using conditional segment initialization for multi-threading programs
authorBrendan Hansen <brendan.f.hansen@gmail.com>
Tue, 19 Oct 2021 13:23:21 +0000 (08:23 -0500)
committerBrendan Hansen <brendan.f.hansen@gmail.com>
Tue, 19 Oct 2021 13:23:21 +0000 (08:23 -0500)
13 files changed:
bin/onyx
bin/onyx-js
bin/onyx-loader.js
bin/onyx-thread.js
core/builtin.onyx
core/runtime/common.onyx
include/astnodes.h
modules/ouit/ouit_build.sh
src/builtins.c
src/onyx.c
src/wasm.c
src/wasm_intrinsics.c
src/wasm_output.c

index 94c9ad5ce00980d0a81d74cf8f7ba45daa22b6d2..a1bcd690e401b97b9e7f5c4e71a832664adc06fa 100755 (executable)
Binary files a/bin/onyx and b/bin/onyx differ
index 8bf494fb20e8982295dc2e32ddba391574f5f5c1..90377bb495a2abfe0298d0f1214a8758b0ce3353 100755 (executable)
@@ -56,14 +56,10 @@ if (isMainThread) {
         wasm_memory = new WebAssembly.Memory({ initial: 1024, maximum: 32768, shared: true });
         ENV.onyx.memory = wasm_memory;
 
-        let data_module_bytes = fs.readFileSync(process.argv[2] + ".data");
-        WebAssembly.instantiate(new Uint8Array(data_module_bytes), ENV)
-            .then(_ => {
-                WebAssembly.instantiate(new Uint8Array(wasm_bytes), ENV)
-                    .then(res => {
-                        wasm_instance = res.instance;
-                        wasm_instance.exports._start();
-                    });
+        WebAssembly.instantiate(new Uint8Array(wasm_bytes), ENV)
+            .then(res => {
+                wasm_instance = res.instance;
+                wasm_instance.exports._start();
             });
 
     } else {
index 017dbbb168a4af49268e78c1e2133cdee44398d0..e60ae573048d720db9f335b89ab4e5f24c7a1088 100644 (file)
@@ -78,9 +78,9 @@ function launch_onyx_program(script_path, call_start) {
     });
 }
 
-function launch_multi_threaded_onyx_program(script_path, data_path, call_start) {
-    Promise.all([fetch(script_path), fetch(data_path)])
-    .then(function(xs) { return Promise.all([xs[0].arrayBuffer(), xs[1].arrayBuffer()]); })
+function launch_multi_threaded_onyx_program(script_path, call_start) {
+    fetch(script_path)
+    .then(function(res) { return res.arrayBuffer(); })
     .then(function(data) {
         var import_object = {};
 
@@ -90,15 +90,12 @@ function launch_multi_threaded_onyx_program(script_path, data_path, call_start)
 
         import_object["onyx"] = { memory: new WebAssembly.Memory({ initial: 1024, maximum: 65536, shared: true }) };
         window.ONYX_MEMORY = import_object["onyx"]["memory"];
-        window.ONYX_BYTES  = data[0];
-
-        WebAssembly.instantiate(data[1], import_object)
-        .then(function (data_module) {
-            WebAssembly.instantiate(data[0], import_object)
-            .then(function (code_module) {
-                window.ONYX_INSTANCE = code_module.instance;
-                code_module.instance.exports._start();
-            });
+        window.ONYX_BYTES  = data;
+
+        WebAssembly.instantiate(data, import_object)
+        .then(function (code_module) {
+            window.ONYX_INSTANCE = code_module.instance;
+            code_module.instance.exports._start();
         });
     });
 }
@@ -109,7 +106,7 @@ window.onload = function() {
     for (var i = 0; i < script_tags.length; i++) {
         if (script_tags[i].getAttribute("type") == "application/onyx") {
             if (script_tags[i].getAttribute("multi-threaded")) {
-                launch_multi_threaded_onyx_program(script_tags[i].getAttribute("src"), script_tags[i].getAttribute("data"), true);
+                launch_multi_threaded_onyx_program(script_tags[i].getAttribute("src"), true);
             } else {
                 launch_onyx_program(script_tags[i].getAttribute("src"), true);
             }
index 881b2ee609630eaa7750073834193cb2cf40f0ba..f58a8fdc525a25e4915b11710be636522d297ef7 100644 (file)
@@ -31,7 +31,7 @@ self.onmessage = function (msg) {
     WebAssembly.instantiate(new Uint8Array(data.wasm_bytes), import_object)
     .then(function(res) {
         self.ONYX_MEMORY = data.memory;
-        
+
         res.instance.exports._thread_start(data.thread_id, data.funcidx, data.dataptr);
         res.instance.exports._thread_exit(data.thread_id);
     });
index 27778d6d68128b105093f3a29ad352ad47f0b424..7ff4865080651ec2fc70f806ce7e2a5ca6899694 100644 (file)
@@ -148,7 +148,7 @@ cfree   :: (ptr: rawptr) do raw_free(context.allocator, ptr);
         // for forever.
         use package core.intrinsics.onyx { __initialize }
         if initialize do __initialize(res);
-        
+
         return res;
     }
 
@@ -206,5 +206,5 @@ Code :: struct {_:i32;}
 // This procedure is a special compiler generated procedure that initializes all the data segments
 // in the program. It should only be called once, by the main thread, at the start of execution. It
 // is undefined behaviour if it is called more than once.
-__initialize_data_segments :: () -> i32 ---
+__initialize_data_segments :: () -> void ---
 
index 6d6f5e4dd1ee9ca8d2d14ff3bcea6faf3a68b863..030d4d9416ba2e5157b889aa119113d3cddda4a3 100644 (file)
@@ -20,6 +20,8 @@ __assert_handler :: (msg: str, site: CallSite) {
 }
 
 __runtime_initialize :: () {
+    __initialize_data_segments();
+
     alloc.init();
     __thread_initialize();
 
index 24c18836ea22eb50ae674d43b30ff26afb39ac05..045c8f4114a87616f8cd59403dc18e0f197d29cb 100644 (file)
@@ -1357,6 +1357,7 @@ extern AstType  *builtin_callsite_type;
 extern AstType  *builtin_any_type;
 extern AstType  *builtin_code_type;
 extern AstTyped *type_table_node;
+extern AstFunction *builtin_initialize_data_segments;
 
 typedef struct BuiltinSymbol {
     char*    package;
index efa25c3bacdfe19f26306b771f13d37d16c529ff..0126c5d944353fd71556294e24adb7a25f276df5 100755 (executable)
@@ -6,5 +6,5 @@
 [ ! -s "./js" ]         && ln -s "$ONYX_FOLDER/bin"     $(pwd)/js
 [ ! -f "./index.html" ] && cp "$ONYX_FOLDER/modules/ouit/index.html" .
 
-onyx -r js -V --use-multi-threading --use-post-mvp-features -o ouit.wasm "$1"
+onyx -r js -V --use-multi-threading -o ouit.wasm "$1"
 
index 27db173f822e417ce33f46b9c64f3bac46065ead..d49c098df04a326422cdab91a4e78ad9d68723a0 100644 (file)
@@ -432,8 +432,8 @@ void initialize_builtins(bh_allocator a) {
         return;
     }
 
-    builtin_initialize_data_segments = (AstType *) symbol_raw_resolve(p->scope, "__initialize_data_segments");
-    if (builtin_code_type == NULL) {
+    builtin_initialize_data_segments = (AstFunction *) symbol_raw_resolve(p->scope, "__initialize_data_segments");
+    if (builtin_initialize_data_segments == NULL || builtin_initialize_data_segments->kind != Ast_Kind_Function) {
         onyx_report_error((OnyxFilePos) { 0 }, "'__initialize_data_segments' procedure not found in builtin package.");
         return;
     }
index 9b3eca832fd22b3a346dd2444c9fc8af1ee7a02b..7c19d37736bfa60ab90cdb6c919cc69663f84b8e 100644 (file)
@@ -65,7 +65,7 @@ static CompileOptions compile_opts_parse(bh_allocator alloc, int argc, char *arg
         .print_function_mappings = 0,
         .no_file_contents        = 0,
 
-        .use_post_mvp_features   = 0,
+        .use_post_mvp_features   = 1,
         .use_multi_threading     = 0,
 
         .runtime = Runtime_Wasi,
@@ -120,6 +120,9 @@ static CompileOptions compile_opts_parse(bh_allocator alloc, int argc, char *arg
             else if (!strcmp(argv[i], "--use-post-mvp-features")) {
                 options.use_post_mvp_features = 1;
             }
+            else if (!strcmp(argv[i], "--mvp-features-only")) {
+                options.use_post_mvp_features = 0;
+            }
             else if (!strcmp(argv[i], "--use-multi-threading")) {
                 options.use_multi_threading = 1;
             }
@@ -511,7 +514,7 @@ static i32 onyx_compile() {
     // to be fine since the browser is really the only place that multi-threading can be used to any
     // degree of competency. But still... This is god awful and I hope that there is some other way to
     // around this down the line.
-    if (context.options->use_multi_threading) {
+    if (context.options->use_multi_threading && !context.options->use_post_mvp_features) {
         bh_file data_file;
         if (bh_file_create(&data_file, bh_aprintf(global_scratch_allocator, "%s.data", context.options->target_file)) != BH_FILE_ERROR_NONE)
             return ONYX_COMPILER_PROGRESS_FAILED_OUTPUT;
index 197eb3dd879e6a69c3e045ebf4eff95846c447b5..1d1a20f1a43863c762b66b1b407fc01573a39866 100644 (file)
@@ -3176,25 +3176,20 @@ static void emit_function(OnyxWasmModule* mod, AstFunction* fd) {
 
     i32 type_idx = generate_type_idx(mod, fd->type);
 
-    WasmFunc wasm_func = {
-        .type_idx = type_idx,
-        .locals = {
-            .param_count = 0,
-
-            .allocated = { 0 },
-            .freed     = { 0 },
-
-            .max_stack = 0,
-            .curr_stack = 0,
-        },
-        .code = NULL,
-    };
+    WasmFunc wasm_func = { 0 };
+    wasm_func.type_idx = type_idx;
 
     bh_arr_new(mod->allocator, wasm_func.code, 4);
 
     i32 func_idx = (i32) bh_imap_get(&mod->index_map, (u64) fd);
 
-    // If there is no body then don't process the code
+    if (fd == builtin_initialize_data_segments) {
+        emit_initialize_data_segments_body(mod, &wasm_func.code);
+        bh_arr_push(wasm_func.code, ((WasmInstruction){ WI_BLOCK_END, 0x00 }));
+        bh_arr_set_at(mod->funcs, func_idx - mod->foreign_function_count, wasm_func);
+        return;
+    }
+
     if (fd->body != NULL) {
         // NOTE: Generate the local map
         u64 localidx = 0;
@@ -3253,13 +3248,11 @@ static void emit_function(OnyxWasmModule* mod, AstFunction* fd) {
 
     WasmFuncType* ft = mod->types[type_idx];
     emit_zero_value(mod, &wasm_func.code, ft->return_type);
-
     bh_arr_push(wasm_func.code, ((WasmInstruction){ WI_BLOCK_END, 0x00 }));
 
-    bh_arr_set_at(mod->funcs, func_idx - mod->foreign_function_count, wasm_func);
-
-    // NOTE: Clear the local map on exit of generating this function
     bh_imap_clear(&mod->local_map);
+
+    bh_arr_set_at(mod->funcs, func_idx - mod->foreign_function_count, wasm_func);
 }
 
 static void emit_foreign_function(OnyxWasmModule* mod, AstFunction* fd) {
index c563d51e360e56ff4c2d7b99dd68e02c8c9d732a..f39ffe21362195692eebd7d7ae5cefdefbb7c65d 100644 (file)
@@ -410,3 +410,21 @@ EMIT_FUNC(intrinsic_atomic_cmpxchg, Type* type, OnyxToken* where) {
 bad_type:
     onyx_report_error(where->pos, "Bad type for atomic cmpxchg, '%s'. Only u8, u16, u32, i32, u64, and i64 are supported.", type_get_name(type));
 }
+
+EMIT_FUNC_NO_ARGS(initialize_data_segments_body) {
+    if (!context.options->use_multi_threading || !context.options->use_post_mvp_features) return;
+
+    bh_arr(WasmInstruction) code = *pcode;
+
+    i32 index = 0;
+    bh_arr_each(WasmDatum, datum, mod->data) {
+        WID(WI_PTR_CONST,   datum->offset);
+        WID(WI_PTR_CONST,   0);
+        WID(WI_I32_CONST,   datum->length);
+        WID(WI_MEMORY_INIT, ((WasmInstructionData) { index, 0 }));
+
+        index += 1;
+    }
+
+    *pcode = code;
+}
\ No newline at end of file
index f6a2ab61d2cb5d325b6c0c452f060d2ec11b396a..305a8b0c50e687a96e20745f2faf3975fafb1919 100644 (file)
@@ -14,6 +14,7 @@
 #define WASM_SECTION_ID_EXPORT 7
 #define WASM_SECTION_ID_START 8
 #define WASM_SECTION_ID_ELEMENT 9
+#define WASM_SECTION_ID_DATACOUNT 12
 #define WASM_SECTION_ID_CODE 10
 #define WASM_SECTION_ID_DATA 11
 
@@ -28,14 +29,14 @@ static i32 output_vector(void** arr, i32 stride, i32 arrlen, vector_func elem, b
     i32 len;
     u8* leb = uint_to_uleb128((u64) arrlen, &len);
     bh_buffer_append(vec_buff, leb, len);
-    
+
     i32 i = 0;
     while (i < arrlen) {
         elem(*arr, vec_buff);
         arr = bh_pointer_add(arr, stride);
         i++;
     }
-    
+
     return vec_buff->length;
 }
 
@@ -54,112 +55,112 @@ static i32 output_limits(i32 min, i32 max, b32 shared, bh_buffer* buff) {
     u8 mem_type = 0x00;
     if (max >= 0) mem_type |= 0x01;
     if (shared)   mem_type |= 0x02;
-    
+
     bh_buffer_write_byte(buff, mem_type);
-    
+
     leb = uint_to_uleb128((u64) min, &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     if (max >= 0) {
         leb = uint_to_uleb128((u64) max, &leb_len);
         bh_buffer_append(buff, leb, leb_len);
     }
-    
+
     return buff->length - prev_len;
 }
 
 static i32 output_functype(WasmFuncType* type, bh_buffer* buff) {
     i32 prev_len = buff->length;
-    
+
     bh_buffer_write_byte(buff, 0x60);
-    
+
     i32 len;
     u8* leb_buff = uint_to_uleb128(type->param_count, &len);
     bh_buffer_append(buff, leb_buff, len);
     bh_buffer_append(buff, type->param_types, type->param_count);
-    
+
     if (type->return_type != WASM_TYPE_VOID) {
         bh_buffer_write_byte(buff, 0x01);
         bh_buffer_write_byte(buff, type->return_type);
     } else {
         bh_buffer_write_byte(buff, 0x00);
     }
-    
+
     return buff->length - prev_len;
 }
 
 static i32 output_typesection(OnyxWasmModule* module, bh_buffer* buff) {
     i32 prev_len = buff->length;
     bh_buffer_write_byte(buff, 0x01);
-    
+
     bh_buffer vec_buff;
     bh_buffer_init(&vec_buff, buff->allocator, 128);
-    
+
     i32 vec_len = output_vector(
                                 (void**) module->types,
                                 sizeof(WasmFuncType*),
                                 bh_arr_length(module->types),
                                 (vector_func *) output_functype,
                                 &vec_buff);
-    
+
     i32 leb_len;
     u8* leb = uint_to_uleb128((u64) vec_len, &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     bh_buffer_concat(buff, vec_buff);
     bh_buffer_free(&vec_buff);
-    
+
     return buff->length - prev_len;
 }
 
 static i32 output_funcsection(OnyxWasmModule* module, bh_buffer* buff) {
     i32 prev_len = buff->length;
     bh_buffer_write_byte(buff, WASM_SECTION_ID_FUNCTION);
-    
+
     bh_buffer vec_buff;
     bh_buffer_init(&vec_buff, buff->allocator, 128);
-    
+
     i32 leb_len;
     u8* leb = uint_to_uleb128((u64) (bh_arr_length(module->funcs)), &leb_len);
     bh_buffer_append(&vec_buff, leb, leb_len);
-    
+
     bh_arr_each(WasmFunc, func, module->funcs) {
         leb = uint_to_uleb128((u64) (func->type_idx), &leb_len);
         bh_buffer_append(&vec_buff, leb, leb_len);
     }
-    
+
     leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     bh_buffer_concat(buff, vec_buff);
     bh_buffer_free(&vec_buff);
-    
+
     return buff->length - prev_len;
 }
 
 static i32 output_tablesection(OnyxWasmModule* module, bh_buffer* buff) {
     if (bh_arr_length(module->elems) == 0) return 0;
-    
+
     i32 prev_len = buff->length;
     bh_buffer_write_byte(buff, WASM_SECTION_ID_TABLE);
-    
+
     bh_buffer vec_buff;
     bh_buffer_init(&vec_buff, buff->allocator, 128);
-    
+
     i32 leb_len;
     u8* leb = uint_to_uleb128((u64) 1, &leb_len);
     bh_buffer_append(&vec_buff, leb, leb_len);
-    
+
     // NOTE: funcrefs are the only valid table element type
     bh_buffer_write_byte(&vec_buff, 0x70);
     output_limits(bh_arr_length(module->elems), -1, 0, &vec_buff);
-    
+
     leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     bh_buffer_concat(buff, vec_buff);
     bh_buffer_free(&vec_buff);
-    
+
     return buff->length - prev_len;
 }
 
@@ -168,70 +169,70 @@ static i32 output_memorysection(OnyxWasmModule* module, bh_buffer* buff) {
 
     i32 prev_len = buff->length;
     bh_buffer_write_byte(buff, WASM_SECTION_ID_MEMORY);
-    
+
     bh_buffer vec_buff;
     bh_buffer_init(&vec_buff, buff->allocator, 128);
-    
+
     i32 leb_len;
     u8* leb = uint_to_uleb128((u64) 1, &leb_len);
     bh_buffer_append(&vec_buff, leb, leb_len);
-    
+
     // FIXME: This needs to be dynamically chosen depending on the size of
     // the data section and stack size pre-requeseted.
     // :WasmMemory
     output_limits(1024, -1, 0, &vec_buff);
-    
+
     leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     bh_buffer_concat(buff, vec_buff);
     bh_buffer_free(&vec_buff);
-    
+
     return buff->length - prev_len;
 }
 
 static i32 output_globalsection(OnyxWasmModule* module, bh_buffer* buff) {
     i32 prev_len = buff->length;
     bh_buffer_write_byte(buff, WASM_SECTION_ID_GLOBAL);
-    
+
     bh_buffer vec_buff;
     bh_buffer_init(&vec_buff, buff->allocator, 128);
-    
+
     i32 leb_len;
     u8* leb = uint_to_uleb128((u64) (bh_arr_length(module->globals)), &leb_len);
     bh_buffer_append(&vec_buff, leb, leb_len);
-    
+
     bh_arr_each(WasmGlobal, global, module->globals) {
         bh_buffer_write_byte(&vec_buff, global->type);
         bh_buffer_write_byte(&vec_buff, 0x01);
-        
+
         bh_arr_each(WasmInstruction, instr, global->initial_value)
             output_instruction(NULL, instr, &vec_buff);
-        
+
         // NOTE: Initial value expression terminator
         bh_buffer_write_byte(&vec_buff, (u8) WI_BLOCK_END);
     }
-    
+
     leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     bh_buffer_concat(buff, vec_buff);
     bh_buffer_free(&vec_buff);
-    
+
     return buff->length - prev_len;
 }
 
 static i32 output_importsection(OnyxWasmModule* module, bh_buffer* buff) {
     i32 prev_len = buff->length;
     bh_buffer_write_byte(buff, WASM_SECTION_ID_IMPORT);
-    
+
     bh_buffer vec_buff;
     bh_buffer_init(&vec_buff, buff->allocator, 128);
-    
+
     i32 leb_len;
     u8* leb = uint_to_uleb128((u64) (bh_arr_length(module->imports)), &leb_len);
     bh_buffer_append(&vec_buff, leb, leb_len);
-    
+
     bh_arr_each(WasmImport, import, module->imports) {
         output_name(import->mod, strlen(import->mod), &vec_buff);
         output_name(import->name, strlen(import->name), &vec_buff);
@@ -258,49 +259,49 @@ static i32 output_importsection(OnyxWasmModule* module, bh_buffer* buff) {
             case WASM_FOREIGN_TABLE: assert(0);
         }
     }
-    
+
     leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     bh_buffer_concat(buff, vec_buff);
     bh_buffer_free(&vec_buff);
-    
+
     return buff->length - prev_len;
 }
 
 static i32 output_exportsection(OnyxWasmModule* module, bh_buffer* buff) {
     i32 prev_len = buff->length;
     bh_buffer_write_byte(buff, WASM_SECTION_ID_EXPORT);
-    
+
     bh_buffer vec_buff;
     bh_buffer_init(&vec_buff, buff->allocator, 128);
-    
+
     i32 leb_len;
     u8* leb = uint_to_uleb128((u64) (module->export_count), &leb_len);
     bh_buffer_append(&vec_buff, leb, leb_len);
-    
+
     i32 key_len = 0;
     bh_table_each_start(WasmExport, module->exports);
     key_len = strlen(key);
     output_name(key, key_len, &vec_buff);
-    
+
     bh_buffer_write_byte(&vec_buff, (u8) (value.kind));
     leb = uint_to_uleb128((u64) value.idx, &leb_len);
     bh_buffer_append(&vec_buff, leb, leb_len);
     bh_table_each_end;
-    
+
     leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     bh_buffer_concat(buff, vec_buff);
     bh_buffer_free(&vec_buff);
-    
+
     return buff->length - prev_len;
 }
 
 static i32 output_startsection(OnyxWasmModule* module, bh_buffer* buff) {
     i32 prev_len = buff->length;
-    
+
     i32 start_idx = -1;
     bh_table_each_start(WasmExport, module->exports) {
         if (value.kind == WASM_FOREIGN_FUNCTION) {
@@ -310,65 +311,65 @@ static i32 output_startsection(OnyxWasmModule* module, bh_buffer* buff) {
             }
         }
     } bh_table_each_end;
-    
+
     if (start_idx != -1) {
         bh_buffer_write_byte(buff, WASM_SECTION_ID_START);
-        
+
         i32 start_leb_len, section_leb_len;
         uint_to_uleb128((u64) start_idx, &start_leb_len);
         u8* section_leb = uint_to_uleb128((u64) start_leb_len, &section_leb_len);
         bh_buffer_append(buff, section_leb, section_leb_len);
-        
+
         u8* start_leb = uint_to_uleb128((u64) start_idx, &start_leb_len);
         bh_buffer_append(buff, start_leb, start_leb_len);
     }
-    
+
     return buff->length - prev_len;
 }
 
 static i32 output_elemsection(OnyxWasmModule* module, bh_buffer* buff) {
     if (bh_arr_length(module->elems) == 0) return 0;
-    
+
     i32 prev_len = buff->length;
-    
+
     bh_buffer_write_byte(buff, WASM_SECTION_ID_ELEMENT);
-    
+
     bh_buffer vec_buff;
     bh_buffer_init(&vec_buff, buff->allocator, 128);
-    
+
     i32 leb_len;
     u8* leb;
-    
+
     // NOTE: 0x01 count of elems
     bh_buffer_write_byte(&vec_buff, 0x01);
-    
+
     // NOTE: 0x00 table index
     bh_buffer_write_byte(&vec_buff, 0x00);
-    
+
     bh_buffer_write_byte(&vec_buff, WI_I32_CONST);
     bh_buffer_write_byte(&vec_buff, 0x00);
     bh_buffer_write_byte(&vec_buff, WI_BLOCK_END);
-    
+
     leb = uint_to_uleb128((u64) bh_arr_length(module->elems), &leb_len);
     bh_buffer_append(&vec_buff, leb, leb_len);
-    
+
     bh_arr_each(i32, elem, module->elems) {
         leb = uint_to_uleb128((u64) *elem, &leb_len);
         bh_buffer_append(&vec_buff, leb, leb_len);
     }
-    
+
     leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     bh_buffer_concat(buff, vec_buff);
     bh_buffer_free(&vec_buff);
-    
+
     return buff->length - prev_len;
 }
 
 static i32 output_locals(WasmFunc* func, bh_buffer* buff) {
     i32 prev_len = buff->length;
-    
+
     // NOTE: Output vector length
     i32 total_locals =
         (i32) (func->locals.allocated[0] != 0) +
@@ -376,11 +377,11 @@ static i32 output_locals(WasmFunc* func, bh_buffer* buff) {
         (i32) (func->locals.allocated[2] != 0) +
         (i32) (func->locals.allocated[3] != 0) +
         (i32) (func->locals.allocated[4] != 0);
-    
+
     i32 leb_len;
     u8* leb = uint_to_uleb128((u64) total_locals, &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     if (func->locals.allocated[0] != 0) {
         leb = uint_to_uleb128((u64) func->locals.allocated[0], &leb_len);
         bh_buffer_append(buff, leb, leb_len);
@@ -406,21 +407,21 @@ static i32 output_locals(WasmFunc* func, bh_buffer* buff) {
         bh_buffer_append(buff, leb, leb_len);
         bh_buffer_write_byte(buff, WASM_TYPE_VAR128);
     }
-    
+
     return buff->length - prev_len;
 }
 
 static void output_instruction(WasmFunc* func, WasmInstruction* instr, bh_buffer* buff) {
     i32 leb_len;
     u8* leb;
-    
+
     if (instr->type == WI_NOP) return;
-    
+
     if (instr->type & SIMD_INSTR_MASK) {
         bh_buffer_write_byte(buff, 0xFD);
         leb = uint_to_uleb128((u64) (instr->type &~ SIMD_INSTR_MASK), &leb_len);
         bh_buffer_append(buff, leb, leb_len);
-        
+
     } else if (instr->type & EXT_INSTR_MASK) {
         bh_buffer_write_byte(buff, 0xFC);
         leb = uint_to_uleb128((u64) (instr->type &~ EXT_INSTR_MASK), &leb_len);
@@ -441,11 +442,11 @@ static void output_instruction(WasmFunc* func, WasmInstruction* instr, bh_buffer
             leb = uint_to_uleb128((u64) instr->data.i2, &leb_len);
             bh_buffer_append(buff, leb, leb_len);
         }
-        
+
     } else {
         bh_buffer_write_byte(buff, (u8) instr->type);
     }
-    
+
     switch (instr->type) {
         case WI_LOCAL_GET:
         case WI_LOCAL_SET:
@@ -455,7 +456,7 @@ static void output_instruction(WasmFunc* func, WasmInstruction* instr, bh_buffer
             bh_buffer_append(buff, leb, leb_len);
             break;
         }
-        
+
         case WI_GLOBAL_GET:
         case WI_GLOBAL_SET:
         case WI_CALL:
@@ -470,33 +471,33 @@ static void output_instruction(WasmFunc* func, WasmInstruction* instr, bh_buffer
             leb = uint_to_uleb128((u64) instr->data.i1, &leb_len);
             bh_buffer_append(buff, leb, leb_len);
             break;
-        
+
         case WI_MEMORY_INIT:
         case WI_MEMORY_COPY:
             leb = uint_to_uleb128((u64) instr->data.i1, &leb_len);
             bh_buffer_append(buff, leb, leb_len);
-        
+
             leb = uint_to_uleb128((u64) instr->data.i2, &leb_len);
             bh_buffer_append(buff, leb, leb_len);
             break;
-        
+
         case WI_JUMP_TABLE: {
             BranchTable* bt = (BranchTable *) instr->data.p;
-            
+
             leb = uint_to_uleb128((u64) bt->count, &leb_len);
             bh_buffer_append(buff, leb, leb_len);
-            
+
             fori (i, 0, bt->count) {
                 leb = uint_to_uleb128((u64) bt->cases[i], &leb_len);
                 bh_buffer_append(buff, leb, leb_len);
             }
-            
+
             leb = uint_to_uleb128((u64) bt->default_case, &leb_len);
             bh_buffer_append(buff, leb, leb_len);
             break;
         }
-        
-        
+
+
         case WI_CALL_INDIRECT:
         case WI_I32_STORE: case WI_I32_STORE_8: case WI_I32_STORE_16:
         case WI_I64_STORE: case WI_I64_STORE_8: case WI_I64_STORE_16: case WI_I64_STORE_32:
@@ -516,7 +517,7 @@ static void output_instruction(WasmFunc* func, WasmInstruction* instr, bh_buffer
             leb = uint_to_uleb128((u64) instr->data.i2, &leb_len);
             bh_buffer_append(buff, leb, leb_len);
             break;
-        
+
         case WI_I32_CONST:
             leb = int_to_leb128((i64) instr->data.i1, &leb_len);
             bh_buffer_append(buff, leb, leb_len);
@@ -533,12 +534,12 @@ static void output_instruction(WasmFunc* func, WasmInstruction* instr, bh_buffer
             leb = double_to_ieee754(instr->data.d, 0);
             bh_buffer_append(buff, leb, 8);
             break;
-        
+
         case WI_V128_CONST:
         case WI_I8X16_SHUFFLE:
             fori (i, 0, 16) bh_buffer_write_byte(buff, ((u8*) instr->data.p)[i]);
             break;
-        
+
         case WI_I8X16_EXTRACT_LANE_S: case WI_I8X16_EXTRACT_LANE_U: case WI_I8X16_REPLACE_LANE:
         case WI_I16X8_EXTRACT_LANE_S: case WI_I16X8_EXTRACT_LANE_U: case WI_I16X8_REPLACE_LANE:
         case WI_I32X4_EXTRACT_LANE: case WI_I32X4_REPLACE_LANE:
@@ -553,85 +554,110 @@ static void output_instruction(WasmFunc* func, WasmInstruction* instr, bh_buffer
 }
 
 static i32 output_code(WasmFunc* func, bh_buffer* buff) {
-    
+
     bh_buffer code_buff;
     bh_buffer_init(&code_buff, buff->allocator, 128);
-    
+
     // Output locals
     output_locals(func, &code_buff);
-    
+
     // Output code
     bh_arr_each(WasmInstruction, instr, func->code) output_instruction(func, instr, &code_buff);
-    
+
     i32 leb_len;
     u8* leb = uint_to_uleb128((u64) code_buff.length, &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     bh_buffer_concat(buff, code_buff);
     bh_buffer_free(&code_buff);
-    
+
     return 0;
 }
 
 static i32 output_codesection(OnyxWasmModule* module, bh_buffer* buff) {
     i32 prev_len = buff->length;
-    
+
     bh_buffer_write_byte(buff, WASM_SECTION_ID_CODE);
-    
+
     bh_buffer vec_buff;
     bh_buffer_init(&vec_buff, buff->allocator, 128);
-    
+
     i32 leb_len;
     u8* leb = uint_to_uleb128((u64) bh_arr_length(module->funcs), &leb_len);
     bh_buffer_append(&vec_buff, leb, leb_len);
-    
+
     // DEBUG_HERE;
-    
+
     bh_arr_each(WasmFunc, func, module->funcs) output_code(func, &vec_buff);
-    
+
+    leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len);
+    bh_buffer_append(buff, leb, leb_len);
+
+    bh_buffer_concat(buff, vec_buff);
+    bh_buffer_free(&vec_buff);
+
+    return buff->length - prev_len;
+}
+
+static i32 output_datacountsection(OnyxWasmModule* module, bh_buffer* buff) {
+    i32 prev_len = buff->length;
+
+    bh_buffer_write_byte(buff, WASM_SECTION_ID_DATACOUNT);
+
+    bh_buffer vec_buff;
+    bh_buffer_init(&vec_buff, buff->allocator, 128);
+
+    i32 leb_len;
+    u8* leb = uint_to_uleb128((u64) bh_arr_length(module->data), &leb_len);
+    bh_buffer_append(&vec_buff, leb, leb_len);
+
     leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     bh_buffer_concat(buff, vec_buff);
     bh_buffer_free(&vec_buff);
-    
+
     return buff->length - prev_len;
 }
 
 static i32 output_datasection(OnyxWasmModule* module, bh_buffer* buff) {
     i32 prev_len = buff->length;
-    
+
     bh_buffer_write_byte(buff, WASM_SECTION_ID_DATA);
-    
+
     bh_buffer vec_buff;
     bh_buffer_init(&vec_buff, buff->allocator, 128);
-    
+
     i32 leb_len;
     u8* leb = uint_to_uleb128((u64) bh_arr_length(module->data), &leb_len);
     bh_buffer_append(&vec_buff, leb, leb_len);
-    
+
     bh_arr_each(WasmDatum, datum, module->data) {
         if (datum->data == NULL) continue;
-        
-        // NOTE: 0x00 memory index
-        bh_buffer_write_byte(&vec_buff, 0x00);
-        
-        bh_buffer_write_byte(&vec_buff, WI_I32_CONST);
-        leb = int_to_leb128((i64) datum->offset, &leb_len);
-        bh_buffer_append(&vec_buff, leb, leb_len);
-        bh_buffer_write_byte(&vec_buff, WI_BLOCK_END);
-        
+
+        i32 memory_flags = 0x00;
+        if (context.options->use_multi_threading) memory_flags |= 0x01;
+
+        bh_buffer_write_byte(&vec_buff, memory_flags);
+
+        if (!context.options->use_multi_threading) {
+            bh_buffer_write_byte(&vec_buff, WI_I32_CONST);
+            leb = int_to_leb128((i64) datum->offset, &leb_len);
+            bh_buffer_append(&vec_buff, leb, leb_len);
+            bh_buffer_write_byte(&vec_buff, WI_BLOCK_END);
+        }
+
         leb = uint_to_uleb128((u64) datum->length, &leb_len);
         bh_buffer_append(&vec_buff, leb, leb_len);
         fori (i, 0, datum->length) bh_buffer_write_byte(&vec_buff, ((u8 *) datum->data)[i]);
     }
-    
+
     leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len);
     bh_buffer_append(buff, leb, leb_len);
-    
+
     bh_buffer_concat(buff, vec_buff);
     bh_buffer_free(&vec_buff);
-    
+
     return buff->length - prev_len;
 }
 
@@ -640,7 +666,7 @@ void onyx_wasm_module_write_to_file(OnyxWasmModule* module, bh_file file) {
     bh_buffer_init(&master_buffer, global_heap_allocator, 128);
     bh_buffer_append(&master_buffer, WASM_MAGIC_STRING, 4);
     bh_buffer_append(&master_buffer, WASM_VERSION, 4);
-    
+
     output_typesection(module, &master_buffer);
     output_importsection(module, &master_buffer);
     output_funcsection(module, &master_buffer);
@@ -650,8 +676,9 @@ void onyx_wasm_module_write_to_file(OnyxWasmModule* module, bh_file file) {
     output_exportsection(module, &master_buffer);
     output_startsection(module, &master_buffer);
     output_elemsection(module, &master_buffer);
+    output_datacountsection(module, &master_buffer);
     output_codesection(module, &master_buffer);
     output_datasection(module, &master_buffer);
-    
+
     bh_file_write(&file, master_buffer.data, master_buffer.length);
 }