added: tree shaking for globals and strings
authorBrendan Hansen <brendan.f.hansen@gmail.com>
Thu, 11 Jan 2024 03:07:47 +0000 (21:07 -0600)
committerBrendan Hansen <brendan.f.hansen@gmail.com>
Thu, 11 Jan 2024 03:07:47 +0000 (21:07 -0600)
compiler/include/astnodes.h
compiler/include/wasm_emit.h
compiler/src/checker.c
compiler/src/onyx.c
compiler/src/wasm_emit.c
compiler/src/wasm_type_table.h

index 08290445ce395dd7dd81db9e922026222fea2885..3f9286e55d26e6b8b0baa96728bcfac157034d54 100644 (file)
@@ -1834,6 +1834,7 @@ struct CompileOptions {
     b32 use_multi_threading   : 1;
     b32 generate_foreign_info : 1;
     b32 generate_type_info    : 1;
+    b32 generate_method_info  : 1;
     b32 no_core               : 1;
     b32 no_stale_code         : 1;
     b32 show_all_errors       : 1;
index b16f7bee6ad0a5f51719f2bcba76a8f06bc005d1..cb671b0e991ff8df87f7fdefab4fa41a9cd337ca 100644 (file)
@@ -593,6 +593,9 @@ typedef enum CodePatchInfoKind {
     Code_Patch_Callee,
     Code_Patch_Element,
     Code_Patch_Export,
+    Code_Patch_Tls_Offset,
+    Code_Patch_String_Length,
+    Code_Patch_String_Length_In_Data,
 } CodePatchInfoKind;
 
 //
index d16ad3731041820e26040903062d7258324b8938..e1ba37ce1dc1b0bd71fa92da95f790fc3516c873 100644 (file)
@@ -3348,7 +3348,13 @@ CheckStatus check_memres_type(AstMemRes* memres) {
     CHECK(type, &memres->type_node);
     fill_in_type((AstTyped *) memres);
     if (memres->type_node && !memres->type) YIELD(memres->token->pos, "Waiting for global type to be constructed.");
-    return Check_Success;
+
+    if (bh_arr_length(memres->tags) > 0) {
+        memres->flags |= Ast_Flag_Has_Been_Scheduled_For_Emit;
+        return Check_Success;
+    }
+
+    return Check_Complete;
 }
 
 CheckStatus check_memres(AstMemRes* memres) {
@@ -3393,7 +3399,12 @@ CheckStatus check_memres(AstMemRes* memres) {
         CHECK(expression, ptag);
     }
 
-    return Check_Success;
+    if (bh_arr_length(memres->tags) > 0) {
+        memres->flags |= Ast_Flag_Has_Been_Scheduled_For_Emit;
+        return Check_Success;
+    }
+
+    return Check_Complete;
 }
 
 CheckStatus check_type(AstType** ptype) {
@@ -4040,6 +4051,7 @@ void check_entity(Entity* ent) {
         case Entity_Type_Expression:
             cs = check_expression(&ent->expr);
             resolve_expression_type(ent->expr);
+            if (cs == Check_Success) cs = Check_Complete;
             break;
 
         case Entity_Type_Type_Alias:
@@ -4055,6 +4067,7 @@ void check_entity(Entity* ent) {
             if (context.options->no_file_contents) {
                 onyx_report_error(ent->expr->token->pos, Error_Critical, "#file_contents is disabled for this compilation.");
             }
+            cs = Check_Complete;
             break;
 
         case Entity_Type_Job: cs = check_arbitrary_job(ent->job_data); break;
index c87f987437a60b06eb6973c798e58cd37040587a..1808f859a1be749791b9656462de3308cb88cd93 100644 (file)
@@ -90,6 +90,8 @@ static const char *build_docstring = DOCSTRING_HEADER
     "\t--no-core               Disable automatically including \"core/module\".\n"
     "\t--no-stale-code         Disables use of `#allow_stale_code` directive\n"
     "\t--no-type-info          Disables generating type information\n"
+    "\t--generate-method-info  Populate method information in type information structures.\n"
+    "\t                        Can drastically increase binary size.\n"
     "\t--generate-foreign-info Generate information for foreign blocks. Rarely needed, so disabled by default.\n"
     "\t--wasm-mvp              Use only WebAssembly MVP features.\n"
     "\n"
@@ -116,6 +118,7 @@ static CompileOptions compile_opts_parse(bh_allocator alloc, int argc, char *arg
         .use_multi_threading     = 0,
         .generate_foreign_info   = 0,
         .generate_type_info      = 1,
+        .generate_method_info    = 0,
         .no_core                 = 0,
         .no_stale_code           = 0,
         .show_all_errors         = 0,
@@ -262,6 +265,9 @@ static CompileOptions compile_opts_parse(bh_allocator alloc, int argc, char *arg
             else if (!strcmp(argv[i], "--generate-foreign-info")) {
                 options.generate_foreign_info = 1;
             }
+            else if (!strcmp(argv[i], "--generate-method-info")) {
+                options.generate_method_info = 1;
+            }
             else if (!strcmp(argv[i], "--no-type-info")) {
                 options.generate_type_info = 0;
             }
index 9624bfd180e781e135806b57415150cb75cf53f5..8b913745e414a513c30b0c7c72cd6146c3d67a62 100644 (file)
@@ -484,6 +484,7 @@ EMIT_FUNC(statement,                       AstNode* stmt);
 EMIT_FUNC_RETURNING(u64, local_allocation, AstTyped* stmt);
 EMIT_FUNC_NO_ARGS(free_local_allocations);
 EMIT_FUNC(data_relocation,                 u32 data_id);
+EMIT_FUNC(data_relocation_for_node,        AstNode *node);
 EMIT_FUNC(assignment,                      AstBinaryOp* assign);
 EMIT_FUNC(assignment_of_array,             AstTyped* left, AstTyped* right);
 EMIT_FUNC(compound_assignment,             AstBinaryOp* assign);
@@ -825,6 +826,25 @@ EMIT_FUNC(data_relocation, u32 data_id) {
     *pcode = code;
 }
 
+EMIT_FUNC(data_relocation_for_node, AstNode *node) {
+    bh_arr(WasmInstruction) code = *pcode;
+
+    u32 instr_idx = bh_arr_length(code);
+    WID(NULL, WI_PTR_CONST, 0);
+    assert(mod->current_func_idx >= 0);
+
+    DatumPatchInfo patch;
+    patch.kind = Datum_Patch_Instruction;
+    patch.index = mod->current_func_idx;
+    patch.location = instr_idx;
+    patch.data_id = 0;
+    patch.offset = 0;
+    patch.node_to_use_if_data_id_is_null = node;
+    bh_arr_push(mod->data_patches, patch);
+
+    *pcode = code;
+}
+
 EMIT_FUNC(stack_address, u32 offset, OnyxToken *token) {
     bh_arr(WasmInstruction) code = *pcode;
 
@@ -2938,22 +2958,25 @@ EMIT_FUNC(field_access_location, AstFieldAccess* field, u64* offset_return) {
 EMIT_FUNC(memory_reservation_location, AstMemRes* memres) {
     bh_arr(WasmInstruction) code = *pcode;
 
+    ensure_node_has_been_submitted_for_emission((AstNode *) memres);
+
     if (memres->threadlocal) {
         u64 tls_base_idx = bh_imap_get(&mod->index_map, (u64) &builtin_tls_base);
 
-        if (memres->tls_offset > 0) {
-            WID(NULL, WI_PTR_CONST, memres->tls_offset);
-            WIL(NULL, WI_GLOBAL_GET, tls_base_idx);
-            WI(NULL, WI_PTR_ADD);
+        CodePatchInfo code_patch;
+        code_patch.kind = Code_Patch_Tls_Offset;
+        code_patch.func_idx = mod->current_func_idx;
+        code_patch.instr = bh_arr_length(code);
+        code_patch.node_related_to_patch = (AstNode *) memres;
+        bh_arr_push(mod->code_patches, code_patch);
 
-        } else {
-            WIL(NULL, WI_GLOBAL_GET, tls_base_idx);
-        }
+        WID(NULL, WI_PTR_CONST, 0);
+        WIL(NULL, WI_GLOBAL_GET, tls_base_idx);
+        WI(NULL, WI_PTR_ADD);
 
     } else {
         // :ProperLinking
-        assert(memres->data_id != 0);
-        emit_data_relocation(mod, &code, memres->data_id);
+        emit_data_relocation_for_node(mod, &code, (AstNode *) memres);
     }
 
     *pcode = code;
@@ -3569,11 +3592,19 @@ EMIT_FUNC(expression, AstTyped* expr) {
         case Ast_Kind_StrLit: {
             // :ProperLinking
             AstStrLit *strlit = (AstStrLit *) expr;
-            assert(strlit->data_id > 0);
-            emit_data_relocation(mod, &code, strlit->data_id);
-
-            if (strlit->is_cstr == 0)
-                WID(NULL, WI_I32_CONST, strlit->length);
+            ensure_node_has_been_submitted_for_emission((AstNode *) strlit);
+            emit_data_relocation_for_node(mod, &code, (AstNode *) strlit);
+
+            if (strlit->is_cstr == 0) {
+                CodePatchInfo code_patch;
+                code_patch.kind = Code_Patch_String_Length;
+                code_patch.func_idx = mod->current_func_idx;
+                code_patch.instr = bh_arr_length(code);
+                code_patch.node_related_to_patch = (AstNode *) strlit;
+                bh_arr_push(mod->code_patches, code_patch);
+
+                WID(NULL, WI_I32_CONST, 0);
+            }
             break;
         }
 
@@ -3854,12 +3885,18 @@ EMIT_FUNC(expression, AstTyped* expr) {
         case Ast_Kind_File_Contents: {
             AstFileContents* fc = (AstFileContents *) expr;
 
-            assert(fc->data_id > 0);
-            assert(fc->size > 0);
-
             // :ProperLinking
-            emit_data_relocation(mod, &code, fc->data_id);
-            WID(NULL, WI_I32_CONST, fc->size);
+            ensure_node_has_been_submitted_for_emission((AstNode *) fc);
+            emit_data_relocation_for_node(mod, &code, (AstNode *) fc);
+
+            CodePatchInfo code_patch;
+            code_patch.kind = Code_Patch_String_Length;
+            code_patch.func_idx = mod->current_func_idx;
+            code_patch.instr = bh_arr_length(code);
+            code_patch.node_related_to_patch = (AstNode *) fc;
+            bh_arr_push(mod->code_patches, code_patch);
+
+            WID(NULL, WI_I32_CONST, 0);
             break;
         }
 
@@ -4738,7 +4775,7 @@ static void emit_global(OnyxWasmModule* module, AstGlobal* global) {
         module->heap_start_ptr = &module->globals[global_idx].initial_value[0].data.i1;
 
     if (global == &builtin_tls_size)
-        module->globals[global_idx].initial_value[0].data.i1 =  module->next_tls_offset;
+        module->tls_size_ptr = &module->globals[global_idx].initial_value[0].data.i1;
 }
 
 static void emit_raw_string(OnyxWasmModule* mod, char *data, i32 len, u64 *out_data_id, u64 *out_len) {
@@ -4856,26 +4893,24 @@ static b32 emit_constexpr_(ConstExprContext *ctx, AstTyped *node, u32 offset) {
     case Ast_Kind_StrLit: {
         AstStrLit* sl = (AstStrLit *) node;
 
-        // NOTE: This assumes the data_id and the length fields have been filled out
-        // by emit_string_literal.
-        if (POINTER_SIZE == 4) {
-            CE(u32, 0) = 0;
-            CE(u32, 4) = sl->length;
-        } else {
-            CE(u64, 0) = 0;
-            CE(u64, 8) = sl->length;
-        }
-
-        assert(sl->data_id > 0);
+        ensure_node_has_been_submitted_for_emission((AstNode *) sl);
 
         DatumPatchInfo patch;
         patch.kind = Datum_Patch_Data;
         patch.index = ctx->data_id;
         patch.location = offset;
-        patch.data_id = sl->data_id;
+        patch.data_id = 0;
         patch.offset = 0;
+        patch.node_to_use_if_data_id_is_null = (AstNode *) sl;
         bh_arr_push(ctx->module->data_patches, patch);
 
+        CodePatchInfo code_patch;
+        code_patch.kind = Code_Patch_String_Length_In_Data;
+        code_patch.func_idx = ctx->data_id; // Repurposing func_idx for this.
+        code_patch.instr    = offset + POINTER_SIZE; // Repurposing instr for offset into section
+        code_patch.node_related_to_patch = (AstNode *) sl;
+        bh_arr_push(ctx->module->code_patches, code_patch);
+
         break;
     }
 
@@ -5364,6 +5399,41 @@ void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options)
                 module->exports[export_idx].value.idx = (i32) func_idx;
                 break;
             }
+
+            case Code_Patch_Tls_Offset: {
+                AstMemRes *memres = (AstMemRes *) patch->node_related_to_patch;
+                assert(memres->kind == Ast_Kind_Memres);
+                assert(memres->threadlocal);
+
+                module->funcs[patch->func_idx].code[patch->instr].data.l = memres->tls_offset;
+                break;
+            }
+
+            case Code_Patch_String_Length: {
+                i32 length;
+                switch (patch->node_related_to_patch->kind) {
+                    case Ast_Kind_StrLit:        length = ((AstStrLit *) patch->node_related_to_patch)->length; break;
+                    case Ast_Kind_File_Contents: length = ((AstFileContents *) patch->node_related_to_patch)->size; break;
+                    default: assert("Unexpected node kind in Code_Patch_String_Length." && 0);
+                }
+
+                module->funcs[patch->func_idx].code[patch->instr].data.l = length;
+                break;
+            }
+
+            case Code_Patch_String_Length_In_Data: {
+                i32 length;
+                switch (patch->node_related_to_patch->kind) {
+                    case Ast_Kind_StrLit:        length = ((AstStrLit *) patch->node_related_to_patch)->length; break;
+                    case Ast_Kind_File_Contents: length = ((AstFileContents *) patch->node_related_to_patch)->size; break;
+                    default: assert("Unexpected node kind in Code_Patch_String_Length_In_Data." && 0);
+                }
+
+                WasmDatum *datum = &module->data[patch->func_idx - 1];
+                assert(datum->id == patch->func_idx);
+                *(i32 *) bh_pointer_add(datum->data, patch->instr) = (i32) length;
+                break;
+            }
         }
     }
 
@@ -5423,15 +5493,22 @@ void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options)
     // we can emit the __initialize_data_segments function.
     emit_function(module, builtin_initialize_data_segments);
 
+#ifdef ENABLE_DEBUG_INFO
+    if (module->debug_context) {
+        bh_arr_each(DebugFuncContext, func, module->debug_context->funcs) {
+            func->func_index += module->next_foreign_func_idx;
+        }
+    }
+#endif
+
     bh_arr_each(DatumPatchInfo, patch, module->data_patches) {
         if (patch->data_id == 0) {
-            if (patch->node_to_use_if_data_id_is_null
-                && patch->node_to_use_if_data_id_is_null->kind == Ast_Kind_Memres) {
-
-                patch->data_id = ((AstMemRes *) patch->node_to_use_if_data_id_is_null)->data_id;
-
-            } else {
-                assert("Unexpected empty data_id in linking!" && 0);
+            assert(patch->node_to_use_if_data_id_is_null || ("Unexpected empty data_id in linking!" && 0));
+            switch (patch->node_to_use_if_data_id_is_null->kind) {
+                case Ast_Kind_Memres:        patch->data_id = ((AstMemRes *) patch->node_to_use_if_data_id_is_null)->data_id; break;
+                case Ast_Kind_StrLit:        patch->data_id = ((AstStrLit *) patch->node_to_use_if_data_id_is_null)->data_id; break;
+                case Ast_Kind_File_Contents: patch->data_id = ((AstFileContents *) patch->node_to_use_if_data_id_is_null)->data_id; break;
+                default: assert("Unexpected node kind in linking phase." && 0);
             }
         }
 
@@ -5478,6 +5555,11 @@ void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options)
 
     *module->heap_start_ptr = *module->stack_top_ptr + options->stack_size;
     bh_align(*module->heap_start_ptr, 16);
+
+    if (module->tls_size_ptr) {
+        *module->tls_size_ptr = module->next_tls_offset;
+        bh_align(*module->tls_size_ptr, 16);
+    }
 }
 
 void onyx_wasm_module_free(OnyxWasmModule* module) {
index e63de9642075a06aa3f95d7a300a5c63371edff9..f72e1b58f18b754074a9bedb2038e48e1ca5bb3e 100644 (file)
@@ -385,7 +385,12 @@ static u64 build_type_table(OnyxWasmModule* module) {
 
                 // Struct methods
                 bh_arr(StructMethodData) method_data=NULL;
+
                 AstType *ast_type = type->ast_type;
+                if (!context.options->generate_method_info) {
+                    goto no_methods;
+                }
+
                 if (ast_type && ast_type->kind == Ast_Kind_Struct_Type) {
                     AstStructType *struct_type  = (AstStructType *) ast_type;
                     Scope*         struct_scope = struct_type->scope;
@@ -644,7 +649,12 @@ static u64 build_type_table(OnyxWasmModule* module) {
 
                 // Union methods
                 bh_arr(StructMethodData) method_data=NULL;
+
                 AstType *ast_type = type->ast_type;
+                if (!context.options->generate_method_info) {
+                    goto no_union_methods;
+                }
+
                 if (ast_type && ast_type->kind == Ast_Kind_Union_Type) {
                     AstUnionType *union_type  = (AstUnionType *) ast_type;
                     Scope*        union_scope = union_type->scope;
@@ -666,7 +676,7 @@ static u64 build_type_table(OnyxWasmModule* module) {
                         // any data member
                         bh_buffer_align(&table_buffer, 4);
                         u32 data_loc = table_buffer.length;
-                        u32 func_idx = get_element_idx(module, node);
+                        u32 func_idx = 0; // get_element_idx(module, node);
                         bh_buffer_write_u32(&table_buffer, func_idx);
                         bh_buffer_write_u32(&table_buffer, 0);
                         bh_buffer_write_u32(&table_buffer, 0);