From 6dedd788443eecaa1eb541064717d137786b9257 Mon Sep 17 00:00:00 2001 From: Brendan Hansen Date: Wed, 10 Jan 2024 21:07:47 -0600 Subject: [PATCH] added: tree shaking for globals and strings --- compiler/include/astnodes.h | 1 + compiler/include/wasm_emit.h | 3 + compiler/src/checker.c | 17 +++- compiler/src/onyx.c | 6 ++ compiler/src/wasm_emit.c | 160 +++++++++++++++++++++++++-------- compiler/src/wasm_type_table.h | 12 ++- 6 files changed, 157 insertions(+), 42 deletions(-) diff --git a/compiler/include/astnodes.h b/compiler/include/astnodes.h index 08290445..3f9286e5 100644 --- a/compiler/include/astnodes.h +++ b/compiler/include/astnodes.h @@ -1834,6 +1834,7 @@ struct CompileOptions { b32 use_multi_threading : 1; b32 generate_foreign_info : 1; b32 generate_type_info : 1; + b32 generate_method_info : 1; b32 no_core : 1; b32 no_stale_code : 1; b32 show_all_errors : 1; diff --git a/compiler/include/wasm_emit.h b/compiler/include/wasm_emit.h index b16f7bee..cb671b0e 100644 --- a/compiler/include/wasm_emit.h +++ b/compiler/include/wasm_emit.h @@ -593,6 +593,9 @@ typedef enum CodePatchInfoKind { Code_Patch_Callee, Code_Patch_Element, Code_Patch_Export, + Code_Patch_Tls_Offset, + Code_Patch_String_Length, + Code_Patch_String_Length_In_Data, } CodePatchInfoKind; // diff --git a/compiler/src/checker.c b/compiler/src/checker.c index d16ad373..e1ba37ce 100644 --- a/compiler/src/checker.c +++ b/compiler/src/checker.c @@ -3348,7 +3348,13 @@ CheckStatus check_memres_type(AstMemRes* memres) { CHECK(type, &memres->type_node); fill_in_type((AstTyped *) memres); if (memres->type_node && !memres->type) YIELD(memres->token->pos, "Waiting for global type to be constructed."); - return Check_Success; + + if (bh_arr_length(memres->tags) > 0) { + memres->flags |= Ast_Flag_Has_Been_Scheduled_For_Emit; + return Check_Success; + } + + return Check_Complete; } CheckStatus check_memres(AstMemRes* memres) { @@ -3393,7 +3399,12 @@ CheckStatus check_memres(AstMemRes* memres) { CHECK(expression, ptag); } - return Check_Success; + if (bh_arr_length(memres->tags) > 0) { + memres->flags |= Ast_Flag_Has_Been_Scheduled_For_Emit; + return Check_Success; + } + + return Check_Complete; } CheckStatus check_type(AstType** ptype) { @@ -4040,6 +4051,7 @@ void check_entity(Entity* ent) { case Entity_Type_Expression: cs = check_expression(&ent->expr); resolve_expression_type(ent->expr); + if (cs == Check_Success) cs = Check_Complete; break; case Entity_Type_Type_Alias: @@ -4055,6 +4067,7 @@ void check_entity(Entity* ent) { if (context.options->no_file_contents) { onyx_report_error(ent->expr->token->pos, Error_Critical, "#file_contents is disabled for this compilation."); } + cs = Check_Complete; break; case Entity_Type_Job: cs = check_arbitrary_job(ent->job_data); break; diff --git a/compiler/src/onyx.c b/compiler/src/onyx.c index c87f9874..1808f859 100644 --- a/compiler/src/onyx.c +++ b/compiler/src/onyx.c @@ -90,6 +90,8 @@ static const char *build_docstring = DOCSTRING_HEADER "\t--no-core Disable automatically including \"core/module\".\n" "\t--no-stale-code Disables use of `#allow_stale_code` directive\n" "\t--no-type-info Disables generating type information\n" + "\t--generate-method-info Populate method information in type information structures.\n" + "\t Can drastically increase binary size.\n" "\t--generate-foreign-info Generate information for foreign blocks. Rarely needed, so disabled by default.\n" "\t--wasm-mvp Use only WebAssembly MVP features.\n" "\n" @@ -116,6 +118,7 @@ static CompileOptions compile_opts_parse(bh_allocator alloc, int argc, char *arg .use_multi_threading = 0, .generate_foreign_info = 0, .generate_type_info = 1, + .generate_method_info = 0, .no_core = 0, .no_stale_code = 0, .show_all_errors = 0, @@ -262,6 +265,9 @@ static CompileOptions compile_opts_parse(bh_allocator alloc, int argc, char *arg else if (!strcmp(argv[i], "--generate-foreign-info")) { options.generate_foreign_info = 1; } + else if (!strcmp(argv[i], "--generate-method-info")) { + options.generate_method_info = 1; + } else if (!strcmp(argv[i], "--no-type-info")) { options.generate_type_info = 0; } diff --git a/compiler/src/wasm_emit.c b/compiler/src/wasm_emit.c index 9624bfd1..8b913745 100644 --- a/compiler/src/wasm_emit.c +++ b/compiler/src/wasm_emit.c @@ -484,6 +484,7 @@ EMIT_FUNC(statement, AstNode* stmt); EMIT_FUNC_RETURNING(u64, local_allocation, AstTyped* stmt); EMIT_FUNC_NO_ARGS(free_local_allocations); EMIT_FUNC(data_relocation, u32 data_id); +EMIT_FUNC(data_relocation_for_node, AstNode *node); EMIT_FUNC(assignment, AstBinaryOp* assign); EMIT_FUNC(assignment_of_array, AstTyped* left, AstTyped* right); EMIT_FUNC(compound_assignment, AstBinaryOp* assign); @@ -825,6 +826,25 @@ EMIT_FUNC(data_relocation, u32 data_id) { *pcode = code; } +EMIT_FUNC(data_relocation_for_node, AstNode *node) { + bh_arr(WasmInstruction) code = *pcode; + + u32 instr_idx = bh_arr_length(code); + WID(NULL, WI_PTR_CONST, 0); + assert(mod->current_func_idx >= 0); + + DatumPatchInfo patch; + patch.kind = Datum_Patch_Instruction; + patch.index = mod->current_func_idx; + patch.location = instr_idx; + patch.data_id = 0; + patch.offset = 0; + patch.node_to_use_if_data_id_is_null = node; + bh_arr_push(mod->data_patches, patch); + + *pcode = code; +} + EMIT_FUNC(stack_address, u32 offset, OnyxToken *token) { bh_arr(WasmInstruction) code = *pcode; @@ -2938,22 +2958,25 @@ EMIT_FUNC(field_access_location, AstFieldAccess* field, u64* offset_return) { EMIT_FUNC(memory_reservation_location, AstMemRes* memres) { bh_arr(WasmInstruction) code = *pcode; + ensure_node_has_been_submitted_for_emission((AstNode *) memres); + if (memres->threadlocal) { u64 tls_base_idx = bh_imap_get(&mod->index_map, (u64) &builtin_tls_base); - if (memres->tls_offset > 0) { - WID(NULL, WI_PTR_CONST, memres->tls_offset); - WIL(NULL, WI_GLOBAL_GET, tls_base_idx); - WI(NULL, WI_PTR_ADD); + CodePatchInfo code_patch; + code_patch.kind = Code_Patch_Tls_Offset; + code_patch.func_idx = mod->current_func_idx; + code_patch.instr = bh_arr_length(code); + code_patch.node_related_to_patch = (AstNode *) memres; + bh_arr_push(mod->code_patches, code_patch); - } else { - WIL(NULL, WI_GLOBAL_GET, tls_base_idx); - } + WID(NULL, WI_PTR_CONST, 0); + WIL(NULL, WI_GLOBAL_GET, tls_base_idx); + WI(NULL, WI_PTR_ADD); } else { // :ProperLinking - assert(memres->data_id != 0); - emit_data_relocation(mod, &code, memres->data_id); + emit_data_relocation_for_node(mod, &code, (AstNode *) memres); } *pcode = code; @@ -3569,11 +3592,19 @@ EMIT_FUNC(expression, AstTyped* expr) { case Ast_Kind_StrLit: { // :ProperLinking AstStrLit *strlit = (AstStrLit *) expr; - assert(strlit->data_id > 0); - emit_data_relocation(mod, &code, strlit->data_id); - - if (strlit->is_cstr == 0) - WID(NULL, WI_I32_CONST, strlit->length); + ensure_node_has_been_submitted_for_emission((AstNode *) strlit); + emit_data_relocation_for_node(mod, &code, (AstNode *) strlit); + + if (strlit->is_cstr == 0) { + CodePatchInfo code_patch; + code_patch.kind = Code_Patch_String_Length; + code_patch.func_idx = mod->current_func_idx; + code_patch.instr = bh_arr_length(code); + code_patch.node_related_to_patch = (AstNode *) strlit; + bh_arr_push(mod->code_patches, code_patch); + + WID(NULL, WI_I32_CONST, 0); + } break; } @@ -3854,12 +3885,18 @@ EMIT_FUNC(expression, AstTyped* expr) { case Ast_Kind_File_Contents: { AstFileContents* fc = (AstFileContents *) expr; - assert(fc->data_id > 0); - assert(fc->size > 0); - // :ProperLinking - emit_data_relocation(mod, &code, fc->data_id); - WID(NULL, WI_I32_CONST, fc->size); + ensure_node_has_been_submitted_for_emission((AstNode *) fc); + emit_data_relocation_for_node(mod, &code, (AstNode *) fc); + + CodePatchInfo code_patch; + code_patch.kind = Code_Patch_String_Length; + code_patch.func_idx = mod->current_func_idx; + code_patch.instr = bh_arr_length(code); + code_patch.node_related_to_patch = (AstNode *) fc; + bh_arr_push(mod->code_patches, code_patch); + + WID(NULL, WI_I32_CONST, 0); break; } @@ -4738,7 +4775,7 @@ static void emit_global(OnyxWasmModule* module, AstGlobal* global) { module->heap_start_ptr = &module->globals[global_idx].initial_value[0].data.i1; if (global == &builtin_tls_size) - module->globals[global_idx].initial_value[0].data.i1 = module->next_tls_offset; + module->tls_size_ptr = &module->globals[global_idx].initial_value[0].data.i1; } static void emit_raw_string(OnyxWasmModule* mod, char *data, i32 len, u64 *out_data_id, u64 *out_len) { @@ -4856,26 +4893,24 @@ static b32 emit_constexpr_(ConstExprContext *ctx, AstTyped *node, u32 offset) { case Ast_Kind_StrLit: { AstStrLit* sl = (AstStrLit *) node; - // NOTE: This assumes the data_id and the length fields have been filled out - // by emit_string_literal. - if (POINTER_SIZE == 4) { - CE(u32, 0) = 0; - CE(u32, 4) = sl->length; - } else { - CE(u64, 0) = 0; - CE(u64, 8) = sl->length; - } - - assert(sl->data_id > 0); + ensure_node_has_been_submitted_for_emission((AstNode *) sl); DatumPatchInfo patch; patch.kind = Datum_Patch_Data; patch.index = ctx->data_id; patch.location = offset; - patch.data_id = sl->data_id; + patch.data_id = 0; patch.offset = 0; + patch.node_to_use_if_data_id_is_null = (AstNode *) sl; bh_arr_push(ctx->module->data_patches, patch); + CodePatchInfo code_patch; + code_patch.kind = Code_Patch_String_Length_In_Data; + code_patch.func_idx = ctx->data_id; // Repurposing func_idx for this. + code_patch.instr = offset + POINTER_SIZE; // Repurposing instr for offset into section + code_patch.node_related_to_patch = (AstNode *) sl; + bh_arr_push(ctx->module->code_patches, code_patch); + break; } @@ -5364,6 +5399,41 @@ void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options) module->exports[export_idx].value.idx = (i32) func_idx; break; } + + case Code_Patch_Tls_Offset: { + AstMemRes *memres = (AstMemRes *) patch->node_related_to_patch; + assert(memres->kind == Ast_Kind_Memres); + assert(memres->threadlocal); + + module->funcs[patch->func_idx].code[patch->instr].data.l = memres->tls_offset; + break; + } + + case Code_Patch_String_Length: { + i32 length; + switch (patch->node_related_to_patch->kind) { + case Ast_Kind_StrLit: length = ((AstStrLit *) patch->node_related_to_patch)->length; break; + case Ast_Kind_File_Contents: length = ((AstFileContents *) patch->node_related_to_patch)->size; break; + default: assert("Unexpected node kind in Code_Patch_String_Length." && 0); + } + + module->funcs[patch->func_idx].code[patch->instr].data.l = length; + break; + } + + case Code_Patch_String_Length_In_Data: { + i32 length; + switch (patch->node_related_to_patch->kind) { + case Ast_Kind_StrLit: length = ((AstStrLit *) patch->node_related_to_patch)->length; break; + case Ast_Kind_File_Contents: length = ((AstFileContents *) patch->node_related_to_patch)->size; break; + default: assert("Unexpected node kind in Code_Patch_String_Length_In_Data." && 0); + } + + WasmDatum *datum = &module->data[patch->func_idx - 1]; + assert(datum->id == patch->func_idx); + *(i32 *) bh_pointer_add(datum->data, patch->instr) = (i32) length; + break; + } } } @@ -5423,15 +5493,22 @@ void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options) // we can emit the __initialize_data_segments function. emit_function(module, builtin_initialize_data_segments); +#ifdef ENABLE_DEBUG_INFO + if (module->debug_context) { + bh_arr_each(DebugFuncContext, func, module->debug_context->funcs) { + func->func_index += module->next_foreign_func_idx; + } + } +#endif + bh_arr_each(DatumPatchInfo, patch, module->data_patches) { if (patch->data_id == 0) { - if (patch->node_to_use_if_data_id_is_null - && patch->node_to_use_if_data_id_is_null->kind == Ast_Kind_Memres) { - - patch->data_id = ((AstMemRes *) patch->node_to_use_if_data_id_is_null)->data_id; - - } else { - assert("Unexpected empty data_id in linking!" && 0); + assert(patch->node_to_use_if_data_id_is_null || ("Unexpected empty data_id in linking!" && 0)); + switch (patch->node_to_use_if_data_id_is_null->kind) { + case Ast_Kind_Memres: patch->data_id = ((AstMemRes *) patch->node_to_use_if_data_id_is_null)->data_id; break; + case Ast_Kind_StrLit: patch->data_id = ((AstStrLit *) patch->node_to_use_if_data_id_is_null)->data_id; break; + case Ast_Kind_File_Contents: patch->data_id = ((AstFileContents *) patch->node_to_use_if_data_id_is_null)->data_id; break; + default: assert("Unexpected node kind in linking phase." && 0); } } @@ -5478,6 +5555,11 @@ void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options) *module->heap_start_ptr = *module->stack_top_ptr + options->stack_size; bh_align(*module->heap_start_ptr, 16); + + if (module->tls_size_ptr) { + *module->tls_size_ptr = module->next_tls_offset; + bh_align(*module->tls_size_ptr, 16); + } } void onyx_wasm_module_free(OnyxWasmModule* module) { diff --git a/compiler/src/wasm_type_table.h b/compiler/src/wasm_type_table.h index e63de964..f72e1b58 100644 --- a/compiler/src/wasm_type_table.h +++ b/compiler/src/wasm_type_table.h @@ -385,7 +385,12 @@ static u64 build_type_table(OnyxWasmModule* module) { // Struct methods bh_arr(StructMethodData) method_data=NULL; + AstType *ast_type = type->ast_type; + if (!context.options->generate_method_info) { + goto no_methods; + } + if (ast_type && ast_type->kind == Ast_Kind_Struct_Type) { AstStructType *struct_type = (AstStructType *) ast_type; Scope* struct_scope = struct_type->scope; @@ -644,7 +649,12 @@ static u64 build_type_table(OnyxWasmModule* module) { // Union methods bh_arr(StructMethodData) method_data=NULL; + AstType *ast_type = type->ast_type; + if (!context.options->generate_method_info) { + goto no_union_methods; + } + if (ast_type && ast_type->kind == Ast_Kind_Union_Type) { AstUnionType *union_type = (AstUnionType *) ast_type; Scope* union_scope = union_type->scope; @@ -666,7 +676,7 @@ static u64 build_type_table(OnyxWasmModule* module) { // any data member bh_buffer_align(&table_buffer, 4); u32 data_loc = table_buffer.length; - u32 func_idx = get_element_idx(module, node); + u32 func_idx = 0; // get_element_idx(module, node); bh_buffer_write_u32(&table_buffer, func_idx); bh_buffer_write_u32(&table_buffer, 0); bh_buffer_write_u32(&table_buffer, 0); -- 2.25.1