From f57d08ffd3546a6aa6b01d319028fe084b2261d1 Mon Sep 17 00:00:00 2001 From: Brendan Hansen Date: Sun, 17 Jul 2022 22:52:07 -0500 Subject: [PATCH] code cleanup; added ways to control linking options in the code --- core/builtin.onyx | 36 +++++++ core/runtime/default_link_options.onyx | 5 + core/std.onyx | 1 + docs/link_options.md | 83 ++++++++++++++++ docs/todo | 10 ++ include/astnodes.h | 5 +- include/wasm_emit.h | 23 +++++ src/astnodes.c | 26 +++++ src/builtins.c | 9 +- src/onyx.c | 18 +++- src/wasm_emit.c | 132 ++++++++++++++++++++----- src/wasm_output.h | 10 +- 12 files changed, 322 insertions(+), 36 deletions(-) create mode 100644 core/runtime/default_link_options.onyx create mode 100644 docs/link_options.md diff --git a/core/builtin.onyx b/core/builtin.onyx index 5977704d..c0ed2211 100644 --- a/core/builtin.onyx +++ b/core/builtin.onyx @@ -278,3 +278,39 @@ __initialize_data_segments :: () -> void --- // This is also a special compiler generated procedure that calls all procedures specified with // #init, in the specified order. It should theoritically only be called once on the main thread. __run_init_procedures :: () -> void --- + + +#local { + #if runtime.runtime == .Onyx { + IMPORT_MEMORY_DEFAULT :: true; + IMPORT_MEMORY_MODULE_NAME_DEFAULT :: "onyx"; + IMPORT_MEMORY_IMPORT_NAME_DEFAULT :: "memory"; + + } else { + IMPORT_MEMORY_DEFAULT :: false; + IMPORT_MEMORY_MODULE_NAME_DEFAULT :: ""; + IMPORT_MEMORY_IMPORT_NAME_DEFAULT :: ""; + } +} + +// Should this be here? and/or should its name be so generic? +Link_Options :: struct { + stack_first := false; + stack_size := 16; // 16 pages * 65536 bytes per page = 1 MiB stack + stack_alignment := 16; + + null_reserve_size := 16; + + import_memory := IMPORT_MEMORY_DEFAULT; + import_memory_module_name := IMPORT_MEMORY_MODULE_NAME_DEFAULT; + import_memory_import_name := IMPORT_MEMORY_IMPORT_NAME_DEFAULT; + + export_memory := true; + export_memory_name := "memory"; + + export_func_table := true; + export_func_table_name := "func_table"; + + memory_min_size := 1024; + memory_max_size := 65536; +} \ No newline at end of file diff --git a/core/runtime/default_link_options.onyx b/core/runtime/default_link_options.onyx new file mode 100644 index 00000000..2dc1cb3d --- /dev/null +++ b/core/runtime/default_link_options.onyx @@ -0,0 +1,5 @@ +package runtime.vars + +#if !#defined(link_options) { + link_options :: Link_Options.{} +} diff --git a/core/std.onyx b/core/std.onyx index 15afa529..99a01875 100644 --- a/core/std.onyx +++ b/core/std.onyx @@ -34,6 +34,7 @@ package core #load "./runtime/build_opts" #load "./runtime/common" +#load "./runtime/default_link_options" #load "./arg_parse" #local runtime :: package runtime diff --git a/docs/link_options.md b/docs/link_options.md new file mode 100644 index 00000000..aed784bd --- /dev/null +++ b/docs/link_options.md @@ -0,0 +1,83 @@ +Link Options for Onyx +=== + +## Preface + +As Onyx compiles to WebAssembly, a sophisticated link-phase is not +necessary. That is why, up until a week ago, Onyx did not have a way +to specify any linking options, as Onyx could determine what it wants +to do with the linear memory space. However, as I am trying to use +Onyx with more things, I'm realizing that other WebAssembly runtimes +are a bit stricter, and expect things to be in a particular way. + +Specifically, I was looking at the WASM4 "game engine" as something +interesting Onyx could target. As WASM4 is trying to be as restrictive +as possible to increase creativity, the memory layout for the program +is defined. This means that Onyx's default layout will not suffice. +Instead, you need to be able to control where the stack and data section +elements go in the program. I have recently renovated the code that +determines where a piece of data will be placed, as well as added a +"link-phase" to update all references in the program to the data section +element. Now I need to determine what options you will be allowed to +control and what the syntax / semantics / method of communication will +be. A good reference for which options should be supported is +[wasm-ld](https://lld.llvm.org/WebAssembly.html) + +Because I am trying to stay away from a ton of command line options, +especially options that *required* for your program to compile and work +correctly, I would like specifying link options to be contained in the +syntax of the program. Command-line options should be reserved for +changing meta-level parameters about the program, such as the runtime +and whether or not to disable features based on where the program is +being compiled. *Fundamental* options, such as how to lay out the data +section, should be within the program. + +## Proposal + +That being said, I think the syntax should like so: + +```onyx + +#link_options .{ + .stack_size = 4096, // 1MiB + .stack_alignment = 16, // Align the start of the stack to 16 bytes + .stack_first = true, // Stack-before data section + + .null_reserve_size = 16, // Reserve 16 bytes for null + + .import_memory = true, + .import_memory_module_name = "onyx", + .import_memory_import_name = "memory", + + .memory_min_size = 16, // 16 * 65536 Bytes + .memory_max_size = 24, // 24 * 65536 Bytes +} + +``` + +There will only be one `#link_options` directive in the entire set +of included files. After it, it takes an expression that is of type +`runtime.Link_Options`, which is type infered, as seen above. All +members of this structure will have default values given by the settings +that the program is compiling under. If no `#link_options` is provided, +these default values are used. + +Alternatively, there could just be a optional variable in +`package runtime.vars` that would define the link options. And if, +one is not specified than a definition in the standard library +would define it like so + +```onyx +package runtime.vars + +#if !#defined(link_options) { + link_options :: runtime.Link_Options.{} +} + +``` + +This would simplify a lot, as there would not have to be any other logic +to deduplicate multiple `#link_options`. The only inconvience is that +it will have to be part of a separate package, which currently means +a separate file. That is a separate issue that will hopefully be tackled +later. diff --git a/docs/todo b/docs/todo index 3ba0ab4c..2ca2809b 100644 --- a/docs/todo +++ b/docs/todo @@ -224,3 +224,13 @@ Revamping File System: use data: runtime.fs.FileData; } [x] Most file functionality will be provided using the stream API. + + +Making a proper "linkage" phase of the compiler: :ProperLinking + [x] References to data section entries should be patched after compilation + Should just be leaving 4-bytes blank to be filled in later + [x] Be able to specify the start address: + - The stack + - The data section + - The stack size + - The memory constraints (in pages) \ No newline at end of file diff --git a/include/astnodes.h b/include/astnodes.h index f3b2ee00..510698a1 100644 --- a/include/astnodes.h +++ b/include/astnodes.h @@ -1607,6 +1607,7 @@ extern AstType *builtin_iterator_type; extern AstType *builtin_callsite_type; extern AstType *builtin_any_type; extern AstType *builtin_code_type; +extern AstType *builtin_link_options_type; extern AstTyped *type_table_node; extern AstTyped *foreign_blocks_node; extern AstType *foreign_block_type; @@ -1656,7 +1657,9 @@ typedef enum TypeMatch { #define unify_node_and_type(node, type) (unify_node_and_type_((node), (type), 1)) TypeMatch unify_node_and_type_(AstTyped** pnode, Type* type, b32 permanent); Type* resolve_expression_type(AstTyped* node); -i64 get_expression_integer_value(AstTyped* node, b32 *out_is_valid); + +i64 get_expression_integer_value(AstTyped* node, b32 *out_is_valid); +char *get_expression_string_value(AstTyped* node, b32 *out_is_valid); b32 cast_is_legal(Type* from_, Type* to_, char** err_msg); char* get_function_name(AstFunction* func); diff --git a/include/wasm_emit.h b/include/wasm_emit.h index 3c7e44ed..5475473b 100644 --- a/include/wasm_emit.h +++ b/include/wasm_emit.h @@ -728,6 +728,9 @@ typedef struct OnyxWasmModule { bh_arr(i32) elems; bh_arr(char *) libraries; bh_arr(char *) library_paths; + b32 needs_memory_section; + u32 memory_min_size; + u32 memory_max_size; // NOTE: Set of things used when compiling; not part of the actual module u32 export_count; @@ -749,8 +752,28 @@ typedef struct OnyxWasmModule { } OnyxWasmModule; typedef struct OnyxWasmLinkOptions { + b32 stack_first; + u32 stack_size; + u32 stack_alignment; + + u32 null_reserve_size; + + b32 import_memory; + char *import_memory_module_name; + char *import_memory_import_name; + + b32 export_memory; + char *export_memory_name; + + b32 export_func_table; + char *export_func_table_name; + + u32 memory_min_size; + u32 memory_max_size; } OnyxWasmLinkOptions; +b32 onyx_wasm_build_link_options_from_node(OnyxWasmLinkOptions *opts, struct AstTyped *node); + OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc); void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options); void onyx_wasm_module_free(OnyxWasmModule* module); diff --git a/src/astnodes.c b/src/astnodes.c index 4cfffce1..a888fa3f 100644 --- a/src/astnodes.c +++ b/src/astnodes.c @@ -941,6 +941,32 @@ i64 get_expression_integer_value(AstTyped* node, b32 *is_valid) { return 0; } +char *get_expression_string_value(AstTyped* node, b32 *out_is_valid) { + resolve_expression_type(node); + + if (out_is_valid) *out_is_valid = 1; + + if (node->kind == Ast_Kind_StrLit) { + AstStrLit *str = (AstStrLit *) node; + + // CLEANUP: Maybe this should allocate on the heap? + // I guess if in all cases the memory is allocated on the heap, + // then the caller can free the memory. + i8* strdata = bh_alloc_array(global_heap_allocator, i8, str->token->length + 1); + i32 length = string_process_escape_seqs(strdata, str->token->text, str->token->length); + strdata[length] = '\0'; + + return strdata; + } + + if (node->kind == Ast_Kind_Alias) { + return get_expression_string_value(((AstAlias *) node)->alias, out_is_valid); + } + + if (out_is_valid) *out_is_valid = 0; + return NULL; +} + static const b32 cast_legality[][12] = { /* I8 */ { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, /* U8 */ { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, diff --git a/src/builtins.c b/src/builtins.c index b964d20c..b6eacef0 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -59,6 +59,7 @@ AstType *builtin_iterator_type; AstType *builtin_callsite_type; AstType *builtin_any_type; AstType *builtin_code_type; +AstType *builtin_link_options_type; AstTyped *type_table_node = NULL; AstTyped *foreign_blocks_node = NULL; @@ -451,7 +452,13 @@ void initialize_builtins(bh_allocator a) { builtin_run_init_procedures = (AstFunction *) symbol_raw_resolve(p->scope, "__run_init_procedures"); if (builtin_run_init_procedures == NULL || builtin_run_init_procedures->kind != Ast_Kind_Function) { - onyx_report_error((OnyxFilePos) { 0 }, Error_Critical, "'__run_init_procedures"); + onyx_report_error((OnyxFilePos) { 0 }, Error_Critical, "'__run_init_procedures' procedure not found."); + return; + } + + builtin_link_options_type = (AstType *) symbol_raw_resolve(p->scope, "Link_Options"); + if (builtin_link_options_type == NULL) { + onyx_report_error((OnyxFilePos) { 0 }, Error_Critical, "'Link_Options' type not found."); return; } diff --git a/src/onyx.c b/src/onyx.c index f394815b..ae47fab0 100644 --- a/src/onyx.c +++ b/src/onyx.c @@ -649,8 +649,22 @@ static i32 onyx_compile() { return ONYX_COMPILER_PROGRESS_SUCCESS; } +static void link_wasm_module() { + Package *runtime_var_package = package_lookup("runtime.vars"); + assert(runtime_var_package); + + AstTyped *link_options_node = (AstTyped *) symbol_raw_resolve(runtime_var_package->scope, "link_options"); + Type *link_options_type = type_build_from_ast(context.ast_alloc, builtin_link_options_type); + + assert(unify_node_and_type(&link_options_node, link_options_type) == TYPE_MATCH_SUCCESS); + + OnyxWasmLinkOptions link_opts; + onyx_wasm_build_link_options_from_node(&link_opts, link_options_node); + onyx_wasm_module_link(context.wasm_module, &link_opts); +} + static CompilerProgress onyx_flush_module() { - onyx_wasm_module_link(context.wasm_module, NULL); + link_wasm_module(); // NOTE: Output to file bh_file output_file; @@ -702,7 +716,7 @@ static CompilerProgress onyx_flush_module() { #ifdef ENABLE_RUN_WITH_WASMER static b32 onyx_run() { - onyx_wasm_module_link(context.wasm_module, NULL); + link_wasm_module(); bh_buffer code_buffer; onyx_wasm_module_write_to_buffer(context.wasm_module, &code_buffer); diff --git a/src/wasm_emit.c b/src/wasm_emit.c index b8beb9ec..d016b8a4 100644 --- a/src/wasm_emit.c +++ b/src/wasm_emit.c @@ -3945,6 +3945,10 @@ OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc) { .elems = NULL, .next_elem_idx = 0, + .needs_memory_section = 0, + .memory_min_size = 0, + .memory_max_size = 0, + .structured_jump_target = NULL, .return_location_stack = NULL, .local_allocations = NULL, @@ -4103,44 +4107,55 @@ void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options) // the code will probably need to be altered. static_assert(POINTER_SIZE == 4); - if (context.options->use_multi_threading) { + module->memory_min_size = options->memory_min_size; + module->memory_max_size = options->memory_max_size; + + if (context.options->use_multi_threading || options->import_memory) { + module->needs_memory_section = 0; + WasmImport mem_import = { .kind = WASM_FOREIGN_MEMORY, - .min = 1024, - .max = 65536, // NOTE: Why not use all 4 Gigs of memory? + .min = options->memory_min_size, + .max = options->memory_max_size, // NOTE: Why not use all 4 Gigs of memory? .shared = context.options->runtime == Runtime_Js, - .mod = "onyx", - .name = "memory", + .mod = options->import_memory_module_name, + .name = options->import_memory_import_name, }; bh_arr_push(module->imports, mem_import); + + } else { + module->needs_memory_section = 1; } - WasmExport mem_export = { - .kind = WASM_FOREIGN_MEMORY, - .idx = 0, - }; + if (options->export_memory) { + WasmExport mem_export = { + .kind = WASM_FOREIGN_MEMORY, + .idx = 0, + }; - shput(module->exports, "memory", mem_export); - module->export_count++; + shput(module->exports, options->export_memory_name, mem_export); + module->export_count++; + } - WasmExport func_table_export = { - .kind = WASM_FOREIGN_TABLE, - .idx = 0, - }; - shput(module->exports, "func_type", func_table_export); - module->export_count++; + if (options->export_func_table) { + WasmExport func_table_export = { + .kind = WASM_FOREIGN_TABLE, + .idx = 0, + }; + + shput(module->exports, options->export_func_table_name, func_table_export); + module->export_count++; + } - u32 datum_offset = 32; // :LinkOption + u32 datum_offset = options->null_reserve_size; bh_arr_each(WasmDatum, datum, module->data) { assert(datum->id > 0); bh_align(datum_offset, datum->alignment); datum->offset_ = datum_offset; - // printf("Data ID %d -> %d\n", datum->id, datum->offset); - datum_offset += datum->length; } @@ -4152,7 +4167,6 @@ void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options) switch (patch->kind) { case Datum_Patch_Instruction: { WasmFunc *func = &module->funcs[patch->index - module->foreign_function_count]; - // printf("Patching instruction %d in func[%d] with %d\n", patch->location, patch->index, datum->offset); assert(func->code[patch->location].type == WI_PTR_CONST); func->code[patch->location].data.l = (u64) datum->offset_ + patch->offset; @@ -4162,7 +4176,6 @@ void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options) case Datum_Patch_Data: { WasmDatum *datum_to_alter = &module->data[patch->index - 1]; assert(datum_to_alter->id == patch->index); - // printf("Patching data %d in data[%d] with %d + %d\n", patch->location, patch->index, target_datum->offset, patch->offset); *((u32 *) bh_pointer_add(datum_to_alter->data, patch->location)) = (u32) datum->offset_ + patch->offset; break; @@ -4174,9 +4187,9 @@ void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options) u32 *addr = (u32 *) bh_pointer_add(datum_to_alter->data, patch->location); if (*addr != 0) { - // printf("Patching data %d in data[%d] with %d + %d + %d\n", patch->location, patch->index, target_datum->offset, *addr, patch->offset); *addr += (u32) datum->offset_ + patch->offset; } + break; } @@ -4187,9 +4200,9 @@ void onyx_wasm_module_link(OnyxWasmModule *module, OnyxWasmLinkOptions *options) assert(module->stack_top_ptr && module->heap_start_ptr); *module->stack_top_ptr = datum_offset; - bh_align(*module->stack_top_ptr, 16); // :LinkOption + bh_align(*module->stack_top_ptr, options->stack_alignment); - *module->heap_start_ptr = *module->stack_top_ptr + (1 << 20); // :LinkOption + *module->heap_start_ptr = *module->stack_top_ptr + (options->stack_size << 16); bh_align(*module->heap_start_ptr, 16); } @@ -4206,4 +4219,73 @@ void onyx_wasm_module_free(OnyxWasmModule* module) { } +b32 onyx_wasm_build_link_options_from_node(OnyxWasmLinkOptions *opts, AstTyped *node) { + node = (AstTyped *) strip_aliases((AstNode *) node); + + assert(node && node->kind == Ast_Kind_Struct_Literal); + assert(builtin_link_options_type); + + Type *link_options_type = type_build_from_ast(context.ast_alloc, builtin_link_options_type); + + AstStructLiteral *input = (AstStructLiteral *) node; + + StructMember smem; + b32 out_is_valid; + + // TODO: These should be properly error handled. + assert(type_lookup_member(link_options_type, "stack_first", &smem)); + opts->stack_first = get_expression_integer_value(input->args.values[smem.idx], &out_is_valid) != 0; + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "stack_size", &smem)); + opts->stack_size = get_expression_integer_value(input->args.values[smem.idx], &out_is_valid); + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "stack_alignment", &smem)); + opts->stack_alignment = get_expression_integer_value(input->args.values[smem.idx], &out_is_valid); + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "null_reserve_size", &smem)); + opts->null_reserve_size = get_expression_integer_value(input->args.values[smem.idx], &out_is_valid); + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "import_memory", &smem)); + opts->import_memory = get_expression_integer_value(input->args.values[smem.idx], &out_is_valid) != 0; + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "import_memory_module_name", &smem)); + opts->import_memory_module_name = get_expression_string_value(input->args.values[smem.idx], &out_is_valid); + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "import_memory_import_name", &smem)); + opts->import_memory_import_name = get_expression_string_value(input->args.values[smem.idx], &out_is_valid); + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "export_memory", &smem)); + opts->export_memory = get_expression_integer_value(input->args.values[smem.idx], &out_is_valid) != 0; + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "export_memory_name", &smem)); + opts->export_memory_name = get_expression_string_value(input->args.values[smem.idx], &out_is_valid); + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "export_func_table", &smem)); + opts->export_func_table = get_expression_integer_value(input->args.values[smem.idx], &out_is_valid) != 0; + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "export_func_table_name", &smem)); + opts->export_func_table_name = get_expression_string_value(input->args.values[smem.idx], &out_is_valid); + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "memory_min_size", &smem)); + opts->memory_min_size = get_expression_integer_value(input->args.values[smem.idx], &out_is_valid); + if (!out_is_valid) return 0; + + assert(type_lookup_member(link_options_type, "memory_max_size", &smem)); + opts->memory_max_size = get_expression_integer_value(input->args.values[smem.idx], &out_is_valid); + if (!out_is_valid) return 0; + + return 1; +} + #include "wasm_output.h" diff --git a/src/wasm_output.h b/src/wasm_output.h index 2398f9fb..99c19d92 100644 --- a/src/wasm_output.h +++ b/src/wasm_output.h @@ -168,7 +168,8 @@ static i32 output_tablesection(OnyxWasmModule* module, bh_buffer* buff) { static i32 output_memorysection(OnyxWasmModule* module, bh_buffer* buff) { // :ProperLinking - if (context.options->use_multi_threading) return 0; + // if (context.options->use_multi_threading) return 0; + if (!module->needs_memory_section) return 0; i32 prev_len = buff->length; bh_buffer_write_byte(buff, WASM_SECTION_ID_MEMORY); @@ -180,10 +181,7 @@ static i32 output_memorysection(OnyxWasmModule* module, bh_buffer* buff) { u8* leb = uint_to_uleb128((u64) 1, &leb_len); bh_buffer_append(&vec_buff, leb, leb_len); - // FIXME: This needs to be dynamically chosen depending on the size of - // the data section and stack size pre-requeseted. - // :WasmMemory :ProperLinking - output_limits(1024, -1, 0, &vec_buff); + output_limits(module->memory_min_size, -1, 0, &vec_buff); leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len); bh_buffer_append(buff, leb, leb_len); @@ -590,8 +588,6 @@ static i32 output_codesection(OnyxWasmModule* module, bh_buffer* buff) { u8* leb = uint_to_uleb128((u64) bh_arr_length(module->funcs), &leb_len); bh_buffer_append(&vec_buff, leb, leb_len); - // DEBUG_HERE; - bh_arr_each(WasmFunc, func, module->funcs) output_code(func, &vec_buff); leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len); -- 2.25.1