From 8b3a0c7fcc79c58403333f7e35d2914fd6d71869 Mon Sep 17 00:00:00 2001 From: Brendan Hansen Date: Wed, 22 Jun 2022 22:41:12 -0500 Subject: [PATCH] started parsing instructions --- build.sh | 2 +- include/vm_codebuilder.h | 24 ++ src/ovm_cli_test.c | 2 +- src/vm/code_builder.c | 10 + src/wasm/module.c | 350 +------------------------- src/wasm/module_parsing.c.incl | 439 +++++++++++++++++++++++++++++++++ 6 files changed, 476 insertions(+), 351 deletions(-) create mode 100644 src/wasm/module_parsing.c.incl diff --git a/build.sh b/build.sh index 8ebf101..42da70a 100755 --- a/build.sh +++ b/build.sh @@ -6,7 +6,7 @@ FLAGS="-g3" LIBS= INCLUDES="-I include" TARGET="libonyx_embedder.so" -C_FILES="src/wasm.c src/vm/* src/wasm/*" +C_FILES="src/wasm.c src/vm/*.c src/wasm/*.c" $CC $FLAGS $INCLUDES -shared -fPIC -o $TARGET $C_FILES $LIBS $WARNINGS diff --git a/include/vm_codebuilder.h b/include/vm_codebuilder.h index 671e335..9d5cbe8 100644 --- a/include/vm_codebuilder.h +++ b/include/vm_codebuilder.h @@ -4,6 +4,9 @@ #include "vm.h" typedef struct ovm_code_builder_t ovm_code_builder_t; +typedef struct label_target_t label_target_t; +typedef struct branch_patch_t branch_patch_t; +typedef enum label_kind_t label_kind_t; // // A new code builder will be "made" for each function @@ -11,13 +14,34 @@ typedef struct ovm_code_builder_t ovm_code_builder_t; struct ovm_code_builder_t { bh_arr(i32) execution_stack; + i32 next_label_idx; + bh_arr(label_target_t) label_stack; + bh_arr(branch_patch_t) branch_patches; + i32 param_count, local_count; ovm_program_t *program; i32 start_instr; }; +enum label_kind_t { + label_kind_block, + label_kind_loop, +}; + +struct label_target_t { + i32 idx; + label_kind_t kind; + i32 instr; +}; + +struct branch_patch_t { + i32 branch_instr; + i32 label_idx; +}; + ovm_code_builder_t ovm_code_builder_new(ovm_program_t *program, i32 param_count, i32 local_count); +void ovm_code_builder_free(ovm_code_builder_t *builder); void ovm_code_builder_add_binop(ovm_code_builder_t *builder, u32 instr); void ovm_code_builder_add_imm(ovm_code_builder_t *builder, u32 ovm_type, void *imm); diff --git a/src/ovm_cli_test.c b/src/ovm_cli_test.c index 330685e..3e5a0ca 100644 --- a/src/ovm_cli_test.c +++ b/src/ovm_cli_test.c @@ -28,7 +28,7 @@ int main(int argc, char *argv[]) { ovm_engine_t *engine = ovm_engine_new(store); ovm_state_t *state = ovm_state_new(engine, prog); - ovm_program_load_from_file(prog, engine, state, "./out.ovm"); + ovm_program_load_from_file(prog, engine, state, argv[1]); ovm_program_print_instructions(prog, 0, bh_arr_length(prog->code)); ovm_state_link_native_funcs(state, native_funcs); diff --git a/src/vm/code_builder.c b/src/vm/code_builder.c index c64e368..f1f102b 100644 --- a/src/vm/code_builder.c +++ b/src/vm/code_builder.c @@ -17,9 +17,19 @@ ovm_code_builder_t ovm_code_builder_new(ovm_program_t *program, i32 param_count, builder.execution_stack = NULL; bh_arr_new(bh_heap_allocator(), builder.execution_stack, 32); + builder.next_label_idx = 0; + builder.label_stack = NULL; + bh_arr_new(bh_heap_allocator(), builder.label_stack, 32); + bh_arr_new(bh_heap_allocator(), builder.branch_patches, 32); + return builder; } +void ovm_code_builder_free(ovm_code_builder_t *builder) { + bh_arr_free(builder->execution_stack); + bh_arr_free(builder->label_stack); + bh_arr_free(builder->branch_patches); +} void ovm_code_builder_add_binop(ovm_code_builder_t *builder, u32 instr) { i32 right = POP_VALUE(builder); diff --git a/src/wasm/module.c b/src/wasm/module.c index 452497e..e62bd32 100644 --- a/src/wasm/module.c +++ b/src/wasm/module.c @@ -3,355 +3,7 @@ #include "onyx_wasm.h" #include "vm_codebuilder.h" -typedef struct build_context build_context; -struct build_context { - wasm_byte_vec_t binary; - unsigned int offset; - - wasm_module_t *module; - ovm_program_t *program; - ovm_store_t *store; -}; - -#define CONSUME_BYTE(ctx) ((ctx)->binary.data[(ctx)->offset++]) - -enum wasm_section_numbers_t { - WASM_CUSTOM_SECTION = 0, - WASM_TYPE_SECTION = 1, - WASM_IMPORT_SECTION = 2, - WASM_FUNC_SECTION = 3, - WASM_TABLE_SECTION = 4, - WASM_MEMORY_SECTION = 5, - WASM_GLOBAL_SECTION = 6, - WASM_EXPORT_SECTION = 7, - WASM_START_SECTION = 8, - WASM_ELEM_SECTION = 9, - WASM_CODE_SECTION = 10, - WASM_DATA_SECTION = 11, - WASM_DATAC_SECTION = 12, -}; - -static inline wasm_valkind_t parse_valtype(build_context *ctx) { - switch (CONSUME_BYTE(ctx)) { - case 0x7f: return WASM_I32; - case 0x7e: return WASM_I64; - case 0x7d: return WASM_F32; - case 0x7c: return WASM_F64; - case 0x7b: assert(("SIMD values are not currently supported", 0)); - case 0x70: return WASM_FUNCREF; - case 0x6F: return WASM_ANYREF; - default: assert(("Invalid valtype.", 0)); - } -} - -static void parse_custom_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - ctx->offset += section_size; -} - -static void parse_type_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - unsigned int type_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - - wasm_functype_vec_new_uninitialized(&ctx->module->type_section, type_count); - - fori (i, 0, (int) type_count) { - assert(CONSUME_BYTE(ctx) == 0x60); // @ReportError - - unsigned int param_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - wasm_valtype_vec_t param_types; - wasm_valtype_vec_new_uninitialized(¶m_types, param_count); - fori (p, 0, (int) param_count) { - param_types.data[p] = wasm_valtype_new(parse_valtype(ctx)); - } - - unsigned int result_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - wasm_valtype_vec_t result_types; - wasm_valtype_vec_new_uninitialized(&result_types, result_count); - fori (p, 0, (int) result_count) { - result_types.data[p] = wasm_valtype_new(parse_valtype(ctx)); - } - - wasm_functype_t *functype = wasm_functype_new(¶m_types, &result_types); - ctx->module->type_section.data[i] = functype; - } -} - -static wasm_limits_t parse_limits(build_context *ctx) { - bool maximum_present = CONSUME_BYTE(ctx) == 0x01; - - wasm_limits_t limits; - limits.min = uleb128_to_uint(ctx->binary.data, &ctx->offset); - if (maximum_present) { - limits.max = uleb128_to_uint(ctx->binary.data, &ctx->offset); - } else { - limits.max = wasm_limits_max_default; - } - - return limits; -} - -static wasm_tabletype_t *parse_tabletype(build_context *ctx) { - assert(CONSUME_BYTE(ctx) == 0x70); // @ReportError - - wasm_limits_t limits = parse_limits(ctx); - wasm_tabletype_t *tt = wasm_tabletype_new(wasm_valtype_new(WASM_FUNCREF), &limits); - return tt; -} - -static wasm_memorytype_t *parse_memorytype(build_context *ctx) { - wasm_limits_t limits = parse_limits(ctx); - wasm_memorytype_t *mt = wasm_memorytype_new(&limits); - return mt; -} - -static wasm_globaltype_t *parse_globaltype(build_context *ctx) { - wasm_valtype_t *valtype = wasm_valtype_new(parse_valtype(ctx)); - bool mutable = CONSUME_BYTE(ctx) == 0x01; - - wasm_globaltype_t *gt = wasm_globaltype_new(valtype, mutable); - return gt; -} - -static void parse_import_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - unsigned int import_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - - wasm_importtype_vec_new_uninitialized(&ctx->module->imports, import_count); - - fori (i, 0, (int) import_count) { - unsigned int mod_name_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - wasm_byte_vec_t module_name; - wasm_byte_vec_new_uninitialized(&module_name, mod_name_size); - fori (n, 0, mod_name_size) module_name.data[n] = CONSUME_BYTE(ctx); - - unsigned int import_name_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - wasm_byte_vec_t import_name; - wasm_byte_vec_new_uninitialized(&import_name, import_name_size); - fori (n, 0, import_name_size) import_name.data[n] = CONSUME_BYTE(ctx); - - wasm_externtype_t *import_type; - switch (CONSUME_BYTE(ctx)) { - case 0x00: { - unsigned int type_idx = uleb128_to_uint(ctx->binary.data, &ctx->offset); - import_type = wasm_functype_as_externtype(ctx->module->type_section.data[type_idx]); - break; - } - - case 0x01: import_type = wasm_tabletype_as_externtype(parse_tabletype(ctx)); break; - case 0x02: import_type = wasm_memorytype_as_externtype(parse_memorytype(ctx)); break; - case 0x03: import_type = wasm_globaltype_as_externtype(parse_globaltype(ctx)); break; - } - - wasm_importtype_t *import = wasm_importtype_new(&module_name, &import_name, import_type); - ctx->module->imports.data[i] = import; - } -} - -static void parse_func_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - unsigned int func_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - - wasm_functype_vec_new_uninitialized(&ctx->module->functypes, func_count); - - fori (i, 0, (int) func_count) { - unsigned int index = uleb128_to_uint(ctx->binary.data, &ctx->offset); - ctx->module->functypes.data[i] = ctx->module->type_section.data[index]; - } -} - -static void parse_table_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - unsigned int table_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - - wasm_tabletype_vec_new_uninitialized(&ctx->module->tabletypes, table_count); - - fori (i, 0, (int) table_count) { - ctx->module->tabletypes.data[i] = parse_tabletype(ctx); - } -} - -static void parse_memory_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - unsigned int memory_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - - wasm_memorytype_vec_new_uninitialized(&ctx->module->memorytypes, memory_count); - - fori (i, 0, (int) memory_count) { - ctx->module->memorytypes.data[i] = parse_memorytype(ctx); - } -} - -static void parse_global_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - unsigned int global_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - - wasm_globaltype_vec_new_uninitialized(&ctx->module->globaltypes, global_count); - - fori (i, 0, (int) global_count) { - wasm_globaltype_t *gt = parse_globaltype(ctx); - - switch (CONSUME_BYTE(ctx)) { - case 0x41: { - gt->type.global.initial_value.kind = WASM_I32; - gt->type.global.initial_value.of.i32 = (i32) uleb128_to_uint(ctx->binary.data, &ctx->offset); - break; - } - - case 0x42: { - gt->type.global.initial_value.kind = WASM_I64; - gt->type.global.initial_value.of.i64 = (i64) uleb128_to_uint(ctx->binary.data, &ctx->offset); - break; - } - - case 0x43: { - gt->type.global.initial_value.kind = WASM_F32; - gt->type.global.initial_value.of.f32 = *(f32 *) &ctx->binary.data[ctx->offset]; // HACK: This assumes IEEE-754 floats - ctx->offset += 4; - break; - } - - case 0x44: { - gt->type.global.initial_value.kind = WASM_F64; - gt->type.global.initial_value.of.f64 = *(f64 *) &ctx->binary.data[ctx->offset]; // HACK: This assumes IEEE-754 floats - ctx->offset += 8; - break; - } - } - - assert(CONSUME_BYTE(ctx) == 0x0b); - - ctx->module->globaltypes.data[i] = gt; - } -} - -static void parse_export_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - unsigned int export_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - - wasm_exporttype_vec_new_uninitialized(&ctx->module->exports, export_count); - - fori (i, 0, (int) export_count) { - unsigned int export_name_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - wasm_byte_vec_t export_name; - wasm_byte_vec_new_uninitialized(&export_name, export_name_size); - fori (n, 0, export_name_size) export_name.data[n] = CONSUME_BYTE(ctx); - - unsigned int type = CONSUME_BYTE(ctx); - unsigned int idx = uleb128_to_uint(ctx->binary.data, &ctx->offset); - - wasm_externtype_t *export_type = NULL; - - switch (type) { - case 0x00: export_type = wasm_functype_as_externtype(wasm_module_index_functype(ctx->module, idx)); break; - case 0x01: export_type = wasm_tabletype_as_externtype(wasm_module_index_tabletype(ctx->module, idx)); break; - case 0x02: export_type = wasm_memorytype_as_externtype(wasm_module_index_memorytype(ctx->module, idx)); break; - case 0x03: export_type = wasm_globaltype_as_externtype(wasm_module_index_globaltype(ctx->module, idx)); break; - } - - wasm_exporttype_t *export = wasm_exporttype_new(&export_name, export_type); - export->index = idx; - ctx->module->exports.data[i] = export; - } -} - -static void parse_start_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - ctx->module->start_func_idx = uleb128_to_uint(ctx->binary.data, &ctx->offset); -} - -static void parse_elem_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - unsigned int elem_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - - // This is going to be a mess... - // I am only going to handle the case of a single, active, offset-0, - // element entry. This is all that Onyx uses and will probably ever - // use. - assert(elem_count == 1); - assert(CONSUME_BYTE(ctx) == 0x00); - assert(CONSUME_BYTE(ctx) == 0x41); - assert(CONSUME_BYTE(ctx) == 0x00); - assert(CONSUME_BYTE(ctx) == 0x0B); - - unsigned int entry_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - ctx->module->elem_count = entry_count; - ctx->module->elem_entries = malloc(sizeof(unsigned int) * entry_count); - - fori (i, 0, (int) entry_count) { - ctx->module->elem_entries[i] = uleb128_to_uint(ctx->binary.data, &ctx->offset); - } -} - -static void parse_code_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - ctx->offset += section_size; - // TODO -} - -static void parse_data_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - unsigned int data_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - - if (ctx->module->data_count_present) { - assert(ctx->module->data_count == data_count); - } else { - ctx->module->data_count = data_count; - } - - ctx->module->data_entries = malloc(sizeof(struct wasm_data_t) * data_count); - - fori (i, 0, (int) data_count) { - struct wasm_data_t data_entry; - data_entry.data = NULL; - data_entry.offset = 0; - data_entry.length = 0; - data_entry.passive = true; - - char data_type = CONSUME_BYTE(ctx); - if (data_type == 0x00) { - assert(CONSUME_BYTE(ctx) == 0x41); - data_entry.offset = uleb128_to_uint(ctx->binary.data, &ctx->offset); - data_entry.passive = false; - assert(CONSUME_BYTE(ctx) == 0x0B); - } - - data_entry.length = uleb128_to_uint(ctx->binary.data, &ctx->offset); - data_entry.data = bh_pointer_add(ctx->binary.data, ctx->offset); - ctx->offset += data_entry.length; - - ctx->module->data_entries[i] = data_entry; - } -} - -static void parse_data_count_section(build_context *ctx) { - unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - unsigned int data_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); - - ctx->module->data_count_present = true; - ctx->module->data_count = data_count; -} - -static void parse_section(build_context *ctx) { - char section_number = CONSUME_BYTE(ctx); - - switch (section_number) { - case WASM_CUSTOM_SECTION: parse_custom_section(ctx); break; - case WASM_TYPE_SECTION: parse_type_section(ctx); break; - case WASM_IMPORT_SECTION: parse_import_section(ctx); break; - case WASM_FUNC_SECTION: parse_func_section(ctx); break; - case WASM_TABLE_SECTION: parse_table_section(ctx); break; - case WASM_MEMORY_SECTION: parse_memory_section(ctx); break; - case WASM_GLOBAL_SECTION: parse_global_section(ctx); break; - case WASM_EXPORT_SECTION: parse_export_section(ctx); break; - case WASM_START_SECTION: parse_start_section(ctx); break; - case WASM_ELEM_SECTION: parse_elem_section(ctx); break; - case WASM_CODE_SECTION: parse_code_section(ctx); break; - case WASM_DATA_SECTION: parse_data_section(ctx); break; - case WASM_DATAC_SECTION: parse_data_count_section(ctx); break; - default: assert(("bad section number", 0)); break; - } -} +#include "./module_parsing.c.incl" static bool module_build(wasm_module_t *module, const wasm_byte_vec_t *binary) { wasm_engine_t *engine = module->store->engine; diff --git a/src/wasm/module_parsing.c.incl b/src/wasm/module_parsing.c.incl new file mode 100644 index 0000000..8906334 --- /dev/null +++ b/src/wasm/module_parsing.c.incl @@ -0,0 +1,439 @@ +// vim: ft=c: + +// +// This file is not to be compile like normal. +// It is instead included in wasm/module.c +// + +typedef struct build_context build_context; +struct build_context { + wasm_byte_vec_t binary; + unsigned int offset; + + wasm_module_t *module; + ovm_program_t *program; + ovm_store_t *store; + + // This will be set/reset for every code (function) entry. + ovm_code_builder_t builder; +}; + +#define PEEK_BYTE(ctx) ((ctx)->binary.data[(ctx)->offset]) +#define CONSUME_BYTE(ctx) ((ctx)->binary.data[(ctx)->offset++]) + +enum wasm_section_numbers_t { + WASM_CUSTOM_SECTION = 0, + WASM_TYPE_SECTION = 1, + WASM_IMPORT_SECTION = 2, + WASM_FUNC_SECTION = 3, + WASM_TABLE_SECTION = 4, + WASM_MEMORY_SECTION = 5, + WASM_GLOBAL_SECTION = 6, + WASM_EXPORT_SECTION = 7, + WASM_START_SECTION = 8, + WASM_ELEM_SECTION = 9, + WASM_CODE_SECTION = 10, + WASM_DATA_SECTION = 11, + WASM_DATAC_SECTION = 12, +}; + +static inline wasm_valkind_t parse_valtype(build_context *ctx) { + switch (CONSUME_BYTE(ctx)) { + case 0x7f: return WASM_I32; + case 0x7e: return WASM_I64; + case 0x7d: return WASM_F32; + case 0x7c: return WASM_F64; + case 0x7b: assert(("SIMD values are not currently supported", 0)); + case 0x70: return WASM_FUNCREF; + case 0x6F: return WASM_ANYREF; + default: assert(("Invalid valtype.", 0)); + } +} + +static void parse_custom_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + ctx->offset += section_size; +} + +static void parse_type_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int type_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_functype_vec_new_uninitialized(&ctx->module->type_section, type_count); + + fori (i, 0, (int) type_count) { + assert(CONSUME_BYTE(ctx) == 0x60); // @ReportError + + unsigned int param_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + wasm_valtype_vec_t param_types; + wasm_valtype_vec_new_uninitialized(¶m_types, param_count); + fori (p, 0, (int) param_count) { + param_types.data[p] = wasm_valtype_new(parse_valtype(ctx)); + } + + unsigned int result_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + wasm_valtype_vec_t result_types; + wasm_valtype_vec_new_uninitialized(&result_types, result_count); + fori (p, 0, (int) result_count) { + result_types.data[p] = wasm_valtype_new(parse_valtype(ctx)); + } + + wasm_functype_t *functype = wasm_functype_new(¶m_types, &result_types); + ctx->module->type_section.data[i] = functype; + } +} + +static wasm_limits_t parse_limits(build_context *ctx) { + bool maximum_present = CONSUME_BYTE(ctx) == 0x01; + + wasm_limits_t limits; + limits.min = uleb128_to_uint(ctx->binary.data, &ctx->offset); + if (maximum_present) { + limits.max = uleb128_to_uint(ctx->binary.data, &ctx->offset); + } else { + limits.max = wasm_limits_max_default; + } + + return limits; +} + +static wasm_tabletype_t *parse_tabletype(build_context *ctx) { + assert(CONSUME_BYTE(ctx) == 0x70); // @ReportError + + wasm_limits_t limits = parse_limits(ctx); + wasm_tabletype_t *tt = wasm_tabletype_new(wasm_valtype_new(WASM_FUNCREF), &limits); + return tt; +} + +static wasm_memorytype_t *parse_memorytype(build_context *ctx) { + wasm_limits_t limits = parse_limits(ctx); + wasm_memorytype_t *mt = wasm_memorytype_new(&limits); + return mt; +} + +static wasm_globaltype_t *parse_globaltype(build_context *ctx) { + wasm_valtype_t *valtype = wasm_valtype_new(parse_valtype(ctx)); + bool mutable = CONSUME_BYTE(ctx) == 0x01; + + wasm_globaltype_t *gt = wasm_globaltype_new(valtype, mutable); + return gt; +} + +static void parse_import_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int import_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_importtype_vec_new_uninitialized(&ctx->module->imports, import_count); + + fori (i, 0, (int) import_count) { + unsigned int mod_name_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + wasm_byte_vec_t module_name; + wasm_byte_vec_new_uninitialized(&module_name, mod_name_size); + fori (n, 0, mod_name_size) module_name.data[n] = CONSUME_BYTE(ctx); + + unsigned int import_name_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + wasm_byte_vec_t import_name; + wasm_byte_vec_new_uninitialized(&import_name, import_name_size); + fori (n, 0, import_name_size) import_name.data[n] = CONSUME_BYTE(ctx); + + wasm_externtype_t *import_type; + switch (CONSUME_BYTE(ctx)) { + case 0x00: { + unsigned int type_idx = uleb128_to_uint(ctx->binary.data, &ctx->offset); + import_type = wasm_functype_as_externtype(ctx->module->type_section.data[type_idx]); + break; + } + + case 0x01: import_type = wasm_tabletype_as_externtype(parse_tabletype(ctx)); break; + case 0x02: import_type = wasm_memorytype_as_externtype(parse_memorytype(ctx)); break; + case 0x03: import_type = wasm_globaltype_as_externtype(parse_globaltype(ctx)); break; + } + + wasm_importtype_t *import = wasm_importtype_new(&module_name, &import_name, import_type); + ctx->module->imports.data[i] = import; + } +} + +static void parse_func_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int func_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_functype_vec_new_uninitialized(&ctx->module->functypes, func_count); + + fori (i, 0, (int) func_count) { + unsigned int index = uleb128_to_uint(ctx->binary.data, &ctx->offset); + ctx->module->functypes.data[i] = ctx->module->type_section.data[index]; + } +} + +static void parse_table_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int table_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_tabletype_vec_new_uninitialized(&ctx->module->tabletypes, table_count); + + fori (i, 0, (int) table_count) { + ctx->module->tabletypes.data[i] = parse_tabletype(ctx); + } +} + +static void parse_memory_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int memory_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_memorytype_vec_new_uninitialized(&ctx->module->memorytypes, memory_count); + + fori (i, 0, (int) memory_count) { + ctx->module->memorytypes.data[i] = parse_memorytype(ctx); + } +} + +static void parse_global_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int global_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_globaltype_vec_new_uninitialized(&ctx->module->globaltypes, global_count); + + fori (i, 0, (int) global_count) { + wasm_globaltype_t *gt = parse_globaltype(ctx); + + switch (CONSUME_BYTE(ctx)) { + case 0x41: { + gt->type.global.initial_value.kind = WASM_I32; + gt->type.global.initial_value.of.i32 = (i32) uleb128_to_uint(ctx->binary.data, &ctx->offset); + break; + } + + case 0x42: { + gt->type.global.initial_value.kind = WASM_I64; + gt->type.global.initial_value.of.i64 = (i64) uleb128_to_uint(ctx->binary.data, &ctx->offset); + break; + } + + case 0x43: { + gt->type.global.initial_value.kind = WASM_F32; + gt->type.global.initial_value.of.f32 = *(f32 *) &ctx->binary.data[ctx->offset]; // HACK: This assumes IEEE-754 floats + ctx->offset += 4; + break; + } + + case 0x44: { + gt->type.global.initial_value.kind = WASM_F64; + gt->type.global.initial_value.of.f64 = *(f64 *) &ctx->binary.data[ctx->offset]; // HACK: This assumes IEEE-754 floats + ctx->offset += 8; + break; + } + } + + assert(CONSUME_BYTE(ctx) == 0x0b); + + ctx->module->globaltypes.data[i] = gt; + } +} + +static void parse_export_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int export_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_exporttype_vec_new_uninitialized(&ctx->module->exports, export_count); + + fori (i, 0, (int) export_count) { + unsigned int export_name_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + wasm_byte_vec_t export_name; + wasm_byte_vec_new_uninitialized(&export_name, export_name_size); + fori (n, 0, export_name_size) export_name.data[n] = CONSUME_BYTE(ctx); + + unsigned int type = CONSUME_BYTE(ctx); + unsigned int idx = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_externtype_t *export_type = NULL; + + switch (type) { + case 0x00: export_type = wasm_functype_as_externtype(wasm_module_index_functype(ctx->module, idx)); break; + case 0x01: export_type = wasm_tabletype_as_externtype(wasm_module_index_tabletype(ctx->module, idx)); break; + case 0x02: export_type = wasm_memorytype_as_externtype(wasm_module_index_memorytype(ctx->module, idx)); break; + case 0x03: export_type = wasm_globaltype_as_externtype(wasm_module_index_globaltype(ctx->module, idx)); break; + } + + wasm_exporttype_t *export = wasm_exporttype_new(&export_name, export_type); + export->index = idx; + ctx->module->exports.data[i] = export; + } +} + +static void parse_start_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + ctx->module->start_func_idx = uleb128_to_uint(ctx->binary.data, &ctx->offset); +} + +static void parse_elem_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int elem_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + // This is going to be a mess... + // I am only going to handle the case of a single, active, offset-0, + // element entry. This is all that Onyx uses and will probably ever + // use. + assert(elem_count == 1); + assert(CONSUME_BYTE(ctx) == 0x00); + assert(CONSUME_BYTE(ctx) == 0x41); + assert(CONSUME_BYTE(ctx) == 0x00); + assert(CONSUME_BYTE(ctx) == 0x0B); + + unsigned int entry_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + ctx->module->elem_count = entry_count; + ctx->module->elem_entries = malloc(sizeof(unsigned int) * entry_count); + + fori (i, 0, (int) entry_count) { + ctx->module->elem_entries[i] = uleb128_to_uint(ctx->binary.data, &ctx->offset); + } +} + +static void parse_data_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int data_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + if (ctx->module->data_count_present) { + assert(ctx->module->data_count == data_count); + } else { + ctx->module->data_count = data_count; + } + + ctx->module->data_entries = malloc(sizeof(struct wasm_data_t) * data_count); + + fori (i, 0, (int) data_count) { + struct wasm_data_t data_entry; + data_entry.data = NULL; + data_entry.offset = 0; + data_entry.length = 0; + data_entry.passive = true; + + char data_type = CONSUME_BYTE(ctx); + if (data_type == 0x00) { + assert(CONSUME_BYTE(ctx) == 0x41); + data_entry.offset = uleb128_to_uint(ctx->binary.data, &ctx->offset); + data_entry.passive = false; + assert(CONSUME_BYTE(ctx) == 0x0B); + } + + data_entry.length = uleb128_to_uint(ctx->binary.data, &ctx->offset); + data_entry.data = bh_pointer_add(ctx->binary.data, ctx->offset); + ctx->offset += data_entry.length; + + ctx->module->data_entries[i] = data_entry; + } +} + +static void parse_data_count_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int data_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + ctx->module->data_count_present = true; + ctx->module->data_count = data_count; +} + + + +// +// Instruction building +// + +static void push_label_target(build_context *ctx, label_kind_t kind) { + label_target_t target; + target.kind = kind; + target.idx = ctx->builder.next_label_idx++; + target.instr = -1; + + if (kind == label_kind_loop) { + target.instr = bh_arr_length(ctx->program->code); + } + + bh_arr_push(ctx->builder.label_stack, target); +} + +static void pop_label_target(build_context *ctx) { + label_target_t target = bh_arr_pop(ctx->builder.label_stack); + if (target.instr == -1) { + target.instr = bh_arr_length(ctx->program->code); + } + + fori (i, 0, bh_arr_length(ctx->builder.branch_patches)) { + branch_patch_t patch = ctx->builder.branch_patches[i]; + if (patch.label_idx != target.idx) continue; + + ctx->program->code[patch.branch_instr].a = target.instr - patch.branch_instr - 1; + bh_arr_fastdelete(ctx->builder.branch_patches, i); + i--; + } +} + +static void parse_expression(build_context *ctx); + +static void parse_instruction(build_context *ctx) { + char instr_byte; + switch (instr_byte = CONSUME_BYTE(ctx)) { + case 0x00: return; + case 0x01: return; + case 0x02: { + // Currently, only "void" block types are valid. + assert(CONSUME_BYTE(ctx) == 0x40); + push_label_target(ctx, label_kind_block); + parse_expression(ctx); + pop_label_target(ctx); + return; + } + } +} + +static void parse_expression(build_context *ctx) { + while (PEEK_BYTE(ctx) != 0x0B) { + parse_instruction(ctx); + } +} + +static void parse_code_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int code_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + assert(ctx->module->functypes.size == code_count); + + fori (i, 0, (int) code_count) { + unsigned int code_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int local_sections_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + unsigned int total_locals = 0; + fori (j, 0, (int) local_sections_count) { + unsigned int local_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + wasm_valkind_t valtype = parse_valtype(ctx); + + total_locals += local_count; + } + + + // Set up a lot of stuff... + ctx->builder = ovm_code_builder_new(ctx->program, 0, total_locals); + parse_expression(ctx); + ovm_code_builder_free(&ctx->builder); + } +} + + +static void parse_section(build_context *ctx) { + char section_number = CONSUME_BYTE(ctx); + + switch (section_number) { + case WASM_CUSTOM_SECTION: parse_custom_section(ctx); break; + case WASM_TYPE_SECTION: parse_type_section(ctx); break; + case WASM_IMPORT_SECTION: parse_import_section(ctx); break; + case WASM_FUNC_SECTION: parse_func_section(ctx); break; + case WASM_TABLE_SECTION: parse_table_section(ctx); break; + case WASM_MEMORY_SECTION: parse_memory_section(ctx); break; + case WASM_GLOBAL_SECTION: parse_global_section(ctx); break; + case WASM_EXPORT_SECTION: parse_export_section(ctx); break; + case WASM_START_SECTION: parse_start_section(ctx); break; + case WASM_ELEM_SECTION: parse_elem_section(ctx); break; + case WASM_CODE_SECTION: parse_code_section(ctx); break; + case WASM_DATA_SECTION: parse_data_section(ctx); break; + case WASM_DATAC_SECTION: parse_data_count_section(ctx); break; + default: assert(("bad section number", 0)); break; + } +} -- 2.25.1