From 5de2a661a2aded130ea0bc6306f10bdf5b0627a5 Mon Sep 17 00:00:00 2001 From: Brendan Hansen Date: Mon, 20 Jun 2022 21:46:41 -0500 Subject: [PATCH] starting to parse the WASM module --- include/onyx_wasm.h | 46 ++++-- src/wasm/extern.c | 35 +++++ src/wasm/frame.c | 2 + src/wasm/module.c | 359 ++++++++++++++++++++++++++++++++++++++++++++ src/wasm/ref.c | 2 + src/wasm/store.c | 4 +- src/wasm/table.c | 2 + src/wasm/trap.c | 2 + src/wasm/type.c | 8 +- src/wasm/value.c | 14 +- 10 files changed, 451 insertions(+), 23 deletions(-) diff --git a/include/onyx_wasm.h b/include/onyx_wasm.h index a797dd1..971b68f 100644 --- a/include/onyx_wasm.h +++ b/include/onyx_wasm.h @@ -20,7 +20,7 @@ struct wasm_engine_t { }; struct wasm_store_t { - i32 nothing; + wasm_engine_t *engine; }; @@ -38,6 +38,8 @@ struct wasm_functype_inner_t { struct wasm_globaltype_inner_t { wasm_valtype_t *content; wasm_mutability_t mutability; + + wasm_val_t initial_value; }; struct wasm_tabletype_inner_t { @@ -65,8 +67,8 @@ struct wasm_tabletype_t { wasm_externtype_t type; }; struct wasm_memorytype_t { wasm_externtype_t type; }; struct wasm_importtype_t { - wasm_name_t *module_name; - wasm_name_t *import_name; + wasm_name_t module_name; + wasm_name_t import_name; wasm_externtype_t *type; }; @@ -91,39 +93,61 @@ struct wasm_foreign_t { }; struct wasm_module_t { + wasm_store_t *store; + + wasm_functype_vec_t type_section; + + wasm_functype_vec_t functypes; + wasm_globaltype_vec_t globaltypes; + wasm_tabletype_vec_t tabletypes; + wasm_memorytype_vec_t memorytypes; + wasm_importtype_vec_t imports; + wasm_exporttype_vec_t exports; + + int start_func_idx; + unsigned int *elem_entries; // Array of function indicies + ovm_program_t *program; + bool valid; }; -struct wasm_func_t { +struct wasm_func_inner_t { wasm_instance_t *instance; ovm_func_t *func; + + wasm_functype_t *type; }; -struct wasm_global_t { +struct wasm_global_inner_t { wasm_instance_t *instance; int register_index; wasm_globaltype_t *type; }; -struct wasm_table_t { +struct wasm_table_inner_t { wasm_tabletype_t *type; }; -struct wasm_memory_t { +struct wasm_memory_inner_t { wasm_memorytype_t *type; }; struct wasm_extern_t { wasm_externtype_t *type; union { - wasm_func_t *func; - wasm_global_t *global; - wasm_table_t *table; - wasm_memory_t *memory; + struct wasm_func_inner_t func; + struct wasm_global_inner_t global; + struct wasm_table_inner_t table; + struct wasm_memory_inner_t memory; }; }; +struct wasm_func_t { wasm_extern_t inner; }; +struct wasm_global_t { wasm_extern_t inner; }; +struct wasm_table_t { wasm_extern_t inner; }; +struct wasm_memory_t { wasm_extern_t inner; }; + struct wasm_instance_t { wasm_module_t *module; ovm_state_t *state; diff --git a/src/wasm/extern.c b/src/wasm/extern.c index e69de29..0a1b5d4 100644 --- a/src/wasm/extern.c +++ b/src/wasm/extern.c @@ -0,0 +1,35 @@ + + +#include "onyx_wasm.h" + + +WASM_DECLARE_VEC_IMPL(extern, *) + +wasm_externkind_t wasm_extern_kind(const wasm_extern_t* ext) { + return ext->type->kind; +} + +wasm_externtype_t* wasm_extern_type(const wasm_extern_t* ext) { + return ext->type; +} + +wasm_extern_t* wasm_func_as_extern(wasm_func_t* ext) { return (wasm_extern_t *) ext; } +wasm_extern_t* wasm_global_as_extern(wasm_global_t* ext) { return (wasm_extern_t *) ext; } +wasm_extern_t* wasm_table_as_extern(wasm_table_t* ext) { return (wasm_extern_t *) ext; } +wasm_extern_t* wasm_memory_as_extern(wasm_memory_t* ext) { return (wasm_extern_t *) ext; } + +wasm_func_t* wasm_extern_as_func(wasm_extern_t* ext) { return ext->type->kind == WASM_EXTERN_FUNC ? (wasm_func_t *) ext : NULL; } +wasm_global_t* wasm_extern_as_global(wasm_extern_t* ext) { return ext->type->kind == WASM_EXTERN_GLOBAL ? (wasm_global_t *) ext : NULL; } +wasm_table_t* wasm_extern_as_table(wasm_extern_t* ext) { return ext->type->kind == WASM_EXTERN_TABLE ? (wasm_table_t *) ext : NULL; } +wasm_memory_t* wasm_extern_as_memory(wasm_extern_t* ext) { return ext->type->kind == WASM_EXTERN_MEMORY ? (wasm_memory_t *) ext : NULL; } + +const wasm_extern_t* wasm_func_as_extern_const(const wasm_func_t* ext) { return (const wasm_extern_t *) ext; } +const wasm_extern_t* wasm_global_as_extern_const(const wasm_global_t* ext) { return (const wasm_extern_t *) ext; } +const wasm_extern_t* wasm_table_as_extern_const(const wasm_table_t* ext) { return (const wasm_extern_t *) ext; } +const wasm_extern_t* wasm_memory_as_extern_const(const wasm_memory_t* ext) { return (const wasm_extern_t *) ext; } + +const wasm_func_t* wasm_extern_as_func_const(const wasm_extern_t* ext) { return ext->type->kind == WASM_EXTERN_FUNC ? (const wasm_func_t *) ext : NULL; } +const wasm_global_t* wasm_extern_as_global_const(const wasm_extern_t* ext) { return ext->type->kind == WASM_EXTERN_GLOBAL ? (const wasm_global_t *) ext : NULL; } +const wasm_table_t* wasm_extern_as_table_const(const wasm_extern_t* ext) { return ext->type->kind == WASM_EXTERN_TABLE ? (const wasm_table_t *) ext : NULL; } +const wasm_memory_t* wasm_extern_as_memory_const(const wasm_extern_t* ext) { return ext->type->kind == WASM_EXTERN_MEMORY ? (const wasm_memory_t *) ext : NULL; } + diff --git a/src/wasm/frame.c b/src/wasm/frame.c index e69de29..0ade447 100644 --- a/src/wasm/frame.c +++ b/src/wasm/frame.c @@ -0,0 +1,2 @@ + +// TODO diff --git a/src/wasm/module.c b/src/wasm/module.c index e69de29..c42f910 100644 --- a/src/wasm/module.c +++ b/src/wasm/module.c @@ -0,0 +1,359 @@ + + +#include "onyx_wasm.h" +#include "vm_codebuilder.h" + +typedef struct build_context build_context; +struct build_context { + wasm_byte_vec_t binary; + unsigned int offset; + + wasm_module_t *module; + ovm_program_t *program; + ovm_store_t *store; +}; + +#define CONSUME_BYTE(ctx) ((ctx)->binary.data[(ctx)->offset++]) + +enum wasm_section_numbers_t { + WASM_CUSTOM_SECTION = 0, + WASM_TYPE_SECTION = 1, + WASM_IMPORT_SECTION = 2, + WASM_FUNC_SECTION = 3, + WASM_TABLE_SECTION = 4, + WASM_MEMORY_SECTION = 5, + WASM_GLOBAL_SECTION = 6, + WASM_EXPORT_SECTION = 7, + WASM_START_SECTION = 8, + WASM_ELEM_SECTION = 9, + WASM_CODE_SECTION = 10, + WASM_DATA_SECTION = 11, + WASM_DATAC_SECTION = 12, +}; + +static inline wasm_valkind_t parse_valtype(build_context *ctx) { + switch (CONSUME_BYTE(ctx)) { + case 0x7f: return WASM_I32; + case 0x7e: return WASM_I64; + case 0x7d: return WASM_F32; + case 0x7c: return WASM_F64; + case 0x7b: assert(("SIMD values are not currently supported", 0)); + case 0x70: return WASM_FUNCREF; + case 0x6F: return WASM_ANYREF; + default: assert(("Invalid valtype.", 0)); + } +} + +static void parse_custom_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + ctx->offset += section_size; +} + +static void parse_type_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int type_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_functype_vec_new_uninitialized(&ctx->module->type_section, type_count); + + fori (i, 0, (int) type_count) { + assert(CONSUME_BYTE(ctx) == 0x60); // @ReportError + + unsigned int param_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + wasm_valtype_vec_t param_types; + wasm_valtype_vec_new_uninitialized(¶m_types, param_count); + fori (p, 0, (int) param_count) { + param_types.data[p] = wasm_valtype_new(parse_valtype(ctx)); + } + + unsigned int result_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + wasm_valtype_vec_t result_types; + wasm_valtype_vec_new_uninitialized(&result_types, result_count); + fori (p, 0, (int) result_count) { + result_types.data[p] = wasm_valtype_new(parse_valtype(ctx)); + } + + wasm_functype_t *functype = wasm_functype_new(¶m_types, &result_types); + ctx->module->type_section.data[i] = functype; + } +} + +static wasm_limits_t parse_limits(build_context *ctx) { + bool maximum_present = CONSUME_BYTE(ctx) == 0x01; + + wasm_limits_t limits; + limits.min = uleb128_to_uint(ctx->binary.data, &ctx->offset); + if (maximum_present) { + limits.max = uleb128_to_uint(ctx->binary.data, &ctx->offset); + } else { + limits.max = wasm_limits_max_default; + } + + return limits; +} + +static wasm_tabletype_t *parse_tabletype(build_context *ctx) { + assert(CONSUME_BYTE(ctx) == 0x70); // @ReportError + + wasm_limits_t limits = parse_limits(ctx); + wasm_tabletype_t *tt = wasm_tabletype_new(wasm_valtype_new(0x70), &limits); + return tt; +} + +static wasm_memorytype_t *parse_memorytype(build_context *ctx) { + wasm_limits_t limits = parse_limits(ctx); + wasm_memorytype_t *mt = wasm_memorytype_new(&limits); + return mt; +} + +static wasm_globaltype_t *parse_globaltype(build_context *ctx) { + wasm_valtype_t *valtype = wasm_valtype_new(parse_valtype(ctx)); + bool mutable = CONSUME_BYTE(ctx) == 0x01; + + wasm_globaltype_t *gt = wasm_globaltype_new(valtype, mutable); + return gt; +} + +static void parse_import_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int import_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_importtype_vec_new_uninitialized(&ctx->module->imports, import_count); + + fori (i, 0, (int) import_count) { + unsigned int mod_name_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + wasm_byte_vec_t module_name; + wasm_byte_vec_new_uninitialized(&module_name, mod_name_size); + fori (n, 0, mod_name_size) module_name.data[n] = CONSUME_BYTE(ctx); + + unsigned int import_name_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + wasm_byte_vec_t import_name; + wasm_byte_vec_new_uninitialized(&import_name, import_name_size); + fori (n, 0, import_name_size) import_name.data[n] = CONSUME_BYTE(ctx); + + wasm_externtype_t *import_type; + switch (CONSUME_BYTE(ctx)) { + case 0x00: { + unsigned int type_idx = uleb128_to_uint(ctx->binary.data, &ctx->offset); + import_type = wasm_functype_as_externtype(ctx->module->type_section.data[type_idx]); + break; + } + + case 0x01: import_type = wasm_tabletype_as_externtype(parse_tabletype(ctx)); break; + case 0x02: import_type = wasm_memorytype_as_externtype(parse_memorytype(ctx)); break; + case 0x03: import_type = wasm_globaltype_as_externtype(parse_globaltype(ctx)); break; + } + + wasm_importtype_t *import = wasm_importtype_new(&module_name, &import_name, import_type); + ctx->module->imports.data[i] = import; + } +} + +static void parse_func_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int func_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_functype_vec_new_uninitialized(&ctx->module->functypes, func_count); + + fori (i, 0, (int) func_count) { + ctx->module->functypes.data[i] = ctx->module->type_section.data[i]; + } +} + +static void parse_table_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int table_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_tabletype_vec_new_uninitialized(&ctx->module->tabletypes, table_count); + + fori (i, 0, (int) table_count) { + ctx->module->tabletypes.data[i] = parse_tabletype(ctx); + } +} + +static void parse_memory_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int memory_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_memorytype_vec_new_uninitialized(&ctx->module->memorytypes, memory_count); + + fori (i, 0, (int) memory_count) { + ctx->module->memorytypes.data[i] = parse_memorytype(ctx); + } +} + +static void parse_global_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int global_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_globaltype_vec_new_uninitialized(&ctx->module->globaltypes, global_count); + + fori (i, 0, (int) global_count) { + wasm_globaltype_t *gt = parse_globaltype(ctx); + + switch (CONSUME_BYTE(ctx)) { + case 0x41: { + gt->type.global.initial_value.kind = WASM_I32; + gt->type.global.initial_value.of.i32 = (i32) uleb128_to_uint(ctx->binary.data, &ctx->offset); + break; + } + + case 0x42: { + gt->type.global.initial_value.kind = WASM_I64; + gt->type.global.initial_value.of.i64 = (i64) uleb128_to_uint(ctx->binary.data, &ctx->offset); + break; + } + + case 0x43: { + gt->type.global.initial_value.kind = WASM_F32; + gt->type.global.initial_value.of.f32 = *(f32 *) &ctx->binary.data[ctx->offset]; // HACK: This assumes IEEE-754 floats + ctx->offset += 4; + break; + } + + case 0x44: { + gt->type.global.initial_value.kind = WASM_F64; + gt->type.global.initial_value.of.f64 = *(f64 *) &ctx->binary.data[ctx->offset]; // HACK: This assumes IEEE-754 floats + ctx->offset += 8; + break; + } + } + + ctx->module->globaltypes.data[i] = gt; + } +} + +static void parse_export_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int export_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + wasm_exporttype_vec_new_uninitialized(&ctx->module->exports, export_count); + + fori (i, 0, (int) export_count) { + unsigned int export_name_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + wasm_byte_vec_t export_name; + wasm_byte_vec_new_uninitialized(&export_name, export_name_size); + fori (n, 0, export_name_size) export_name.data[n] = CONSUME_BYTE(ctx); + + wasm_externtype_t *export_type; + switch (CONSUME_BYTE(ctx)) { + case 0x00: { + unsigned int type_idx = uleb128_to_uint(ctx->binary.data, &ctx->offset); + export_type = wasm_functype_as_externtype(ctx->module->type_section.data[type_idx]); + break; + } + + case 0x01: export_type = wasm_tabletype_as_externtype(parse_tabletype(ctx)); break; + case 0x02: export_type = wasm_memorytype_as_externtype(parse_memorytype(ctx)); break; + case 0x03: export_type = wasm_globaltype_as_externtype(parse_globaltype(ctx)); break; + } + + wasm_exporttype_t *export = wasm_exporttype_new(&export_name, export_type); + ctx->module->exports.data[i] = export; + } +} + +static void parse_start_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + ctx->module->start_func_idx = uleb128_to_uint(ctx->binary.data, &ctx->offset); +} + +static void parse_elem_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + unsigned int elem_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + + + // This is going to be a mess... + // I am only going to handle the case of a single, active, offset-0, + // element entry. This is all that Onyx uses and will probably ever + // use. + assert(elem_count == 1); + assert(uleb128_to_uint(ctx->binary.data, &ctx->offset) == 0x00); + assert(uleb128_to_uint(ctx->binary.data, &ctx->offset) == 0x40); + assert(uleb128_to_uint(ctx->binary.data, &ctx->offset) == 0x00); + assert(uleb128_to_uint(ctx->binary.data, &ctx->offset) == 0x0B); + + unsigned int entry_count = uleb128_to_uint(ctx->binary.data, &ctx->offset); + ctx->module->elem_entries = malloc(sizeof(unsigned int) * entry_count); + + fori (i, 0, (int) entry_count) { + ctx->module->elem_entries[i] = uleb128_to_uint(ctx->binary.data, &ctx->offset); + } +} + +static void parse_code_section(build_context *ctx) { + unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); + ctx->offset += section_size; + // TODO +} + +static void parse_data_section(build_context *ctx) {} +static void parse_data_count_section(build_context *ctx) {} + +static void parse_section(build_context *ctx) { + char section_number = CONSUME_BYTE(ctx); + + switch (section_number) { + case WASM_CUSTOM_SECTION: parse_custom_section(ctx); break; + case WASM_TYPE_SECTION: parse_type_section(ctx); break; + case WASM_IMPORT_SECTION: parse_import_section(ctx); break; + case WASM_FUNC_SECTION: parse_func_section(ctx); break; + case WASM_TABLE_SECTION: parse_table_section(ctx); break; + case WASM_MEMORY_SECTION: parse_memory_section(ctx); break; + case WASM_GLOBAL_SECTION: parse_global_section(ctx); break; + case WASM_EXPORT_SECTION: parse_export_section(ctx); break; + case WASM_START_SECTION: parse_start_section(ctx); break; + case WASM_ELEM_SECTION: parse_elem_section(ctx); break; + case WASM_CODE_SECTION: parse_code_section(ctx); break; + case WASM_DATA_SECTION: parse_data_section(ctx); break; + case WASM_DATAC_SECTION: parse_data_count_section(ctx); break; + default: assert(("bad section number", 0)); break; + } +} + +static bool module_build(wasm_module_t *module, const wasm_byte_vec_t *binary) { + wasm_engine_t *engine = module->store->engine; + module->program = ovm_program_new(engine->store); + + build_context ctx; + ctx.binary = *binary; + ctx.offset = 0; + ctx.module = module; + ctx.program = module->program; + ctx.store = engine->store; + + while (ctx.offset < binary->size) { + parse_section(&ctx); + } + + return true; +} + + + +// Ommitting the "sharable ref" crap that I don't think will +// ever be needed for a module. + + +wasm_module_t *wasm_module_new(wasm_store_t *store, const wasm_byte_vec_t *binary) { + wasm_module_t *module = malloc(sizeof(*module)); + module->store = store; + + bool success = module_build(module, binary); + return module; +} + +bool wasm_module_validate(wasm_store_t *store, const wasm_byte_vec_t *binary) { + // Hmmm... + return false; +} + +void wasm_module_imports(const wasm_module_t *module, wasm_importtype_vec_t *out_imports) { + *out_imports = module->imports; +} + +void wasm_module_exports(const wasm_module_t *module, wasm_exporttype_vec_t *out_exports) { + *out_exports = module->exports; +} + + + diff --git a/src/wasm/ref.c b/src/wasm/ref.c index e69de29..0ade447 100644 --- a/src/wasm/ref.c +++ b/src/wasm/ref.c @@ -0,0 +1,2 @@ + +// TODO diff --git a/src/wasm/store.c b/src/wasm/store.c index 1fc7eaf..0f30cf6 100644 --- a/src/wasm/store.c +++ b/src/wasm/store.c @@ -3,7 +3,9 @@ #include "vm.h" wasm_store_t *wasm_store_new(wasm_engine_t *engine) { - return malloc(sizeof(wasm_store_t)); + wasm_store_t *store = malloc(sizeof(wasm_store_t)); + store->engine = engine; + return store; } void wasm_store_delete(wasm_store_t *store) { diff --git a/src/wasm/table.c b/src/wasm/table.c index e69de29..0ade447 100644 --- a/src/wasm/table.c +++ b/src/wasm/table.c @@ -0,0 +1,2 @@ + +// TODO diff --git a/src/wasm/trap.c b/src/wasm/trap.c index e69de29..0ade447 100644 --- a/src/wasm/trap.c +++ b/src/wasm/trap.c @@ -0,0 +1,2 @@ + +// TODO diff --git a/src/wasm/type.c b/src/wasm/type.c index 2fa307c..eddf1d0 100644 --- a/src/wasm/type.c +++ b/src/wasm/type.c @@ -174,8 +174,8 @@ const wasm_memorytype_t* wasm_externtype_as_memorytype_const(const wasm_externty wasm_importtype_t *wasm_importtype_new(wasm_name_t *module, wasm_name_t* name, wasm_externtype_t *ext) { wasm_importtype_t *importtype = malloc(sizeof(*importtype)); - importtype->module_name = module; - importtype->import_name = name; + importtype->module_name = *module; + importtype->import_name = *name; importtype->type = ext; return importtype; @@ -186,11 +186,11 @@ void wasm_importtype_delete(wasm_importtype_t *importtype) { } const wasm_name_t* wasm_importtype_module(const wasm_importtype_t* importtype) { - return importtype->module_name; + return &importtype->module_name; } const wasm_name_t* wasm_importtype_name(const wasm_importtype_t* importtype) { - return importtype->import_name; + return &importtype->import_name; } const wasm_externtype_t* wasm_importtype_type(const wasm_importtype_t* importtype) { diff --git a/src/wasm/value.c b/src/wasm/value.c index 0810fa7..f81dea9 100644 --- a/src/wasm/value.c +++ b/src/wasm/value.c @@ -1,20 +1,20 @@ #include "onyx_wasm.h" -void wasm_val_delete(own wasm_val_t* v) { +void wasm_val_delete(wasm_val_t* v) { // Apparently this is suppose to do nothing... } -void wasm_val_copy(own wasm_val_t* out, const wasm_val_t* in) { +void wasm_val_copy(wasm_val_t* out, const wasm_val_t* in) { out->kind = in->kind; switch (out->kind) { - case WASM_I32: out->i32 = in->i32; break; - case WASM_I64: out->i64 = in->i64; break; - case WASM_F32: out->f32 = in->i32; break; - case WASM_F64: out->f64 = in->f64; break; + case WASM_I32: out->of.i32 = in->of.i32; break; + case WASM_I64: out->of.i64 = in->of.i64; break; + case WASM_F32: out->of.f32 = in->of.i32; break; + case WASM_F64: out->of.f64 = in->of.f64; break; case WASM_ANYREF: case WASM_FUNCREF: - out->ref = in->ref; + out->of.ref = in->of.ref; break; } } -- 2.25.1