From 4f99eef3f43337c0e51a797cfffb6d877ed630d4 Mon Sep 17 00:00:00 2001 From: Brendan Hansen Date: Tue, 26 Jul 2022 18:11:54 -0500 Subject: [PATCH] loading debug info from wasm binaries --- include/ovm_debug.h | 65 +++++++++++++++++++++++ include/ovm_wasm.h | 10 ++++ include/vm_codebuilder.h | 5 +- src/debug/debug_info.c | 71 +++++++++++++++++++++++++ src/debug/debug_info_builder.c | 94 ++++++++++++++++++++++++++++++++++ src/vm/code_builder.c | 36 ++++++++++++- src/wasm/module.c | 6 +++ src/wasm/module_parsing.h | 46 +++++++++++++++-- 8 files changed, 328 insertions(+), 5 deletions(-) create mode 100644 include/ovm_debug.h create mode 100644 src/debug/debug_info.c create mode 100644 src/debug/debug_info_builder.c diff --git a/include/ovm_debug.h b/include/ovm_debug.h new file mode 100644 index 0000000..c76763e --- /dev/null +++ b/include/ovm_debug.h @@ -0,0 +1,65 @@ +#ifndef _OVM_DEBUG_H +#define _OVM_DEBUG_H + +#include "bh.h" + +typedef struct debug_loc_info_t { + u32 file_id; + u32 line; + u32 symbols; +} debug_loc_info_t; + +typedef struct debug_func_info_t { + u32 func_id; + u32 file_id; + u32 line; + char *name; + b32 internal; + u32 stack_ptr_idx; + + u32 debug_op_offset; +} debug_func_info_t; + +typedef struct debug_info_t { + bh_allocator alloc; + + bh_arr(debug_func_info_t) funcs; + bh_arr(debug_loc_info_t) line_info; + bh_arr(u32) instruction_reducer; + + bh_arr(char *) file_names; +} debug_info_t; + +void debug_info_init(debug_info_t *); +void debug_info_free(debug_info_t *); +void debug_info_import_file_info(debug_info_t *, u8 *data, u32 len); +void debug_info_import_func_info(debug_info_t *, u8 *data, u32 len); + +// +// This builder is used in conjunction with code builder to output +// debug information for each instruction that is generated in OVM. +// +typedef struct debug_info_builder_t { + debug_info_t *info; + + u8 *data; + u32 reader_offset; + + u32 current_file_id; + u32 current_line; + + bh_arr(char) symbol_scope_stack; + + u32 remaining_reps; + + b32 locked : 1; +} debug_info_builder_t; + +void debug_info_builder_init(debug_info_builder_t *, debug_info_t *); +void debug_info_builder_prepare(debug_info_builder_t *, u8 *); +void debug_info_builder_emit_location(debug_info_builder_t *); +void debug_info_builder_step(debug_info_builder_t *); +void debug_info_builder_begin_func(debug_info_builder_t *, i32 func_idx); +void debug_info_builder_end_func(debug_info_builder_t *); + +#endif \ No newline at end of file diff --git a/include/ovm_wasm.h b/include/ovm_wasm.h index c258ddd..71bb19c 100644 --- a/include/ovm_wasm.h +++ b/include/ovm_wasm.h @@ -3,6 +3,7 @@ #include "wasm.h" #include "vm.h" +#include "ovm_debug.h" // Core Utils @@ -118,6 +119,11 @@ struct wasm_data_t { bool passive; }; +struct wasm_custom_section_t { + unsigned int size; + char *data; +}; + struct wasm_module_t { wasm_store_t *store; @@ -144,6 +150,10 @@ struct wasm_module_t { int memory_init_idx; int memory_init_external_idx; + + Table(struct wasm_custom_section_t) custom_sections; + + debug_info_t debug_info; }; struct wasm_func_inner_t { diff --git a/include/vm_codebuilder.h b/include/vm_codebuilder.h index 6ecadad..80a062b 100644 --- a/include/vm_codebuilder.h +++ b/include/vm_codebuilder.h @@ -2,6 +2,7 @@ #define _OVM_CODE_BUILDER_H #include "vm.h" +#include "ovm_debug.h" typedef struct ovm_code_builder_t ovm_code_builder_t; typedef struct label_target_t label_target_t; @@ -25,6 +26,8 @@ struct ovm_code_builder_t { i32 func_table_arr_idx; i32 highest_value_number; + + debug_info_builder_t *debug_builder; }; enum label_kind_t { @@ -54,7 +57,7 @@ struct branch_patch_t { bool targets_else; }; -ovm_code_builder_t ovm_code_builder_new(ovm_program_t *program, i32 param_count, i32 local_count); +ovm_code_builder_t ovm_code_builder_new(ovm_program_t *program, debug_info_builder_t *debug, i32 param_count, i32 local_count); label_target_t ovm_code_builder_wasm_target_idx(ovm_code_builder_t *builder, i32 idx); i32 ovm_code_builder_push_label_target(ovm_code_builder_t *builder, label_kind_t kind); void ovm_code_builder_pop_label_target(ovm_code_builder_t *builder); diff --git a/src/debug/debug_info.c b/src/debug/debug_info.c new file mode 100644 index 0000000..3a41586 --- /dev/null +++ b/src/debug/debug_info.c @@ -0,0 +1,71 @@ + +#include "ovm_debug.h" + +void debug_info_init(debug_info_t *info) { + memset(info, 0, sizeof(*info)); + + info->alloc = bh_heap_allocator(); + bh_arr_new(info->alloc, info->funcs, 16); + bh_arr_new(info->alloc, info->line_info, 1024); + bh_arr_new(info->alloc, info->instruction_reducer, 4096); + bh_arr_new(info->alloc, info->file_names, 16); +} + +void debug_info_free(debug_info_t *info) { + bh_arr_free(info->funcs); + bh_arr_free(info->line_info); + bh_arr_free(info->instruction_reducer); + + bh_arr_each(char *, name, info->file_names) bh_free(info->alloc, *name); + bh_arr_free(info->file_names); +} + +void debug_info_import_file_info(debug_info_t *info, u8 *data, u32 len) { + u32 offset = 0; + + i32 count = uleb128_to_uint(data, &offset); + fori (i, 0, (i32) count) { + u32 file_id = uleb128_to_uint(data, &offset); + u32 name_length = uleb128_to_uint(data, &offset); + char *name = bh_alloc_array(info->alloc, char, name_length + 1); + memcpy(name, data + offset, name_length); + name[name_length] = 0; + offset += name_length; + + bh_arr_set_at(info->file_names, file_id, name); + } + + assert(offset == len); +} + +void debug_info_import_func_info(debug_info_t *info, u8 *data, u32 len) { + u32 offset = 0; + + i32 count = uleb128_to_uint(data, &offset); + fori (i, 0, (i32) count) { + debug_func_info_t func_info; + func_info.func_id = uleb128_to_uint(data, &offset); + func_info.file_id = uleb128_to_uint(data, &offset); + func_info.line = uleb128_to_uint(data, &offset); + + u32 name_length = uleb128_to_uint(data, &offset); + if (name_length == 0) { + func_info.name = NULL; + } else { + func_info.name = bh_alloc_array(info->alloc, char, name_length + 1); + memcpy(func_info.name, data + offset, name_length); + func_info.name[name_length] = 0; + offset += name_length; + } + + func_info.internal = data[offset++] != 0; + func_info.debug_op_offset = uleb128_to_uint(data, &offset); + func_info.stack_ptr_idx = uleb128_to_uint(data, &offset); + + uleb128_to_uint(data, &offset); + + bh_arr_set_at(info->funcs, func_info.func_id, func_info); + } + + assert(offset == len); +} diff --git a/src/debug/debug_info_builder.c b/src/debug/debug_info_builder.c new file mode 100644 index 0000000..343c452 --- /dev/null +++ b/src/debug/debug_info_builder.c @@ -0,0 +1,94 @@ + + +#include "ovm_debug.h" + + +void debug_info_builder_init(debug_info_builder_t *builder, debug_info_t *info) { + memset(builder, 0, sizeof(*builder)); + + builder->info = info; + bh_arr_new(bh_heap_allocator(), builder->symbol_scope_stack, 8); +} + +void debug_info_builder_prepare(debug_info_builder_t *builder, u8 *data) { + builder->data = data; + builder->reader_offset = 0; + builder->current_file_id = 0; + builder->current_line = 0; + builder->remaining_reps = 0; +} + +static void debug_info_builder_parse(debug_info_builder_t *builder) { + u32 count = 0; + + while (1) { + u8 instr = builder->data[builder->reader_offset++]; + switch (instr & 0b11000000) { + case 0b00000000: + instr &= 0b00111111; + switch (instr) { + case 0: builder->locked = 1; break; + case 1: + builder->current_file_id = uleb128_to_uint(builder->data, &builder->reader_offset); + builder->current_line = uleb128_to_uint(builder->data, &builder->reader_offset); + break; + case 2: break; + case 3: break; + case 4: + uleb128_to_uint(builder->data, &builder->reader_offset); + break; + } + break; + + case 0b01000000: + count = instr & 0x3f; + builder->current_line += count + 1; + builder->remaining_reps = 1; + return; + + case 0b10000000: + count = instr & 0x3f; + builder->current_line -= count + 1; + builder->remaining_reps = 1; + return; + + case 0b11000000: + count = instr & 0x3f; + builder->remaining_reps = count + 1; + return; + } + } +} + +void debug_info_builder_step(debug_info_builder_t *builder) { + while (builder->remaining_reps == 0) { + debug_info_builder_parse(builder); + + debug_loc_info_t info; + info.file_id = builder->current_file_id; + info.line = builder->current_line; + info.symbols = 0; + bh_arr_push(builder->info->line_info, info); + } + + if (builder->locked) return; + + builder->remaining_reps -= 1; + return; +} + +void debug_info_builder_emit_location(debug_info_builder_t *builder) { + bh_arr_push(builder->info->instruction_reducer, bh_arr_length(builder->info->line_info) - 1); +} + +void debug_info_builder_begin_func(debug_info_builder_t *builder, i32 func_idx) { + assert(func_idx < bh_arr_length(builder->info->funcs)); + debug_func_info_t *func_info = &builder->info->funcs[func_idx]; + + builder->reader_offset = func_info->debug_op_offset; + assert(builder->reader_offset < 20000); + builder->locked = 0; +} + +void debug_info_builder_end_func(debug_info_builder_t *builder) { +} diff --git a/src/vm/code_builder.c b/src/vm/code_builder.c index 0ed8ba6..ec84b47 100644 --- a/src/vm/code_builder.c +++ b/src/vm/code_builder.c @@ -1,5 +1,6 @@ #include "vm_codebuilder.h" +#include "ovm_debug.h" // #define BUILDER_DEBUG @@ -26,7 +27,7 @@ static inline int NEXT_VALUE(ovm_code_builder_t *b) { #endif } -ovm_code_builder_t ovm_code_builder_new(ovm_program_t *program, i32 param_count, i32 local_count) { +ovm_code_builder_t ovm_code_builder_new(ovm_program_t *program, debug_info_builder_t *debug, i32 param_count, i32 local_count) { ovm_code_builder_t builder; builder.param_count = param_count; builder.local_count = local_count; @@ -44,6 +45,8 @@ ovm_code_builder_t ovm_code_builder_new(ovm_program_t *program, i32 param_count, builder.highest_value_number = param_count + local_count; + builder.debug_builder = debug; + return builder; } @@ -123,6 +126,7 @@ void ovm_code_builder_add_nop(ovm_code_builder_t *builder) { ovm_instr_t nop = {0}; nop.full_instr = OVMI_NOP; + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &nop); } @@ -137,6 +141,7 @@ void ovm_code_builder_add_binop(ovm_code_builder_t *builder, u32 instr) { binop.a = left; binop.b = right; + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &binop); PUSH_VALUE(builder, result); } @@ -156,6 +161,7 @@ void ovm_code_builder_add_imm(ovm_code_builder_t *builder, u32 ovm_type, void *i default: assert(("bad ovm type for add_imm", 0)); } + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &imm_instr); PUSH_VALUE(builder, imm_instr.r); } @@ -168,6 +174,7 @@ void ovm_code_builder_add_unop(ovm_code_builder_t *builder, u32 instr) { unop.r = NEXT_VALUE(builder); unop.a = operand; + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &unop); PUSH_VALUE(builder, unop.r); } @@ -185,6 +192,7 @@ void ovm_code_builder_add_branch(ovm_code_builder_t *builder, i32 label_idx) { bh_arr_push(builder->branch_patches, patch); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &branch_instr); } @@ -207,6 +215,7 @@ void ovm_code_builder_add_cond_branch(ovm_code_builder_t *builder, i32 label_idx bh_arr_push(builder->branch_patches, patch); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &branch_instr); } @@ -263,6 +272,11 @@ void ovm_code_builder_add_branch_table(ovm_code_builder_t *builder, i32 count, i default_patch.targets_else = false; bh_arr_push(builder->branch_patches, default_patch); + debug_info_builder_emit_location(builder->debug_builder); + debug_info_builder_emit_location(builder->debug_builder); + debug_info_builder_emit_location(builder->debug_builder); + debug_info_builder_emit_location(builder->debug_builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 5, instrs); } @@ -277,6 +291,7 @@ void ovm_code_builder_add_return(ovm_code_builder_t *builder) { instr.a = POP_VALUE(builder); } + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &instr); } @@ -292,6 +307,7 @@ static void ovm_code_builder_add_params(ovm_code_builder_t *builder, i32 param_c param_instr.full_instr = OVMI_PARAM; param_instr.a = flipped_params[param_count - 1 - i]; + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, ¶m_instr); } } @@ -308,6 +324,7 @@ void ovm_code_builder_add_call(ovm_code_builder_t *builder, i32 func_idx, i32 pa call_instr.r = NEXT_VALUE(builder); } + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &call_instr); if (has_return_value) { @@ -334,6 +351,8 @@ void ovm_code_builder_add_indirect_call(ovm_code_builder_t *builder, i32 param_c call_instrs[1].r = NEXT_VALUE(builder); } + debug_info_builder_emit_location(builder->debug_builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 2, call_instrs); if (has_return_value) { @@ -355,6 +374,7 @@ void ovm_code_builder_add_local_get(ovm_code_builder_t *builder, i32 local_idx) // it was spec'd; but in the future for other things, // this will be incorrect. + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &instr); PUSH_VALUE(builder, instr.r); @@ -370,6 +390,7 @@ void ovm_code_builder_add_local_set(ovm_code_builder_t *builder, i32 local_idx) // this will be incorrect. instr.a = POP_VALUE(builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &instr); } @@ -383,6 +404,7 @@ void ovm_code_builder_add_local_tee(ovm_code_builder_t *builder, i32 local_idx) // this will be incorrect. instr.a = POP_VALUE(builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &instr); PUSH_VALUE(builder, instr.a); @@ -394,6 +416,7 @@ void ovm_code_builder_add_register_get(ovm_code_builder_t *builder, i32 reg_idx) instr.r = NEXT_VALUE(builder); instr.a = reg_idx; + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &instr); PUSH_VALUE(builder, instr.r); @@ -405,6 +428,7 @@ void ovm_code_builder_add_register_set(ovm_code_builder_t *builder, i32 reg_idx) instr.r = reg_idx; instr.a = POP_VALUE(builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &instr); } @@ -415,6 +439,7 @@ void ovm_code_builder_add_load(ovm_code_builder_t *builder, u32 ovm_type, i32 of load_instr.a = POP_VALUE(builder); load_instr.r = NEXT_VALUE(builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &load_instr); PUSH_VALUE(builder, load_instr.r); @@ -427,6 +452,7 @@ void ovm_code_builder_add_store(ovm_code_builder_t *builder, u32 ovm_type, i32 o store_instr.a = POP_VALUE(builder); store_instr.r = POP_VALUE(builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &store_instr); return; } @@ -439,6 +465,7 @@ void ovm_code_builder_add_cmpxchg(ovm_code_builder_t *builder, u32 ovm_type, i32 cmpxchg_instr.a = POP_VALUE(builder); cmpxchg_instr.r = POP_VALUE(builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &cmpxchg_instr); PUSH_VALUE(builder, cmpxchg_instr.r); @@ -469,6 +496,9 @@ void ovm_code_builder_add_cmpxchg(ovm_code_builder_t *builder, u32 ovm_type, i32 instrs[2].a = expected_reg; instrs[2].b = value_reg; + debug_info_builder_emit_location(builder->debug_builder); + debug_info_builder_emit_location(builder->debug_builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 3, instrs); PUSH_VALUE(builder, instrs[2].r); @@ -481,6 +511,7 @@ void ovm_code_builder_add_memory_copy(ovm_code_builder_t *builder) { instr.a = POP_VALUE(builder); instr.r = POP_VALUE(builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &instr); } @@ -491,6 +522,7 @@ void ovm_code_builder_add_memory_fill(ovm_code_builder_t *builder) { instr.a = POP_VALUE(builder); instr.r = POP_VALUE(builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &instr); } @@ -503,6 +535,7 @@ void ovm_code_builder_add_atomic_load(ovm_code_builder_t *builder, u32 ovm_type, load_instr.a = POP_VALUE(builder); load_instr.r = NEXT_VALUE(builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &load_instr); PUSH_VALUE(builder, load_instr.r); @@ -517,5 +550,6 @@ void ovm_code_builder_add_atomic_store(ovm_code_builder_t *builder, u32 ovm_type store_instr.a = POP_VALUE(builder); store_instr.r = POP_VALUE(builder); + debug_info_builder_emit_location(builder->debug_builder); ovm_program_add_instructions(builder->program, 1, &store_instr); } diff --git a/src/wasm/module.c b/src/wasm/module.c index a1786aa..f561afb 100644 --- a/src/wasm/module.c +++ b/src/wasm/module.c @@ -2,6 +2,7 @@ #include "ovm_wasm.h" #include "vm_codebuilder.h" +#include "stb_ds.h" #include "./module_parsing.h" @@ -17,6 +18,9 @@ static bool module_build(wasm_module_t *module, const wasm_byte_vec_t *binary) { ctx.store = engine->store; ctx.next_external_func_idx = 0; + debug_info_builder_init(&ctx.debug_builder, &module->debug_info); + sh_new_arena(module->custom_sections); + while (ctx.offset < binary->size) { parse_section(&ctx); } @@ -65,6 +69,8 @@ wasm_module_t *wasm_module_new(wasm_store_t *store, const wasm_byte_vec_t *binar memset(module, 0, sizeof(*module)); module->store = store; + debug_info_init(&module->debug_info); + bool success = module_build(module, binary); return module; } diff --git a/src/wasm/module_parsing.h b/src/wasm/module_parsing.h index 61d8101..1776d2c 100644 --- a/src/wasm/module_parsing.h +++ b/src/wasm/module_parsing.h @@ -23,6 +23,7 @@ struct build_context { // This will be set/reset for every code (function) entry. ovm_code_builder_t builder; + debug_info_builder_t debug_builder; }; #define PEEK_BYTE(ctx) ((ctx)->binary.data[(ctx)->offset]) @@ -59,7 +60,39 @@ static inline wasm_valkind_t parse_valtype(build_context *ctx) { static void parse_custom_section(build_context *ctx) { unsigned int section_size = uleb128_to_uint(ctx->binary.data, &ctx->offset); - ctx->offset += section_size; + unsigned int end_of_section = ctx->offset + section_size; + + struct wasm_custom_section_t cs; + + char name[256]; + unsigned int name_len = uleb128_to_uint(ctx->binary.data, &ctx->offset); + if (name_len < sizeof(name) - 1) { + strncpy(name, &((char *) ctx->binary.data)[ctx->offset], name_len); + name[name_len] = '\0'; + + ctx->offset += name_len; + cs.size = end_of_section - ctx->offset; + + unsigned int data_size = end_of_section - ctx->offset; + cs.data = bh_alloc_array(ctx->store->heap_allocator, char, data_size); + memcpy(cs.data, &((char *) ctx->binary.data)[ctx->offset], data_size); + + shput(ctx->module->custom_sections, name, cs); + + if (!strcmp(name, "ovm_debug_files")) { + debug_info_import_file_info(ctx->debug_builder.info, cs.data, cs.size); + } + + if (!strcmp(name, "ovm_debug_funcs")) { + debug_info_import_func_info(ctx->debug_builder.info, cs.data, cs.size); + } + + if (!strcmp(name, "ovm_debug_ops")) { + debug_info_builder_prepare(&ctx->debug_builder, cs.data); + } + } + + ctx->offset = end_of_section; } static void parse_type_section(build_context *ctx) { @@ -858,6 +891,8 @@ static void parse_instruction(build_context *ctx) { default: assert(("UNHANDLED INSTRUCTION", 0)); } + + debug_info_builder_step(&ctx->debug_builder); } static void parse_expression(build_context *ctx) { @@ -890,18 +925,23 @@ static void parse_code_section(build_context *ctx) { // Set up a lot of stuff... + i32 func_idx = bh_arr_length(ctx->program->funcs); i32 param_count = ctx->module->functypes.data[i]->type.func.params.size; - ctx->builder = ovm_code_builder_new(ctx->program, param_count, total_locals); + debug_info_builder_begin_func(&ctx->debug_builder, func_idx); + + ctx->builder = ovm_code_builder_new(ctx->program, &ctx->debug_builder, param_count, total_locals); ctx->builder.func_table_arr_idx = ctx->func_table_arr_idx; ovm_code_builder_push_label_target(&ctx->builder, label_kind_func); parse_expression(ctx); ovm_code_builder_add_return(&ctx->builder); - char *func_name = bh_aprintf(bh_heap_allocator(), "wasm_loaded_%d", bh_arr_length(ctx->program->funcs)); + char *func_name = bh_aprintf(bh_heap_allocator(), "wasm_loaded_%d", func_idx); ovm_program_register_func(ctx->program, func_name, ctx->builder.start_instr, ctx->builder.param_count, ctx->builder.highest_value_number + 1); + ovm_code_builder_free(&ctx->builder); + debug_info_builder_end_func(&ctx->debug_builder); } ovm_program_register_external_func(ctx->program, "__internal_wasm_memory_init", 4, ctx->module->memory_init_external_idx); -- 2.25.1