From: Brendan Hansen Date: Thu, 16 Apr 2020 02:33:19 +0000 (-0500) Subject: Renamed some files X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=76c4de889a253bbc472a4c0c04fbf8438fe9a31b;p=wasm-analyzer.git Renamed some files --- diff --git a/app.lua b/app.lua index fc319fb..9b3e223 100644 --- a/app.lua +++ b/app.lua @@ -1,6 +1,7 @@ import { Rectangle = "src.utils:Rectangle"; - wasm_decompile = "src.wasm.decompile:"; + wasm_decompile = "src.wasm.decompile:decompile_file"; + wasm_parse = "src.wasm.parse:"; wasm_analyze = "src.wasm.analyze:"; ui = "src.ui:"; @@ -46,7 +47,7 @@ function bootstrap() end function open_file(path) - globals.wasm_module = wasm_decompile(path) + globals.wasm_module = wasm_parse(path) globals.wasm_module = wasm_analyze(globals.wasm_module) -- Delete all old children diff --git a/clib/parse_helper.c b/clib/parse_helper.c index 63cd19f..e3a6415 100644 --- a/clib/parse_helper.c +++ b/clib/parse_helper.c @@ -94,6 +94,40 @@ LUA_FUNCTION(parse_sleb128) { return 1; } +// TODO: This is a hack that relies on an IEEE implementation +// in the C compiler but it works for now +union FloatConvert { unsigned char b[4]; float f; }; +LUA_FUNCTION(parse_ieee754_32) { + Parser* parser = (Parser*) lua_touserdata(L, 1); + + union FloatConvert tmp; + unsigned char* it = (unsigned char *) &tmp; + + for (int i = 0; i < 4; i++) { + *it = buffered_file_read_byte(parser->file); + it++; + } + + lua_pushnumber(L, tmp.f); + return 1; +} + +union DoubleConvert { unsigned char b[8]; double d; }; +LUA_FUNCTION(parse_ieee754_64) { + Parser* parser = (Parser*) lua_touserdata(L, 1); + + union DoubleConvert tmp; + unsigned char* it = (unsigned char *) &tmp; + + for (int i = 0; i < 8; i++) { + *it = buffered_file_read_byte(parser->file); + it++; + } + + lua_pushnumber(L, tmp.d); + return 1; +} + int luaopen_parse_helper(lua_State* L) { const luaL_Reg functions[] = { { "open_file", open_file }, @@ -103,6 +137,9 @@ int luaopen_parse_helper(lua_State* L) { { "parse_uleb128", parse_uleb128 }, { "parse_sleb128", parse_sleb128 }, + + { "parse_ieee754_32", parse_ieee754_32 }, + { "parse_ieee754_64", parse_ieee754_64 }, { NULL, NULL }, }; diff --git a/clib/parse_helper.so b/clib/parse_helper.so index 50ce0d0..4220cea 100755 Binary files a/clib/parse_helper.so and b/clib/parse_helper.so differ diff --git a/src/ui/components.lua b/src/ui/components.lua index 37d65c3..74588c9 100644 --- a/src/ui/components.lua +++ b/src/ui/components.lua @@ -4,7 +4,7 @@ import { Rectangle = "src.utils:Rectangle"; wasm_text = "src.wasm.text"; - wasm_exprs = "src.wasm.exprs"; + wasm_decompile = "src.wasm.decompile"; globals = "src.globals"; COLORS = "conf:COLOR_SCHEME"; @@ -256,7 +256,7 @@ function function_context_menu:init(x, y) ui.insert_child(self, with(ui.make_element("button", "View decompiled")) { click = function_context_menu.btn_view_decompiled; - rect = Rectangle(0, 0, 200, 40) + rect = Rectangle(0, 0, 200, 20) }) ui.focus(self) @@ -279,7 +279,7 @@ end function function_context_menu:btn_view_decompiled(button, x, y) local wasm_func = self.parent.parent.func print(wasm_func.name .. " ------------------------------------------------") - local lines = wasm_exprs.build_expr(wasm_func, globals.wasm_module) + local lines = wasm_decompile.decompile_func(wasm_func, globals.wasm_module) for _, r in ipairs(lines) do print(r) end end diff --git a/src/ui/text.lua b/src/ui/text.lua index 8eee352..7a490fb 100644 --- a/src/ui/text.lua +++ b/src/ui/text.lua @@ -3,6 +3,8 @@ import { } FONTS = {} Text = {} + +-- TODO: Rewrite because this is bad function Text.render_text(x, y, text) local curr_font = love.graphics.getFont() diff --git a/src/wasm/decompile.lua b/src/wasm/decompile.lua index f73351e..e4bf5e0 100644 --- a/src/wasm/decompile.lua +++ b/src/wasm/decompile.lua @@ -1,751 +1,448 @@ --- Recursive decent parser for the WASM v1.0 binary format --- Brendan Hansen 2020 - --- Look in clib folder for C-based libraries -package.cpath = package.cpath .. [[;./clib/?.so]] - import { - parse_helper = "parse_helper"; - pprint = "lualib.pprint"; - - build_str = "src.utils:build_str"; - random_str = "src.utils:random_str"; Stack = "src.utils:Stack"; + pprint = "lualib.pprint"; } -function parse_valtype(r) - local val = r:read_byte() - local valtypes_map = { - [0x7f] = "i32"; - [0x7E] = "i64"; - [0x7D] = "f32"; - [0x7C] = "f64"; - } +--[[ +Expressions in wasm are built off of a stack, so... +i32.const 1 +i32.const 3 +i32.mul +i32.const 8 +i32.add - return valtypes_map[val] -end +would translate to -function parse_vector(tp) - return function(r) - local n = r:read_uint(32) +i32.add (i32.mul (i32.const 1) (i32.const 3)) (i32.const 8). - local v = {} - for i=1, n do - table.insert(v, tp(r)) - end +Every instruction has some number of inputs and outputs to the stack. + inputs outputs +i32.const 0 1 +i32.add 2 1 - return v - end -end +Currently every instruction can output at most one thing to the stack. +This should be quick, just need to look up all the instructions -function parse_byte(r) - return r:read_byte() -end +--]] -function parse_name(r) - local name = parse_vector(parse_byte)(r) - return name -end +local build_instr_list -function parse_blocktype(r) - if r:peek_byte() == 0x40 then - r:read_byte() - return {} - else - return { parse_valtype(r) } +function binop(op) + return function(stack, instr) + local right = stack:pop() + local left = stack:pop() + stack:push("(" .. left .. " " .. op .. " " .. right .. ")") end end -function parse_functype(r) - assert(r:read_byte() == 0x60, "functype expected 0x60 as first byte") - - local params = parse_vector(parse_valtype)(r) - local results = parse_vector(parse_valtype)(r) - - return { param_types = params, result_types = results } -end - -function parse_limits(r) - local t = r:read_byte() - - local min = r:read_uint(32) - local max = nil - if t == 0x01 then - max = r:read_uint(32) +function cast(to) + return function(stack, instr) + local thing = stack:pop() + stack:push("<" .. to .. ">" .. thing) end - - return { min = min, max = max } end -function parse_memtype(r) - local lims = parse_limits(r) - - return lims -end - -function parse_tabletype(r) - local elemtype = parse_elemtype(r) - local limits = parse_limits(r) - - return { lim = limits, et = elemtype } -end - -function parse_elemtype(r) - assert(r:read_byte() == 0x70, "elemtype should be 0x70") - return "funcref" -end - -function parse_globaltype(r) - local valtype = parse_valtype(r) - local ismut = parse_mut(r) == 0x01 - - return { t = valtype, m = ismut } -end - -function parse_mut(r) - local v = r:read_byte() - assert(v == 0x00 or v == 0x01, "mut should be 0x00 or 0x01") - return v -end - -function parse_instr(r) - local instr = r:read_byte() - - return match(instr) { - [0x00] = function() return { "unreachable" } end; - [0x01] = function() return { "nop" } end; - [0x02] = function() - local rt = parse_blocktype(r) - local instrs = {} - local name = random_str(8) - - local block = { "block", label = name, rt = rt } - r.label_stack:push(block) - - while true do - if r:peek_byte() == 0x0B then - r:read_byte() - break - else - table.insert(instrs, parse_instr(r)) - end - end - - r.label_stack:pop() - - block.instrs = instrs - return block - end; - [0x03] = function() - local rt = parse_blocktype(r) - local instrs = {} - local name = random_str(8) - - local block = { "loop", label = name, rt = rt } - r.label_stack:push(block) - - while true do - if r:peek_byte() == 0x0B then - r:read_byte() - break - else - table.insert(instrs, parse_instr(r)) - end - end - - r.label_stack:pop() - - block.instrs = instrs - return block - end; - [0x04] = function() - local rt = parse_blocktype(r) - local instrs = {} - local else_instrs = {} - local inelse = false - local name = random_str(8) - - local block = { "if", label = name, rt = rt } - r.label_stack:push(block) - - while true do - local peek = r:peek_byte() - if peek == 0x0B then - r:read_byte() - break - elseif peek == 0x05 then - r:read_byte() - r.label_stack:pop() - inelse = true - r.label_stack:push({ "else", label = name, rt = rt }) - else - if not inelse then - table.insert(instrs, parse_instr(r)) - else - table.insert(else_instrs, parse_instr(r)) - end - end - end - - r.label_stack:pop() - - return { "if", label = name, rt = rt, instrs = instrs, else_instrs = else_instrs } - end; - - [0x0C] = function() return { "br", x = parse_labelidx(r) } end; - [0x0D] = function() return { "br_if", x = parse_labelidx(r) } end; - [0x0E] = function() - local labels = parse_vector(parse_labelidx)(r) - local labeln = parse_labelidx(r) - - return { "br_table", labels = labels, labeln = labeln } - end; - - [0x0F] = function() return { "return" } end; - - [0x10] = function() return { "call", x = parse_funcidx(r) } end; - [0x11] = function() - local x = parse_typeidx(r) - -- assert(r:read_byte() ~= 0x00, "call_indirect expects 0x00 at end") - return { "call_indirect", x = x, table = r:read_byte() } - end; - - [0x1A] = function() return { "drop" } end; - [0x1B] = function() return { "select" } end; - - [0x20] = function() return { "local.get", x = parse_localidx(r) } end; - [0x21] = function() return { "local.set", x = parse_localidx(r) } end; - [0x22] = function() return { "local.tee", x = parse_localidx(r) } end; - [0x23] = function() return { "global.get", x = parse_globalidx(r) } end; - [0x24] = function() return { "global.set", x = parse_globalidx(r) } end; - - [0x28] = function() return { "i32.load", x = parse_memarg(r) } end; - [0x29] = function() return { "i64.load", x = parse_memarg(r) } end; - [0x2A] = function() return { "f32.load", x = parse_memarg(r) } end; - [0x2B] = function() return { "f64.load", x = parse_memarg(r) } end; - [0x2C] = function() return { "i32.load8_s", x = parse_memarg(r) } end; - [0x2D] = function() return { "i32.load8_u", x = parse_memarg(r) } end; - [0x2E] = function() return { "i32.load16_s", x = parse_memarg(r) } end; - [0x2F] = function() return { "i32.load16_u", x = parse_memarg(r) } end; - [0x30] = function() return { "i64.load8_s", x = parse_memarg(r) } end; - [0x31] = function() return { "i64.load8_u", x = parse_memarg(r) } end; - [0x32] = function() return { "i64.load16_s", x = parse_memarg(r) } end; - [0x33] = function() return { "i64.load16_u", x = parse_memarg(r) } end; - [0x34] = function() return { "i64.load32_s", x = parse_memarg(r) } end; - [0x35] = function() return { "i64.load32_u", x = parse_memarg(r) } end; - [0x36] = function() return { "i32.store", x = parse_memarg(r) } end; - [0x37] = function() return { "i64.store", x = parse_memarg(r) } end; - [0x38] = function() return { "f32.store", x = parse_memarg(r) } end; - [0x39] = function() return { "f64.store", x = parse_memarg(r) } end; - [0x3A] = function() return { "i32.store8", x = parse_memarg(r) } end; - [0x3B] = function() return { "i32.store16", x = parse_memarg(r) } end; - [0x3C] = function() return { "i64.store8", x = parse_memarg(r) } end; - [0x3D] = function() return { "i64.store16", x = parse_memarg(r) } end; - [0x3E] = function() return { "i64.store32", x = parse_memarg(r) } end; - [0x3F] = function() - assert(r:read_byte() == 0x00, "memory.size expects 0x00") - return { "memory.size" } - end; - [0x40] = function() - assert(r:read_byte() == 0x00, "memory.grow expects 0x00") - return { "memory.grow" } - end; - - [0x41] = function() return { "i32.const", x = r:read_sint(32) } end; - [0x42] = function() return { "i64.const", x = r:read_sint(64) } end; - [0x43] = function() return { "f32.const", x = r:read_float(32) } end; - [0x44] = function() return { "f64.const", x = r:read_float(64) } end; - - [0x45] = function() return { "i32.eqz" } end; - [0x46] = function() return { "i32.eq" } end; - [0x47] = function() return { "i32.ne" } end; - [0x48] = function() return { "i32.lt_s" } end; - [0x49] = function() return { "i32.lt_u" } end; - [0x4A] = function() return { "i32.gt_s" } end; - [0x4B] = function() return { "i32.gt_u" } end; - [0x4C] = function() return { "i32.le_s" } end; - [0x4D] = function() return { "i32.le_u" } end; - [0x4E] = function() return { "i32.ge_s" } end; - [0x4F] = function() return { "i32.ge_u" } end; - [0x50] = function() return { "i64.eqz" } end; - [0x51] = function() return { "i64.eq" } end; - [0x52] = function() return { "i64.ne" } end; - [0x53] = function() return { "i64.lt_s" } end; - [0x54] = function() return { "i64.lt_u" } end; - [0x55] = function() return { "i64.gt_s" } end; - [0x56] = function() return { "i64.gt_u" } end; - [0x57] = function() return { "i64.le_s" } end; - [0x58] = function() return { "i64.le_u" } end; - [0x59] = function() return { "i64.ge_s" } end; - [0x5A] = function() return { "i64.ge_u" } end; - [0x5B] = function() return { "f32.eq" } end; - [0x5C] = function() return { "f32.ne" } end; - [0x5D] = function() return { "f32.lt" } end; - [0x5E] = function() return { "f32.gt" } end; - [0x5F] = function() return { "f32.le" } end; - [0x60] = function() return { "f32.ge" } end; - [0x61] = function() return { "f64.eq" } end; - [0x62] = function() return { "f64.ne" } end; - [0x63] = function() return { "f64.lt" } end; - [0x64] = function() return { "f64.gt" } end; - [0x65] = function() return { "f64.le" } end; - [0x66] = function() return { "f64.ge" } end; - [0x67] = function() return { "i32.clz" } end; - [0x68] = function() return { "i32.ctz" } end; - [0x69] = function() return { "i32.popcnt" } end; - [0x6A] = function() return { "i32.add" } end; - [0x6B] = function() return { "i32.sub" } end; - [0x6C] = function() return { "i32.mul" } end; - [0x6D] = function() return { "i32.div_s" } end; - [0x6E] = function() return { "i32.div_u" } end; - [0x6F] = function() return { "i32.rem_s" } end; - [0x70] = function() return { "i32.rem_u" } end; - [0x71] = function() return { "i32.and" } end; - [0x72] = function() return { "i32.or" } end; - [0x73] = function() return { "i32.xor" } end; - [0x74] = function() return { "i32.shl" } end; - [0x75] = function() return { "i32.shr_s" } end; - [0x76] = function() return { "i32.shr_u" } end; - [0x77] = function() return { "i32.rotl" } end; - [0x78] = function() return { "i32.rotr" } end; - [0x79] = function() return { "i64.clz" } end; - [0x7A] = function() return { "i64.ctz" } end; - [0x7B] = function() return { "i64.popcnt" } end; - [0x7C] = function() return { "i64.add" } end; - [0x7D] = function() return { "i64.sub" } end; - [0x7E] = function() return { "i64.mul" } end; - [0x7F] = function() return { "i64.div_s" } end; - [0x80] = function() return { "i64.div_u" } end; - [0x81] = function() return { "i64.rem_s" } end; - [0x82] = function() return { "i64.rem_u" } end; - [0x83] = function() return { "i64.and" } end; - [0x84] = function() return { "i64.or" } end; - [0x85] = function() return { "i64.xor" } end; - [0x86] = function() return { "i64.shl" } end; - [0x87] = function() return { "i64.shr_s" } end; - [0x88] = function() return { "i64.shr_u" } end; - [0x89] = function() return { "i64.rotl" } end; - [0x8A] = function() return { "i64.rotr" } end; - [0x8B] = function() return { "f32.abs" } end; - [0x8C] = function() return { "f32.neg" } end; - [0x8D] = function() return { "f32.ceil" } end; - [0x8E] = function() return { "f32.floor" } end; - [0x8F] = function() return { "f32.trunc" } end; - [0x90] = function() return { "f32.nearest" } end; - [0x91] = function() return { "f32.sqrt" } end; - [0x92] = function() return { "f32.add" } end; - [0x93] = function() return { "f32.sub" } end; - [0x94] = function() return { "f32.mul" } end; - [0x95] = function() return { "f32.div" } end; - [0x96] = function() return { "f32.min" } end; - [0x97] = function() return { "f32.max" } end; - [0x98] = function() return { "f32.copysign" } end; - [0x99] = function() return { "f64.abs" } end; - [0x9A] = function() return { "f64.neg" } end; - [0x9B] = function() return { "f64.ceil" } end; - [0x9C] = function() return { "f64.floor" } end; - [0x9D] = function() return { "f64.trunc" } end; - [0x9E] = function() return { "f64.nearest" } end; - [0x9F] = function() return { "f64.sqrt" } end; - [0xA0] = function() return { "f64.add" } end; - [0xA1] = function() return { "f64.sub" } end; - [0xA2] = function() return { "f64.mul" } end; - [0xA3] = function() return { "f64.div" } end; - [0xA4] = function() return { "f64.min" } end; - [0xA5] = function() return { "f64.max" } end; - [0xA6] = function() return { "f64.copysign" } end; - - [0xA7] = function() return { "i32.wrap_i64" } end; - [0xA8] = function() return { "i32.trunc_f32_s" } end; - [0xA9] = function() return { "i32.trunc_f32_u" } end; - [0xAA] = function() return { "i32.trunc_f64_s" } end; - [0xAB] = function() return { "i32.trunc_f64_u" } end; - [0xAC] = function() return { "i64.extend_i32_s" } end; - [0xAD] = function() return { "i64.extend_i32_u" } end; - [0xAE] = function() return { "i64.trunc_f32_s" } end; - [0xAF] = function() return { "i64.trunc_f32_u" } end; - [0xB0] = function() return { "i64.trunc_f64_s" } end; - [0xB1] = function() return { "i64.trunc_f64_u" } end; - [0xB2] = function() return { "f32.convert_i32_s" } end; - [0xB3] = function() return { "f32.convert_i32_u" } end; - [0xB4] = function() return { "f32.convert_i64_s" } end; - [0xB5] = function() return { "f32.convert_i64_u" } end; - [0xB6] = function() return { "f32.demote_f64" } end; - [0xB7] = function() return { "f64.convert_i32_s" } end; - [0xB8] = function() return { "f64.convert_i32_u" } end; - [0xB9] = function() return { "f64.convert_i64_s" } end; - [0xBA] = function() return { "f64.convert_i64_u" } end; - [0xBB] = function() return { "f64.promote_f32" } end; - [0xBC] = function() return { "i32.reinterpret_f32" } end; - [0xBD] = function() return { "i64.reinterpret_f64" } end; - [0xBE] = function() return { "f32.reinterpret_i32" } end; - [0xBF] = function() return { "f64.reinterpret_i64" } end; - } -end - -function parse_memarg(r) - local a = r:read_uint(32) - local o = r:read_uint(32) - - return { "memarg"; align = a; offset = o } -end - -function parse_expr(r) - local instrs = {} - while true do - if r:peek_byte() == 0x0B then - r:read_byte() - break +function func_call(name, nparams, nreturns) + if nparams == nil then nparams = 1 end + if nreturns == nil then nreturns = 1 end + return function(stack, instr) + local str = name .. "(" + local args = "" + for i=1,nparams do + local val = stack:pop() + args = ", " .. val .. args + end + str = str .. args:sub(3) .. ")" + if nreturns == 0 then + return str else - table.insert(instrs, parse_instr(r)) + stack:push(str) end end - - return instrs -end - -function parse_typeidx(r) return r:read_uint(32) end -function parse_funcidx(r) return r:read_uint(32) end -function parse_tableidx(r) return r:read_uint(32) end -function parse_memidx(r) return r:read_uint(32) end -function parse_globalidx(r) return r:read_uint(32) end -function parse_localidx(r) return r:read_uint(32) end -function parse_labelidx(r) - local idx = r:read_uint(32) - local block = r.label_stack:at(idx) - if block == nil then block = { label = "return " } end - return { "labelidx", labelidx = idx, block = block } end -function parse_section(r, expectN, B) - if r:peek_byte() ~= expectN then return end - - local N = r:read_byte() - local size = r:read_uint(32) - local cont = B(r) - - return { "section", contents = cont, size = size } -end - -function parse_customsec(r) - local csect = parse_section(r, 0, parse_custom) - if not csect then return nil end - - for i=1, csect.size do - -- Discard csect.size bytes - r:read_byte() +function const(t) + return function(stack, instr) + stack:push(tostring(instr.x) .. t) end - return csect end -function parse_custom(r) - local name = parse_name(r) - - return { "custom", name = name } -end - -function parse_typesec(r) - return parse_section(r, 1, parse_vector(parse_functype)) +function expr_return(stack, instr) + if stack:size() >= 1 then + return "return " .. stack:pop() + else + return "return" + end end -function parse_importsec(r) - return parse_section(r, 2, parse_vector(parse_import)) +function expr_br(stack, instr) + return "br <" .. instr.x.block.label .. ">" end -function parse_import(r) - local mod = parse_name(r) - local nm = parse_name(r) - local d = parse_importdesc(r) - - return { "import", mod = mod, name = nm, desc = d } +function expr_br_if(stack, instr) + return "br_if <" .. instr.x.block.label .. "> (" .. stack:pop() .. ")" end -function parse_importdesc(r) - local t = r:read_byte() - if t == 0x00 then return { "func", x = parse_typeidx(r) } - elseif t == 0x01 then return { "table", tt = parse_tabletype(r) } - elseif t == 0x02 then return { "mem", mt = parse_memtype(r) } - elseif t == 0x03 then return { "global", gt = parse_globaltype(r) } +function expr_br_table(stack, instr) + local lines = {} + table.insert(lines, "br_table (" .. stack:pop() .. ") {") + local mappings = {} + for i, lab in ipairs(instr.labels) do + table.insert(mappings, tostring(i - 1) .. " => <" .. lab.block.label .. ">") end - - error("bad importdesc") + table.insert(mappings, "default => <" .. instr.labeln.block.label .. ">") + for _, line in ipairs(indent(mappings, " ")) do + table.insert(lines, line) + end + table.insert(lines, "}") + return lines end -function parse_funcsec(r) - return parse_section(r, 3, parse_vector(parse_typeidx)) +function expr_drop(stack, instr) + return "drop (" .. stack:pop() .. ")" end -function parse_tablesec(r) - return parse_section(r, 4, parse_vector(parse_table)) +function expr_select(stack, instr) + stack:push("select (" .. stack:pop() .. [[) { + 0: ]] .. stack:pop() .. [[ + 1: ]] .. stack:pop() .. [[ +}]]) end -function parse_table(r) - local tt = parse_tabletype(r) - return { "table", type_ = tt } +function expr_local_get(stack, instr, func) + stack:push(func.locals[instr.x + 1].name) end -function parse_memsec(r) - return parse_section(r, 5, parse_vector(parse_mem)) +function expr_local_set(stack, instr, func) + local varname = func.locals[instr.x + 1].name + return varname .. " = " .. stack:pop() end -function parse_mem(r) - local mt = parse_memtype(r) - return { "mem", type_ = mt } +function expr_local_tee(stack, instr, func) + local varname = func.locals[instr.x + 1].name + stack:push(varname .. " = " .. stack:pop()) end -function parse_globalsec(r) - return parse_section(r, 6, parse_vector(parse_global)) +function expr_global_get(stack, instr) + stack:push("global" .. instr.x) end -function parse_global(r) - local gt = parse_globaltype(r) - local e = parse_expr(r) - return { "global", type_ = gt, init = e } +function expr_global_set(stack, instr) + return "global" .. instr.x .. " = " .. stack:pop() end -function parse_exportsec(r) - return parse_section(r, 7, parse_vector(parse_export)) +function expr_load(stack, instr) + stack:push("[" .. stack:pop() .. "]") end -function parse_export(r) - local nm = parse_name(r) - local d = parse_exportdesc(r) - return { "export", name = nm, desc = d } +function expr_store(stack, instr) + local value = stack:pop() + local location = stack:pop() + return "[" .. location .. "] <- " .. value end -function parse_exportdesc(r) - local t = r:read_byte() - if t == 0x00 then return { "func", x = parse_typeidx(r) } - elseif t == 0x01 then return { "table", tt = parse_tableidx(r) } - elseif t == 0x02 then return { "mem", mt = parse_memidx(r) } - elseif t == 0x03 then return { "global", gt = parse_globalidx(r) } +function indent(lines, prefix) + local newlines = {} + for _, line in ipairs(lines) do + table.insert(newlines, prefix .. line) + end + return newlines +end + +function expr_block(stack, instr, func, mod) + local lines = indent( + build_instr_list(instr.instrs, func, mod), + " " + ) + table.insert(lines, 1, "block <" .. instr.label .. "> {") + table.insert(lines, "}") + if #instr.rt == 0 then + return lines + else + stack:push(table.concat(lines, "\n")) end - - error("bad exportdesc: ", t) -end - -function parse_startsec(r) - return parse_section(r, 8, parse_start) -end - -function parse_start(r) - local x = parse_funcidx(r) - return { "start", func = x } -end - -function parse_elemsec(r) - return parse_section(r, 9, parse_vector(parse_elem)) -end - -function parse_elem(r) - local x = parse_tableidx(r) - local e = parse_expr(r) - local y = parse_vector(parse_funcidx)(r) - - return { "elem", table = x, offset = e, init = y } -end - -function parse_codesec(r) - return parse_section(r, 10, parse_vector(parse_code)) end -function parse_code(r) - local size = r:read_uint(32) - local code = parse_func(r) - return code +function expr_loop(stack, instr, func, mod) + local lines = indent( + build_instr_list(instr.instrs, func, mod), + " " + ) + table.insert(lines, 1, "loop <" .. instr.label .. "> {") + table.insert(lines, "}") + if #instr.rt == 0 then + return lines + else + stack:push(table.concat(lines, "\n")) + end end -function parse_func(r) - local t = parse_vector(parse_locals)(r) - local e = parse_expr(r) - - local localidx = 0 - local locals = {} - for _, v in ipairs(t) do - for _, l in ipairs(v) do - l.localidx = localidx - table.insert(locals, l) - localidx = localidx + 1 +function expr_if(stack, instr, func, mod) + local lines = indent( + build_instr_list(instr.instrs, func, mod), + " " + ) + table.insert(lines, 1, "if <" .. instr.label .. "> (" .. stack:pop() .. ") {") + if #instr.else_instrs > 0 then + table.insert(lines, "} else {") + local else_lines = indent( + build_instr_list(instr.else_instrs, func, mod), + " " + ) + for _, line in ipairs(else_lines) do + table.insert(lines, line) end end - - return { "func", locals = locals, body = e } + table.insert(lines, "}") + if #instr.rt == 0 then + return lines + else + stack:push(table.concat(lines, "\n")) + end end -function parse_locals(r) - local n = r:read_uint(32) - local t = parse_valtype(r) - - --TODO: Make a list of values with names like local0, local1, ... +function expr_call(stack, instr, func, mod) + local nparams = #mod.funcs[instr.x].type_.param_types + local nreturns = #mod.funcs[instr.x].type_.result_types + local name = mod.funcs[instr.x].name - local locals = {} - for i = 0, n - 1 do - table.insert(locals, { - name = "local" .. i, - type_ = t, - localidx = i - }) + local res = func_call(name, nparams, nreturns)(stack, instr, func, mod) + if nreturns >= 1 then + stack:push(res) + else + return res end - - return locals end -function parse_datasec(r) - return parse_section(r, 11, parse_vector(parse_data)) -end - -function parse_data(r) - local x = parse_memidx(r) - local e = parse_expr(r) - local b = parse_vector(parse_byte)(r) - - return { "data", data = x, offset = e, init = b } +function expr_call_indirect(stack, instr, func, mod) + local type_ = mod.types.contents[instr.x + 1] + local nparams = #type_.param_types + local nreturns = #type_.result_types + local addr = stack:pop() + local res = func_call("[" .. addr .. "]", nparams, nreturns)(stack, instr, func, mod) + if nreturns >= 1 then + stack:push(res) + else + return res + end end -function parse_magic(r) - assert(r:read_byte() == 0x00, "magic string is wrong") - assert(r:read_byte() == 0x61, "magic string is wrong") - assert(r:read_byte() == 0x73, "magic string is wrong") - assert(r:read_byte() == 0x6D, "magic string is wrong") -end +function expr_nop() return "nop" end +function expr_unreachable() return "unreachable" end + +local expr_generators = { +-- Name Inputs, Outputs + ["unreachable"] = expr_unreachable; + ["nop"] = expr_nop; + + ["block"] = expr_block; + ["loop"] = expr_loop; + ["if"] = expr_if; + + ["br"] = expr_br; + ["br_if"] = expr_br_if; + ["br_table"] = expr_br_table; + ["return"] = expr_return; + ["call"] = expr_call; + ["call_indirect"] = expr_call_indirect; + + ["drop"] = expr_drop; + ["select"] = expr_select; + + ["local.get"] = expr_local_get; + ["local.set"] = expr_local_set; + ["local.tee"] = expr_local_tee; + ["global.get"] = expr_global_get; + ["global.set"] = expr_global_set; + + ["i32.load"] = expr_load; + ["i64.load"] = expr_load; + ["f32.load"] = expr_load; + ["f64.load"] = expr_load; + ["i32.load8_s"] = expr_load; + ["i32.load8_u"] = expr_load; + ["i32.load16_s"] = expr_load; + ["i32.load16_u"] = expr_load; + ["i64.load8_s"] = expr_load; + ["i64.load8_u"] = expr_load; + ["i64.load16_s"] = expr_load; + ["i64.load16_u"] = expr_load; + ["i64.load32_s"] = expr_load; + ["i64.load32_u"] = expr_load; + ["i32.store"] = expr_store; + ["i64.store"] = expr_store; + ["f32.store"] = expr_store; + ["f64.store"] = expr_store; + ["i32.store8"] = expr_store; + ["i32.store16"] = expr_store; + ["i64.store8"] = expr_store; + ["i64.store16"] = expr_store; + ["i64.store32"] = expr_store; + + ["memory.size"] = func_call("memory.size", 0); + ["memory.grow"] = func_call "memory.grow"; + + ["i32.const"] = const "i32"; + ["i64.const"] = const "i64"; + ["f32.const"] = const "f32"; + ["f64.const"] = const "f64"; + + ["i32.eqz"] = func_call "eqz"; + ["i32.eq"] = binop "=="; + ["i32.ne"] = binop "!="; + ["i32.lt_s"] = binop "<"; + ["i32.lt_u"] = binop "<"; + ["i32.gt_s"] = binop ">"; + ["i32.gt_u"] = binop ">"; + ["i32.le_s"] = binop "<="; + ["i32.le_u"] = binop "<="; + ["i32.ge_s"] = binop ">="; + ["i32.ge_u"] = binop ">="; + ["i64.eqz"] = func_call "eqz"; + ["i64.eq"] = binop "=="; + ["i64.ne"] = binop "!="; + ["i64.lt_s"] = binop "<"; + ["i64.lt_u"] = binop "<"; + ["i64.gt_s"] = binop ">"; + ["i64.gt_u"] = binop ">"; + ["i64.le_s"] = binop "<="; + ["i64.le_u"] = binop "<="; + ["i64.ge_s"] = binop ">="; + ["i64.ge_u"] = binop ">="; + ["f32.eq"] = binop "=="; + ["f32.ne"] = binop "!="; + ["f32.lt"] = binop "<"; + ["f32.gt"] = binop ">"; + ["f32.le"] = binop "<="; + ["f32.ge"] = binop ">="; + ["f64.eq"] = binop "=="; + ["f64.ne"] = binop "!="; + ["f64.lt"] = binop "<"; + ["f64.gt"] = binop ">"; + ["f64.le"] = binop "<="; + ["f64.ge"] = binop ">="; + ["i32.clz"] = func_call "clz"; + ["i32.ctz"] = func_call "ctz"; + ["i32.popcnt"] = func_call "popcnt"; + ["i32.add"] = binop "+"; + ["i32.sub"] = binop "-"; + ["i32.mul"] = binop "*"; + ["i32.div_s"] = binop "/"; + ["i32.div_u"] = binop "/"; + ["i32.rem_s"] = binop "%"; + ["i32.rem_u"] = binop "%"; + ["i32.and"] = binop "&"; + ["i32.or"] = binop "|"; + ["i32.xor"] = binop "^"; + ["i32.shl"] = binop "<<"; + ["i32.shr_s"] = binop ">>"; + ["i32.shr_u"] = binop ">>>"; + ["i32.rotl"] = binop "rotl"; + ["i32.rotr"] = binop "rotr"; + ["i64.clz"] = func_call "clz"; + ["i64.ctz"] = func_call "ctz"; + ["i64.popcnt"] = func_call "popcnt"; + ["i64.add"] = binop "+"; + ["i64.sub"] = binop "-"; + ["i64.mul"] = binop "*"; + ["i64.div_s"] = binop "/"; + ["i64.div_u"] = binop "/"; + ["i64.rem_s"] = binop "%"; + ["i64.rem_u"] = binop "%"; + ["i64.and"] = binop "&"; + ["i64.or"] = binop "|"; + ["i64.xor"] = binop "^"; + ["i64.shl"] = binop "<<"; + ["i64.shr_s"] = binop ">>"; + ["i64.shr_u"] = binop ">>>"; + ["i64.rotl"] = binop "rotl"; + ["i64.rotr"] = binop "rotr"; + ["f32.abs"] = func_call "abs"; + ["f32.neg"] = func_call "-"; + ["f32.ceil"] = func_call "ceil"; + ["f32.floor"] = func_call "floor"; + ["f32.trunc"] = func_call "trunc"; + ["f32.nearest"] = func_call "nearest"; + ["f32.sqrt"] = func_call "sqrt"; + ["f32.add"] = binop "+"; + ["f32.sub"] = binop "-"; + ["f32.mul"] = binop "*"; + ["f32.div"] = binop "/"; + ["f32.min"] = func_call("min", 2); + ["f32.max"] = func_call("max", 2); + ["f32.copysign"] = func_call("copysign", 2); + ["f64.abs"] = func_call "abs"; + ["f64.neg"] = func_call "-"; + ["f64.ceil"] = func_call "ceil"; + ["f64.floor"] = func_call "floor"; + ["f64.trunc"] = func_call "trunc"; + ["f64.nearest"] = func_call "nearest"; + ["f64.sqrt"] = func_call "sqrt"; + ["f64.add"] = binop "+"; + ["f64.sub"] = binop "-"; + ["f64.mul"] = binop "*"; + ["f64.div"] = binop "/"; + ["f64.min"] = func_call("min", 2); + ["f64.max"] = func_call("max", 2); + ["f64.copysign"] = func_call("copysign", 2); + + ["i32.wrap_i64"] = cast "i32"; + ["i32.trunc_f32_s"] = cast "i32"; + ["i32.trunc_f32_u"] = cast "i32"; + ["i32.trunc_f64_s"] = cast "i32"; + ["i32.trunc_f64_u"] = cast "i32"; + ["i64.extend_i32_s"] = cast "i64"; + ["i64.extend_i32_u"] = cast "i64"; + ["i64.trunc_f32_s"] = cast "i64"; + ["i64.trunc_f32_u"] = cast "i64"; + ["i64.trunc_f64_s"] = cast "i64"; + ["i64.trunc_f64_u"] = cast "i64"; + ["f32.convert_i32_s"] = cast "f32"; + ["f32.convert_i32_u"] = cast "f32"; + ["f32.convert_i64_s"] = cast "f32"; + ["f32.convert_i64_u"] = cast "f32"; + ["f32.demote_f64"] = cast "f32"; + ["f64.convert_i32_s"] = cast "f64"; + ["f64.convert_i32_u"] = cast "f64"; + ["f64.convert_i64_s"] = cast "f64"; + ["f64.convert_i64_u"] = cast "f64"; + ["f64.promote_f32"] = cast "f64"; + ["i32.reinterpret_f32"] = cast "i32"; + ["i64.reinterpret_f64"] = cast "i64"; + ["f32.reinterpret_i32"] = cast "f32"; + ["f64.reinterpret_i64"] = cast "f64"; +} -function parse_version(r) - assert(r:read_byte() == 0x01, "version is wrong") - assert(r:read_byte() == 0x00, "version is wrong") - assert(r:read_byte() == 0x00, "version is wrong") - assert(r:read_byte() == 0x00, "version is wrong") -end +function build_instr_list(instrs, func, mod) + local lines = {} + local stack = Stack() -function parse_module(r) - parse_magic(r) - parse_version(r) - - local functypes = parse_typesec(r) - local imports = parse_importsec(r) - local typeidxs = parse_funcsec(r) - local tables = parse_tablesec(r) - local mems = parse_memsec(r) - local globals = parse_globalsec(r) - local exports = parse_exportsec(r) - local start = parse_startsec(r) - local elems = parse_elemsec(r) - local codes = parse_codesec(r) - local data = parse_datasec(r) - - local funcs = {} - local funcidx = 0 - - if imports then - for k, v in ipairs(imports.contents) do - if v.desc[1] == "func" then - funcs[funcidx] = { - name = build_str(v.mod) .. "." .. build_str(v.name); - funcidx = funcidx; - type_ = functypes.contents[v.desc.x + 1]; - imported = true; - } - funcidx = funcidx + 1 - end + for _, instr in ipairs(instrs) do + local res = expr_generators[instr[1]](stack, instr, func, mod) + if type(res) == "string" then + table.insert(lines, res) + elseif type(res) == "table" then + for _, line in ipairs(res) do table.insert(lines, line) end end end - if codes then - for i=1, #codes.contents do - local locals = codes.contents[i].locals; - local type_ = functypes.contents[typeidxs.contents[i] + 1]; - local param_types = type_.param_types; - - local new_locals = {} - local new_local_idx = 0 - for _, p in ipairs(param_types) do - table.insert(new_locals, { - name = "param" .. new_local_idx, - type_ = p; - localidx = new_local_idx - }) - new_local_idx = new_local_idx + 1 - end - for _, l in ipairs(locals) do - l.localidx = new_local_idx - table.insert(new_locals, l) - new_local_idx = new_local_idx + 1 - end - - funcs[funcidx] = { - funcidx = funcidx; - name = "func" .. funcidx; - type_ = functypes.contents[typeidxs.contents[i] + 1]; - locals = new_locals; - body = codes.contents[i].body; - imported = false; - } - funcidx = funcidx + 1 + if stack:size() ~= 0 then + while not stack:empty() do + table.insert(lines, stack:pop()) end end - return { - "module"; - types = functypes; - tables = tables; - mems = mems; - globals = globals; - exports = exports; - start = start; - elems = elems; - data = data; - imports = imports; - funcs = funcs; - } + return lines end +function decompile_func(wasm_func, wasm_mod) + local lines = build_instr_list(wasm_func.body, wasm_func, wasm_mod) --- Parser util class used for interfacing --- with the parse_helper c library -class "Parser" { - init = function(self, filename) - self.wasm_file = parse_helper.open_file(filename) - self.label_stack = Stack() - end; - - read_byte = function(self) - return parse_helper.read_byte(self.wasm_file) - end; - - peek_byte = function(self) - return parse_helper.peek_byte(self.wasm_file) - end; - - -- NOTE: N is unused - read_uint = function(self, N) - return parse_helper.parse_uleb128(self.wasm_file) - end; - - read_sint = function(self, N) - return parse_helper.parse_sleb128(self.wasm_file, N) - end; - - read_float = function(self, N) - if N >= 32 then - self:read_byte() - self:read_byte() - self:read_byte() - self:read_byte() - end - if N >= 64 then - self:read_byte() - self:read_byte() - self:read_byte() - self:read_byte() - end - return 0.0; - end; -} - -function decompile(filepath) - local reader = Parser(filepath) - - return parse_module(reader) + return lines end -return module { decompile } +return module { + decompile_func = decompile_func; + build_instr_list = build_instr_list; +} diff --git a/src/wasm/exprs.lua b/src/wasm/exprs.lua deleted file mode 100644 index 85c0b6d..0000000 --- a/src/wasm/exprs.lua +++ /dev/null @@ -1,448 +0,0 @@ -import { - Stack = "src.utils:Stack"; - pprint = "lualib.pprint"; -} - - ---[[ -Expressions in wasm are built off of a stack, so... -i32.const 1 -i32.const 3 -i32.mul -i32.const 8 -i32.add - -would translate to - -i32.add (i32.mul (i32.const 1) (i32.const 3)) (i32.const 8). - -Every instruction has some number of inputs and outputs to the stack. - inputs outputs -i32.const 0 1 -i32.add 2 1 - -Currently every instruction can output at most one thing to the stack. -This should be quick, just need to look up all the instructions - ---]] - -local build_instr_list - -function binop(op) - return function(stack, instr) - local right = stack:pop() - local left = stack:pop() - stack:push("(" .. left .. " " .. op .. " " .. right .. ")") - end -end - -function cast(to) - return function(stack, instr) - local thing = stack:pop() - stack:push("<" .. to .. ">" .. thing) - end -end - -function func_call(name, nparams, nreturns) - if nparams == nil then nparams = 1 end - if nreturns == nil then nreturns = 1 end - return function(stack, instr) - local str = name .. "(" - local args = "" - for i=1,nparams do - local val = stack:pop() - args = ", " .. val .. args - end - str = str .. args:sub(3) .. ")" - if nreturns == 0 then - return str - else - stack:push(str) - end - end -end - -function const(t) - return function(stack, instr) - stack:push(tostring(instr.x) .. t) - end -end - -function expr_return(stack, instr) - if stack:size() >= 1 then - return "return " .. stack:pop() - else - return "return" - end -end - -function expr_br(stack, instr) - return "br <" .. instr.x.block.label .. ">" -end - -function expr_br_if(stack, instr) - return "br_if <" .. instr.x.block.label .. "> (" .. stack:pop() .. ")" -end - -function expr_br_table(stack, instr) - local lines = {} - table.insert(lines, "br_table (" .. stack:pop() .. ") {") - local mappings = {} - for i, lab in ipairs(instr.labels) do - table.insert(mappings, tostring(i - 1) .. " => <" .. lab.block.label .. ">") - end - table.insert(mappings, "default => <" .. instr.labeln.block.label .. ">") - for _, line in ipairs(indent(mappings, " ")) do - table.insert(lines, line) - end - table.insert(lines, "}") - return lines -end - -function expr_drop(stack, instr) - return "drop (" .. stack:pop() .. ")" -end - -function expr_select(stack, instr) - stack:push("select (" .. stack:pop() .. [[) { - 0: ]] .. stack:pop() .. [[ - 1: ]] .. stack:pop() .. [[ -}]]) -end - -function expr_local_get(stack, instr, func) - stack:push(func.locals[instr.x + 1].name) -end - -function expr_local_set(stack, instr, func) - local varname = func.locals[instr.x + 1].name - return varname .. " = " .. stack:pop() -end - -function expr_local_tee(stack, instr, func) - local varname = func.locals[instr.x + 1].name - stack:push(varname .. " = " .. stack:pop()) -end - -function expr_global_get(stack, instr) - stack:push("global" .. instr.x) -end - -function expr_global_set(stack, instr) - return "global" .. instr.x .. " = " .. stack:pop() -end - -function expr_load(stack, instr) - stack:push("[" .. stack:pop() .. "]") -end - -function expr_store(stack, instr) - local value = stack:pop() - local location = stack:pop() - return "[" .. location .. "] <- " .. value -end - -function indent(lines, prefix) - local newlines = {} - for _, line in ipairs(lines) do - table.insert(newlines, prefix .. line) - end - return newlines -end - -function expr_block(stack, instr, func, mod) - local lines = indent( - build_instr_list(instr.instrs, func, mod), - " " - ) - table.insert(lines, 1, "block <" .. instr.label .. "> {") - table.insert(lines, "}") - if #instr.rt == 0 then - return lines - else - stack:push(table.concat(lines, "\n")) - end -end - -function expr_loop(stack, instr, func, mod) - local lines = indent( - build_instr_list(instr.instrs, func, mod), - " " - ) - table.insert(lines, 1, "loop <" .. instr.label .. "> {") - table.insert(lines, "}") - if #instr.rt == 0 then - return lines - else - stack:push(table.concat(lines, "\n")) - end -end - -function expr_if(stack, instr, func, mod) - local lines = indent( - build_instr_list(instr.instrs, func, mod), - " " - ) - table.insert(lines, 1, "if <" .. instr.label .. "> (" .. stack:pop() .. ") {") - if #instr.else_instrs > 0 then - table.insert(lines, "} else {") - local else_lines = indent( - build_instr_list(instr.else_instrs, func, mod), - " " - ) - for _, line in ipairs(else_lines) do - table.insert(lines, line) - end - end - table.insert(lines, "}") - if #instr.rt == 0 then - return lines - else - stack:push(table.concat(lines, "\n")) - end -end - -function expr_call(stack, instr, func, mod) - local nparams = #mod.funcs[instr.x].type_.param_types - local nreturns = #mod.funcs[instr.x].type_.result_types - local name = mod.funcs[instr.x].name - - local res = func_call(name, nparams, nreturns)(stack, instr, func, mod) - if nreturns >= 1 then - stack:push(res) - else - return res - end -end - -function expr_call_indirect(stack, instr, func, mod) - local type_ = mod.types.contents[instr.x + 1] - local nparams = #type_.param_types - local nreturns = #type_.result_types - local addr = stack:pop() - local res = func_call("[" .. addr .. "]", nparams, nreturns)(stack, instr, func, mod) - if nreturns >= 1 then - stack:push(res) - else - return res - end -end - -function expr_nop() return "nop" end -function expr_unreachable() return "unreachable" end - -local expr_generators = { --- Name Inputs, Outputs - ["unreachable"] = expr_unreachable; - ["nop"] = expr_nop; - - ["block"] = expr_block; - ["loop"] = expr_loop; - ["if"] = expr_if; - - ["br"] = expr_br; - ["br_if"] = expr_br_if; - ["br_table"] = expr_br_table; - ["return"] = expr_return; - ["call"] = expr_call; - ["call_indirect"] = expr_call_indirect; - - ["drop"] = expr_drop; - ["select"] = expr_select; - - ["local.get"] = expr_local_get; - ["local.set"] = expr_local_set; - ["local.tee"] = expr_local_tee; - ["global.get"] = expr_global_get; - ["global.set"] = expr_global_set; - - ["i32.load"] = expr_load; - ["i64.load"] = expr_load; - ["f32.load"] = expr_load; - ["f64.load"] = expr_load; - ["i32.load8_s"] = expr_load; - ["i32.load8_u"] = expr_load; - ["i32.load16_s"] = expr_load; - ["i32.load16_u"] = expr_load; - ["i64.load8_s"] = expr_load; - ["i64.load8_u"] = expr_load; - ["i64.load16_s"] = expr_load; - ["i64.load16_u"] = expr_load; - ["i64.load32_s"] = expr_load; - ["i64.load32_u"] = expr_load; - ["i32.store"] = expr_store; - ["i64.store"] = expr_store; - ["f32.store"] = expr_store; - ["f64.store"] = expr_store; - ["i32.store8"] = expr_store; - ["i32.store16"] = expr_store; - ["i64.store8"] = expr_store; - ["i64.store16"] = expr_store; - ["i64.store32"] = expr_store; - - ["memory.size"] = func_call("memory.size", 0); - ["memory.grow"] = func_call "memory.grow"; - - ["i32.const"] = const "i32"; - ["i64.const"] = const "i64"; - ["f32.const"] = const "f32"; - ["f64.const"] = const "f64"; - - ["i32.eqz"] = func_call "eqz"; - ["i32.eq"] = binop "=="; - ["i32.ne"] = binop "!="; - ["i32.lt_s"] = binop "<"; - ["i32.lt_u"] = binop "<"; - ["i32.gt_s"] = binop ">"; - ["i32.gt_u"] = binop ">"; - ["i32.le_s"] = binop "<="; - ["i32.le_u"] = binop "<="; - ["i32.ge_s"] = binop ">="; - ["i32.ge_u"] = binop ">="; - ["i64.eqz"] = func_call "eqz"; - ["i64.eq"] = binop "=="; - ["i64.ne"] = binop "!="; - ["i64.lt_s"] = binop "<"; - ["i64.lt_u"] = binop "<"; - ["i64.gt_s"] = binop ">"; - ["i64.gt_u"] = binop ">"; - ["i64.le_s"] = binop "<="; - ["i64.le_u"] = binop "<="; - ["i64.ge_s"] = binop ">="; - ["i64.ge_u"] = binop ">="; - ["f32.eq"] = binop "=="; - ["f32.ne"] = binop "!="; - ["f32.lt"] = binop "<"; - ["f32.gt"] = binop ">"; - ["f32.le"] = binop "<="; - ["f32.ge"] = binop ">="; - ["f64.eq"] = binop "=="; - ["f64.ne"] = binop "!="; - ["f64.lt"] = binop "<"; - ["f64.gt"] = binop ">"; - ["f64.le"] = binop "<="; - ["f64.ge"] = binop ">="; - ["i32.clz"] = func_call "clz"; - ["i32.ctz"] = func_call "ctz"; - ["i32.popcnt"] = func_call "popcnt"; - ["i32.add"] = binop "+"; - ["i32.sub"] = binop "-"; - ["i32.mul"] = binop "*"; - ["i32.div_s"] = binop "/"; - ["i32.div_u"] = binop "/"; - ["i32.rem_s"] = binop "%"; - ["i32.rem_u"] = binop "%"; - ["i32.and"] = binop "&"; - ["i32.or"] = binop "|"; - ["i32.xor"] = binop "^"; - ["i32.shl"] = binop "<<"; - ["i32.shr_s"] = binop ">>"; - ["i32.shr_u"] = binop ">>>"; - ["i32.rotl"] = binop "rotl"; - ["i32.rotr"] = binop "rotr"; - ["i64.clz"] = func_call "clz"; - ["i64.ctz"] = func_call "ctz"; - ["i64.popcnt"] = func_call "popcnt"; - ["i64.add"] = binop "+"; - ["i64.sub"] = binop "-"; - ["i64.mul"] = binop "*"; - ["i64.div_s"] = binop "/"; - ["i64.div_u"] = binop "/"; - ["i64.rem_s"] = binop "%"; - ["i64.rem_u"] = binop "%"; - ["i64.and"] = binop "&"; - ["i64.or"] = binop "|"; - ["i64.xor"] = binop "^"; - ["i64.shl"] = binop "<<"; - ["i64.shr_s"] = binop ">>"; - ["i64.shr_u"] = binop ">>>"; - ["i64.rotl"] = binop "rotl"; - ["i64.rotr"] = binop "rotr"; - ["f32.abs"] = func_call "abs"; - ["f32.neg"] = func_call "-"; - ["f32.ceil"] = func_call "ceil"; - ["f32.floor"] = func_call "floor"; - ["f32.trunc"] = func_call "trunc"; - ["f32.nearest"] = func_call "nearest"; - ["f32.sqrt"] = func_call "sqrt"; - ["f32.add"] = binop "+"; - ["f32.sub"] = binop "-"; - ["f32.mul"] = binop "*"; - ["f32.div"] = binop "/"; - ["f32.min"] = func_call("min", 2); - ["f32.max"] = func_call("max", 2); - ["f32.copysign"] = func_call("copysign", 2); - ["f64.abs"] = func_call "abs"; - ["f64.neg"] = func_call "-"; - ["f64.ceil"] = func_call "ceil"; - ["f64.floor"] = func_call "floor"; - ["f64.trunc"] = func_call "trunc"; - ["f64.nearest"] = func_call "nearest"; - ["f64.sqrt"] = func_call "sqrt"; - ["f64.add"] = binop "+"; - ["f64.sub"] = binop "-"; - ["f64.mul"] = binop "*"; - ["f64.div"] = binop "/"; - ["f64.min"] = func_call("min", 2); - ["f64.max"] = func_call("max", 2); - ["f64.copysign"] = func_call("copysign", 2); - - ["i32.wrap_i64"] = cast "i32"; - ["i32.trunc_f32_s"] = cast "i32"; - ["i32.trunc_f32_u"] = cast "i32"; - ["i32.trunc_f64_s"] = cast "i32"; - ["i32.trunc_f64_u"] = cast "i32"; - ["i64.extend_i32_s"] = cast "i64"; - ["i64.extend_i32_u"] = cast "i64"; - ["i64.trunc_f32_s"] = cast "i64"; - ["i64.trunc_f32_u"] = cast "i64"; - ["i64.trunc_f64_s"] = cast "i64"; - ["i64.trunc_f64_u"] = cast "i64"; - ["f32.convert_i32_s"] = cast "f32"; - ["f32.convert_i32_u"] = cast "f32"; - ["f32.convert_i64_s"] = cast "f32"; - ["f32.convert_i64_u"] = cast "f32"; - ["f32.demote_f64"] = cast "f32"; - ["f64.convert_i32_s"] = cast "f64"; - ["f64.convert_i32_u"] = cast "f64"; - ["f64.convert_i64_s"] = cast "f64"; - ["f64.convert_i64_u"] = cast "f64"; - ["f64.promote_f32"] = cast "f64"; - ["i32.reinterpret_f32"] = cast "i32"; - ["i64.reinterpret_f64"] = cast "i64"; - ["f32.reinterpret_i32"] = cast "f32"; - ["f64.reinterpret_i64"] = cast "f64"; -} - -function build_instr_list(instrs, func, mod) - local lines = {} - local stack = Stack() - - for _, instr in ipairs(instrs) do - local res = expr_generators[instr[1]](stack, instr, func, mod) - if type(res) == "string" then - table.insert(lines, res) - elseif type(res) == "table" then - for _, line in ipairs(res) do table.insert(lines, line) end - end - end - - if stack:size() ~= 0 then - while not stack:empty() do - table.insert(lines, stack:pop()) - end - end - - return lines -end - -function build_expr(wasm_func, wasm_mod) - local lines = build_instr_list(wasm_func.body, wasm_func, wasm_mod) - - return lines -end - -return module { - build_expr = build_expr; - build_instr_list = build_instr_list; -} diff --git a/src/wasm/parse.lua b/src/wasm/parse.lua new file mode 100644 index 0000000..0f74b4e --- /dev/null +++ b/src/wasm/parse.lua @@ -0,0 +1,743 @@ +-- Recursive decent parser for the WASM v1.0 binary format +-- Brendan Hansen 2020 + +-- Look in clib folder for C-based libraries +package.cpath = package.cpath .. [[;./clib/?.so]] + +import { + parse_helper = "parse_helper"; + pprint = "lualib.pprint"; + + build_str = "src.utils:build_str"; + random_str = "src.utils:random_str"; + Stack = "src.utils:Stack"; +} + +function parse_valtype(r) + local val = r:read_byte() + + local valtypes_map = { + [0x7f] = "i32"; + [0x7E] = "i64"; + [0x7D] = "f32"; + [0x7C] = "f64"; + } + + return valtypes_map[val] +end + +function parse_vector(tp) + return function(r) + local n = r:read_uint(32) + + local v = {} + for i=1, n do + table.insert(v, tp(r)) + end + + return v + end +end + +function parse_byte(r) + return r:read_byte() +end + +function parse_name(r) + local name = parse_vector(parse_byte)(r) + return name +end + +function parse_blocktype(r) + if r:peek_byte() == 0x40 then + r:read_byte() + return {} + else + return { parse_valtype(r) } + end +end + +function parse_functype(r) + assert(r:read_byte() == 0x60, "functype expected 0x60 as first byte") + + local params = parse_vector(parse_valtype)(r) + local results = parse_vector(parse_valtype)(r) + + return { param_types = params, result_types = results } +end + +function parse_limits(r) + local t = r:read_byte() + + local min = r:read_uint(32) + local max = nil + if t == 0x01 then + max = r:read_uint(32) + end + + return { min = min, max = max } +end + +function parse_memtype(r) + local lims = parse_limits(r) + + return lims +end + +function parse_tabletype(r) + local elemtype = parse_elemtype(r) + local limits = parse_limits(r) + + return { lim = limits, et = elemtype } +end + +function parse_elemtype(r) + assert(r:read_byte() == 0x70, "elemtype should be 0x70") + return "funcref" +end + +function parse_globaltype(r) + local valtype = parse_valtype(r) + local ismut = parse_mut(r) == 0x01 + + return { t = valtype, m = ismut } +end + +function parse_mut(r) + local v = r:read_byte() + assert(v == 0x00 or v == 0x01, "mut should be 0x00 or 0x01") + return v +end + +function parse_instr(r) + local instr = r:read_byte() + + return match(instr) { + [0x00] = function() return { "unreachable" } end; + [0x01] = function() return { "nop" } end; + [0x02] = function() + local rt = parse_blocktype(r) + local instrs = {} + local name = random_str(8) + + local block = { "block", label = name, rt = rt } + r.label_stack:push(block) + + while true do + if r:peek_byte() == 0x0B then + r:read_byte() + break + else + table.insert(instrs, parse_instr(r)) + end + end + + r.label_stack:pop() + + block.instrs = instrs + return block + end; + [0x03] = function() + local rt = parse_blocktype(r) + local instrs = {} + local name = random_str(8) + + local block = { "loop", label = name, rt = rt } + r.label_stack:push(block) + + while true do + if r:peek_byte() == 0x0B then + r:read_byte() + break + else + table.insert(instrs, parse_instr(r)) + end + end + + r.label_stack:pop() + + block.instrs = instrs + return block + end; + [0x04] = function() + local rt = parse_blocktype(r) + local instrs = {} + local else_instrs = {} + local inelse = false + local name = random_str(8) + + local block = { "if", label = name, rt = rt } + r.label_stack:push(block) + + while true do + local peek = r:peek_byte() + if peek == 0x0B then + r:read_byte() + break + elseif peek == 0x05 then + r:read_byte() + r.label_stack:pop() + inelse = true + r.label_stack:push({ "else", label = name, rt = rt }) + else + if not inelse then + table.insert(instrs, parse_instr(r)) + else + table.insert(else_instrs, parse_instr(r)) + end + end + end + + r.label_stack:pop() + + return { "if", label = name, rt = rt, instrs = instrs, else_instrs = else_instrs } + end; + + [0x0C] = function() return { "br", x = parse_labelidx(r) } end; + [0x0D] = function() return { "br_if", x = parse_labelidx(r) } end; + [0x0E] = function() + local labels = parse_vector(parse_labelidx)(r) + local labeln = parse_labelidx(r) + + return { "br_table", labels = labels, labeln = labeln } + end; + + [0x0F] = function() return { "return" } end; + + [0x10] = function() return { "call", x = parse_funcidx(r) } end; + [0x11] = function() + local x = parse_typeidx(r) + -- assert(r:read_byte() ~= 0x00, "call_indirect expects 0x00 at end") + return { "call_indirect", x = x, table = r:read_byte() } + end; + + [0x1A] = function() return { "drop" } end; + [0x1B] = function() return { "select" } end; + + [0x20] = function() return { "local.get", x = parse_localidx(r) } end; + [0x21] = function() return { "local.set", x = parse_localidx(r) } end; + [0x22] = function() return { "local.tee", x = parse_localidx(r) } end; + [0x23] = function() return { "global.get", x = parse_globalidx(r) } end; + [0x24] = function() return { "global.set", x = parse_globalidx(r) } end; + + [0x28] = function() return { "i32.load", x = parse_memarg(r) } end; + [0x29] = function() return { "i64.load", x = parse_memarg(r) } end; + [0x2A] = function() return { "f32.load", x = parse_memarg(r) } end; + [0x2B] = function() return { "f64.load", x = parse_memarg(r) } end; + [0x2C] = function() return { "i32.load8_s", x = parse_memarg(r) } end; + [0x2D] = function() return { "i32.load8_u", x = parse_memarg(r) } end; + [0x2E] = function() return { "i32.load16_s", x = parse_memarg(r) } end; + [0x2F] = function() return { "i32.load16_u", x = parse_memarg(r) } end; + [0x30] = function() return { "i64.load8_s", x = parse_memarg(r) } end; + [0x31] = function() return { "i64.load8_u", x = parse_memarg(r) } end; + [0x32] = function() return { "i64.load16_s", x = parse_memarg(r) } end; + [0x33] = function() return { "i64.load16_u", x = parse_memarg(r) } end; + [0x34] = function() return { "i64.load32_s", x = parse_memarg(r) } end; + [0x35] = function() return { "i64.load32_u", x = parse_memarg(r) } end; + [0x36] = function() return { "i32.store", x = parse_memarg(r) } end; + [0x37] = function() return { "i64.store", x = parse_memarg(r) } end; + [0x38] = function() return { "f32.store", x = parse_memarg(r) } end; + [0x39] = function() return { "f64.store", x = parse_memarg(r) } end; + [0x3A] = function() return { "i32.store8", x = parse_memarg(r) } end; + [0x3B] = function() return { "i32.store16", x = parse_memarg(r) } end; + [0x3C] = function() return { "i64.store8", x = parse_memarg(r) } end; + [0x3D] = function() return { "i64.store16", x = parse_memarg(r) } end; + [0x3E] = function() return { "i64.store32", x = parse_memarg(r) } end; + [0x3F] = function() + assert(r:read_byte() == 0x00, "memory.size expects 0x00") + return { "memory.size" } + end; + [0x40] = function() + assert(r:read_byte() == 0x00, "memory.grow expects 0x00") + return { "memory.grow" } + end; + + [0x41] = function() return { "i32.const", x = r:read_sint(32) } end; + [0x42] = function() return { "i64.const", x = r:read_sint(64) } end; + [0x43] = function() return { "f32.const", x = r:read_float(32) } end; + [0x44] = function() return { "f64.const", x = r:read_float(64) } end; + + [0x45] = function() return { "i32.eqz" } end; + [0x46] = function() return { "i32.eq" } end; + [0x47] = function() return { "i32.ne" } end; + [0x48] = function() return { "i32.lt_s" } end; + [0x49] = function() return { "i32.lt_u" } end; + [0x4A] = function() return { "i32.gt_s" } end; + [0x4B] = function() return { "i32.gt_u" } end; + [0x4C] = function() return { "i32.le_s" } end; + [0x4D] = function() return { "i32.le_u" } end; + [0x4E] = function() return { "i32.ge_s" } end; + [0x4F] = function() return { "i32.ge_u" } end; + [0x50] = function() return { "i64.eqz" } end; + [0x51] = function() return { "i64.eq" } end; + [0x52] = function() return { "i64.ne" } end; + [0x53] = function() return { "i64.lt_s" } end; + [0x54] = function() return { "i64.lt_u" } end; + [0x55] = function() return { "i64.gt_s" } end; + [0x56] = function() return { "i64.gt_u" } end; + [0x57] = function() return { "i64.le_s" } end; + [0x58] = function() return { "i64.le_u" } end; + [0x59] = function() return { "i64.ge_s" } end; + [0x5A] = function() return { "i64.ge_u" } end; + [0x5B] = function() return { "f32.eq" } end; + [0x5C] = function() return { "f32.ne" } end; + [0x5D] = function() return { "f32.lt" } end; + [0x5E] = function() return { "f32.gt" } end; + [0x5F] = function() return { "f32.le" } end; + [0x60] = function() return { "f32.ge" } end; + [0x61] = function() return { "f64.eq" } end; + [0x62] = function() return { "f64.ne" } end; + [0x63] = function() return { "f64.lt" } end; + [0x64] = function() return { "f64.gt" } end; + [0x65] = function() return { "f64.le" } end; + [0x66] = function() return { "f64.ge" } end; + [0x67] = function() return { "i32.clz" } end; + [0x68] = function() return { "i32.ctz" } end; + [0x69] = function() return { "i32.popcnt" } end; + [0x6A] = function() return { "i32.add" } end; + [0x6B] = function() return { "i32.sub" } end; + [0x6C] = function() return { "i32.mul" } end; + [0x6D] = function() return { "i32.div_s" } end; + [0x6E] = function() return { "i32.div_u" } end; + [0x6F] = function() return { "i32.rem_s" } end; + [0x70] = function() return { "i32.rem_u" } end; + [0x71] = function() return { "i32.and" } end; + [0x72] = function() return { "i32.or" } end; + [0x73] = function() return { "i32.xor" } end; + [0x74] = function() return { "i32.shl" } end; + [0x75] = function() return { "i32.shr_s" } end; + [0x76] = function() return { "i32.shr_u" } end; + [0x77] = function() return { "i32.rotl" } end; + [0x78] = function() return { "i32.rotr" } end; + [0x79] = function() return { "i64.clz" } end; + [0x7A] = function() return { "i64.ctz" } end; + [0x7B] = function() return { "i64.popcnt" } end; + [0x7C] = function() return { "i64.add" } end; + [0x7D] = function() return { "i64.sub" } end; + [0x7E] = function() return { "i64.mul" } end; + [0x7F] = function() return { "i64.div_s" } end; + [0x80] = function() return { "i64.div_u" } end; + [0x81] = function() return { "i64.rem_s" } end; + [0x82] = function() return { "i64.rem_u" } end; + [0x83] = function() return { "i64.and" } end; + [0x84] = function() return { "i64.or" } end; + [0x85] = function() return { "i64.xor" } end; + [0x86] = function() return { "i64.shl" } end; + [0x87] = function() return { "i64.shr_s" } end; + [0x88] = function() return { "i64.shr_u" } end; + [0x89] = function() return { "i64.rotl" } end; + [0x8A] = function() return { "i64.rotr" } end; + [0x8B] = function() return { "f32.abs" } end; + [0x8C] = function() return { "f32.neg" } end; + [0x8D] = function() return { "f32.ceil" } end; + [0x8E] = function() return { "f32.floor" } end; + [0x8F] = function() return { "f32.trunc" } end; + [0x90] = function() return { "f32.nearest" } end; + [0x91] = function() return { "f32.sqrt" } end; + [0x92] = function() return { "f32.add" } end; + [0x93] = function() return { "f32.sub" } end; + [0x94] = function() return { "f32.mul" } end; + [0x95] = function() return { "f32.div" } end; + [0x96] = function() return { "f32.min" } end; + [0x97] = function() return { "f32.max" } end; + [0x98] = function() return { "f32.copysign" } end; + [0x99] = function() return { "f64.abs" } end; + [0x9A] = function() return { "f64.neg" } end; + [0x9B] = function() return { "f64.ceil" } end; + [0x9C] = function() return { "f64.floor" } end; + [0x9D] = function() return { "f64.trunc" } end; + [0x9E] = function() return { "f64.nearest" } end; + [0x9F] = function() return { "f64.sqrt" } end; + [0xA0] = function() return { "f64.add" } end; + [0xA1] = function() return { "f64.sub" } end; + [0xA2] = function() return { "f64.mul" } end; + [0xA3] = function() return { "f64.div" } end; + [0xA4] = function() return { "f64.min" } end; + [0xA5] = function() return { "f64.max" } end; + [0xA6] = function() return { "f64.copysign" } end; + + [0xA7] = function() return { "i32.wrap_i64" } end; + [0xA8] = function() return { "i32.trunc_f32_s" } end; + [0xA9] = function() return { "i32.trunc_f32_u" } end; + [0xAA] = function() return { "i32.trunc_f64_s" } end; + [0xAB] = function() return { "i32.trunc_f64_u" } end; + [0xAC] = function() return { "i64.extend_i32_s" } end; + [0xAD] = function() return { "i64.extend_i32_u" } end; + [0xAE] = function() return { "i64.trunc_f32_s" } end; + [0xAF] = function() return { "i64.trunc_f32_u" } end; + [0xB0] = function() return { "i64.trunc_f64_s" } end; + [0xB1] = function() return { "i64.trunc_f64_u" } end; + [0xB2] = function() return { "f32.convert_i32_s" } end; + [0xB3] = function() return { "f32.convert_i32_u" } end; + [0xB4] = function() return { "f32.convert_i64_s" } end; + [0xB5] = function() return { "f32.convert_i64_u" } end; + [0xB6] = function() return { "f32.demote_f64" } end; + [0xB7] = function() return { "f64.convert_i32_s" } end; + [0xB8] = function() return { "f64.convert_i32_u" } end; + [0xB9] = function() return { "f64.convert_i64_s" } end; + [0xBA] = function() return { "f64.convert_i64_u" } end; + [0xBB] = function() return { "f64.promote_f32" } end; + [0xBC] = function() return { "i32.reinterpret_f32" } end; + [0xBD] = function() return { "i64.reinterpret_f64" } end; + [0xBE] = function() return { "f32.reinterpret_i32" } end; + [0xBF] = function() return { "f64.reinterpret_i64" } end; + } +end + +function parse_memarg(r) + local a = r:read_uint(32) + local o = r:read_uint(32) + + return { "memarg"; align = a; offset = o } +end + +function parse_expr(r) + local instrs = {} + while true do + if r:peek_byte() == 0x0B then + r:read_byte() + break + else + table.insert(instrs, parse_instr(r)) + end + end + + return instrs +end + +function parse_typeidx(r) return r:read_uint(32) end +function parse_funcidx(r) return r:read_uint(32) end +function parse_tableidx(r) return r:read_uint(32) end +function parse_memidx(r) return r:read_uint(32) end +function parse_globalidx(r) return r:read_uint(32) end +function parse_localidx(r) return r:read_uint(32) end +function parse_labelidx(r) + local idx = r:read_uint(32) + local block = r.label_stack:at(idx) + if block == nil then block = { label = "return " } end + return { "labelidx", labelidx = idx, block = block } +end + +function parse_section(r, expectN, B) + if r:peek_byte() ~= expectN then return end + + local N = r:read_byte() + local size = r:read_uint(32) + local cont = B(r) + + return { "section", contents = cont, size = size } +end + +function parse_customsec(r) + local csect = parse_section(r, 0, parse_custom) + if not csect then return nil end + + for i=1, csect.size do + -- Discard csect.size bytes + r:read_byte() + end + return csect +end + +function parse_custom(r) + local name = parse_name(r) + + return { "custom", name = name } +end + +function parse_typesec(r) + return parse_section(r, 1, parse_vector(parse_functype)) +end + +function parse_importsec(r) + return parse_section(r, 2, parse_vector(parse_import)) +end + +function parse_import(r) + local mod = parse_name(r) + local nm = parse_name(r) + local d = parse_importdesc(r) + + return { "import", mod = mod, name = nm, desc = d } +end + +function parse_importdesc(r) + local t = r:read_byte() + if t == 0x00 then return { "func", x = parse_typeidx(r) } + elseif t == 0x01 then return { "table", tt = parse_tabletype(r) } + elseif t == 0x02 then return { "mem", mt = parse_memtype(r) } + elseif t == 0x03 then return { "global", gt = parse_globaltype(r) } + end + + error("bad importdesc") +end + +function parse_funcsec(r) + return parse_section(r, 3, parse_vector(parse_typeidx)) +end + +function parse_tablesec(r) + return parse_section(r, 4, parse_vector(parse_table)) +end + +function parse_table(r) + local tt = parse_tabletype(r) + return { "table", type_ = tt } +end + +function parse_memsec(r) + return parse_section(r, 5, parse_vector(parse_mem)) +end + +function parse_mem(r) + local mt = parse_memtype(r) + return { "mem", type_ = mt } +end + +function parse_globalsec(r) + return parse_section(r, 6, parse_vector(parse_global)) +end + +function parse_global(r) + local gt = parse_globaltype(r) + local e = parse_expr(r) + return { "global", type_ = gt, init = e } +end + +function parse_exportsec(r) + return parse_section(r, 7, parse_vector(parse_export)) +end + +function parse_export(r) + local nm = parse_name(r) + local d = parse_exportdesc(r) + return { "export", name = nm, desc = d } +end + +function parse_exportdesc(r) + local t = r:read_byte() + if t == 0x00 then return { "func", x = parse_typeidx(r) } + elseif t == 0x01 then return { "table", tt = parse_tableidx(r) } + elseif t == 0x02 then return { "mem", mt = parse_memidx(r) } + elseif t == 0x03 then return { "global", gt = parse_globalidx(r) } + end + + error("bad exportdesc: ", t) +end + +function parse_startsec(r) + return parse_section(r, 8, parse_start) +end + +function parse_start(r) + local x = parse_funcidx(r) + return { "start", func = x } +end + +function parse_elemsec(r) + return parse_section(r, 9, parse_vector(parse_elem)) +end + +function parse_elem(r) + local x = parse_tableidx(r) + local e = parse_expr(r) + local y = parse_vector(parse_funcidx)(r) + + return { "elem", table = x, offset = e, init = y } +end + +function parse_codesec(r) + return parse_section(r, 10, parse_vector(parse_code)) +end + +function parse_code(r) + local size = r:read_uint(32) + local code = parse_func(r) + return code +end + +function parse_func(r) + local t = parse_vector(parse_locals)(r) + local e = parse_expr(r) + + local localidx = 0 + local locals = {} + for _, v in ipairs(t) do + for _, l in ipairs(v) do + l.localidx = localidx + table.insert(locals, l) + localidx = localidx + 1 + end + end + + return { "func", locals = locals, body = e } +end + +function parse_locals(r) + local n = r:read_uint(32) + local t = parse_valtype(r) + + --TODO: Make a list of values with names like local0, local1, ... + + local locals = {} + for i = 0, n - 1 do + table.insert(locals, { + name = "local" .. i, + type_ = t, + localidx = i + }) + end + + return locals +end + +function parse_datasec(r) + return parse_section(r, 11, parse_vector(parse_data)) +end + +function parse_data(r) + local x = parse_memidx(r) + local e = parse_expr(r) + local b = parse_vector(parse_byte)(r) + + return { "data", data = x, offset = e, init = b } +end + +function parse_magic(r) + assert(r:read_byte() == 0x00, "magic string is wrong") + assert(r:read_byte() == 0x61, "magic string is wrong") + assert(r:read_byte() == 0x73, "magic string is wrong") + assert(r:read_byte() == 0x6D, "magic string is wrong") +end + +function parse_version(r) + assert(r:read_byte() == 0x01, "version is wrong") + assert(r:read_byte() == 0x00, "version is wrong") + assert(r:read_byte() == 0x00, "version is wrong") + assert(r:read_byte() == 0x00, "version is wrong") +end + +function parse_module(r) + parse_magic(r) + parse_version(r) + + local functypes = parse_typesec(r) + local imports = parse_importsec(r) + local typeidxs = parse_funcsec(r) + local tables = parse_tablesec(r) + local mems = parse_memsec(r) + local globals = parse_globalsec(r) + local exports = parse_exportsec(r) + local start = parse_startsec(r) + local elems = parse_elemsec(r) + local codes = parse_codesec(r) + local data = parse_datasec(r) + + local funcs = {} + local funcidx = 0 + + if imports then + for k, v in ipairs(imports.contents) do + if v.desc[1] == "func" then + funcs[funcidx] = { + name = build_str(v.mod) .. "." .. build_str(v.name); + funcidx = funcidx; + type_ = functypes.contents[v.desc.x + 1]; + imported = true; + } + funcidx = funcidx + 1 + end + end + end + + if codes then + for i=1, #codes.contents do + local locals = codes.contents[i].locals; + local type_ = functypes.contents[typeidxs.contents[i] + 1]; + local param_types = type_.param_types; + + local new_locals = {} + local new_local_idx = 0 + for _, p in ipairs(param_types) do + table.insert(new_locals, { + name = "param" .. new_local_idx, + type_ = p; + localidx = new_local_idx + }) + new_local_idx = new_local_idx + 1 + end + for _, l in ipairs(locals) do + l.localidx = new_local_idx + table.insert(new_locals, l) + new_local_idx = new_local_idx + 1 + end + + funcs[funcidx] = { + funcidx = funcidx; + name = "func" .. funcidx; + type_ = functypes.contents[typeidxs.contents[i] + 1]; + locals = new_locals; + body = codes.contents[i].body; + imported = false; + } + funcidx = funcidx + 1 + end + end + + return { + "module"; + types = functypes; + tables = tables; + mems = mems; + globals = globals; + exports = exports; + start = start; + elems = elems; + data = data; + imports = imports; + funcs = funcs; + } +end + + +-- Parser util class used for interfacing +-- with the parse_helper c library +class "Parser" { + init = function(self, filename) + self.wasm_file = parse_helper.open_file(filename) + self.label_stack = Stack() + end; + + read_byte = function(self) + return parse_helper.read_byte(self.wasm_file) + end; + + peek_byte = function(self) + return parse_helper.peek_byte(self.wasm_file) + end; + + -- NOTE: N is unused + read_uint = function(self, N) + return parse_helper.parse_uleb128(self.wasm_file) + end; + + read_sint = function(self, N) + return parse_helper.parse_sleb128(self.wasm_file, N) + end; + + read_float = function(self, N) + if N == 32 then + return parse_helper.parse_ieee754_32(self.wasm_file) + elseif N == 64 then + return parse_helper.parse_ieee754_64(self.wasm_file) + end + end; +} + +function parse(filepath) + local reader = Parser(filepath) + + return parse_module(reader) +end + +return module { parse } diff --git a/testing.lua b/testing.lua index 9415924..51caa05 100644 --- a/testing.lua +++ b/testing.lua @@ -1,21 +1,21 @@ require "lualib.oop" import { - decompile = "src.wasm.decompile:"; + parse = "src.wasm.parse:"; analyze = "src.wasm.analyze:"; - build_expr = "src.wasm.exprs:build_expr"; + decompile_func = "src.wasm.decompile:decompile_func"; pprint = "lualib.pprint"; } -local mod = decompile(arg[1]) +local mod = parse(arg[1]) mod = analyze(mod) -- pprint(mod) for i, func in pairs(mod.funcs) do if func.body then print(func.name .. " -----------------------------------") - local res = build_expr(func, mod) + local res = decompile_func(func, mod) for _, r in ipairs(res) do print(r) end