From: Brendan Hansen Date: Sat, 11 Apr 2020 04:52:51 +0000 (-0500) Subject: Working on parsing binary to text format (C-style) X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=eace29ac8c5581cbd99f68e0f4d3304b2051e78a;p=wasm-analyzer.git Working on parsing binary to text format (C-style) --- diff --git a/src/utils.lua b/src/utils.lua index a08a676..db8880f 100644 --- a/src/utils.lua +++ b/src/utils.lua @@ -116,6 +116,10 @@ class "Stack" { return rt end; + clear = function(self) + self.data = {} + end; + at = function(self, x) return self.data[#self.data - x] end diff --git a/src/wasm/decompile.lua b/src/wasm/decompile.lua index b67f973..f73351e 100644 --- a/src/wasm/decompile.lua +++ b/src/wasm/decompile.lua @@ -164,6 +164,7 @@ function parse_instr(r) local instrs = {} local else_instrs = {} local inelse = false + local name = random_str(8) local block = { "if", label = name, rt = rt } r.label_stack:push(block) @@ -189,7 +190,7 @@ function parse_instr(r) r.label_stack:pop() - return { "if", rt = rt, instrs = instrs, else_instrs = else_instrs } + return { "if", label = name, rt = rt, instrs = instrs, else_instrs = else_instrs } end; [0x0C] = function() return { "br", x = parse_labelidx(r) } end; diff --git a/src/wasm/exprs.lua b/src/wasm/exprs.lua index e0dd1af..eb10f91 100644 --- a/src/wasm/exprs.lua +++ b/src/wasm/exprs.lua @@ -1,9 +1,284 @@ import { + Stack = "src.utils:Stack"; +} + + +--[[ +Expressions in wasm are built off of a stack, so... +i32.const 1 +i32.const 3 +i32.mul +i32.const 8 +i32.add + +would translate to + +i32.add (i32.mul (i32.const 1) (i32.const 3)) (i32.const 8). + +Every instruction has some number of inputs and outputs to the stack. + inputs outputs +i32.const 0 1 +i32.add 2 1 + +Currently every instruction can output at most one thing to the stack. +This should be quick, just need to look up all the instructions + +--]] + +-- TODO: Maybe add type checking at some point? +local stack_opts = { +-- Name Inputs, Outputs + ["unreachable"] = { 0, 0 }; + ["nop"] = { 0, 0 }; + + --block is dynamic + --loop is dynamic + --if and else are dynamic + + ["br"] = { 0, 0 }; + ["br_if"] = { 1, 0 }; + ["br_table"] = { 1, 0 }; + ["return"] = { 0, 0 }; + --call is dynamic + --call_indirect is dynamic + + ["drop"] = { 1, 0 }; + ["select"] = { 3, 0 }; + + ["local.get"] = { 0, 1 }; + ["local.set"] = { 1, 0 }; + ["local.tee"] = { 1, 1 }; + ["global.get"] = { 0, 1 }; + ["global.set"] = { 1, 0 }; + + ["i32.load"] = { 1, 1 }; + ["i64.load"] = { 1, 1 }; + ["f32.load"] = { 1, 1 }; + ["f64.load"] = { 1, 1 }; + ["i32.load8_s"] = { 1, 1 }; + ["i32.load8_u"] = { 1, 1 }; + ["i32.load16_s"] = { 1, 1 }; + ["i32.load16_u"] = { 1, 1 }; + ["i64.load8_s"] = { 1, 1 }; + ["i64.load8_u"] = { 1, 1 }; + ["i64.load16_s"] = { 1, 1 }; + ["i64.load16_u"] = { 1, 1 }; + ["i64.load32_s"] = { 1, 1 }; + ["i64.load32_u"] = { 1, 1 }; + ["i32.store"] = { 2, 0 }; + ["i64.store"] = { 2, 0 }; + ["f32.store"] = { 2, 0 }; + ["f64.store"] = { 2, 0 }; + ["i32.store8"] = { 2, 0 }; + ["i32.store16"] = { 2, 0 }; + ["i64.store8"] = { 2, 0 }; + ["i64.store16"] = { 2, 0 }; + ["i64.store32"] = { 2, 0 }; + + ["memory.size"] = { 0, 1 }; + ["memory.grow"] = { 1, 1 }; + ["i32.const"] = { 0, 1 }; + ["i64.const"] = { 0, 1 }; + ["f32.const"] = { 0, 1 }; + ["f64.const"] = { 0, 1 }; + + ["i32.eqz"] = { 1, 1 }; + ["i32.eq"] = { 2, 1 }; + ["i32.ne"] = { 2, 1 }; + ["i32.lt_s"] = { 2, 1 }; + ["i32.lt_u"] = { 2, 1 }; + ["i32.gt_s"] = { 2, 1 }; + ["i32.gt_u"] = { 2, 1 }; + ["i32.le_s"] = { 2, 1 }; + ["i32.le_u"] = { 2, 1 }; + ["i32.ge_s"] = { 2, 1 }; + ["i32.ge_u"] = { 2, 1 }; + ["i64.eqz"] = { 1, 1 }; + ["i64.eq"] = { 2, 1 }; + ["i64.ne"] = { 2, 1 }; + ["i64.lt_s"] = { 2, 1 }; + ["i64.lt_u"] = { 2, 1 }; + ["i64.gt_s"] = { 2, 1 }; + ["i64.gt_u"] = { 2, 1 }; + ["i64.le_s"] = { 2, 1 }; + ["i64.le_u"] = { 2, 1 }; + ["i64.ge_s"] = { 2, 1 }; + ["i64.ge_u"] = { 2, 1 }; + ["f32.eq"] = { 2, 1 }; + ["f32.ne"] = { 2, 1 }; + ["f32.lt"] = { 2, 1 }; + ["f32.gt"] = { 2, 1 }; + ["f32.le"] = { 2, 1 }; + ["f32.ge"] = { 2, 1 }; + ["f64.eq"] = { 2, 1 }; + ["f64.ne"] = { 2, 1 }; + ["f64.lt"] = { 2, 1 }; + ["f64.gt"] = { 2, 1 }; + ["f64.le"] = { 2, 1 }; + ["f64.ge"] = { 2, 1 }; + ["i32.clz"] = { 1, 1 }; + ["i32.ctz"] = { 1, 1 }; + ["i32.popcnt"] = { 1, 1 }; + ["i32.add"] = { 2, 1 }; + ["i32.sub"] = { 2, 1 }; + ["i32.mul"] = { 2, 1 }; + ["i32.div_s"] = { 2, 1 }; + ["i32.div_u"] = { 2, 1 }; + ["i32.rem_s"] = { 2, 1 }; + ["i32.rem_u"] = { 2, 1 }; + ["i32.and"] = { 2, 1 }; + ["i32.or"] = { 2, 1 }; + ["i32.xor"] = { 2, 1 }; + ["i32.shl"] = { 2, 1 }; + ["i32.shr_s"] = { 2, 1 }; + ["i32.shr_u"] = { 2, 1 }; + ["i32.rotl"] = { 2, 1 }; + ["i32.rotr"] = { 2, 1 }; + ["i64.clz"] = { 1, 1 }; + ["i64.ctz"] = { 1, 1 }; + ["i64.popcnt"] = { 1, 1 }; + ["i64.add"] = { 2, 1 }; + ["i64.sub"] = { 2, 1 }; + ["i64.mul"] = { 2, 1 }; + ["i64.div_s"] = { 2, 1 }; + ["i64.div_u"] = { 2, 1 }; + ["i64.rem_s"] = { 2, 1 }; + ["i64.rem_u"] = { 2, 1 }; + ["i64.and"] = { 2, 1 }; + ["i64.or"] = { 2, 1 }; + ["i64.xor"] = { 2, 1 }; + ["i64.shl"] = { 2, 1 }; + ["i64.shr_s"] = { 2, 1 }; + ["i64.shr_u"] = { 2, 1 }; + ["i64.rotl"] = { 2, 1 }; + ["i64.rotr"] = { 2, 1 }; + ["f32.abs"] = { 1, 1 }; + ["f32.neg"] = { 1, 1 }; + ["f32.ceil"] = { 1, 1 }; + ["f32.floor"] = { 1, 1 }; + ["f32.trunc"] = { 1, 1 }; + ["f32.nearest"] = { 1, 1 }; + ["f32.sqrt"] = { 1, 1 }; + ["f32.add"] = { 2, 1 }; + ["f32.sub"] = { 2, 1 }; + ["f32.mul"] = { 2, 1 }; + ["f32.div"] = { 2, 1 }; + ["f32.min"] = { 2, 1 }; + ["f32.max"] = { 2, 1 }; + ["f32.copysign"] = { 2, 1 }; + ["f64.abs"] = { 1, 1 }; + ["f64.neg"] = { 1, 1 }; + ["f64.ceil"] = { 1, 1 }; + ["f64.floor"] = { 1, 1 }; + ["f64.trunc"] = { 1, 1 }; + ["f64.nearest"] = { 1, 1 }; + ["f64.sqrt"] = { 1, 1 }; + ["f64.add"] = { 2, 1 }; + ["f64.sub"] = { 2, 1 }; + ["f64.mul"] = { 2, 1 }; + ["f64.div"] = { 2, 1 }; + ["f64.min"] = { 2, 1 }; + ["f64.max"] = { 2, 1 }; + ["f64.copysign"] = { 2, 1 }; + + ["i32.wrap_i64"] = { 1, 1 }; + ["i32.trunc_f32_s"] = { 1, 1 }; + ["i32.trunc_f32_u"] = { 1, 1 }; + ["i32.trunc_f64_s"] = { 1, 1 }; + ["i32.trunc_f64_u"] = { 1, 1 }; + ["i64.extend_i32_s"] = { 1, 1 }; + ["i64.extend_i32_u"] = { 1, 1 }; + ["i64.trunc_f32_s"] = { 1, 1 }; + ["i64.trunc_f32_u"] = { 1, 1 }; + ["i64.trunc_f64_s"] = { 1, 1 }; + ["i64.trunc_f64_u"] = { 1, 1 }; + ["f32.convert_i32_s"] = { 1, 1 }; + ["f32.convert_i32_u"] = { 1, 1 }; + ["f32.convert_i64_s"] = { 1, 1 }; + ["f32.convert_i64_u"] = { 1, 1 }; + ["f32.demote_f64"] = { 1, 1 }; + ["f64.convert_i32_s"] = { 1, 1 }; + ["f64.convert_i32_u"] = { 1, 1 }; + ["f64.convert_i64_s"] = { 1, 1 }; + ["f64.convert_i64_u"] = { 1, 1 }; + ["f64.promote_f32"] = { 1, 1 }; + ["i32.reinterpret_f32"] = { 1, 1 }; + ["i64.reinterpret_f64"] = { 1, 1 }; + ["f32.reinterpret_i32"] = { 1, 1 }; + ["f64.reinterpret_i64"] = { 1, 1 }; } +-- This works... ish but we should use proper symbols (+-*/) and other things +function build_instr_list(instrs) + local lines = {} + local stack = Stack() -return module { + for _, instr in ipairs(instrs) do + local instr_name = instr[1] + if stack_opts[instr_name] then + -- For now, everything will be built in a function calling syntax + local str = "" + for i=1, stack_opts[instr_name][1] do + str = str .. ", " + str = str .. stack:pop() + end + + str = instr_name .. "(" .. str:sub(3) .. ")" + + if stack_opts[instr_name][2] == 1 then + stack:push(str) + else + table.insert(lines, str) + end + end + + if instr_name == "block" or instr_name == "loop" then + -- have a .instrs which is the instruction list + table.insert(lines, instr_name .. " { [" .. instr.label .. "]") + local instr_lines = build_instr_list(instr.instrs) + for _, line in ipairs(instr_lines) do + table.insert(lines, " " .. line) + end + table.insert(lines, "}") + end + if instr_name == "if" then + local str = instr_name .. " (" + str = str .. stack:pop() + str = str .. " ) { [" .. instr.label .. "]" + table.insert(lines, str) + + local instr_lines = build_instr_list(instr.instrs) + for _, line in ipairs(instr_lines) do + table.insert(lines, " " .. line) + end + if instr_name == "if" and #instr.else_instrs > 0 then + table.insert(lines, "} else {") + instr_lines = build_instr_list(instr.else_instrs) + for _, line in ipairs(instr_lines) do + table.insert(lines, " " .. line) + end + end + + table.insert(lines, "}") + end + + if instr_name == "call" then + end + end + + return lines +end + +function build_expr(wasm_func, wasm_mod) + local lines = build_instr_list(wasm_func.body) + + return lines +end + +return module { + build_expr = build_expr; + build_instr_list = build_instr_list; } diff --git a/testing.lua b/testing.lua index 935dc9c..033ef79 100644 --- a/testing.lua +++ b/testing.lua @@ -3,11 +3,17 @@ require "lualib.oop" import { decompile = "src.wasm.decompile:"; analyze = "src.wasm.analyze:"; - build_str = "src.utils:build_str"; + build_expr = "src.wasm.exprs:build_expr"; + pprint = "lualib.pprint"; } local mod = decompile(arg[1]) -analyze(mod) -pprint(mod) +mod = analyze(mod) +for i, func in ipairs(mod.funcs) do + if func.body then + print(func.name .. " -----------------------------------") + pprint(build_expr(func, mod)) + end +end