Working on parsing binary to text format (C-style)
authorBrendan Hansen <brendan.f.hansen@gmail.com>
Sat, 11 Apr 2020 04:52:51 +0000 (23:52 -0500)
committerBrendan Hansen <brendan.f.hansen@gmail.com>
Sat, 11 Apr 2020 04:52:51 +0000 (23:52 -0500)
src/utils.lua
src/wasm/decompile.lua
src/wasm/exprs.lua
testing.lua

index a08a676603ecf932c6b94db70238c2cd091e56d7..db8880fed85bca5dd3a9d2e2734977d793d44794 100644 (file)
@@ -116,6 +116,10 @@ class "Stack" {
                return rt
        end;
 
+       clear = function(self)
+               self.data = {}
+       end;
+
        at = function(self, x)
                return self.data[#self.data - x]
        end
index b67f97312f6b588ef24c8b7962cc22f4a20e00a2..f73351e763734fc6dac60c5685db70d3e53ffd9f 100644 (file)
@@ -164,6 +164,7 @@ function parse_instr(r)
                        local instrs = {}
                        local else_instrs = {}
                        local inelse = false
+                       local name = random_str(8)
 
                        local block = { "if", label = name, rt = rt }
                        r.label_stack:push(block)
@@ -189,7 +190,7 @@ function parse_instr(r)
 
                        r.label_stack:pop()
 
-                       return { "if", rt = rt, instrs = instrs, else_instrs = else_instrs }
+                       return { "if", label = name, rt = rt, instrs = instrs, else_instrs = else_instrs }
                end;
 
                [0x0C] = function() return { "br", x = parse_labelidx(r) } end;
index e0dd1af166bef48a9b072b5044380b7c253f7327..eb10f91ab4ce9fd607d4568d662b77ffc2100406 100644 (file)
@@ -1,9 +1,284 @@
 import {
+       Stack = "src.utils:Stack";
+}
+
+
+--[[
+Expressions in wasm are built off of a stack, so...
+i32.const 1
+i32.const 3
+i32.mul
+i32.const 8
+i32.add
+
+would translate to
+
+i32.add (i32.mul (i32.const 1) (i32.const 3)) (i32.const 8).
+
+Every instruction has some number of inputs and outputs to the stack.
+                               inputs          outputs
+i32.const              0                       1
+i32.add                        2                       1
+
+Currently every instruction can output at most one thing to the stack.
+This should be quick, just need to look up all the instructions
+
+--]]
+
+-- TODO: Maybe add type checking at some point?
+local stack_opts = {
+--  Name                                       Inputs, Outputs
+       ["unreachable"]                 = { 0, 0 };
+       ["nop"]                                 = { 0, 0 };
+
+       --block is dynamic
+       --loop is dynamic
+       --if and else are dynamic
+
+       ["br"]                                  = { 0, 0 };
+       ["br_if"]                               = { 1, 0 };
+       ["br_table"]                    = { 1, 0 };
+       ["return"]                              = { 0, 0 };
+       --call is dynamic
+       --call_indirect is dynamic
+
+       ["drop"]                                = { 1, 0 };
+       ["select"]                              = { 3, 0 };
+
+       ["local.get"]                   = { 0, 1 };
+       ["local.set"]                   = { 1, 0 };
+       ["local.tee"]                   = { 1, 1 };
+       ["global.get"]                  = { 0, 1 };
+       ["global.set"]                  = { 1, 0 };
+
+       ["i32.load"]                    = { 1, 1 };
+       ["i64.load"]                    = { 1, 1 };
+       ["f32.load"]                    = { 1, 1 };
+       ["f64.load"]                    = { 1, 1 };
+       ["i32.load8_s"]                 = { 1, 1 };
+       ["i32.load8_u"]                 = { 1, 1 };
+       ["i32.load16_s"]                = { 1, 1 };
+       ["i32.load16_u"]                = { 1, 1 };
+       ["i64.load8_s"]                 = { 1, 1 };
+       ["i64.load8_u"]                 = { 1, 1 };
+       ["i64.load16_s"]                = { 1, 1 };
+       ["i64.load16_u"]                = { 1, 1 };
+       ["i64.load32_s"]                = { 1, 1 };
+       ["i64.load32_u"]                = { 1, 1 };
+       ["i32.store"]                   = { 2, 0 };
+       ["i64.store"]                   = { 2, 0 };
+       ["f32.store"]                   = { 2, 0 };
+       ["f64.store"]                   = { 2, 0 };
+       ["i32.store8"]                  = { 2, 0 };
+       ["i32.store16"]                 = { 2, 0 };
+       ["i64.store8"]                  = { 2, 0 };
+       ["i64.store16"]                 = { 2, 0 };
+       ["i64.store32"]                 = { 2, 0 };
+
+       ["memory.size"]                 = { 0, 1 };
+       ["memory.grow"]                 = { 1, 1 };
 
+       ["i32.const"]                   = { 0, 1 };
+       ["i64.const"]                   = { 0, 1 };
+       ["f32.const"]                   = { 0, 1 };
+       ["f64.const"]                   = { 0, 1 };
+
+       ["i32.eqz"]                             = { 1, 1 };
+       ["i32.eq"]                              = { 2, 1 };
+       ["i32.ne"]                              = { 2, 1 };
+       ["i32.lt_s"]                    = { 2, 1 };
+       ["i32.lt_u"]                    = { 2, 1 };
+       ["i32.gt_s"]                    = { 2, 1 };
+       ["i32.gt_u"]                    = { 2, 1 };
+       ["i32.le_s"]                    = { 2, 1 };
+       ["i32.le_u"]                    = { 2, 1 };
+       ["i32.ge_s"]                    = { 2, 1 };
+       ["i32.ge_u"]                    = { 2, 1 };
+       ["i64.eqz"]                             = { 1, 1 };
+       ["i64.eq"]                              = { 2, 1 };
+       ["i64.ne"]                              = { 2, 1 };
+       ["i64.lt_s"]                    = { 2, 1 };
+       ["i64.lt_u"]                    = { 2, 1 };
+       ["i64.gt_s"]                    = { 2, 1 };
+       ["i64.gt_u"]                    = { 2, 1 };
+       ["i64.le_s"]                    = { 2, 1 };
+       ["i64.le_u"]                    = { 2, 1 };
+       ["i64.ge_s"]                    = { 2, 1 };
+       ["i64.ge_u"]                    = { 2, 1 };
+       ["f32.eq"]                              = { 2, 1 };
+       ["f32.ne"]                              = { 2, 1 };
+       ["f32.lt"]                              = { 2, 1 };
+       ["f32.gt"]                              = { 2, 1 };
+       ["f32.le"]                              = { 2, 1 };
+       ["f32.ge"]                              = { 2, 1 };
+       ["f64.eq"]                              = { 2, 1 };
+       ["f64.ne"]                              = { 2, 1 };
+       ["f64.lt"]                              = { 2, 1 };
+       ["f64.gt"]                              = { 2, 1 };
+       ["f64.le"]                              = { 2, 1 };
+       ["f64.ge"]                              = { 2, 1 };
+       ["i32.clz"]                             = { 1, 1 };
+       ["i32.ctz"]                             = { 1, 1 };
+       ["i32.popcnt"]                  = { 1, 1 };
+       ["i32.add"]                             = { 2, 1 };
+       ["i32.sub"]                             = { 2, 1 };
+       ["i32.mul"]                             = { 2, 1 };
+       ["i32.div_s"]                   = { 2, 1 };
+       ["i32.div_u"]                   = { 2, 1 };
+       ["i32.rem_s"]                   = { 2, 1 };
+       ["i32.rem_u"]                   = { 2, 1 };
+       ["i32.and"]                             = { 2, 1 };
+       ["i32.or"]                              = { 2, 1 };
+       ["i32.xor"]                             = { 2, 1 };
+       ["i32.shl"]                             = { 2, 1 };
+       ["i32.shr_s"]                   = { 2, 1 };
+       ["i32.shr_u"]                   = { 2, 1 };
+       ["i32.rotl"]                    = { 2, 1 };
+       ["i32.rotr"]                    = { 2, 1 };
+       ["i64.clz"]                             = { 1, 1 };
+       ["i64.ctz"]                             = { 1, 1 };
+       ["i64.popcnt"]                  = { 1, 1 };
+       ["i64.add"]                             = { 2, 1 };
+       ["i64.sub"]                             = { 2, 1 };
+       ["i64.mul"]                             = { 2, 1 };
+       ["i64.div_s"]                   = { 2, 1 };
+       ["i64.div_u"]                   = { 2, 1 };
+       ["i64.rem_s"]                   = { 2, 1 };
+       ["i64.rem_u"]                   = { 2, 1 };
+       ["i64.and"]                             = { 2, 1 };
+       ["i64.or"]                              = { 2, 1 };
+       ["i64.xor"]                             = { 2, 1 };
+       ["i64.shl"]                             = { 2, 1 };
+       ["i64.shr_s"]                   = { 2, 1 };
+       ["i64.shr_u"]                   = { 2, 1 };
+       ["i64.rotl"]                    = { 2, 1 };
+       ["i64.rotr"]                    = { 2, 1 };
+       ["f32.abs"]                             = { 1, 1 };
+       ["f32.neg"]                             = { 1, 1 };
+       ["f32.ceil"]                    = { 1, 1 };
+       ["f32.floor"]                   = { 1, 1 };
+       ["f32.trunc"]                   = { 1, 1 };
+       ["f32.nearest"]                 = { 1, 1 };
+       ["f32.sqrt"]                    = { 1, 1 };
+       ["f32.add"]                             = { 2, 1 };
+       ["f32.sub"]                             = { 2, 1 };
+       ["f32.mul"]                             = { 2, 1 };
+       ["f32.div"]                             = { 2, 1 };
+       ["f32.min"]                             = { 2, 1 };
+       ["f32.max"]                             = { 2, 1 };
+       ["f32.copysign"]                = { 2, 1 };
+       ["f64.abs"]                             = { 1, 1 };
+       ["f64.neg"]                             = { 1, 1 };
+       ["f64.ceil"]                    = { 1, 1 };
+       ["f64.floor"]                   = { 1, 1 };
+       ["f64.trunc"]                   = { 1, 1 };
+       ["f64.nearest"]                 = { 1, 1 };
+       ["f64.sqrt"]                    = { 1, 1 };
+       ["f64.add"]                             = { 2, 1 };
+       ["f64.sub"]                             = { 2, 1 };
+       ["f64.mul"]                             = { 2, 1 };
+       ["f64.div"]                             = { 2, 1 };
+       ["f64.min"]                             = { 2, 1 };
+       ["f64.max"]                             = { 2, 1 };
+       ["f64.copysign"]                = { 2, 1 };
+
+       ["i32.wrap_i64"]                = { 1, 1 };
+       ["i32.trunc_f32_s"]             = { 1, 1 };
+       ["i32.trunc_f32_u"]             = { 1, 1 };
+       ["i32.trunc_f64_s"]             = { 1, 1 };
+       ["i32.trunc_f64_u"]             = { 1, 1 };
+       ["i64.extend_i32_s"]    = { 1, 1 };
+       ["i64.extend_i32_u"]    = { 1, 1 };
+       ["i64.trunc_f32_s"]             = { 1, 1 };
+       ["i64.trunc_f32_u"]             = { 1, 1 };
+       ["i64.trunc_f64_s"]             = { 1, 1 };
+       ["i64.trunc_f64_u"]             = { 1, 1 };
+       ["f32.convert_i32_s"]   = { 1, 1 };
+       ["f32.convert_i32_u"]   = { 1, 1 };
+       ["f32.convert_i64_s"]   = { 1, 1 };
+       ["f32.convert_i64_u"]   = { 1, 1 };
+       ["f32.demote_f64"]              = { 1, 1 };
+       ["f64.convert_i32_s"]   = { 1, 1 };
+       ["f64.convert_i32_u"]   = { 1, 1 };
+       ["f64.convert_i64_s"]   = { 1, 1 };
+       ["f64.convert_i64_u"]   = { 1, 1 };
+       ["f64.promote_f32"]             = { 1, 1 };
+       ["i32.reinterpret_f32"] = { 1, 1 };
+       ["i64.reinterpret_f64"] = { 1, 1 };
+       ["f32.reinterpret_i32"] = { 1, 1 };
+       ["f64.reinterpret_i64"] = { 1, 1 };
 }
 
 
+-- This works... ish but we should use proper symbols (+-*/) and other things
+function build_instr_list(instrs)
+       local lines = {}
+       local stack = Stack()
 
-return module {
+       for _, instr in ipairs(instrs) do
+               local instr_name = instr[1]
+               if stack_opts[instr_name] then
+                       -- For now, everything will be built in a function calling syntax
+                       local str = ""
+                       for i=1, stack_opts[instr_name][1] do
+                               str = str .. ", "
+                               str = str .. stack:pop()
+                       end
+
+                       str = instr_name .. "(" .. str:sub(3) .. ")"
+
+                       if stack_opts[instr_name][2] == 1 then
+                               stack:push(str)
+                       else
+                               table.insert(lines, str)
+                       end
+               end
+
+               if instr_name == "block" or instr_name == "loop" then
+                       -- have a .instrs which is the instruction list
+                       table.insert(lines, instr_name .. " { [" .. instr.label .. "]")
+                       local instr_lines = build_instr_list(instr.instrs)
+                       for _, line in ipairs(instr_lines) do
+                               table.insert(lines, "  " .. line)
+                       end
+                       table.insert(lines, "}")
+               end
+               if instr_name == "if" then
+                       local str = instr_name .. " ("
+                       str = str .. stack:pop()
+                       str = str .. " ) { [" .. instr.label .. "]"
+                       table.insert(lines, str)
+
+                       local instr_lines = build_instr_list(instr.instrs)
+                       for _, line in ipairs(instr_lines) do
+                               table.insert(lines, "  " .. line)
+                       end
 
+                       if instr_name == "if" and #instr.else_instrs > 0 then
+                               table.insert(lines, "} else {")
+                               instr_lines = build_instr_list(instr.else_instrs)
+                               for _, line in ipairs(instr_lines) do
+                                       table.insert(lines, "  " .. line)
+                               end
+                       end
+
+                       table.insert(lines, "}")
+               end
+
+               if instr_name == "call" then
+               end
+       end
+
+       return lines
+end
+
+function build_expr(wasm_func, wasm_mod)
+       local lines = build_instr_list(wasm_func.body)
+
+       return lines
+end
+
+return module {
+       build_expr = build_expr;
+       build_instr_list = build_instr_list;
 }
index 935dc9caf3e6ea14eaa515216d8c19c26c05f4d7..033ef797eafe09e0ee4a356c8031d676d1972ed8 100644 (file)
@@ -3,11 +3,17 @@ require "lualib.oop"
 import {
        decompile = "src.wasm.decompile:";
        analyze = "src.wasm.analyze:";
-       build_str = "src.utils:build_str";
+       build_expr = "src.wasm.exprs:build_expr";
+
        pprint = "lualib.pprint";
 }
 
 local mod = decompile(arg[1])
-analyze(mod)
-pprint(mod)
+mod = analyze(mod)
 
+for i, func in ipairs(mod.funcs) do
+       if func.body then
+               print(func.name .. " -----------------------------------")
+               pprint(build_expr(func, mod))
+       end
+end