From dee781e75945d0cd66103b5fa3d6eba775dbc155 Mon Sep 17 00:00:00 2001 From: Brendan Hansen Date: Fri, 2 Jul 2021 14:30:25 -0500 Subject: [PATCH] wasm module parsing --- modules/wasm_utils/module.onyx | 8 +- modules/wasm_utils/parser.onyx | 195 +++++++++++++++++++++++++++++++-- modules/wasm_utils/types.onyx | 139 ++++++++++++++++++++++- 3 files changed, 327 insertions(+), 15 deletions(-) diff --git a/modules/wasm_utils/module.onyx b/modules/wasm_utils/module.onyx index 7d8a471c..e9111c91 100644 --- a/modules/wasm_utils/module.onyx +++ b/modules/wasm_utils/module.onyx @@ -8,7 +8,9 @@ package wasm_utils #load "./types" #load "./utils" +#load "./parser" -#private map :: package core.map -#private io :: package core.io -#private hash :: package core.hash +#private map :: package core.map +#private io :: package core.io +#private hash :: package core.hash +#private memory :: package core.memory diff --git a/modules/wasm_utils/parser.onyx b/modules/wasm_utils/parser.onyx index 006b4156..a9b2d128 100644 --- a/modules/wasm_utils/parser.onyx +++ b/modules/wasm_utils/parser.onyx @@ -1,14 +1,7 @@ // // Sections that still need to be parse-able -// - Function -// - Table -// - Memory -// - Global -// - Start -// - Element -// - Code -// - Data // - DataCount +// package wasm_utils @@ -126,6 +119,160 @@ parse_start_section :: (use bin: ^WasmBinary, allocator := context.allocator) -> return ~~read_uleb128(^reader); } +parse_memory_section :: (use bin: ^WasmBinary, allocator := context.allocator) -> [] WasmMemory { + if !map.has(^sections, .Memory) do return .{ null, 0 }; + + wasm_allocator = allocator; + + @Cleanup @WasmStream // These are going to be needed in many places + stream := io.string_stream_make(data); + reader := io.reader_make(^stream); + + io.stream_seek(^stream, map.get(^sections, .Memory).offset, .Start); + + return parse_vector(^reader, bin, read_memory); + + read_memory :: (reader: ^io.Reader, bin: ^WasmBinary) -> WasmMemory { + limits := parse_limits(reader, bin); + return .{ limits }; + } +} + +parse_table_section :: (use bin: ^WasmBinary, allocator := context.allocator) -> [] WasmTable { + if !map.has(^sections, .Table) do return .{ null, 0 }; + + wasm_allocator = allocator; + + @Cleanup @WasmStream // These are going to be needed in many places + stream := io.string_stream_make(data); + reader := io.reader_make(^stream); + + io.stream_seek(^stream, map.get(^sections, .Table).offset, .Start); + + return parse_vector(^reader, bin, read_table); + + read_table :: (reader: ^io.Reader, bin: ^WasmBinary) -> WasmTable { + type := cast(WasmTableType) io.read_byte(reader); + limits := parse_limits(reader, bin); + return .{ type, limits }; + } +} + +parse_global_section :: (use bin: ^WasmBinary, allocator := context.allocator) -> [] WasmGlobal { + if !map.has(^sections, .Global) do return .{ null, 0 }; + + wasm_allocator = allocator; + + @Cleanup @WasmStream // These are going to be needed in many places + stream := io.string_stream_make(data); + reader := io.reader_make(^stream); + + io.stream_seek(^stream, map.get(^sections, .Global).offset, .Start); + + return parse_vector(^reader, bin, read_global); + + read_global :: (reader: ^io.Reader, bin: ^WasmBinary) -> WasmGlobal { + type := read_val_type(reader, bin); + mutable := io.read_byte(reader) == 1; + return .{ type, mutable }; + } +} + +parse_element_section :: (use bin: ^WasmBinary, allocator := context.allocator) -> [] WasmElement { + if !map.has(^sections, .Element) do return .{ null, 0 }; + + wasm_allocator = allocator; + + @Cleanup @WasmStream // These are going to be needed in many places + stream := io.string_stream_make(data); + reader := io.reader_make(^stream); + + io.stream_seek(^stream, map.get(^sections, .Element).offset, .Start); + + return parse_vector(^reader, bin, read_element); + + read_element :: (reader: ^io.Reader, bin: ^WasmBinary) -> WasmElement { + table_index := read_uleb128(reader); + offset := parse_const_uint32(reader, bin); + funcs := parse_vector(reader, bin, read_function_index); + return .{ ~~table_index, offset, funcs }; + } + + read_function_index :: (reader: ^io.Reader, bin: ^WasmBinary) -> u32 { + return ~~read_uleb128(reader); + } +} + +parse_data_section :: (use bin: ^WasmBinary, allocator := context.allocator) -> [] WasmData { + if !map.has(^sections, .Data) do return .{ null, 0 }; + + wasm_allocator = allocator; + + @Cleanup @WasmStream // These are going to be needed in many places + stream := io.string_stream_make(data); + reader := io.reader_make(^stream); + + io.stream_seek(^stream, map.get(^sections, .Data).offset, .Start); + + return parse_vector(^reader, bin, read_data); + + read_data :: (reader: ^io.Reader, bin: ^WasmBinary) -> WasmData { + memory_index := read_uleb128(reader); + offset := parse_const_uint32(reader, bin); + + size := read_uleb128(reader); + _, pos := io.stream_tell(reader.stream); + data := bin.data.data[pos .. (pos + ~~size)]; + io.stream_seek(reader.stream, ~~size, .Current); + + return .{ ~~memory_index, offset, data }; + } +} + +parse_code_section :: (use bin: ^WasmBinary, allocator := context.allocator) -> [] WasmCode { + if !map.has(^sections, .Code) do return .{ null, 0 }; + + wasm_allocator = allocator; + + @Cleanup @WasmStream // These are going to be needed in many places + stream := io.string_stream_make(data); + reader := io.reader_make(^stream); + + io.stream_seek(^stream, map.get(^sections, .Code).offset, .Start); + + return parse_vector(^reader, bin, read_code); + + read_code :: (reader: ^io.Reader, bin: ^WasmBinary) -> WasmCode { + size := cast(u32) read_uleb128(reader); + _, before_locals := io.stream_tell(reader.stream); + + array :: package core.array + + locals := array.make(WasmLocal); + defer array.free(^locals); + local_block_count := cast(u32) read_uleb128(reader); + + local_index := 0; + for _: local_block_count { + locals_count := cast(u32) read_uleb128(reader); + local_type := read_val_type(reader, bin); + + for _: locals_count { + array.push(^locals, .{ local_type, local_index }); + local_index += 1; + } + } + + _, pos := io.stream_tell(reader.stream); + io.stream_seek(reader.stream, before_locals + size, .Start); + + locals_slice := array.copy_range(^locals, 0 .. local_index, allocator=wasm_allocator) + |> array.to_slice(); + + return .{ size, locals_slice, pos }; + } +} + #private parse_vector :: (reader: ^io.Reader, bin: ^WasmBinary, read: (^io.Reader, ^WasmBinary) -> $T) -> [] T { @@ -149,6 +296,29 @@ parse_name :: (reader: ^io.Reader, bin: ^WasmBinary) -> [] u8 { } } +#private +parse_limits :: (reader: ^io.Reader, bin: ^WasmBinary) -> WasmLimits { + byte := io.read_byte(reader); + + minimum, maximum : u32; + + switch byte { + case 0 { + minimum =~~ read_uleb128(reader); + maximum = 0; + } + + case 1 { + minimum =~~ read_uleb128(reader); + maximum =~~ read_uleb128(reader); + } + + case #default do assert(false, "Bad limit header"); + } + + return .{ minimum=minimum, maximum=maximum }; +} + #private read_val_type :: (reader: ^io.Reader, binary: ^WasmBinary) -> WasmValueType { byte := io.read_byte(reader); @@ -163,3 +333,12 @@ read_val_type :: (reader: ^io.Reader, binary: ^WasmBinary) -> WasmValueType { return ~~0; } + +#private +parse_const_uint32 :: (reader: ^io.Reader, binary: ^WasmBinary) -> u32 { + assert(io.read_byte(reader) == 65, "Expected integer constant"); + value := read_uleb128(reader); + assert(io.read_byte(reader) == 11, "Expected end for integer constant"); + + return ~~value; +} diff --git a/modules/wasm_utils/types.onyx b/modules/wasm_utils/types.onyx index e4b5f748..2577d122 100644 --- a/modules/wasm_utils/types.onyx +++ b/modules/wasm_utils/types.onyx @@ -23,23 +23,131 @@ WasmSection :: enum { WasmBinary :: struct { data: [] u8; - // Section number -> Offset into data + // Section number -> Offset+size of data // This does not work for custom sections, as they all have the same section number - section_locations : map.Map(WasmSection, i32); + sections: map.Map(WasmSection, WasmSectionData); + // Custom section name -> Offset into data // So there is a custom section location that maps the name of the custom section // to the offset into the file. The backing-store for the keys is just the data // itself, as the names are in the data for the binary. custom_section_locations : map.Map(str, i32); } +WasmSectionData :: struct { + offset: u32 = 0; + size: u32 = 0; +} + +WasmValueType :: enum (u8) { + I32; I64; + F32; F64; + V128; +} + +WasmFuncType :: struct { + params : [] WasmValueType; + results : [] WasmValueType; // This will probably have only 1 value most of the time, but it is a good idea to support the multi-return proposal + + reference : [] u8; // Where in the data is this function type +} + +WasmForeignType :: enum { + Function; + Table; + Memory; + Global; +} + +WasmImport :: struct { + module_name : str; + import_name : str; + + type : WasmForeignType; + index : u32; +} + +WasmExport :: struct { + name : str; + type : WasmForeignType; + index : u32; +} + +WasmFunction :: struct { + type_index : u32; +} + +WasmLimits :: struct { + minimum, maximum: u32; +} + +WasmMemory :: struct { + use limits: WasmLimits; +} + +WasmTableType :: enum { + AnyFunc :: 0x70; +} + +WasmTable :: struct { + element_type: WasmTableType; + + use limits: WasmLimits; +} + +WasmGlobal :: struct { + type : WasmValueType; + mutable : bool; +} + +WasmElement :: struct { + table_index : u32; + offset : u32; + funcs : [] u32; +} + +WasmData :: struct { + memory_index : u32; + offset : u32; + + data : [] u8; +} + +WasmCode :: struct { + size : u32; + locals : [] WasmLocal; + code_offset : u32; +} + +WasmLocal :: struct { + type : WasmValueType; + index : u32; // This realistically could be a u16, which would have better packing, but dealing with casts from u16 to u32 is annoying. +} + +@Relocate // This should maybe in the WasmBinary structure? I'll need to see how I want to use this library. +WasmSections :: struct { + allocator: Allocator; + + type_section : [] WasmFuncType; + import_section : [] WasmImport; + export_section : [] WasmExport; + function_section : [] WasmFunction; + start_section : i32; // Index of function to start + memory_section : [] WasmMemory; + table_section : [] WasmTable; + global_section : [] WasmGlobal; + element_section : [] WasmElement; + code_section : [] WasmCode; + data_section : [] WasmData; +} + load :: (data: [] u8, allocator := context.allocator) -> WasmBinary { binary: WasmBinary; binary.data = data; #context_scope { context.allocator = allocator; - map.init(^binary.section_locations, -1); + map.init(^binary.sections, .{}); map.init(^binary.custom_section_locations, -1); } @@ -48,6 +156,25 @@ load :: (data: [] u8, allocator := context.allocator) -> WasmBinary { return binary; } +parse_sections :: (use bin: ^WasmBinary, allocator := context.allocator) -> WasmSections { + ws: WasmSections; + ws.allocator = allocator; + + ws.type_section = parse_type_section(bin, allocator); + ws.import_section = parse_import_section(bin, allocator); + ws.export_section = parse_export_section(bin, allocator); + ws.function_section = parse_function_section(bin, allocator); + ws.start_section = parse_start_section(bin, allocator); + ws.memory_section = parse_memory_section(bin, allocator); + ws.table_section = parse_table_section(bin, allocator); + ws.global_section = parse_global_section(bin, allocator); + ws.element_section = parse_element_section(bin, allocator); + ws.code_section = parse_code_section(bin, allocator); + ws.data_section = parse_data_section(bin, allocator); + + return ws; +} + #private_file parse_section_locations :: (use bin: ^WasmBinary) -> bool { @@ -58,6 +185,7 @@ parse_section_locations :: (use bin: ^WasmBinary) -> bool { // Checking the magic string magic_buffer: [4] u8; + @Bug // If these are string literals, then the null byte messes up the compiler and it thinks its a 0-character string. if !(io.read_bytes(^reader, cast([] u8) magic_buffer) == ~~u8.[ 0, #char "a", #char "s", #char "m" ]) do return false; if !(io.read_bytes(^reader, cast([] u8) magic_buffer) == ~~u8.[ 1, 0, 0, 0 ]) do return false; // This may not be necessary } @@ -73,7 +201,10 @@ parse_section_locations :: (use bin: ^WasmBinary) -> bool { case .Type, .Import, .Function, .Table, .Memory, .Global, .Export, .Start, .Element, .Code, .Data, .DataCount { - map.put(^section_locations, section_number, pos); + map.put(^sections, section_number, .{ + offset = pos, + size = ~~section_size, + }); } case #default { -- 2.25.1