From: Brendan Hansen Date: Fri, 7 Apr 2023 16:46:44 +0000 (-0500) Subject: added: `core.encoding.json` as a stale code package X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=5c55029889c45a7314f64ef0eb765adb7a7595db;p=onyx.git added: `core.encoding.json` as a stale code package --- diff --git a/core/encoding/json/decoder.onyx b/core/encoding/json/decoder.onyx new file mode 100644 index 00000000..5f617e2e --- /dev/null +++ b/core/encoding/json/decoder.onyx @@ -0,0 +1,63 @@ +package core.encoding.json +#allow_stale_code + +use core {*} + +decode :: (data: str, allocator := context.allocator, print_errors := true) -> Json { + json: Json; + json.allocator = allocator; + json.root = cast(Value) null; + + root, err := parse(data, allocator); + if err.kind != .None && print_errors { + switch err.kind { + case .EOF do printf("Reached EOF\n"); + case .Illegal_Character do printf("Illegal Character: {}\n", err.pos); + case .String_Unterminated do printf("Unterminated String: {}\n", err.pos); + case .Unexpected_Token do printf("Unexpected Token: {}\n", err.pos); + } + + return json; + } + + json.root = root; + return json; +} + +#local +_Decode_Error :: struct { + errmsg: str; + pos: Position; +} + +Decode_Error :: #distinct ^_Decode_Error; + +#inject Decode_Error { + has_error :: (this: Decode_Error) => cast(^_Decode_Error) this != null; + message :: (this: Decode_Error) => (cast(^_Decode_Error) this).errmsg; + position :: (this: Decode_Error) => (cast(^_Decode_Error) this).pos; +} + +decode_with_error :: (data: str, allocator := context.allocator) -> (Json, Decode_Error) { + json: Json; + json.allocator = allocator; + json.root = null_value(); + + root, err := parse(data, allocator); + + if err.kind != .None { + decode_error := new_temp(_Decode_Error); + decode_error.pos = err.pos; + switch err.kind { + case .EOF do decode_error.errmsg = "Reached EOF"; + case .Illegal_Character do decode_error.errmsg = "Illegal Character"; + case .String_Unterminated do decode_error.errmsg = "Unterminated String"; + case .Unexpected_Token do decode_error.errmsg = "Unexpected Token"; + } + + return json, Decode_Error.{decode_error}; + } + + json.root = root; + return json, Decode_Error.{null}; +} diff --git a/core/encoding/json/encoder.onyx b/core/encoding/json/encoder.onyx new file mode 100644 index 00000000..c2d981a0 --- /dev/null +++ b/core/encoding/json/encoder.onyx @@ -0,0 +1,451 @@ +package core.encoding.json +#allow_stale_code + +use core {package, *} +use runtime +use runtime.info {*} +use core.encoding.json + +Encoding_Error :: enum { + None; + Unsupported_Type; +} + +encode_string :: (v: $T, allocator := context.allocator) -> (str, Encoding_Error) { + stream := io.buffer_stream_make(256); + writer := io.writer_make(^stream, 0); + defer io.buffer_stream_free(^stream); + + err := encode(^writer, v); + if err != .None { + return "", err; + } + + s := string.alloc_copy(io.buffer_stream_to_str(^stream), allocator); + return s, .None; +} + +// +// This could be changed to use the "any" type now, which would allow for any type to be +// represented as a json value. However, this eliminates the control that you get from +// this way. +// +encode :: #match { + (w: ^io.Writer, v: i32) -> Encoding_Error { + io.write_i32(w, ~~v); + return .None; + }, + + (w: ^io.Writer, v: i64) -> Encoding_Error { + io.write_i64(w, ~~v); + return .None; + }, + + (w: ^io.Writer, v: f32) -> Encoding_Error { + io.write_f32(w, ~~v); + return .None; + }, + + (w: ^io.Writer, v: f64) -> Encoding_Error { + io.write_f64(w, ~~v); + return .None; + }, + + (w: ^io.Writer, v: str) -> Encoding_Error { + io.write_escaped_str(w, v); + return .None; + }, + + (w: ^io.Writer, v: [] $T) -> Encoding_Error { + io.write_byte(w, #char "["); + + for i: v.count { + if i > 0 do io.write_byte(w, #char ","); + + err := encode(w, v[i]); + if err != .None do return err; + } + + io.write_byte(w, #char "]"); + return .None; + }, + + (w: ^io.Writer, v: [..] $T) -> Encoding_Error { + io.write_byte(w, #char "["); + + for i: v.count { + if i > 0 do io.write_byte(w, #char ","); + + err := encode(w, v[i]); + if err != .None do return err; + } + + io.write_byte(w, #char "]"); + return .None; + }, + + (w: ^io.Writer, v: Map(str, $T)) -> Encoding_Error { + io.write_byte(w, #char "{"); + + for i: v.entries.count { + if i > 0 do io.write_byte(w, #char ","); + entry := ^v.entries[i]; + + io.write_escaped_str(w, entry.key); + io.write_byte(w, #char ":"); + + err := encode(w, entry.value); + if err != .None do return err; + } + + io.write_byte(w, #char "}"); + return .None; + }, + + (w: ^io.Writer, v: Value) -> Encoding_Error { + switch (cast(^_Value) v).type { + case .Null do io.write_str(w, "null"); + case .Bool do io.write(w, v->as_bool()); + case .Integer do io.write(w, v->as_int()); + case .Float do io.write(w, v->as_float()); + case .String do encode(w, v->as_str()); + case .Array do encode(w, v->as_array()); + + case .Object { + io.write_byte(w, #char "{"); + obj := cast(^_Value_Object) cast(^_Value) v; + + for i: obj.object_.count { + if i > 0 do io.write_byte(w, #char ","); + + io.write_escaped_str(w, obj.object_[i].key); + io.write_byte(w, #char ":"); + + err := encode(w, obj.object_[i].value); + if err != .None do return err; + } + + io.write_byte(w, #char "}"); + } + + case #default { + return .Unsupported_Type; + } + } + + return .None; + }, + + // This is disabled because I would prefer to have a compile time error for an unsupported type, + // as opposed to a error to check programatically. + // + // // Inserted after any of the #match directives + // #order 1000 (w: ^io.Writer, v: $T) -> Encoding_Error { + // return .Unsupported_Type; + // } +} + + +#overload #order 10000 +encode :: (w: ^io.Writer, data: any) -> Encoding_Error { + use runtime.info {*} + + info := get_type_info(data.type); + + switch info.kind { + case .Basic { + io.write_format_va(w, "{}", .[data]); + } + + case .Array { + io.write(w, "["); + + a := cast(^Type_Info_Array) info; + arr := data.data; + + for i: a.count { + if i != 0 do io.write(w, ","); + + encode(w, any.{ ~~(cast([^] u8) arr + get_type_info(a.of).size * i), a.of }); + } + + io.write(w, "]"); + } + + case .Slice, .Dynamic_Array { + if data.type == str { + io.write_format_va(w, "{\"}", .[data]); + break; + } + + io.write(w, "["); + + a := cast(^Type_Info_Dynamic_Array) info; + arr := cast(^core.array.Untyped_Array) data.data; + data := arr.data; + count := arr.count; + + for i: count { + if i != 0 do io.write(w, ","); + + encode(w, any.{ ~~(cast([^] u8) data + get_type_info(a.of).size * i), a.of }); + } + + io.write(w, "]"); + } + + case .Distinct { + if data.type == Value { + encode(w, *cast(^Value) data.data); + return .None; + } + + d := cast(^Type_Info_Distinct) info; + encode(w, any.{ data.data, d.base_type }); + } + + case .Struct { + s := cast(^runtime.info.Type_Info_Struct) info; + + io.write(w, "{"); + + for ^member: s.members { + key := member.name; + if tag := array.first(member.tags, #(it.type == Custom_Key)); tag != null { + key = (cast(^Custom_Key) tag.data).key; + } + + if tag := array.first(member.tags, #(it.type == type_expr)); tag != null { + if *cast(^type_expr, tag.data) == Ignore { + continue; + } + } + + if !#first do io.write(w, ","); + encode(w, key); + + io.write(w, ":"); + encode(w, any.{ ~~(cast([^] u8) data.data + member.offset), member.type }); + } + io.write(w, "}"); + } + + case #default { + return .Unsupported_Type; + } + } +} + + + +// +// Use this to change the name of the key in +// a JSON object to use for populating a structure member. +// +// Foo :: struct { +// @json.Custom_Key.{"other_key"} +// x: u32; +// } +Custom_Key :: struct {key: str;} + + +// +// Use this to ignore a field when parsing/formatting +// a JSON object. +// +// Foo :: struct { +// @json.Ignore +// x: u32; +// } +Ignore :: #distinct void + + +from_any :: #match #local {} + +#overload +from_any :: macro (in: ^$T, allocator := context.allocator) -> Value { + from_any :: from_any + return from_any(T, in, allocator); +} + +#overload +from_any :: (type: type_expr, in: rawptr, allocator := context.allocator) -> Value { + use runtime.info; + + t_info := get_type_info(type); + switch t_info.kind { + case .Basic do switch type { + // These functions handle the cases where the types do + // not match, so no additional checks are needed here. + case bool { v := new(_Value_Bool, allocator); v.bool_ = *cast(^bool) in; return Value.{v}; } + case i32, u32 { v := new(_Value_Integer, allocator); v.int_ = ~~ *cast(^i32) in; return Value.{v}; } + case i64, u64 { v := new(_Value_Integer, allocator); v.int_ = *cast(^i64) in; return Value.{v}; } + case f32 { v := new(_Value_Float, allocator); v.float_ = ~~ *cast(^f32) in; return Value.{v}; } + case f64 { v := new(_Value_Float, allocator); v.float_ = *cast(^f64) in; return Value.{v}; } + } + + case .Array { + a_info := cast(^Type_Info_Array) t_info; + + v := new(_Value_Array, allocator); + array.init(^v.array_, a_info.count, allocator); + + for i: a_info.count { + v.array_ << from_any(a_info.of, cast([^] u8) in + size_of(a_info.of) * i); + } + + return Value.{v}; + } + + case .Slice, .Dynamic_Array { + // Strings are handled differently + if type == str { + v := new(_Value_String, allocator); + v.str_ = string.alloc_copy(*cast(^str) in, allocator); + return Value.{v}; + } + + s_info := cast(^Type_Info_Slice) t_info; + s := cast(^core.array.Untyped_Array) in; + + v := new(_Value_Array, allocator); + array.init(^v.array_, s.count, allocator); + + for i: s.count { + v.array_ << from_any(s_info.of, cast([^] u8) s.data + size_of(s_info.of) * i); + } + + return Value.{v}; + } + + case .Struct { + s_info := cast(^Type_Info_Struct) t_info; + + v := new(_Value_Object, allocator); + array.init(^v.object_, s_info.members.count, allocator); + + for^ member: s_info.members { + key := member.name; + if tag := array.first(member.tags, #(it.type == Custom_Key)); tag != null { + key = (cast(^Custom_Key) tag.data).key; + } + + if tag := array.first(member.tags, #(it.type == type_expr)); tag != null { + if *cast(^type_expr, tag.data) == Ignore { + continue; + } + } + + json.set(Value.{v}, key, from_any(member.type, cast([^] u8) in + member.offset), dont_copy_key=true); + } + + return Value.{v}; + } + + case .Distinct { + if type == Value { + return *cast(^Value) in; + } + + d_info := cast(^Type_Info_Distinct) t_info; + return from_any(d_info.base_type, in); + } + } + + return null_value(); +} + + + +to_any :: as_any + +as_any :: #match #local {} + +#overload +as_any :: macro (value: Value, out: ^$T) { + #this_package.to_any(value, T, out); +} + +#overload +as_any :: (value: Value, type: type_expr, out: rawptr) { + use runtime.info; + + t_info := get_type_info(type); + switch t_info.kind { + case .Basic do switch type { + // These functions handle the cases where the types do + // not match, so no additional checks are needed here. + case bool do *cast(^bool) out = value->as_bool(); + case i32, u32 do *cast(^i32) out = ~~(value->as_int()); + case i64, u64 do *cast(^i64) out = value->as_int(); + case f32 do *cast(^f32) out = ~~(value->as_float()); + case f64 do *cast(^f64) out = value->as_float(); + } + + case .Array { + a_info := cast(^Type_Info_Array) t_info; + + for i: a_info.count { + to_any(value[i], a_info.of, cast([^] u8) out + size_of(a_info.of) * i); + } + } + + case .Slice { + // Strings are handled differently + if type == str { + *cast(^str) out = string.alloc_copy(value->as_str()); + return; + } + + s_info := cast(^Type_Info_Slice) t_info; + s := cast(^core.array.Untyped_Array) out; + + if s.count == 0 { + if s.data != null { + return; + } + + to_copy := value->as_array(); + + size := size_of(s_info.of) * to_copy.count; + s.data = raw_alloc(context.allocator, size); + memory.set(s.data, 0, size); + + s.count = to_copy.count; + } + + for i: s.count { + to_any(value[i], s_info.of, cast([^] u8) s.data + size_of(s_info.of) * i); + } + } + + case .Struct { + s_info := cast(^Type_Info_Struct) t_info; + + for^ member: s_info.members { + key := member.name; + if tag := array.first(member.tags, #(it.type == Custom_Key)); tag != null { + key = (cast(^Custom_Key) tag.data).key; + } + + if tag := array.first(member.tags, #(it.type == type_expr)); tag != null { + if *cast(^type_expr, tag.data) == Ignore { + continue; + } + } + + to_any(value[key], member.type, cast([^] u8) out + member.offset); + } + } + + case .Distinct { + if type == Value { + *cast(^Value) out = value; + return; + } + + d_info := cast(^Type_Info_Distinct) t_info; + to_any(value, d_info.base_type, out); + } + } +} diff --git a/core/encoding/json/parser.onyx b/core/encoding/json/parser.onyx new file mode 100644 index 00000000..5ebb2f01 --- /dev/null +++ b/core/encoding/json/parser.onyx @@ -0,0 +1,429 @@ +package core.encoding.json +#allow_stale_code + +use core {*} + +#package +Parser :: struct { + tokenizer : Tokenizer; + allocator : Allocator; + + current_token : Token; + previous_token : Token; +} + +#package +make_parser :: (data: [] u8, allocator := context.allocator) -> Parser { + parser: Parser; + parser.tokenizer = Tokenizer.{ data = data }; + parser.allocator = allocator; + consume_token(^parser); + return parser; +} + +#package +parse :: (data: [] u8, allocator := context.allocator) -> (Value, Error) { + parser := make_parser(data, allocator); + return parse_value(^parser); +} + +#local +consume_token :: (use parser: ^Parser) -> (Token, Error) { + error: Error; + previous_token = current_token; + current_token, error = token_get(^tokenizer); + return previous_token, error; +} + +#local +consume_token_if_next :: (use parser: ^Parser, kind: Token.Kind) -> bool { + if current_token.kind == kind { + consume_token(parser); + return true; + } + + return false; +} + +#local +expect_token :: (use parser: ^Parser, kind: Token.Kind) -> (Token, Error) { + previous := current_token; + consume_token(parser); + error := Error.{ .None, previous.position }; + if previous.kind != kind do error.kind = .Unexpected_Token; + return previous, error; +} + +#package +parse_value :: (use parser: ^Parser) -> (Value, Error) { + return_value: ^_Value = null; + + current := current_token; + switch current.kind { + case .Null { + value := new(_Value, allocator); + + consume_token(parser); + return_value = value; + } + + case .False, .True { + value := new(_Value_Bool, allocator); + value.bool_ = current.kind == .True; + + consume_token(parser); + return_value = value; + } + + case .Integer { + value := new(_Value_Integer, allocator); + value.int_ = conv.str_to_i64(current.text); + + consume_token(parser); + return_value = value; + } + + case .Float { + value := new(_Value_Float, allocator); + value.float_ = conv.str_to_f64(current.text); + + consume_token(parser); + return_value = value; + } + + case .String { + value := new(_Value_String, allocator); + value.str_ = unescape_string(current, allocator); + + consume_token(parser); + return_value = value; + } + + case .Open_Bracket { + value, err := parse_array(parser); + if err.kind != .None do return value, err; + + return_value = cast(^_Value) value; + } + + case .Open_Brace { + value, err := parse_object(parser); + if err.kind != .None do return value, err; + + return_value = cast(^_Value) value; + } + + case #default { + consume_token(parser); + return Value.{return_value}, .{ .Unexpected_Token, current.position }; + } + } + + return Value.{return_value}, .{ .None }; +} + +#local +parse_array :: (use parser: ^Parser) -> (Value, Error) { + value := new(_Value_Array, allocator); + + _, err := expect_token(parser, .Open_Bracket); + if err.kind != .None do return Value.{value}, err; + + // This uses the context allocators because the array resizing needs to happen in a general purpose heap allocator + arr := array.make(Value, allocator=context.allocator); + defer if err.kind != .None { + for elem: arr { + free(elem, allocator); + } + + array.free(^arr); + } + + while current_token.kind != .Close_Bracket { + elem, elem_err := parse_value(parser); + if elem_err.kind != .None { + err = elem_err; + return Value.{value}, err; + } + + array.push(^arr, elem); + + if !consume_token_if_next(parser, .Comma) { + break; + } + } + + _, close_err := expect_token(parser, .Close_Bracket); + if close_err.kind != .None { + err = close_err; + return Value.{value}, err; + } + + value.array_ = arr; + return Value.{value}, err; +} + + +#local +parse_object :: (use parser: ^Parser) -> (Value, Error) { + value := new(_Value_Object, allocator); + + _, err := expect_token(parser, .Open_Brace); + if err.kind != .None do return Value.{value}, err; + + // This uses the context allocators because the array resizing needs to happen in a general purpose heap allocator + array.init(^value.object_, allocator=context.allocator); + defer if err.kind != .None { + free(Value.{value}, allocator); + } + + while current_token.kind != .Close_Brace { + key_token, key_err := expect_token(parser, .String); + if key_err.kind != .None { + err = key_err; + return Value.{value}, err; + } + + key := unescape_string(key_token, allocator); + + _, colon_err := expect_token(parser, .Colon); + if colon_err.kind != .None { + err = colon_err; + return Value.{value}, err; + } + + elem, elem_err := parse_value(parser); + if elem_err.kind != .None { + err = elem_err; + return Value.{value}, err; + } + + // Checking for duplicate keys. I have it disabled for the moment. + #if false { + for elem: value.object_ { + if elem.key == key { + err.kind = .Duplicate_Keys; + err.pos = key_token.pos; + string.free(key, allocator); + return Value.{value}, err; + } + } + } + + array.push(^value.object_, .{ + key = key, + value = elem + }); + + if !consume_token_if_next(parser, .Comma) { + break; + } + } + + _, close_err := expect_token(parser, .Close_Brace); + if close_err.kind != .None { + err = close_err; + return Value.{value}, err; + } + + return Value.{value}, err; +} + + +#local +unescape_string :: (token: Token, allocator: Allocator) -> str { + if token.kind != .String do return ""; + + s := token.text; + if s.count <= 2 do return ""; + + s = s.data[1 .. s.count - 1]; + + i := 0; + for c: s { + if c == #char "\\" || c == #char "\"" || c < #char " " { + break; + } + + i += 1; + } + + if i == s.count { + return string.alloc_copy(s, allocator); + } + + buffer := memory.make_slice(u8, s.count, allocator=allocator); + string.copy(s.data[0 .. i], buffer); + buffer_write := i; + + while i < s.count { + c := s[i]; + + switch c { + case #char "\\" { + i += 1; + if i >= s.count do break break; + + switch s[i] { + case #char "\"", #char "\\", #char "/" { + buffer[buffer_write] = s[i]; + i += 1; + buffer_write += 1; + } + + case #char "n" { + buffer[buffer_write] = #char "\n"; + i += 1; + buffer_write += 1; + } + + case #char "t" { + buffer[buffer_write] = #char "\t"; + i += 1; + buffer_write += 1; + } + + case #char "r" { + buffer[buffer_write] = #char "\r"; + i += 1; + buffer_write += 1; + } + + case #char "b" { + buffer[buffer_write] = #char "\b"; + i += 1; + buffer_write += 1; + } + + case #char "f" { + buffer[buffer_write] = #char "\f"; + i += 1; + buffer_write += 1; + } + + case #char "v" { + buffer[buffer_write] = #char "\v"; + i += 1; + buffer_write += 1; + } + + case #char "u" { + i += 1; + wrote, consumed := parse_and_write_utf8_character(s[i..s.length], ~~&buffer[buffer_write]); + buffer_write += wrote; + i += consumed; + } + } + } + + case #default { + buffer[buffer_write] = c; + i += 1; + buffer_write += 1; + } + } + } + + buffer.count = buffer_write; + return buffer; +} + +#local +parse_and_write_utf8_character :: (s: str, out: [&] u8) -> (i32, i32) { + if s.length < 4 do return 0, 0; + + chars := 0; + codepoint: u32 = 0; + + for c: s[0 .. 4] { + codepoint = codepoint << 4; + codepoint |= digit_to_hex(c); + } + + if codepoint <= 0x7F { + out[0] = ~~ codepoint; + return 1, 4; + } + + if codepoint <= 0x7FF { + out[0] = ~~(0xC0 | ((codepoint >> 6) & 0x1F)); + out[1] = ~~(0x80 | (codepoint & 0x3F)); + return 2, 4; + } + + if codepoint <= 0x7FFF { + out[0] = ~~(0xE0 | ((codepoint >> 12) & 0x0F)); + out[1] = ~~(0x80 | ((codepoint >> 6) & 0x3F)); + out[2] = ~~(0x80 | (codepoint & 0x3F)); + return 3, 4; + } + + if codepoint < 0xD800 || codepoint > 0xDFFF { + out[0] = ~~(0xF0 | ((codepoint >> 18) & 0x07)); + out[1] = ~~(0x80 | ((codepoint >> 12) & 0x3F)); + out[2] = ~~(0x80 | ((codepoint >> 6) & 0x3F)); + out[3] = ~~(0x80 | (codepoint & 0x3F)); + return 4, 4; + } + + // + // If the code point is between D800 and DFFF, then it + // lies in the "surrogate zone" of the UTF spec, where + // there are no valid codepoints, and the entire space + // is reserved for surrogate pairs in UTF-16 space. + // + // When that is the case, we need to parse another 6 bytes, + // for the \uXXXX, and use that to build a second codepoint + // for the other surrogate pair. Then we stitch them + // together using this formula: + // + // actual = (first - 0xD800) << 10 + (second - 0xDC00) + 0x10000; + // + if s.length < 10 do return 0, 0; + + second_codepoint: u32 = 0; + if s[4 .. 6] != "\\u" do return 0, 0; + + for c: s[6 .. 10] { + second_codepoint = second_codepoint << 4; + second_codepoint |= digit_to_hex(c); + } + + codepoint -= 0xD800; + second_codepoint -= 0xDC00; + + codepoint = (codepoint << 10) + second_codepoint + 0x10000; + + if codepoint <= 0x10FFFF { + out[0] = ~~(0xF0 | ((codepoint >> 18) & 0x07)); + out[1] = ~~(0x80 | ((codepoint >> 12) & 0x3F)); + out[2] = ~~(0x80 | ((codepoint >> 6) & 0x3F)); + out[3] = ~~(0x80 | (codepoint & 0x3F)); + return 4, 10; + } + + return 0, 0; + + digit_to_hex :: (c: u8) -> i32 { + switch c { + case #char "0" do return 0; + case #char "1" do return 1; + case #char "2" do return 2; + case #char "3" do return 3; + case #char "4" do return 4; + case #char "5" do return 5; + case #char "6" do return 6; + case #char "7" do return 7; + case #char "8" do return 8; + case #char "9" do return 9; + case #char "A", #char "a" do return 10; + case #char "B", #char "b" do return 11; + case #char "C", #char "c" do return 12; + case #char "D", #char "d" do return 13; + case #char "E", #char "e" do return 14; + case #char "F", #char "f" do return 15; + } + + return 0; + } +} diff --git a/core/encoding/json/tokenizer.onyx b/core/encoding/json/tokenizer.onyx new file mode 100644 index 00000000..10d00554 --- /dev/null +++ b/core/encoding/json/tokenizer.onyx @@ -0,0 +1,221 @@ +// Everything in this file is marked #package because I do not think +// that this code will be needed outside of this module. I do not see +// the value of having access to the tokenizer and parser of JSON directly. + + +package core.encoding.json +#allow_stale_code + +#package +Tokenizer :: struct { + data: [] u8; + use position := Position.{ 0, 1, 1 }; +} + +#package +Token :: struct { + Kind :: enum { + Invalid; + + Open_Brace; // { + Close_Brace; // } + + Open_Bracket; // [ + Close_Bracket; // ] + + Comma; + Colon; + + Null; + True; + False; + + Integer; + Float; + String; + } + + kind: Kind = .Invalid; + text: str = null_str; + use position := Position.{ 0, 1, 1 }; +} + +#package +Position :: struct { + offset : u32; // Offset into the stream + line, column : u32; // Line and column number +} + +#package +token_get :: (use tkn: ^Tokenizer) -> (Token, Error) { + err := Error.{}; + + skip_whitespace(tkn); + token := Token.{}; + token.position = tkn.position; + + curr_char := data[offset]; + next_char, has_next := next_character(tkn); + if !has_next do return .{}, .{ .EOF, token.position }; + + switch curr_char { + case #char "{" do token.kind = .Open_Brace; + case #char "}" do token.kind = .Close_Brace; + case #char "[" do token.kind = .Open_Bracket; + case #char "]" do token.kind = .Close_Bracket; + case #char "," do token.kind = .Comma; + case #char ":" do token.kind = .Colon; + + case #char "a" .. #char "z" { + token.kind = .Invalid; + skip_alpha_numeric(tkn); + + identifier := data.data[token.offset .. offset]; + if identifier == "null" do token.kind = .Null; + if identifier == "true" do token.kind = .True; + if identifier == "false" do token.kind = .False; + } + + case #char "-" { + switch data[offset] { + case #char "0" .. #char "9" --- + case #default { + err.kind = .Illegal_Character; + err.pos = token.position; + break break; + } + } + + fallthrough; + } + + case #char "0" .. #char "9" { + token.kind = .Integer; + skip_numeric(tkn); + + if data[offset] == #char "." { + token.kind = .Float; + next_character(tkn); + skip_numeric(tkn); + } + + if data[offset] == #char "e" || data[offset] == #char "E" { + next_character(tkn); + if data[offset] == #char "-" || data[offset] == #char "+" { + next_character(tkn); + } + skip_numeric(tkn); + } + } + + case #char "\"" { + token.kind = .String; + + while offset < data.count { + ch := data[offset]; + if ch == #char "\n" { + err.kind = .String_Unterminated; + err.pos = token.position; + break break; + } + + next_character(tkn); + if ch == #char "\"" { + break; + } + + if ch == #char "\\" { + skip_escape(tkn); + } + } + } + } + + token.text = data.data[token.offset .. offset]; + + if token.kind == .Invalid do err.kind = .Illegal_Character; + + return token, err; +} + +#local +next_character :: (use tkn: ^Tokenizer) -> (u8, bool) { + if offset >= data.count do return 0, false; + + retval := data[offset]; + offset += 1; + column += 1; + + return retval, true; +} + +#local +skip_whitespace :: (use tkn: ^Tokenizer) { + while offset < data.count { + switch data[offset] { + case #char "\t", #char " ", #char "\r", #char "\v" { + next_character(tkn); + } + + case #char "\n" { + line += 1; + column = 1; + offset += 1; + } + + case #default { + break break; + } + } + } +} + +#local +skip_alpha_numeric :: (use tkn: ^Tokenizer) { + while offset < data.count { + switch data[offset] { + case #char "A" .. #char "Z", #char "a" .. #char "z", #char "0" .. #char "9", #char "_" { + next_character(tkn); + continue; + } + } + + break; + } +} + +#local +skip_numeric :: (use tkn: ^Tokenizer) { + while offset < data.count { + switch data[offset] { + case #char "0" .. #char "9" { + next_character(tkn); + continue; + } + } + + break; + } +} + +#local +skip_escape :: (use tkn: ^Tokenizer) { + switch data[offset] { + case #char "u" { + for i: 4 { + ch, _ := next_character(tkn); + switch ch { + case #char "0" .. #char "9", + #char "A" .. #char "F", + #char "a" .. #char "f" --- + + case #default do return; + } + } + } + + case #default { + next_character(tkn); + } + } +} diff --git a/core/encoding/json/types.onyx b/core/encoding/json/types.onyx new file mode 100644 index 00000000..73c24a72 --- /dev/null +++ b/core/encoding/json/types.onyx @@ -0,0 +1,279 @@ +package core.encoding.json +#allow_stale_code + +use core {*} + +#package +_null_value := _Value.{} + +null_value :: () -> Value { + return Value.{^_null_value}; +} + +empty_object :: (allocator := context.allocator) -> Value { + o := new(_Value_Object, allocator); + o.type = .Object; + o.object_ = make(typeof o.object_, allocator); + return Value.{ o }; +} + + +Json :: struct { + // This is the allocator for all of the values in the JSON tree. + // It is not the allocator the arrays and objects however. Those + // have their own allocator, which I'm assuming will always be + // the general purpose heap allocator. + allocator: Allocator; + + root: Value; +} + +Error :: struct { + Kind :: enum { + None; + EOF; + Illegal_Character; + String_Unterminated; + Unexpected_Token; + } + + kind := Kind.None; + use pos := Position.{ 0, 1, 1 }; +} + +Value :: #distinct ^_Value + +#inject Value { + type :: (v: Value) -> Value_Type { + if cast(rawptr) v == null do return .Null; + return (cast(^_Value) v).type; + } + + as_bool :: (v: Value) -> bool { + if cast(rawptr) v == null do return false; + + if (cast(^_Value) v).type == .Bool do return (cast(^_Value_Bool) cast(^_Value) v).bool_; + return false; + } + + as_str :: (v: Value) -> str { + if cast(rawptr) v == null do return null_str; + + if (cast(^_Value) v).type == .String do return (cast(^_Value_String) cast(^_Value) v).str_; + return ""; + } + + as_int :: (v: Value) -> i64 { + if cast(rawptr) v == null do return 0; + + if (cast(^_Value) v).type == .Integer do return (cast(^_Value_Integer) cast(^_Value) v).int_; + if (cast(^_Value) v).type == .Float do return ~~ (cast(^_Value_Float) cast(^_Value) v).float_; + return 0; + } + + as_float :: (v: Value) -> f64 { + if cast(rawptr) v == null do return 0; + + if (cast(^_Value) v).type == .Float do return (cast(^_Value_Float) cast(^_Value) v).float_; + if (cast(^_Value) v).type == .Integer do return ~~ (cast(^_Value_Integer) cast(^_Value) v).int_; + return 0; + } + + as_array :: (v: Value) -> [..] Value { + if cast(rawptr) v == null do return .{}; + if (cast(^_Value) v).type != .Array do return .{}; + + return (cast(^_Value_Array) cast(^_Value) v).array_; + } + + as_map :: (v: Value) -> Map(str, Value) { + if cast(rawptr) v == null do return .{}; + if (cast(^_Value) v).type != .Object do return .{}; + + m: Map(str, Value); + for ^(cast(^_Value_Object, cast(^_Value, v))).object_ { + m->put(it.key, it.value); + } + + return m; + } + + is_null :: (v: Value) -> bool { + if cast(rawptr) v == null do return true; + return cast(^_Value) v == ^_null_value || (cast(^_Value) v).type == .Null; + } +} + +Value_Type :: enum { + Null :: 0x00; + Bool; + Integer; + Float; + String; + Array; + Object; +} + +#package +_Value :: struct { + type := Value_Type.Null; +} + + +#package +_Value_Bool :: struct { + use base := _Value.{ type = .Bool }; + bool_: bool; +} + +#package +_Value_Integer :: struct { + use base := _Value.{ type = .Integer }; + int_: i64; +} + +#package +_Value_Float :: struct { + use base := _Value.{ type = .Float }; + float_: f64; +} + +#package +_Value_String :: struct { + use base := _Value.{ type = .String }; + str_: str; + + // Set if the string should not be freed from the allocator. + dont_free := false; +} + +#package +_Value_Array :: struct { + use base := _Value.{ type = .Array }; + array_: [..] Value; +} + +#package +_Value_Object :: struct { + use base := _Value.{ type = .Object }; + object_: [..] struct { + key : str; + dont_free_key := false; + + value : Value; + }; +} + +#operator [] get +get :: (v: Value, key: str) -> Value { + v_ := cast(^_Value) v; + if v_.type != .Object do return Value.{^_null_value}; + + for ^entry: (cast(^_Value_Object) v_).object_ { + if entry.key == key do return entry.value; + } + return Value.{^_null_value}; +} + +// This is an interesting operator overload, as it completely disables the +// ability to do array lookups on an array of values. So you cannot have an +// [..] Value, because the implementation of dynamic arrays heavily relies +// on the ability to do arr.data[...]. This isn't a problem for this program, +// but this is why I waited on adding overloading to '[]'. +// +// The above was remedied when I added distinct pointer types, which allows +// this to not conflict with the subscript operator on a ^_Value. +#operator [] get_idx +get_idx :: (v: Value, idx: i32) -> Value { + v_ := cast(^_Value) v; + if v_.type != .Array do return Value.{^_null_value}; + + v_arr := cast(^_Value_Array) v_; + if idx < 0 || idx >= v_arr.array_.count do return Value.{^_null_value}; + + return v_arr.array_[idx]; +} + +set :: #match { + macro (v: Value, key: str, value: Value, dont_copy_key := false) { + use core {string} + + _Value :: _Value + _Value_Object :: _Value_Object; + + v_ := cast(^_Value) v; + if v_.type == .Object { + k := key if dont_copy_key else string.alloc_copy(key); + + (cast(^_Value_Object) v_).object_ << .{ k, dont_copy_key, value }; + } + }, + + // Quick thing for allocating json values on the stack. + macro (v: Value, key: str, value: str, + dont_copy_key := false, dont_copy_value := false) { + _Value_String :: _Value_String; + _Value_Object :: _Value_Object; + + use core {string} + + v_ := cast(^_Value) v; + if v_.type == .Object { + k := key if dont_copy_key else string.alloc_copy(key); + v := value if dont_copy_value else string.alloc_copy(value); + + json_value := init(_Value_String); + json_value.str_ = v; + json_value.dont_free = dont_copy_value; + + (cast(^_Value_Object) v_).object_ << .{ k, dont_copy_key, ^json_value }; + } + } +} + +#overload +delete :: free + +free :: #match #local {} + +#overload +free :: (v: Value, allocator: Allocator) { + switch v_ := cast(^_Value) v; v_.type { + case .String { + v_str := cast(^_Value_String) v_; + if !v_str.str_.data do return; + + if !v_str.dont_free { + raw_free(allocator, v_str.str_.data); + } + } + + case .Array { + v_arr := cast(^_Value_Array) v_; + for elem: v_arr.array_ { + free(elem, allocator); + } + array.free(^v_arr.array_); + } + + case .Object { + v_obj := cast(^_Value_Object) v_; + for ^entry: v_obj.object_ { + if !entry.dont_free_key do raw_free(allocator, entry.key.data); + free(entry.value, allocator); + } + array.free(^v_obj.object_); + } + } + + raw_free(allocator, cast(^_Value) v); +} + +#overload +free :: (use j: Json) { + free(root, allocator); +} + +static_string :: (s: str) -> _Value_String { + return .{ str_ = s, dont_free = true }; +} diff --git a/core/std.onyx b/core/std.onyx index 37d9861f..e6761f88 100644 --- a/core/std.onyx +++ b/core/std.onyx @@ -59,6 +59,7 @@ use runtime #load "./encoding/base64" #load "./encoding/utf8" #load "./encoding/osad" +#load_all "./encoding/json" #load "./runtime/common" diff --git a/tests/json_test b/tests/json_test new file mode 100644 index 00000000..29001aa4 --- /dev/null +++ b/tests/json_test @@ -0,0 +1,6 @@ +[ { foo = 1 }, { foo = 2 } ] +[{"x":1},{"x":2}] +1 +{"test":"Wow!","working":123,"data":[1,2,3,4,5],"people":[{"name":"Joe"},{"name":"Bob"}],"other":[{"x":1},{"x":2}]} +{"foo":{"data":5}} +🂡 diff --git a/tests/json_test.onyx b/tests/json_test.onyx new file mode 100644 index 00000000..72754c82 --- /dev/null +++ b/tests/json_test.onyx @@ -0,0 +1,48 @@ +use core +use core.encoding.json + +main :: () { + j, err := json.decode_with_error("""[ + { + "x": 1 + }, + { + "x": 2 + } + ]"""); + defer delete(j); + + arr: [] struct { + @json.Custom_Key.{"x"} + foo: i32 + }; + json.as_any(j.root, ^arr); + + core.println(arr); + + json.encode(^core.stdio.print_writer, arr); + core.print("\n"); + + core.println(j.root[0]["x"]->as_int()); + + ctx := json.from_any(^.{ + test = "Wow!", + working = 123, + data = .[1, 2, 3, 4, 5], + people = .[ .{ name = "Joe" }, .{ "Bob" } ], + other = j.root + }); + json.encode(^core.stdio.print_writer, ctx); + + core.print("\n"); + + o := json.empty_object(); + defer json.free(o, context.allocator); + + json.set(o, "foo", json.from_any(^.{data=5})); + json.encode(^core.stdio.print_writer, o); + + v := json.decode_with_error("""{ "key1": "\uD83C\uDCA1", "key2": "\u264C" }"""); + core.print("\n"); + core.println(v.root["key1"]->as_str()); +}