From abbd1d8e3d4ce015f7bac0c7f11d766059871883 Mon Sep 17 00:00:00 2001 From: Brendan Hansen Date: Thu, 17 Jun 2021 11:25:09 -0500 Subject: [PATCH] nearly finished with json parsing; added float parsing and float test case --- core/conv.onyx | 86 +++++++++++++- modules/json/decoder.onyx | 43 +++++-- modules/json/example.onyx | 18 ++- modules/json/module.onyx | 4 +- modules/json/parser.onyx | 228 +++++++++++++++++++++++++++++++++++- modules/json/tokenizer.onyx | 15 +-- modules/json/types.onyx | 81 ++++++++++--- tests/float_parsing | 29 +++++ tests/float_parsing.onyx | 52 ++++++++ 9 files changed, 507 insertions(+), 49 deletions(-) create mode 100644 tests/float_parsing create mode 100644 tests/float_parsing.onyx diff --git a/core/conv.onyx b/core/conv.onyx index defdba68..fe93b1f6 100644 --- a/core/conv.onyx +++ b/core/conv.onyx @@ -11,6 +11,10 @@ str_to_i64 :: (s: str) -> i64 { s = string.advance(s, 1); } + if s[0] == #char "+" { + s = string.advance(s, 1); + } + for c: s do switch c { case #char "0" .. #char "9" { value *= 10; @@ -23,6 +27,73 @@ str_to_i64 :: (s: str) -> i64 { return value * ~~mul; } +str_to_f64 :: (s_: str) -> f64 { + use package core + + // 's' needs to live on the stack to take its address. Stupid optimization + // that simple structs turn into registers for parameters. + s := s_; + string.strip_leading_whitespace(^s); + + sign := parse_sign(^s); + value, _ := parse_digits(^s); + + if s[0] == #char "." { + string.advance(^s, 1); + fraction, fraction_digits := parse_digits(^s); + while fraction_digits > 0 { + fraction_digits -= 1; + fraction /= 10; + } + value += fraction; + } + + value *= sign; + + if s[0] != #char "e" && s[0] != #char "E" do return value; + string.advance(^s, 1); + + exponent_sign := parse_sign(^s); + exponent, _ := parse_digits(^s); + if exponent_sign > 0 { + while exponent > 0 { + value *= 10; + exponent -= 1; + } + } else { + while exponent > 0 { + value /= 10; + exponent -= 1; + } + } + + return value; + + + parse_sign :: (s: ^str) -> f64 { + switch s.data[0] { + case #char "-" { string.advance(s, 1); return -1; } + case #char "+" { string.advance(s, 1); return 1; } + case #default { return 1; } + } + } + + parse_digits :: (s: ^str) -> (f64, digit_count: i32) { + value: f64 = 0; + count := 0; + while s.count > 0 do switch s.data[0] { + case #char "0" .. #char "9" { + value = value * 10 + ~~cast(i32)(s.data[0] - #char "0"); + string.advance(s, 1); + count += 1; + } + + case #default do break break; + } + return value, count; + } +} + i64_to_str :: (n: i64, base: u64, buf: [] u8, min_length := 0) -> str { is_neg := false; if n < 0 && base == 10 { @@ -92,15 +163,20 @@ f64_to_str :: (f: f64, buf: [] u8) -> str { v := cast(i64) f; len := 0; + if v < ~~0 { + v = -v; + + buf[0] = #char "-"; + len += 1; + } s1 := i64_to_str(v / 10000, 10, buf); - for i: 0 .. s1.count do buf.data[i] = s1.data[i]; - buf.data[s1.count] = #char "."; - len = s1.count + 1; + for i: 0 .. s1.count do buf.data[i + len] = s1.data[i]; + buf.data[s1.count + len] = #char "."; + len += s1.count + 1; - if v < ~~0 do v = -v; s2 := i64_to_str(v % 10000, 10, buf, min_length = 4); - for i: 0 .. s2.count do buf.data[s1.count + 1 + i] = s2.data[i]; + for i: 0 .. s2.count do buf.data[len + i] = s2.data[i]; len += s2.count; return str.{ buf.data, len }; diff --git a/modules/json/decoder.onyx b/modules/json/decoder.onyx index 719a470f..39c9e902 100644 --- a/modules/json/decoder.onyx +++ b/modules/json/decoder.onyx @@ -1,17 +1,42 @@ package json use package core -decode_string :: (data: str, allocator := context.allocator) -> Json { - tokenizer := Tokenizer.{ data = data }; +decode :: (data: str, allocator := context.allocator) -> Json { - err: Tokenizer_Error = .None; - tkn: Token; + json: Json; + json.allocator = allocator; + json.root = null; - while err == .None { - tkn, err = token_get(^tokenizer); + root, err := parse(data, allocator); + if err != .None { + switch err { + case .EOF do println("Reached EOF"); + case .Illegal_Character do println("Illegal Character"); + case .String_Unterminated do println("Unterminated String"); + case .Unexpected_Token do println("Unexpected Token"); + } - buf: [10] u8; - printf("%s %s\n", conv.i64_to_str(~~tkn.kind, 10, ~~buf, 2), tkn.text); + return json; } -} + json.root = root; + return json; + + + // Old testing code + #if false { + tokenizer := Tokenizer.{ data = data }; + + err : = Error.None; + tkn: Token; + + while err == .None { + tkn, err = token_get(^tokenizer); + + buf: [10] u8; + printf("%s %s\n", conv.i64_to_str(~~tkn.kind, 10, ~~buf, 2), tkn.text); + } + + return .{ allocator, null }; + } +} diff --git a/modules/json/example.onyx b/modules/json/example.onyx index f759351c..189a5c89 100644 --- a/modules/json/example.onyx +++ b/modules/json/example.onyx @@ -11,12 +11,22 @@ main :: (args: [] cstr) { arena := alloc.arena.make(context.allocator, 4096); defer alloc.arena.free(^arena); - decoded_json := json.decode_string(#file_contents "./dummy.json", alloc.arena.make_allocator(^arena)); + decoded_json := json.decode(#file_contents "./dummy.json", alloc.arena.make_allocator(^arena)); + // decoded_json := json.decode(json_string, alloc.arena.make_allocator(^arena)); defer json.free(decoded_json); - test_str := decoded_json.root["test"] |> json.to_str(); - println(test_str); + root := decoded_json.root; + for v: root->as_array() { + println(v["friends"][1]["name"]->as_str()); + } - println("Done."); + #if false { + value := decoded_json.root["array"]; + for v: value->as_array() { + println(v->as_int()); + } + test_str := decoded_json.root["sub"]["mem"]->as_bool(); + println(test_str); + } } \ No newline at end of file diff --git a/modules/json/module.onyx b/modules/json/module.onyx index 251b9ea0..22c2e789 100644 --- a/modules/json/module.onyx +++ b/modules/json/module.onyx @@ -8,4 +8,6 @@ package json #load "./encoder" #load "./decoder" #load "./types" -#load "./tokenizer" \ No newline at end of file + +#load "./tokenizer" +#load "./parser" \ No newline at end of file diff --git a/modules/json/parser.onyx b/modules/json/parser.onyx index 09d59843..b505b617 100644 --- a/modules/json/parser.onyx +++ b/modules/json/parser.onyx @@ -1 +1,227 @@ -package json \ No newline at end of file +package json +use package core + +#private +Parser :: struct { + tokenizer : Tokenizer; + allocator : Allocator; + + current_token : Token; + previous_token : Token; +} + +#private +make_parser :: (data: [] u8, allocator := context.allocator) -> Parser { + parser: Parser; + parser.tokenizer = Tokenizer.{ data = data }; + parser.allocator = allocator; + consume_token(^parser); + return parser; +} + +#private +parse :: (data: [] u8, allocator := context.allocator) -> (^Value, Error) { + parser := make_parser(data, allocator); + return parse_value(^parser); +} + +#private_file +consume_token :: (use parser: ^Parser) -> (Token, Error) { + error: Error; + previous_token = current_token; + current_token, error = token_get(^tokenizer); + return previous_token, error; +} + +#private_file +consume_token_if_next :: (use parser: ^Parser, kind: Token.Kind) -> bool { + if current_token.kind == kind { + consume_token(parser); + return true; + } + + return false; +} + +#private_file +expect_token :: (use parser: ^Parser, kind: Token.Kind) -> (Token, Error) { + previous := current_token; + consume_token(parser); + if previous.kind == kind do return previous, .None; + else do return previous, .Unexpected_Token; +} + +#private +parse_value :: (use parser: ^Parser) -> (^Value, Error) { + return_value: ^Value = null; + + current := current_token; + switch current.kind { + case .Null { + value := new(Value, allocator); + + consume_token(parser); + return_value = value; + } + + case .False, .True { + value := new(Value_Bool, allocator); + value.bool_ = current.kind == .True; + + consume_token(parser); + return_value = value; + } + + case .Integer { + value := new(Value_Integer, allocator); + value.int_ = conv.str_to_i64(current.text); + + consume_token(parser); + return_value = value; + } + + case .Float { + value := new(Value_Float, allocator); + value.float_ = conv.str_to_f64(current.text); + + consume_token(parser); + return_value = value; + } + + case .String { + value := new(Value_String, allocator); + @Todo // parse escaped strings + value.str_ = string.alloc_copy(current.text.data[1 .. current.text.count - 1], allocator); + + consume_token(parser); + return_value = value; + } + + case .Open_Bracket { + value, err := parse_array(parser); + if err != .None do return value, err; + + return_value = value; + } + + case .Open_Brace { + value, err := parse_object(parser); + if err != .None do return value, err; + + return_value = value; + } + + case #default { + consume_token(parser); + return return_value, .Unexpected_Token; + } + } + + return return_value, .None; +} + +#private_file +parse_array :: (use parser: ^Parser) -> (^Value_Array, Error) { + value := new(Value_Array, allocator); + + _, err := expect_token(parser, .Open_Bracket); + if err != .None do return value, err; + + // This uses the context allocators because the array resizing needs to happen in a general purpose heap allocator + arr := array.make(#type ^Value, allocator=context.allocator); + defer if err != .None { + for elem: arr { + free(elem, allocator); + } + + array.free(^arr); + } + + while current_token.kind != .Close_Bracket { + elem, elem_err := parse_value(parser); + if elem_err != .None { + err = elem_err; + return value, err; + } + + array.push(^arr, elem); + + if !consume_token_if_next(parser, .Comma) { + break; + } + } + + _, close_err := expect_token(parser, .Close_Bracket); + if close_err != .None { + err = close_err; + return value, err; + } + + value.array_ = arr; + return value, err; +} + + +#private_file +parse_object :: (use parser: ^Parser) -> (^Value_Object, Error) { + value := new(Value_Object, allocator); + + _, err := expect_token(parser, .Open_Brace); + if err != .None do return value, err; + + // This uses the context allocators because the array resizing needs to happen in a general purpose heap allocator + array.init(^value.object_, allocator=context.allocator); + defer if err != .None { + free(value, allocator); + } + + while current_token.kind != .Close_Brace { + key_token, key_err := expect_token(parser, .String); + if key_err != .None { + err = key_err; + return value, err; + } + + key := string.alloc_copy(key_token.text.data[1 .. key_token.text.count - 1], allocator); + + _, colon_err := expect_token(parser, .Colon); + if colon_err != .None { + err = colon_err; + return value, err; + } + + elem, elem_err := parse_value(parser); + if elem_err != .None { + err = elem_err; + return value, err; + } + + // Checking for duplicate keys. I have it disabled for the moment. + #if false { + for elem: value.object_ { + if elem.key == key { + err = .Duplicate_Keys; + string.free(key, allocator); + return value, err; + } + } + } + + array.push(^value.object_, .{ + key = key, + value = elem + }); + + if !consume_token_if_next(parser, .Comma) { + break; + } + } + + _, close_err := expect_token(parser, .Close_Brace); + if close_err != .None { + err = close_err; + return value, err; + } + + return value, err; +} \ No newline at end of file diff --git a/modules/json/tokenizer.onyx b/modules/json/tokenizer.onyx index ed71683d..5db0009f 100644 --- a/modules/json/tokenizer.onyx +++ b/modules/json/tokenizer.onyx @@ -37,7 +37,7 @@ Token :: struct { kind: Kind = .Invalid; text: str = null_str; - use position: Position = .{ 0, 1, 1 }; + use position := Position.{ 0, 1, 1 }; } #private @@ -47,17 +47,8 @@ Position :: struct { } #private -Tokenizer_Error :: enum { - None; - EOF; - Illegal_Character; - String_Unterminated; -} - - -#private -token_get :: (use tkn: ^Tokenizer) -> (Token, Tokenizer_Error) { - err := Tokenizer_Error.None; +token_get :: (use tkn: ^Tokenizer) -> (Token, Error) { + err := Error.None; skip_whitespace(tkn); token := Token.{}; diff --git a/modules/json/types.onyx b/modules/json/types.onyx index 3d83d74c..fced87e0 100644 --- a/modules/json/types.onyx +++ b/modules/json/types.onyx @@ -13,6 +13,14 @@ Json :: struct { root: ^Value; } +Error :: enum { + None; + EOF; + Illegal_Character; + String_Unterminated; + Unexpected_Token; +} + Value :: struct { Type :: enum { Null :: 0x00; @@ -25,6 +33,46 @@ Value :: struct { } type := Type.Null; + + as_bool :: (v: ^Value) -> bool { + if v == null do return false; + + if v.type == .Bool do return (cast(^Value_Bool) v).bool_; + return false; + } + + as_str :: (v: ^Value) -> str { + if v == null do return null_str; + + if v.type == .String do return (cast(^Value_String) v).str_; + return ""; + } + + as_int :: (v: ^Value) -> i64 { + if v == null do return 0; + + if v.type == .Integer do return (cast(^Value_Integer) v).int_; + return 0; + } + + as_float :: (v: ^Value) -> f64 { + if v == null do return 0; + + if v.type == .Float do return (cast(^Value_Float) v).float_; + return 0; + } + + as_array :: (v: ^Value) -> [..] ^Value { + if v == null do return .{ null, 0, 0, .{ null, null_proc } }; + if v.type != .Array do return .{ null, 0, 0, .{ null, null_proc } }; + + return (cast(^Value_Array) v).array_; + } + + is_null :: (v: ^Value) -> bool { + if v == null do return true; + return v == ^null_value || v.type == .Null; + } } Value_Bool :: struct { @@ -60,32 +108,31 @@ Value_Object :: struct { }; } -is_null :: (v: ^Value) -> bool { - if v == null do return true; - return v == ^null_value || v.type == .Null; -} - -to_str :: (v: ^Value) -> str { - if v == null do return null_str; - - switch v.type { - case .String do return (cast(^Value_String) v).str_; - case #default do return ""; - } -} - #operator [] get get :: (v: ^Value, key: str) -> ^Value { if v.type != .Object do return ^null_value; - v_obj := cast(^Value_Object) v; - - for ^entry: v_obj.object_ { + for ^entry: (cast(^Value_Object) v).object_ { if entry.key == key do return entry.value; } return ^null_value; } +// This is an interesting operator overload, as it completely disables the +// ability to do array lookups on an array of values. So you cannot have an +// [..] Value, because the implementation of dynamic arrays heavily relies +// on the ability to do arr.data[...]. This isn't a problem for this program, +// but this is why I waited on adding overloading to '[]'. +#operator [] get_idx +get_idx :: (v: ^Value, idx: i32) -> ^Value { + if v.type != .Array do return ^null_value; + + v_arr := cast(^Value_Array) v; + if idx < 0 || idx >= v_arr.array_.count do return ^null_value; + + return v_arr.array_[idx]; +} + free :: proc { (v: ^Value, allocator: Allocator) do switch v.type { case .String { diff --git a/tests/float_parsing b/tests/float_parsing new file mode 100644 index 00000000..4d983702 --- /dev/null +++ b/tests/float_parsing @@ -0,0 +1,29 @@ +12.0000 +12.0000 +8.0000 +12.3400 +0.3399 +2.0000 +1.0000 +1.0000 +1.0000 +10000.0000 +0.0010 +0.0002 +-5000000.0000 +-0.0500 +0.0000 +-1000000.0000 +5.0000 +10.0000 +0.0000 +0.0000 +0.0000 +1.0000 +-1.0000 +1.0000 +1.0000 +0.0000 +0.0000 +0.0000 +-1000000.0000 diff --git a/tests/float_parsing.onyx b/tests/float_parsing.onyx new file mode 100644 index 00000000..f050ec00 --- /dev/null +++ b/tests/float_parsing.onyx @@ -0,0 +1,52 @@ +#load "core/std" + +use package core + +main :: (args: [] cstr) { + + @CoreLibraries // The commented out cases can be re-enabled when f64_to_str is better. + // Right now there is an integer overflow because it converts the float to an i64. + strings := str.[ + /* these should parse fully */ + "12", + "12.0", + "08", /* not octal! */ + "+12.34", + ".34", + "\t \n2.", + "1e0", + "1e+0", + "1e-0", + "1.e4", + ".1e-2", + "2e-4", + "-5e006", + //"-5e+16", + "-.05", + "-.0", + "-1e6", + /* these should parse only the initial part */ + "5c5", + "10ee5", + "0x06", /* not hex! */ + "--1" , + "-+1" , + "1e--4" , + "-1e.4", + "1e 4", + "1e-g", + "", "foobar", /* both 0 */ + " e5", /* also 0 */ + "-1e6", + /* overflow/underflow */ + // "1e500000", + // "1e-500000", + // "-1e500000", + // "-1e-500000", + ]; + + for s: strings { + value := conv.str_to_f64(s); + println(value); + } +} \ No newline at end of file -- 2.25.1