nearly finished with json parsing; added float parsing and float test case

author Brendan Hansen <brendan.f.hansen@gmail.com>

Thu, 17 Jun 2021 16:25:09 +0000 (11:25 -0500)

committer Brendan Hansen <brendan.f.hansen@gmail.com>

Thu, 17 Jun 2021 16:25:09 +0000 (11:25 -0500)
author Brendan Hansen <brendan.f.hansen@gmail.com>
Thu, 17 Jun 2021 16:25:09 +0000 (11:25 -0500)
committer Brendan Hansen <brendan.f.hansen@gmail.com>
Thu, 17 Jun 2021 16:25:09 +0000 (11:25 -0500)
diff --git a/core/conv.onyx b/core/conv.onyx

index defdba68ce9927629ee5d9ba3c32bce2a2594674..fe93b1f62e1597eab1e0c45f29be623c1b4bb4a1 100644 (file)
--- a/core/conv.onyx
+++ b/core/conv.onyx
@@ -11,6 +11,10 @@ str_to_i64 :: (s: str) -> i64 {
          s = string.advance(s, 1);
      }
  
+    if s[0] == #char "+" {
+        s = string.advance(s, 1);
+    }
+
      for c: s do switch c {
          case #char "0" .. #char "9" {
              value *= 10;
@@ -23,6 +27,73 @@ str_to_i64 :: (s: str) -> i64 {
      return value * ~~mul;
  }
  
+str_to_f64 :: (s_: str) -> f64 {
+    use package core
+
+    // 's' needs to live on the stack to take its address. Stupid optimization
+    // that simple structs turn into registers for parameters.
+    s := s_;
+    string.strip_leading_whitespace(^s);
+
+    sign := parse_sign(^s);
+    value, _ := parse_digits(^s);    
+
+    if s[0] == #char "." {
+        string.advance(^s, 1);
+        fraction, fraction_digits := parse_digits(^s);
+        while fraction_digits > 0 {
+            fraction_digits -= 1;
+            fraction /= 10;
+        }
+        value += fraction;
+    }
+
+    value *= sign;
+
+    if s[0] != #char "e" && s[0] != #char "E" do return value;
+    string.advance(^s, 1);
+
+    exponent_sign := parse_sign(^s);
+    exponent, _   := parse_digits(^s);
+    if exponent_sign > 0 {
+        while exponent > 0 {
+            value *= 10;
+            exponent -= 1;
+        }
+    } else {
+        while exponent > 0 {
+            value /= 10;
+            exponent -= 1;
+        }
+    }
+
+    return value;
+
+
+    parse_sign :: (s: ^str) -> f64 {
+        switch s.data[0] {
+            case #char "-" { string.advance(s, 1); return -1; }
+            case #char "+" { string.advance(s, 1); return  1; }
+            case #default  { return 1; }
+        }
+    }
+
+    parse_digits :: (s: ^str) -> (f64, digit_count: i32) {
+        value: f64 = 0;
+        count := 0;
+        while s.count > 0 do switch s.data[0] {
+            case #char "0" .. #char "9" {
+                value = value * 10 + ~~cast(i32)(s.data[0] - #char "0");
+                string.advance(s, 1);
+                count += 1;
+            }
+
+            case #default do break break;
+        }
+        return value, count;
+    }
+}
+
  i64_to_str :: (n: i64, base: u64, buf: [] u8, min_length := 0) -> str {
      is_neg := false;
      if n < 0 && base == 10 {
@@ -92,15 +163,20 @@ f64_to_str :: (f: f64, buf: [] u8) -> str {
      v := cast(i64) f;
  
      len := 0;
+    if v < ~~0 {
+        v = -v;
+
+        buf[0] = #char "-";
+        len += 1;
+    }
  
      s1 := i64_to_str(v / 10000, 10, buf);
-    for i: 0 .. s1.count do buf.data[i] = s1.data[i];
-    buf.data[s1.count] = #char ".";
-    len = s1.count + 1;
+    for i: 0 .. s1.count do buf.data[i + len] = s1.data[i];
+    buf.data[s1.count + len] = #char ".";
+    len += s1.count + 1;
  
-    if v < ~~0 do v = -v;
      s2 := i64_to_str(v % 10000, 10, buf, min_length = 4);
-    for i: 0 .. s2.count do buf.data[s1.count + 1 + i] = s2.data[i];
+    for i: 0 .. s2.count do buf.data[len + i] = s2.data[i];
      len += s2.count;
  
      return str.{ buf.data, len };
diff --git a/modules/json/decoder.onyx b/modules/json/decoder.onyx

index 719a470f714a236db6d2e95725655e2907946cea..39c9e902e311e4fc87296fa893772f7557b67f07 100644 (file)
--- a/modules/json/decoder.onyx
+++ b/modules/json/decoder.onyx
@@ -1,17 +1,42 @@
  package json
  use package core
  
-decode_string :: (data: str, allocator := context.allocator) -> Json {
-    tokenizer := Tokenizer.{ data = data };
+decode :: (data: str, allocator := context.allocator) -> Json {
  
-    err: Tokenizer_Error = .None;
-    tkn: Token;
+    json: Json;
+    json.allocator = allocator;
+    json.root = null;
  
-    while err == .None {
-        tkn, err = token_get(^tokenizer);
+    root, err := parse(data, allocator);
+    if err != .None {
+        switch err {
+            case .EOF do println("Reached EOF");
+            case .Illegal_Character do println("Illegal Character");
+            case .String_Unterminated do println("Unterminated String");
+            case .Unexpected_Token do println("Unexpected Token");
+        }
  
-        buf: [10] u8;
-        printf("%s  %s\n", conv.i64_to_str(~~tkn.kind, 10, ~~buf, 2), tkn.text);
+        return json;
      }
-}
  
+    json.root = root;
+    return json;
+
+
+    // Old testing code
+    #if false {
+        tokenizer := Tokenizer.{ data = data };
+
+        err : = Error.None;
+        tkn: Token;
+
+        while err == .None {
+            tkn, err = token_get(^tokenizer);
+
+            buf: [10] u8;
+            printf("%s  %s\n", conv.i64_to_str(~~tkn.kind, 10, ~~buf, 2), tkn.text);
+        }
+
+        return .{ allocator, null };
+    }
+}
diff --git a/modules/json/example.onyx b/modules/json/example.onyx

index f759351caf4d2358eb93669b20359d810dfb0e98..189a5c89188f71569b31eacabfdd9feb87de9c46 100644 (file)
--- a/modules/json/example.onyx
+++ b/modules/json/example.onyx
@@ -11,12 +11,22 @@ main :: (args: [] cstr) {
      arena := alloc.arena.make(context.allocator, 4096);
      defer alloc.arena.free(^arena);
  
-    decoded_json := json.decode_string(#file_contents "./dummy.json", alloc.arena.make_allocator(^arena));
+    decoded_json := json.decode(#file_contents "./dummy.json", alloc.arena.make_allocator(^arena));
+    // decoded_json := json.decode(json_string, alloc.arena.make_allocator(^arena));
      defer json.free(decoded_json);
  
-    test_str := decoded_json.root["test"] |> json.to_str();
-    println(test_str);
+    root := decoded_json.root;
+    for v: root->as_array() {
+        println(v["friends"][1]["name"]->as_str());
+    }
  
-    println("Done.");
+    #if false {
+        value := decoded_json.root["array"];
+        for v: value->as_array() {
+            println(v->as_int());
+        }
+        test_str := decoded_json.root["sub"]["mem"]->as_bool();
+        println(test_str);
+    }
  }
      
 \ No newline at end of file
diff --git a/modules/json/module.onyx b/modules/json/module.onyx

index 251b9ea0766e8747d584887cf39bc2deec26a511..22c2e789281d973c73498e6abc778c856b2e405f 100644 (file)
--- a/modules/json/module.onyx
+++ b/modules/json/module.onyx
@@ -8,4 +8,6 @@ package json
  #load "./encoder"
  #load "./decoder"
  #load "./types"
-#load "./tokenizer"
-\ No newline at end of file
+
+#load "./tokenizer"
+#load "./parser"
+\ No newline at end of file
diff --git a/modules/json/parser.onyx b/modules/json/parser.onyx

index 09d59843e04644472d2e4093886e20f237c8fead..b505b617e5a49880a9b48ef4ac280ca0406ec66f 100644 (file)
--- a/modules/json/parser.onyx
+++ b/modules/json/parser.onyx
@@ -1 +1,227 @@
-package json
-\ No newline at end of file
+package json
+use package core
+
+#private
+Parser :: struct {
+    tokenizer : Tokenizer;
+    allocator : Allocator;
+
+    current_token  : Token;
+    previous_token : Token;
+}
+
+#private
+make_parser :: (data: [] u8, allocator := context.allocator) -> Parser {
+    parser: Parser;
+    parser.tokenizer = Tokenizer.{ data = data };
+    parser.allocator = allocator;
+    consume_token(^parser);
+    return parser;
+}
+
+#private
+parse :: (data: [] u8, allocator := context.allocator) -> (^Value, Error) {
+    parser := make_parser(data, allocator);
+    return parse_value(^parser);
+}
+
+#private_file
+consume_token :: (use parser: ^Parser) -> (Token, Error) {
+    error: Error;
+    previous_token = current_token;
+    current_token, error = token_get(^tokenizer);
+    return previous_token, error;
+}
+
+#private_file
+consume_token_if_next :: (use parser: ^Parser, kind: Token.Kind) -> bool {
+    if current_token.kind == kind {
+        consume_token(parser);
+        return true;
+    }
+
+    return false;
+}
+
+#private_file
+expect_token :: (use parser: ^Parser, kind: Token.Kind) -> (Token, Error) {
+    previous :=  current_token;
+    consume_token(parser);
+    if previous.kind == kind do return previous, .None;
+    else                     do return previous, .Unexpected_Token;
+}
+
+#private
+parse_value :: (use parser: ^Parser) -> (^Value, Error) {
+    return_value: ^Value = null;
+
+    current := current_token;
+    switch current.kind {
+        case .Null {
+            value := new(Value, allocator);
+
+            consume_token(parser);
+            return_value = value;
+        }
+
+        case .False, .True {
+            value := new(Value_Bool, allocator);
+            value.bool_ = current.kind == .True;
+
+            consume_token(parser);
+            return_value = value;
+        }
+
+        case .Integer {
+            value := new(Value_Integer, allocator);
+            value.int_ = conv.str_to_i64(current.text);
+
+            consume_token(parser);
+            return_value = value;
+        }
+
+        case .Float {
+            value := new(Value_Float, allocator);
+            value.float_ = conv.str_to_f64(current.text);
+
+            consume_token(parser);
+            return_value = value;
+        }
+
+        case .String {
+            value := new(Value_String, allocator);
+            @Todo // parse escaped strings
+            value.str_ = string.alloc_copy(current.text.data[1 .. current.text.count - 1], allocator);
+
+            consume_token(parser);
+            return_value = value;
+        }
+
+        case .Open_Bracket {
+            value, err := parse_array(parser);
+            if err != .None do return value, err;
+            
+            return_value = value;
+        }
+
+        case .Open_Brace {
+            value, err := parse_object(parser);
+            if err != .None do return value, err;
+
+            return_value = value;
+        }
+
+        case #default {
+            consume_token(parser);
+            return return_value, .Unexpected_Token;
+        }
+    }
+
+    return return_value, .None;
+}
+
+#private_file
+parse_array :: (use parser: ^Parser) -> (^Value_Array, Error) {
+    value := new(Value_Array, allocator);
+
+    _, err := expect_token(parser, .Open_Bracket);
+    if err != .None do return value, err;
+
+    // This uses the context allocators because the array resizing needs to happen in a general purpose heap allocator
+    arr := array.make(#type ^Value, allocator=context.allocator);
+    defer if err != .None {
+        for elem: arr {
+            free(elem, allocator);
+        } 
+
+        array.free(^arr);
+    }
+
+    while current_token.kind != .Close_Bracket {
+        elem, elem_err := parse_value(parser);
+        if elem_err != .None {
+            err = elem_err;
+            return value, err;
+        }
+
+        array.push(^arr, elem);
+
+        if !consume_token_if_next(parser, .Comma) {
+            break;
+        }
+    }
+
+    _, close_err := expect_token(parser, .Close_Bracket);
+    if close_err != .None {
+        err = close_err;
+        return value, err;
+    }
+
+    value.array_ = arr;
+    return value, err;
+}
+
+
+#private_file
+parse_object :: (use parser: ^Parser) -> (^Value_Object, Error) {
+    value := new(Value_Object, allocator);
+
+    _, err := expect_token(parser, .Open_Brace);
+    if err != .None do return value, err;
+
+    // This uses the context allocators because the array resizing needs to happen in a general purpose heap allocator
+    array.init(^value.object_, allocator=context.allocator);
+    defer if err != .None {
+        free(value, allocator);
+    }
+
+    while current_token.kind != .Close_Brace {
+        key_token, key_err := expect_token(parser, .String);
+        if key_err != .None {
+            err = key_err;
+            return value, err;
+        }
+
+        key := string.alloc_copy(key_token.text.data[1 .. key_token.text.count - 1], allocator);
+
+        _, colon_err := expect_token(parser, .Colon);
+        if colon_err != .None {
+            err = colon_err;
+            return value, err;
+        }
+
+        elem, elem_err := parse_value(parser);
+        if elem_err != .None {
+            err = elem_err;
+            return value, err;
+        }
+
+        // Checking for duplicate keys. I have it disabled for the moment.
+        #if false {
+            for elem: value.object_ {
+                if elem.key == key {
+                    err = .Duplicate_Keys;
+                    string.free(key, allocator);
+                    return value, err;
+                }
+            }
+        }
+
+        array.push(^value.object_, .{
+            key = key,
+            value = elem            
+        });
+
+        if !consume_token_if_next(parser, .Comma) {
+            break;
+        }
+    } 
+
+    _, close_err := expect_token(parser, .Close_Brace);
+    if close_err != .None {
+        err = close_err;
+        return value, err;
+    }
+
+    return value, err;
+}
+\ No newline at end of file
diff --git a/modules/json/tokenizer.onyx b/modules/json/tokenizer.onyx

index ed71683d8513c40738f08a4bcb08b7cb0299b313..5db0009f7d6d554f66251405715c909529b22c9f 100644 (file)
--- a/modules/json/tokenizer.onyx
+++ b/modules/json/tokenizer.onyx
@@ -37,7 +37,7 @@ Token :: struct {
  
      kind: Kind = .Invalid;
      text: str  = null_str;
-    use position: Position = .{ 0, 1, 1 };
+    use position := Position.{ 0, 1, 1 };
  }
  
  #private
@@ -47,17 +47,8 @@ Position :: struct {
  }
  
  #private
-Tokenizer_Error :: enum {
-    None;
-    EOF;
-    Illegal_Character;
-    String_Unterminated;
-}
-
-
-#private
-token_get :: (use tkn: ^Tokenizer) -> (Token, Tokenizer_Error) {
-    err := Tokenizer_Error.None;
+token_get :: (use tkn: ^Tokenizer) -> (Token, Error) {
+    err := Error.None;
  
      skip_whitespace(tkn);
      token := Token.{};
diff --git a/modules/json/types.onyx b/modules/json/types.onyx

index 3d83d74c5a91e0c9b6dcefa86750d2c3c12c79d6..fced87e00bbe51535eafab4d83aaff1d48b49267 100644 (file)
--- a/modules/json/types.onyx
+++ b/modules/json/types.onyx
@@ -13,6 +13,14 @@ Json :: struct {
      root: ^Value;
  }
  
+Error :: enum {
+    None;
+    EOF;
+    Illegal_Character;
+    String_Unterminated;
+    Unexpected_Token;
+}
+
  Value :: struct {
      Type :: enum {
          Null :: 0x00;
@@ -25,6 +33,46 @@ Value :: struct {
      }
  
      type := Type.Null;
+
+    as_bool :: (v: ^Value) -> bool {
+        if v == null do return false;
+
+        if v.type == .Bool do return (cast(^Value_Bool) v).bool_;
+        return false;
+    }
+
+    as_str :: (v: ^Value) -> str {
+        if v == null do return null_str;
+
+        if v.type == .String do return (cast(^Value_String) v).str_;
+        return "";
+    }
+
+    as_int :: (v: ^Value) -> i64 {
+        if v == null do return 0;
+
+        if v.type == .Integer do return (cast(^Value_Integer) v).int_;
+        return 0;
+    }
+
+    as_float :: (v: ^Value) -> f64 {
+        if v == null do return 0;
+
+        if v.type == .Float do return (cast(^Value_Float) v).float_;
+        return 0;
+    }
+
+    as_array :: (v: ^Value) -> [..] ^Value {
+        if v == null        do return .{ null, 0, 0, .{ null, null_proc } };
+        if v.type != .Array do return .{ null, 0, 0, .{ null, null_proc } };
+
+        return (cast(^Value_Array) v).array_;
+    }
+
+    is_null :: (v: ^Value) -> bool {
+        if v == null do return true;
+        return v == ^null_value || v.type == .Null;
+    }
  }
  
  Value_Bool :: struct {
@@ -60,32 +108,31 @@ Value_Object :: struct {
      };
  }
  
-is_null :: (v: ^Value) -> bool {
-    if v == null do return true;
-    return v == ^null_value || v.type == .Null;
-}
-
-to_str :: (v: ^Value) -> str {
-    if v == null do return null_str;
-
-    switch v.type {
-        case .String do return (cast(^Value_String) v).str_;
-        case #default do return "";
-    }
-}
-
  #operator [] get
  get :: (v: ^Value, key: str) -> ^Value {
      if v.type != .Object do return ^null_value;
  
-    v_obj := cast(^Value_Object) v;
-
-    for ^entry: v_obj.object_ {
+    for ^entry: (cast(^Value_Object) v).object_ {
          if entry.key == key do return entry.value;
      }
      return ^null_value;
  }
  
+// This is an interesting operator overload, as it completely disables the
+// ability to do array lookups on an array of values. So you cannot have an
+// [..] Value, because the implementation of dynamic arrays heavily relies
+// on the ability to do arr.data[...]. This isn't a problem for this program,
+// but this is why I waited on adding overloading to '[]'.
+#operator [] get_idx
+get_idx :: (v: ^Value, idx: i32) -> ^Value {
+    if v.type != .Array do return ^null_value;
+
+    v_arr := cast(^Value_Array) v;
+    if idx < 0 || idx >= v_arr.array_.count do return ^null_value;
+
+    return v_arr.array_[idx];
+}
+
  free :: proc {
      (v: ^Value, allocator: Allocator) do switch v.type {
          case .String {
diff --git a/tests/float_parsing b/tests/float_parsing

new file mode 100644 (file)

index 0000000..4d98370
--- /dev/null
+++ b/tests/float_parsing
@@ -0,0 +1,29 @@
+12.0000
+12.0000
+8.0000
+12.3400
+0.3399
+2.0000
+1.0000
+1.0000
+1.0000
+10000.0000
+0.0010
+0.0002
+-5000000.0000
+-0.0500
+0.0000
+-1000000.0000
+5.0000
+10.0000
+0.0000
+0.0000
+0.0000
+1.0000
+-1.0000
+1.0000
+1.0000
+0.0000
+0.0000
+0.0000
+-1000000.0000
diff --git a/tests/float_parsing.onyx b/tests/float_parsing.onyx

new file mode 100644 (file)

index 0000000..f050ec0
--- /dev/null
+++ b/tests/float_parsing.onyx
@@ -0,0 +1,52 @@
+#load "core/std"
+
+use package core
+
+main :: (args: [] cstr) {
+
+    @CoreLibraries // The commented out cases can be re-enabled when f64_to_str is better.
+    // Right now there is an integer overflow because it converts the float to an i64.
+    strings := str.[
+        /* these should parse fully */
+        "12",
+        "12.0",
+        "08",                   /* not octal! */
+        "+12.34",
+        ".34",
+        "\t \n2.",
+        "1e0",
+        "1e+0",
+        "1e-0",
+        "1.e4",
+        ".1e-2",
+        "2e-4",
+        "-5e006",
+        //"-5e+16",
+        "-.05",
+        "-.0",
+        "-1e6",
+        /* these should parse only the initial part */
+        "5c5",
+        "10ee5",
+        "0x06",                 /* not hex! */
+        "--1" ,
+        "-+1" ,
+        "1e--4" ,
+        "-1e.4",
+        "1e 4",
+        "1e-g",
+        "", "foobar",           /* both 0 */
+        " e5",                  /* also 0 */
+        "-1e6",
+        /* overflow/underflow */
+        // "1e500000",
+        // "1e-500000",
+        // "-1e500000",
+        // "-1e-500000",
+    ];
+
+    for s: strings {
+        value := conv.str_to_f64(s);
+        println(value);
+    }
+}
+\ No newline at end of file
author	Brendan Hansen <brendan.f.hansen@gmail.com>
	Thu, 17 Jun 2021 16:25:09 +0000 (11:25 -0500)
committer	Brendan Hansen <brendan.f.hansen@gmail.com>
	Thu, 17 Jun 2021 16:25:09 +0000 (11:25 -0500)
core/conv.onyx		patch \| blob \| history
modules/json/decoder.onyx		patch \| blob \| history
modules/json/example.onyx		patch \| blob \| history
modules/json/module.onyx		patch \| blob \| history
modules/json/parser.onyx		patch \| blob \| history
modules/json/tokenizer.onyx		patch \| blob \| history
modules/json/types.onyx		patch \| blob \| history
tests/float_parsing	[new file with mode: 0644]	patch \| blob
tests/float_parsing.onyx	[new file with mode: 0644]	patch \| blob