added: KDL support; bugfixed: segfault in lexer

author Brendan Hansen <brendan.f.hansen@gmail.com>

Tue, 21 Nov 2023 03:49:08 +0000 (21:49 -0600)

committer Brendan Hansen <brendan.f.hansen@gmail.com>

Tue, 21 Nov 2023 03:49:08 +0000 (21:49 -0600)
author Brendan Hansen <brendan.f.hansen@gmail.com>
Tue, 21 Nov 2023 03:49:08 +0000 (21:49 -0600)
committer Brendan Hansen <brendan.f.hansen@gmail.com>
Tue, 21 Nov 2023 03:49:08 +0000 (21:49 -0600)
diff --git a/build.sh b/build.sh

index d6ea7e014bc5e498ce267ccd1fbc7d92f79aa873..29cc7b6c052a11780411001070acd47077dcf637 100755 (executable)
--- a/build.sh
+++ b/build.sh
@@ -89,6 +89,7 @@ install_all() {
  for arg in $@; do
      case "$arg" in
          compile) compile_all ;;
+        debug) compile_all debug ;;
          package) package_all ;;
          compress) compress_all ;;
          install) install_all ;;
diff --git a/compiler/src/lex.c b/compiler/src/lex.c

index 5853b5c95a91ba7dc68ae371a0a06d368331d756..81e866bb1769e3dab67304a6a46ae4e29e5e948c 100644 (file)
--- a/compiler/src/lex.c
+++ b/compiler/src/lex.c
@@ -90,7 +90,7 @@ static const char* token_type_names[] = {
          (tkn)->line_number++; \
          (tkn)->line_start = (tkn)->curr + 1; \
      } \
-    (tkn)->curr++; \
+    if ((tkn)->curr != (tkn)->end) (tkn)->curr++; \
  }
  #endif
  
@@ -239,7 +239,7 @@ whitespace_skipped:
          INCREMENT_CURR_TOKEN(tokenizer);
          INCREMENT_CURR_TOKEN(tokenizer);
  
-        while (!(*tokenizer->curr == '"' && *(tokenizer->curr + 1) == '"' && *(tokenizer->curr + 2) == '"')) {
+        while (!(*tokenizer->curr == '"' && *(tokenizer->curr + 1) == '"' && *(tokenizer->curr + 2) == '"') && tokenizer->curr != tokenizer->end) {
              len++;
              INCREMENT_CURR_TOKEN(tokenizer);
          }
@@ -262,7 +262,7 @@ whitespace_skipped:
          char ch = *tk.text;
          INCREMENT_CURR_TOKEN(tokenizer);
  
-        while (!(*tokenizer->curr == ch && slash_count == 0)) {
+        while (tokenizer->curr != tokenizer->end && !(*tokenizer->curr == ch && slash_count == 0)) {
              len++;
  
              if (*tokenizer->curr == '\n' && ch == '\'') {
@@ -279,6 +279,10 @@ whitespace_skipped:
              }
  
              INCREMENT_CURR_TOKEN(tokenizer);
+            if (tokenizer->curr == tokenizer->end) {
+                onyx_report_error(tk.pos, Error_Critical, "String literal not closed. String literal starts here.");
+                break;
+            }
          }
  
          INCREMENT_CURR_TOKEN(tokenizer);
diff --git a/core/encoding/kdl/kdl.onyx b/core/encoding/kdl/kdl.onyx

new file mode 100644 (file)

index 0000000..b6653f4
--- /dev/null
+++ b/core/encoding/kdl/kdl.onyx
@@ -0,0 +1,134 @@
+package core.encoding.kdl
+#allow_stale_code
+
+#load "./parser"
+#load "./utils"
+#load "./kql"
+
+use core {Result, io, string}
+
+//
+// Cuddly Document Language (KDL)
+// https://kdl.dev
+//
+
+
+Document :: struct {
+    allocator: Allocator;
+    nodes: [..] &Node;
+}
+
+Node :: struct {
+    node: str;
+    type_annotation: ? str;
+    values: [..] Value;
+    props: Map(str, Value);
+    children: [..] &Node;
+}
+
+Value :: struct {
+    data: Value_Data;
+    type_annotation: ? str;
+
+    Value_Data :: union {
+        String: str;
+        Number: KDL_Number;
+        Boolean: bool;
+        Null: void;
+    }
+}
+
+KDL_Number :: union {
+    Integer: i64;
+    Float: f64;
+    String: str;
+}
+
+
+#doc """
+    Parses a string or `io.Reader` into a KDL document, using the allocator provided for internal allocations.
+
+    Call `core.encoding.kdl.free` to free the returned document.
+"""
+parse :: #match #local -> Result(Document, Parse_Error) {}
+
+#overload
+parse :: (s: str, allocator := context.allocator) -> Result(Document, Parse_Error) {
+    doc: Document;
+    doc.allocator = allocator;
+
+    parser := Parser.make(s);
+    error  := parser->parse(&doc);
+
+    if error.None {
+        return .{ Ok = doc };
+    } else {
+        return .{ Err = error };
+    }
+}
+
+#overload
+parse :: (r: &io.Reader, allocator := context.allocator) -> Result(Document, Parse_Error) {
+    doc: Document;
+    doc.allocator = allocator;
+
+    parser := Parser.make(r);
+    error  := parser->parse(&doc);
+
+    if error.None {
+        return .{ Ok = doc };
+    } else {
+        return .{ Err = error };
+    }
+}
+
+#overload
+builtin.delete :: free
+
+#doc """
+    Releases all resources allocated for the document.
+"""
+free :: (d: Document) {
+    for d.nodes do free_node(d.allocator, it);
+    delete(&d.nodes);
+}
+
+#local
+free_node :: (al: Allocator, n: &Node) {
+    string.free(n.node, al);
+    n.type_annotation->with([t] { string.free(t, al); });
+
+    for& v: n.values do free_value(al, v);
+    delete(&n.values); // This should use the allocator inside of the array
+
+    for n.props->as_iter() {
+        string.free(it.key, al);
+        free_value(al, &it.value);
+    }
+    delete(&n.props);
+
+    for n.children do free_node(al, it);
+}
+
+#local
+free_value :: (al: Allocator, v: &Value) {
+    v.type_annotation->with([t] { string.free(t, al); });
+
+    switch v.data {
+        case s: .String {
+            string.free(s, al);
+        }
+
+        case num: .Number do switch num {
+            case s: .String {
+                string.free(s, al);
+            } 
+
+            case #default ---
+        }
+
+        case #default ---
+    }
+}
+
+
diff --git a/core/encoding/kdl/kql.onyx b/core/encoding/kdl/kql.onyx

new file mode 100644 (file)

index 0000000..7d534e2
--- /dev/null
+++ b/core/encoding/kdl/kql.onyx
@@ -0,0 +1,490 @@
+package core.encoding.kdl
+#allow_stale_code
+
+use core {iter, alloc, array, string}
+
+#inject Document {
+    query     :: query
+    query_all :: query_all
+}
+
+query :: (d: &Document, query: str) -> ? &Node {
+    query_iter := query_all(d, query);
+    node, empty := iter.next(query_iter);
+    iter.close(query_iter);
+
+    if !empty do return node;
+    return null;
+}
+
+query_all :: (d: &Document, query: str) -> Iterator(&Node) {
+    arena := alloc.arena.make(context.allocator, 16 * 1024);
+    q     := parse_query(query, alloc.as_allocator(&arena));
+
+    ctx := .{
+        d = d,
+        q = q,
+        stack = make([..] QueryStack, 8, alloc.as_allocator(&arena)),
+        top_level_node = -1,
+        current_selector = 0,
+        arena = arena
+    };
+
+    return iter.generator(
+        &ctx,
+        query_next,
+        ctx => { alloc.arena.free(&ctx.arena); }
+    );
+}
+
+#local
+QueryStack :: struct {
+    node: &Node;
+    current_child: i32;
+}
+
+#local
+query_next :: ctx => {
+    while true {
+        if !ctx.stack {
+            // If the stack is empty, populate with a node
+            ctx.top_level_node += 1;
+            if ctx.top_level_node >= ctx.d.nodes.length do break;
+
+            ctx.stack << .{ ctx.d.nodes[ctx.top_level_node], 0 };
+        }
+
+        last_query := array.get_ptr(ctx.stack, -1);
+        if !last_query do break;
+
+        while last_query.current_child < last_query.node.children.length {
+            ctx.stack << .{ last_query.node.children[last_query.current_child], 0 };
+            last_query.current_child += 1;
+            last_query = array.get_ptr(ctx.stack, -1);
+        }
+
+        defer array.pop(&ctx.stack);
+        for ctx.q.matches_any {
+            if query_selector_matches(it, ctx.stack) {
+                return last_query.node, true;
+            }
+        }
+    }
+
+    return .{}, false;
+}
+
+#local
+query_selector_matches :: (s: &Selector, trail: [] QueryStack) -> bool {
+    if !trail do return false;
+
+    node_index: i32 = trail.count - 1;
+    if !query_matcher_matches(s.segments[0].matcher, trail[node_index].node) {
+        return false;
+    }
+
+    node_index -= 1;
+    for segment: s.segments[1 .. s.segments.length] {
+        switch segment.op->unwrap() {
+            case .Child, .Descendant {
+                while node_index >= 0 {
+                    defer node_index -= 1;
+                    if query_matcher_matches(segment.matcher, trail[node_index].node) {
+                        // Continue from the outer for loop
+                        continue continue;
+                    }
+
+                    if segment.op->unwrap() == .Child {
+                        break;
+                    }
+                }
+
+                return false;
+            }
+
+            case .Neighbor, .Sibling {
+                if node_index < 0 do return false;
+
+                parent_node := trail[node_index].node;
+
+                walker_index: i32 = trail[node_index].current_child - 1;
+                if walker_index <= 0 do return false;
+
+                walker_index -= 1;
+                while walker_index >= 0 {
+                    defer walker_index -= 1;
+
+                    if query_matcher_matches(segment.matcher, parent_node.children[walker_index]) {
+                        // Continue from the outer for loop
+                        continue continue;
+                    }
+
+                    if segment.op->unwrap() == .Neighbor {
+                        break;
+                    }
+                }
+
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+#local
+query_matcher_matches :: (s: &Matcher, node: &Node) =>
+    iter.as_iter(s.details)
+    |> iter.every((d, [node]) => {
+        if d.accessor.Scope do return false;
+
+        if d.accessor.Node {
+            name := node.node;
+            if !d.value && d.op == .Equal {
+                return true;
+            }
+
+            return operate_on_values(
+                .{ data = .{ String = name } },
+                d.value?,
+                d.op
+            );
+        }
+
+        d.accessor.Prop->with([prop] {
+            if !d.value {
+                return node.props->has(prop);
+            }
+
+            return operate_on_values(node.props->get(prop)?, d.value?, d.op);
+        });
+
+        d.accessor.Arg->with([index] {
+            if !d.value {
+                return index < node.values.length;
+            }
+
+            if index >= node.values.length do return false;
+
+            return operate_on_values(node.values[index], d.value?, d.op);
+        });
+
+        return false;
+    })
+
+#local
+operate_on_values :: (v1, v2: Value, op: AttributeOp) -> bool {
+    v1_ := v1;
+    v2_ := v2;
+    left := v1_->as_str()?;
+    right := v2_->as_str()?;
+
+    return switch op {
+        case .Equal    => left == right;
+        case .NotEqual => left != right;
+
+        case .StartsWith => string.starts_with(left, right);
+        case .EndsWith   => string.ends_with(left, right);
+        case .Contains   => string.contains(left, right);
+
+        case #default => false;
+    };
+}
+
+
+#local
+Query :: struct {
+    matches_any: [..] &Selector;
+}
+
+#local
+Selector :: struct {
+    segments: [..] &SelectorSegment;
+}
+
+#local
+SelectorSegment :: struct {
+    op: ? SelectorOp;
+    matcher: &Matcher;
+}
+
+#local
+SelectorOp :: enum {
+    Child;
+    Descendant;
+    Neighbor;
+    Sibling;
+}
+
+#local
+Matcher :: struct {
+    details: [..] MatcherDetails;
+}
+
+#local
+MatcherDetails :: struct {
+    accessor: MatcherAccessor;
+    op: AttributeOp;
+    value: ? Value;
+}
+
+#local
+MatcherAccessor :: union {
+    Scope: void;
+    Node: void;
+    Annotation: void;
+    Arg: u32;
+    Prop: str;
+}
+
+#local
+AttributeOp :: enum {
+    Equal;
+    NotEqual;
+    Gt;
+    Gte;
+    Lt;
+    Lte;
+    StartsWith;
+    EndsWith;
+    Contains;
+}
+
+
+#local
+QueryParser :: struct {
+    query: str;
+    cursor: u32;
+
+    al: Allocator;
+}
+
+#local
+parse_query :: (q: str, al: Allocator) -> Query {
+    query: Query;
+    query.matches_any = make(typeof query.matches_any, al);
+
+    parser: QueryParser;
+    parser.query = q;
+    parser.al = al;
+
+    while !reached_end(&parser) {
+        query.matches_any << parse_selector(&parser);
+
+        skip_whitespace(&parser);
+        if string.starts_with(parser->rem(), "||") {
+            parser.cursor += 2;
+            skip_whitespace(&parser);
+        }
+    }
+
+    return query;
+}
+
+#local
+parse_selector :: (p: &QueryParser) -> &Selector {
+    s := p.al->move(Selector.{
+        segments = make([..] &SelectorSegment, p.al)
+    });
+
+    while !reached_end(p) {
+        skip_whitespace(p);
+
+        switch parse_matcher(p) {
+            case matcher: .Some {
+                segment := p.al->move(SelectorSegment.{
+                    matcher = matcher
+                });
+
+                s.segments << segment;
+
+                skip_whitespace(p);
+                if reached_end(p) {
+                    break break;
+                }
+
+                segment.op = switch p.query[p.cursor] {
+                    case '>' => SelectorOp.Child;
+                    case '+' => .Neighbor;
+                    case '~' => .Sibling;
+                    case #default => .Descendant;
+                };
+
+                if segment.op->unwrap() != .Descendant {
+                    p.cursor += 1;
+                }
+            }
+
+            case .None do break break;
+        }
+
+        skip_whitespace(p);
+
+        if string.starts_with(p->rem(), "||") {
+            break;
+        }
+    }
+
+    // Reverse the segments here so it is easier to process with later.
+    array.reverse(s.segments);
+
+    return s;
+}
+
+#local
+parse_matcher :: (p: &QueryParser) -> ? &Matcher {
+    if reached_end(p) do return .{};
+
+    m := p.al->move(Matcher.{
+        details = make([..] MatcherDetails, p.al)
+    });
+
+    if p.query[p.cursor] != '[' {
+        id := parse_identifier(p);
+
+        m.details << .{
+            accessor = .{ Node = .{} },
+            op = .Equal,
+            value = .{ Some = .{ data = .{ String = id } } }
+        };
+    }
+
+    while p.query[p.cursor] == '[' {
+        p.cursor += 1;
+        
+        if p.query[p.cursor] == ']' {
+            p.cursor += 1;
+
+            m.details << .{
+                accessor = .{ Node = .{} },
+                op = .Equal,
+                value = .{ None = .{} },
+            };
+            continue;
+        }
+
+        accessor: MatcherAccessor;
+
+        if string.starts_with(p->rem(), "val(") {
+            p.cursor += 4; // "("
+            if string.starts_with(p->rem(), ")") {
+                p.cursor += 1;
+                accessor = .{ Arg = 0 };
+
+            } else {
+                index := parse_number(p);
+                accessor = .{ Arg = index };
+
+                if string.starts_with(p->rem(), ")") {
+                    p.cursor += 1;
+                }
+            }
+        }
+
+        elseif string.starts_with(p->rem(), "prop(") {
+            p.cursor += 5; // "prop("
+            prop_id := parse_identifier(p);
+            accessor = .{ Prop = prop_id };
+
+            if string.starts_with(p->rem(), ")") {
+                p.cursor += 1;
+            }
+        }
+
+        else {
+            prop_id := parse_identifier(p);
+            accessor = .{ Prop = prop_id };
+        }
+
+        skip_whitespace(p);
+
+        op := AttributeOp.Equal;
+        value: ? Value;
+
+        if p.query[p.cursor] != ']' {
+            rem := p->rem();
+            if     string.starts_with(rem, "=")  { op = .Equal; p.cursor += 1; }
+            elseif string.starts_with(rem, "!=") { op = .NotEqual; p.cursor += 2; }
+            elseif string.starts_with(rem, "<=") { op = .Lte; p.cursor += 2; }
+            elseif string.starts_with(rem, ">=") { op = .Gte; p.cursor += 2; }
+            elseif string.starts_with(rem, "<")  { op = .Lt; p.cursor += 1; }
+            elseif string.starts_with(rem, ">")  { op = .Gt; p.cursor += 1; }
+            elseif string.starts_with(rem, "^=") { op = .StartsWith; p.cursor += 2; }
+            elseif string.starts_with(rem, "$=") { op = .EndsWith; p.cursor += 2; }
+            elseif string.starts_with(rem, "*=") { op = .Contains; p.cursor += 2; }
+
+            skip_whitespace(p);
+
+            // TODO Make any value work!
+            v := Value.{ data = .{ String = "" } };
+            if string.starts_with(p->rem(), "\"") {
+                p.cursor += 1;
+                v.data = .{
+                    String = read_until(p, '"')
+                };
+                p.cursor += 1;
+            }
+
+            value = v;
+        }
+
+        m.details << .{
+            accessor,
+            op,
+            value
+        };
+
+        read_until(p, ']');
+        if p.query[p.cursor] == ']' {
+            p.cursor += 1;
+        }
+    }
+
+    return m;
+}
+
+#local
+skip_whitespace :: (p: &QueryParser) {
+    while p.query[p.cursor]->is_whitespace() {
+        p.cursor += 1;
+        if reached_end(p) do break;
+    }
+}
+
+#local
+reached_end :: macro (p: &QueryParser) => p.cursor >= p.query.length;
+
+#inject
+QueryParser.rem :: (p: &QueryParser) => p.query[p.cursor .. p.query.length];
+
+#local
+parse_identifier :: (p: &QueryParser) -> str {
+    c := p.cursor;
+    while !is_end_of_word(~~p.query[p.cursor]) && is_id(~~p.query[p.cursor]) && !reached_end(p) {
+        p.cursor += 1;
+    }
+
+    return p.query[c .. p.cursor];
+}
+
+#local
+parse_number :: (p: &QueryParser) -> u32 {
+    v := 0;
+    while p.query[p.cursor]->is_num() && !reached_end(p) {
+        v *= 10;
+        v += cast(u32) (p.query[p.cursor] - '0');
+        p.cursor += 1;
+    }
+
+    return v;
+}
+
+#local
+read_until :: (p: &QueryParser, c: u8) -> str {
+    start := p.cursor;
+    while p.query[p.cursor] != c && !reached_end(p) {
+        p.cursor += 1;
+    }
+
+    return p.query[start .. p.cursor];
+}
diff --git a/core/encoding/kdl/parser.onyx b/core/encoding/kdl/parser.onyx

new file mode 100644 (file)

index 0000000..bdb8d1f
--- /dev/null
+++ b/core/encoding/kdl/parser.onyx
@@ -0,0 +1,586 @@
+package core.encoding.kdl
+#allow_stale_code
+
+
+// TODO
+//  - Parse integers
+//  - Parse decimals
+//  - Types in the correct places
+//  - Escaped strings
+//  - Slashdash on children block
+
+use core {tprintf, Result, printf}
+use core.io
+use core.memory
+use core.slice
+use core.encoding.utf8
+use core.string
+
+#package
+Tokenizer :: struct {
+    doc: [] u8;
+    cursor: u32;
+    doc_is_owned := false;
+
+    peeked_token: ? Token;
+}
+
+#package
+Token :: union {
+    Error: void;
+    Start_Type: void;
+    End_Type: void;
+    Word: str;
+    String: str;
+    Raw_String: str;
+    Single_Line_Comment: str;
+    Slashdash: void;
+    Multi_Line_Comment: str;
+    Equals: void;
+    Start_Children: void;
+    End_Children: void;
+    Newline: void;
+    Semicolon: void;
+    Line_Continuation: void;
+    Whitespace: str;
+    EOF: void;
+}
+
+#inject Tokenizer {
+    make :: #match {
+        ((r: &io.Reader) => #Self.{ doc = r->read_all(), doc_is_owned = true }),
+        ((s: str)        => #Self.{ doc = s }),
+    }
+
+    destroy :: (self: &#Self) {
+        if self.doc_is_owned {
+            delete(&self.doc);
+        }
+    }
+
+    peek_char :: (self: &#Self) -> ? u32 {
+        codepoint_length := utf8.rune_length_from_first_byte(self.doc[self.cursor]);
+        if self.cursor + codepoint_length >= self.doc.length do return .{ None = .{} };
+        
+        value := utf8.decode_rune(string.advance(self.doc, self.cursor));
+        return value;
+    }
+
+    eat_char :: (self: &#Self) -> ? u32 {
+        codepoint_length := utf8.rune_length_from_first_byte(self.doc[self.cursor]);
+        if self.cursor + codepoint_length >= self.doc.length do return .{ None = .{} };
+        
+        value := utf8.decode_rune(string.advance(self.doc, self.cursor));
+        self.cursor += codepoint_length;
+
+        return value;
+    }
+
+    peek_token :: (use self: &#Self) -> Token {
+        if peeked_token do return peeked_token->unwrap();
+
+        // :CompilerBug
+        // There is a weird bug related to an optimization happening here.
+        // I would like the following code to just be:
+        //
+        //     peeked_token = self->next_token();
+        //
+        // But sadly, this does not work. This is because the next_token return value
+        // is being upcasted to an optional token. The tag for the optional saying
+        // that it is a "some" not a "none" is emitted first, then the actual call.
+        // The problem is that when assigning a structure literal (which is what this
+        // is internally implemented as), the assignment is done in parts (1st member
+        // emit and store, 2nd member emit and store, etc.). Well, the first member
+        // says that the result is a Some, so at this point peeked_token is a Some
+        // of invalid data. Then in next_token, this is consumed and returned as
+        // a valid token, even though it is not.
+        //
+        new_token := self->next_token();
+        peeked_token = new_token;
+
+        return peeked_token?;
+    }
+
+    next_token :: (self: &#Self) -> Token {
+        if self.peeked_token {
+            tkn := self.peeked_token->unwrap();
+            self.peeked_token->reset();
+            return tkn;
+        }
+
+        c := self->peek_char()->or_return(Token.{ EOF = .{} });
+
+        if is_whitespace(c) {
+            self->consume_while([c](is_whitespace(c)));
+            return self->next_token();
+        }
+
+        if c == '/' {
+            comment := self->handle_comment();
+            if comment.Slashdash do return comment;
+            return self->next_token();
+        }
+
+        if is_newline(c) {
+            self->eat_char();
+            if c == '\r' {
+                // Consume one more character for CRLF.
+                self->eat_char();
+            }
+
+            return .{ Newline = .{} };
+            // return self->next_token();
+        }
+
+        if c == ';'  { self->eat_char(); return .{ Semicolon = .{} }; }
+        if c == '\\' { self->eat_char(); return .{ Line_Continuation = .{} }; }
+        if c == '('  { self->eat_char(); return .{ Start_Type = .{} }; }
+        if c == ')'  { self->eat_char(); return .{ End_Type = .{} }; }
+        if c == '{'  { self->eat_char(); return .{ Start_Children = .{} }; }
+        if c == '}'  { self->eat_char(); return .{ End_Children = .{} }; }
+        if c == '='  { self->eat_char(); return .{ Equals = .{} }; }
+        if c == '"' {
+            return self->handle_string();
+        }
+        if is_id(c) {
+            return self->handle_word();
+        }
+
+        assert(false, tprintf("Unhandled character, {}", c));
+        return .{ Error = .{} };
+    }
+
+    consume_while :: macro (self: &#Self, cond: Code) -> str {
+        res := self.doc[self.cursor .. self.cursor];
+        while true {
+            codepoint := self->peek_char()->or_return(res);
+            if !(#unquote cond(codepoint)) {
+                return res;
+            } else {
+                self->eat_char();
+                res.length += 1;
+            }
+        }
+    }
+
+    handle_comment :: (self: &#Self) -> Token {
+        self->eat_char();
+        c := self->eat_char()->or_return(Token.{EOF=.{}});
+        switch c {
+            case '-' {
+                return .{ Slashdash = .{} };
+            }
+            case '/' {
+                body := self->consume_while([c](!is_newline(c)));
+                return .{ Single_Line_Comment = body };
+            }
+            case '*' {
+                cursor_start := self.cursor;
+
+                depth := 1;
+                prev_char := 0;
+                while depth >= 1 {
+                    c := self->eat_char()->or_return(Token.{ Error=.{} });
+                    if c == '*' && prev_char == '/' {
+                        depth += 1;
+                        c = 0;
+                    }
+                    if c == '/' && prev_char == '*' {
+                        depth -= 1;
+                        c = 0;
+                    }
+
+                    prev_char = c;
+                }
+
+                return .{ Multi_Line_Comment = self.doc[cursor_start .. self.cursor-2] };
+            }
+        }
+    }
+
+    handle_string :: (self: &#Self) -> Token {
+        c := self->eat_char()->or_return(Token.{EOF=.{}});
+        if c != '"' do return Token.{Error=.{}};
+
+        cursor_start := self.cursor;
+        prev_char := 0;
+        while true {
+            c := self->eat_char()->or_return(Token.{Error=.{}});
+            if c == '\\' && prev_char == '\\' {
+                c = 0;
+            }
+            if c == '"' && prev_char != '\\' {
+                break;
+            }
+            prev_char = c;
+        }
+
+        return .{ String = self.doc[cursor_start .. self.cursor-1] };
+    }
+
+    handle_word :: (self: &#Self) -> Token {
+        word := self->consume_while([c](!is_end_of_word(c) && is_id(c)));
+        return .{ Word = word };
+    }
+}
+
+
+#package
+Parser :: struct {
+    tokenizer: Tokenizer;
+    state: Parser_State;
+
+    depth: i32;
+
+    result_allocator: Allocator;
+}
+
+#local
+Parser_State :: enum #flags {
+    Outside_Node;
+    Inside_Node;
+
+    Line_Continuation :: 0x100;
+    Annotation_Start :: 0x200;
+    Annotation_End   :: 0x400;
+    Annotation_Ended :: 0x800;
+
+    In_Property :: 0x1000;
+
+    Whitespace_Banned :: Annotation_Start | Annotation_End | Annotation_Ended | In_Property;
+}
+
+Parse_Error :: union {
+    None: void;
+    Whitespace_Banned: void;
+    Parser_Error: str;
+}
+
+#inject Parser {
+    make :: #match {
+        ((r: &io.Reader) => Parser.{ Tokenizer.make(r) }),
+        ((s: str)        => Parser.{ Tokenizer.make(s) }),
+    }
+
+    parse :: (self: &#Self, doc: &Document) -> Parse_Error {
+        self.result_allocator = doc.allocator;
+
+        while true {
+            token := self.tokenizer->peek_token();
+            switch token {
+                case .EOF {
+                    break break;
+                }
+
+                case .Error {
+                    self.tokenizer->next_token();
+                    return .{ Parser_Error = tprintf("bad token: {}", token) };
+                }
+
+                case .Whitespace, .Newline {
+                    self.tokenizer->next_token();
+                    if self.state & .Whitespace_Banned {
+                        return .{ Whitespace_Banned = .{} };
+                    }
+                }
+
+                case .Single_Line_Comment, .Multi_Line_Comment {
+                    self.tokenizer->next_token();
+                    if self.state & .Whitespace_Banned {
+                        return .{ Whitespace_Banned = .{} };
+                    }
+                }
+
+                case #default {
+                    node_result := self->parse_node();
+                    if err := node_result->err(); err {
+                        logf(.Info, self.tokenizer.doc[self.tokenizer.cursor .. self.tokenizer.doc.length]);
+                        return err?;
+                    }
+
+                    node := node_result->ok()->unwrap();
+                    if node {
+                        doc.nodes << node;
+                    }
+                }
+            }
+        }
+
+        return .{};
+    }
+
+    parse_node :: (self: &#Self) -> Result(&Node, Parse_Error) {
+        self.depth += 1;
+        defer self.depth -= 1;
+
+        self->skip_linespace();
+
+        if_next_token_is(self, .End_Children, [] { return .{ Ok = null }; });
+
+        is_ignored := false;
+        if self.tokenizer->peek_token().Slashdash {
+            self.tokenizer->next_token();
+            is_ignored = true;
+        }
+
+        type_annotation := self->parse_type_if_present()?;
+        name := self->parse_identifier()?;
+
+        if !name do return .{ Ok = null };
+
+        node_to_return := self.result_allocator->move(Node.{
+            node = name->unwrap(),
+            type_annotation = type_annotation,
+            props     = make(Map(str, Value), self.result_allocator),
+            values    = make([..] Value, 0, self.result_allocator),
+            children  = make([..] &Node, 0, self.result_allocator),
+        });
+
+        while true {
+            switch tkn := self.tokenizer->peek_token(); tkn {
+                case .Newline, .Semicolon {
+                    self.tokenizer->next_token();
+                    _apply_slashdash(node_to_return);
+                    return .{ Ok = node_to_return };
+                }
+
+                case .Word, .Raw_String, .String {
+                    self.tokenizer->next_token();
+                    if_next_token_is(self, .Equals, [] {
+                        // Is this good? Or just too hacky?
+                        prop_name := self->parse_into_string(tkn)->or_return(
+                            Result(&Node, Parse_Error).{ Err = .{ Parser_Error = "Error parsing property key" } }
+                        );
+
+                        type := self->parse_type_if_present()?;
+                        value := self->parse_value(self.tokenizer->next_token()) ?? [] {
+                            return return .{ Err = .{ Parser_Error = "Error parsing property value" } };
+                        };
+
+                        value.type_annotation = type;
+
+                        node_to_return.props[prop_name] = value;
+                        continue;
+                    });
+
+                    value := self->parse_value(tkn) ?? [] {
+                        return return .{ Err = .{ Parser_Error = "Error parsing argument value" } };
+                    };
+
+                    node_to_return.values << value;
+                }
+
+                case .Start_Type {
+                    type := self->parse_type_if_present()?;
+
+                    value := self->parse_value(self.tokenizer->next_token()) ?? [] {
+                        return return .{ Err = .{ Parser_Error = "Error parsing argument value" } };
+                    };
+
+                    value.type_annotation = type;
+                    node_to_return.values << value;
+                }
+
+                case .Start_Children {
+                    self.tokenizer->next_token();
+                    while !self.tokenizer->peek_token().End_Children {
+                        child := self->parse_node()?;
+                        if child {
+                            node_to_return.children << child;
+                        }
+
+                        self->skip_linespace();
+                    }
+
+                    self->expect_token(.End_Children);
+                    break break;
+                }
+
+                case .End_Children {
+                    break break;
+                }
+
+                case #default {
+                    return .{ Err = .{ Parser_Error = tprintf("Unexpected token {}, expected node", tkn) } };
+                }
+            }
+        }
+        
+        _apply_slashdash(node_to_return);
+        return .{ Ok = node_to_return };
+
+        _apply_slashdash :: macro (n: &Node) {
+            if is_ignored {
+                n = null;
+            }
+        }
+    }
+
+    parse_value :: (self: &#Self, token: Token) -> ? Value {
+        switch token {
+            case s: .Raw_String {
+                return Value.{
+                    data = .{ String = string.alloc_copy(s, self.result_allocator) }
+                };
+            }
+
+            case s: .String {
+                // TODO: Handle escaped strings here
+                return Value.{
+                    data = .{ String = string.alloc_copy(s, self.result_allocator) }
+                };
+            }
+            
+            case w: .Word {
+                if w == "null" {
+                    return Value.{
+                        data = .{ Null = .{} }
+                    };
+                }
+
+                if w == "true" {
+                    return Value.{
+                        data = .{ Boolean = true }
+                    };
+                }
+
+                if w == "false" {
+                    return Value.{
+                        data = .{ Boolean = false }
+                    };
+                }
+
+                // TODO: parse numbers
+
+                return Value.{
+                    data = .{ String = string.alloc_copy(w, self.result_allocator) }
+                };
+            }
+
+            case #default do return .{};
+        }
+    }
+
+    parse_type_if_present :: (self: &#Self) -> Result(? str, Parse_Error) {
+        if_next_token_is(self, .Start_Type, [start_tkn] {
+            type_token := self.tokenizer->next_token();
+            switch type_token {
+                case .Word, .String, .Raw_String {
+                    self->expect_token(.End_Type);
+                    return .{ Ok = self->parse_into_string(type_token) };
+                }
+
+                case #default {
+                    return .{ Err = .{Parser_Error = tprintf("Expected identifier or string, got {}.", type_token)}};
+                }
+            }
+        });
+
+        return .{ Ok = .{ None = .{} } };
+    }
+
+    parse_identifier :: (self: &#Self) -> Result(? str, Parse_Error) {
+        id_token := self.tokenizer->next_token();
+        switch id_token {
+            case .Word, .String, .Raw_String {
+                name := self->parse_into_string(id_token)
+                            ->catch([] { fallthrough; });
+                return .{ Ok = .{ Some = name } };
+            }
+
+            case .EOF {
+                return .{ Ok = .{ None = .{} } };
+            }
+
+            case #default {
+                return .{ Err = .{Parser_Error = tprintf("Expected identifier or string, got {}.", id_token)}};
+            }
+        }
+    }
+
+    skip_linespace :: (self: &#Self) {
+        while true {
+            switch tkn := self.tokenizer->peek_token(); tkn {
+                case .Newline, .Single_Line_Comment {
+                    self.tokenizer->next_token();
+                }
+
+                case #default {
+                    return;
+                }
+            }
+        }
+    }
+
+
+    expect_token :: macro (self: &#Self, type: Token.tag_enum) -> Token {
+        tkn := self.tokenizer->next_token();
+        if tkn.tag != type {
+            return return .{ Err = .{Parser_Error = tprintf("Expected {}, got {}", type, tkn) } };
+        } else {
+            return tkn;
+        }
+    }
+
+    parse_into_string :: (self: &#Self, tkn: Token) -> ? str {
+        return self->parse_value(tkn)->and_then(x => x.data.String);
+    }
+}
+
+#package {
+    MIN_BUFFER_SIZE :: 1024
+    BUFFER_SIZE_INCREMENT :: 4096
+
+    is_whitespace :: (c: u32) -> bool {
+        #persist chars := u32.[
+            0x9, 0x20, 0xa0, 0x1680,
+            0x2000, 0x2001, 0x2002, 0x2003,
+            0x2004, 0x2005, 0x2006, 0x2007,
+            0x2008, 0x2009, 0x200A,
+            0x202F, 0x205F,
+            0x3000
+        ];
+        return slice.contains(chars, [it](it == c));
+    }
+
+    is_newline :: (c: u32) -> bool {
+        #persist chars := u32.[
+            0xd, 0xa, 0x85, 0xc, 0x2028, 0x2029
+        ];
+        return slice.contains(chars, [it](it == c));
+    }
+
+    is_id :: (c: u32) -> bool {
+        if c < 0x20 || c > 0x10ffff do return false;
+        if is_whitespace(c) || is_newline(c) do return false;
+
+        #persist chars := u32.[
+            '\\', '/', '(', ')', '{', '}', '<', '>', ';', '[', ']', '=', ',', '"'
+        ];
+
+        return !slice.contains(chars, [it](it == c));
+    }
+
+    is_id_start :: (c: u32) -> bool {
+        return is_id(c) && (c < '0' || c > '9');
+    }
+
+    is_end_of_word :: (c: u32) -> bool {
+        if is_whitespace(c) do return true;
+        if is_newline(c) do return true;
+
+        #persist chars := u32.[ ';', ')', '}', '/', '\\', '=' ];
+        return slice.contains(chars, [it](it == c));
+    }
+
+    if_next_token_is :: macro (p: &Parser, $type: Token.tag_enum, body: Code) {
+        switch __tkn := p.tokenizer->peek_token(); __tkn {
+            case type {
+                p.tokenizer->next_token();
+                #unquote body(__tkn);
+            }
+            case #default ---
+        }
+    }
+}
+
diff --git a/core/encoding/kdl/utils.onyx b/core/encoding/kdl/utils.onyx

new file mode 100644 (file)

index 0000000..8c7ba06
--- /dev/null
+++ b/core/encoding/kdl/utils.onyx
@@ -0,0 +1,43 @@
+package core.encoding.kdl
+#allow_stale_code
+
+use core {string}
+
+#inject Value {
+    as_str :: (v: &Value) -> ? str {
+        return v.data.String;
+    }
+
+    as_int :: (v: Value) -> ? i64 {
+        return v.data.Number?.Integer;
+    }
+
+    as_float :: (v: Value) -> ? f64 {
+        return v.data.Number?.Float;
+    }
+}
+
+#inject Document {
+    create_node :: (d: &Document, name: str) -> &Node {
+        return d.allocator->move(Node.{
+            node = name,
+            type_annotation = .{ None = .{} },
+            values = make([..] Value, d.allocator),
+            props = make(Map(str, Value), d.allocator),
+            children = make([..] &Node, d.allocator)
+        });
+    }
+}
+
+#inject Node {
+    add_value :: (n: &Node, value: Value.Value_Data) {
+        n.values << Value.{data = value};
+    }
+
+    set_prop :: (n: &Node, name: str, value: Value.Value_Data) {
+        n.props->put(
+            string.alloc_copy(name, n.props.allocator),
+            .{ data = value }
+        );
+    }
+}
+\ No newline at end of file
diff --git a/core/module.onyx b/core/module.onyx

index ab028a0380160701e2689e80d722be36ce5548e6..c8809b1582e7e3c8e3bed487f24f8ee859099777 100644 (file)
--- a/core/module.onyx
+++ b/core/module.onyx
@@ -64,6 +64,7 @@ use runtime
  #load "./encoding/utf8"
  #load "./encoding/osad"
  #load_all "./encoding/json"
+#load "./encoding/kdl/kdl"
  
  #load "./runtime/common"
  
diff --git a/scripts/onyx-pkg.onyx b/scripts/onyx-pkg.onyx

index 0fbb05a940262a18cdd1db46f62c71afebf591d6..1dc60aa162ca2112dda17ba9573ea10d72eaa58c 100644 (file)
--- a/scripts/onyx-pkg.onyx
+++ b/scripts/onyx-pkg.onyx
@@ -287,11 +287,9 @@ run_sync_command :: (args: [] cstr) {
  
      dependencies_to_install   := make([..] To_Install);
      dependencies_installed    := make(Map(str, SemVer));
-    needed_dependency_folders := make([..] str);
      defer {
          delete(&dependencies_to_install);
          delete(&dependencies_installed);
-        delete(&needed_dependency_folders);
      }
  
      for& config.dependencies.dependencies.entries {
@@ -312,8 +310,6 @@ run_sync_command :: (args: [] cstr) {
              return;
          }
  
-        needed_dependency_folders << installed_folder;
-
          inner_config := read_config_from_installed_dependency(installed_folder) ?? [] {
              error_print("Misconfigured onyx-pkg.ini in '{}'. Omitting.\n", to_install.repo);
              continue;
author	Brendan Hansen <brendan.f.hansen@gmail.com>
	Tue, 21 Nov 2023 03:49:08 +0000 (21:49 -0600)
committer	Brendan Hansen <brendan.f.hansen@gmail.com>
	Tue, 21 Nov 2023 03:49:08 +0000 (21:49 -0600)
build.sh		patch \| blob \| history
compiler/src/lex.c		patch \| blob \| history
core/encoding/kdl/kdl.onyx	[new file with mode: 0644]	patch \| blob
core/encoding/kdl/kql.onyx	[new file with mode: 0644]	patch \| blob
core/encoding/kdl/parser.onyx	[new file with mode: 0644]	patch \| blob
core/encoding/kdl/utils.onyx	[new file with mode: 0644]	patch \| blob
core/module.onyx		patch \| blob \| history
scripts/onyx-pkg.onyx		patch \| blob \| history