From: Brendan Hansen Date: Tue, 21 Nov 2023 03:49:08 +0000 (-0600) Subject: added: KDL support; bugfixed: segfault in lexer X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=af999015c855ffad9cce55565b2f4ef0a123c95f;p=onyx.git added: KDL support; bugfixed: segfault in lexer --- diff --git a/build.sh b/build.sh index d6ea7e01..29cc7b6c 100755 --- a/build.sh +++ b/build.sh @@ -89,6 +89,7 @@ install_all() { for arg in $@; do case "$arg" in compile) compile_all ;; + debug) compile_all debug ;; package) package_all ;; compress) compress_all ;; install) install_all ;; diff --git a/compiler/src/lex.c b/compiler/src/lex.c index 5853b5c9..81e866bb 100644 --- a/compiler/src/lex.c +++ b/compiler/src/lex.c @@ -90,7 +90,7 @@ static const char* token_type_names[] = { (tkn)->line_number++; \ (tkn)->line_start = (tkn)->curr + 1; \ } \ - (tkn)->curr++; \ + if ((tkn)->curr != (tkn)->end) (tkn)->curr++; \ } #endif @@ -239,7 +239,7 @@ whitespace_skipped: INCREMENT_CURR_TOKEN(tokenizer); INCREMENT_CURR_TOKEN(tokenizer); - while (!(*tokenizer->curr == '"' && *(tokenizer->curr + 1) == '"' && *(tokenizer->curr + 2) == '"')) { + while (!(*tokenizer->curr == '"' && *(tokenizer->curr + 1) == '"' && *(tokenizer->curr + 2) == '"') && tokenizer->curr != tokenizer->end) { len++; INCREMENT_CURR_TOKEN(tokenizer); } @@ -262,7 +262,7 @@ whitespace_skipped: char ch = *tk.text; INCREMENT_CURR_TOKEN(tokenizer); - while (!(*tokenizer->curr == ch && slash_count == 0)) { + while (tokenizer->curr != tokenizer->end && !(*tokenizer->curr == ch && slash_count == 0)) { len++; if (*tokenizer->curr == '\n' && ch == '\'') { @@ -279,6 +279,10 @@ whitespace_skipped: } INCREMENT_CURR_TOKEN(tokenizer); + if (tokenizer->curr == tokenizer->end) { + onyx_report_error(tk.pos, Error_Critical, "String literal not closed. String literal starts here."); + break; + } } INCREMENT_CURR_TOKEN(tokenizer); diff --git a/core/encoding/kdl/kdl.onyx b/core/encoding/kdl/kdl.onyx new file mode 100644 index 00000000..b6653f4a --- /dev/null +++ b/core/encoding/kdl/kdl.onyx @@ -0,0 +1,134 @@ +package core.encoding.kdl +#allow_stale_code + +#load "./parser" +#load "./utils" +#load "./kql" + +use core {Result, io, string} + +// +// Cuddly Document Language (KDL) +// https://kdl.dev +// + + +Document :: struct { + allocator: Allocator; + nodes: [..] &Node; +} + +Node :: struct { + node: str; + type_annotation: ? str; + values: [..] Value; + props: Map(str, Value); + children: [..] &Node; +} + +Value :: struct { + data: Value_Data; + type_annotation: ? str; + + Value_Data :: union { + String: str; + Number: KDL_Number; + Boolean: bool; + Null: void; + } +} + +KDL_Number :: union { + Integer: i64; + Float: f64; + String: str; +} + + +#doc """ + Parses a string or `io.Reader` into a KDL document, using the allocator provided for internal allocations. + + Call `core.encoding.kdl.free` to free the returned document. +""" +parse :: #match #local -> Result(Document, Parse_Error) {} + +#overload +parse :: (s: str, allocator := context.allocator) -> Result(Document, Parse_Error) { + doc: Document; + doc.allocator = allocator; + + parser := Parser.make(s); + error := parser->parse(&doc); + + if error.None { + return .{ Ok = doc }; + } else { + return .{ Err = error }; + } +} + +#overload +parse :: (r: &io.Reader, allocator := context.allocator) -> Result(Document, Parse_Error) { + doc: Document; + doc.allocator = allocator; + + parser := Parser.make(r); + error := parser->parse(&doc); + + if error.None { + return .{ Ok = doc }; + } else { + return .{ Err = error }; + } +} + +#overload +builtin.delete :: free + +#doc """ + Releases all resources allocated for the document. +""" +free :: (d: Document) { + for d.nodes do free_node(d.allocator, it); + delete(&d.nodes); +} + +#local +free_node :: (al: Allocator, n: &Node) { + string.free(n.node, al); + n.type_annotation->with([t] { string.free(t, al); }); + + for& v: n.values do free_value(al, v); + delete(&n.values); // This should use the allocator inside of the array + + for n.props->as_iter() { + string.free(it.key, al); + free_value(al, &it.value); + } + delete(&n.props); + + for n.children do free_node(al, it); +} + +#local +free_value :: (al: Allocator, v: &Value) { + v.type_annotation->with([t] { string.free(t, al); }); + + switch v.data { + case s: .String { + string.free(s, al); + } + + case num: .Number do switch num { + case s: .String { + string.free(s, al); + } + + case #default --- + } + + case #default --- + } +} + + diff --git a/core/encoding/kdl/kql.onyx b/core/encoding/kdl/kql.onyx new file mode 100644 index 00000000..7d534e27 --- /dev/null +++ b/core/encoding/kdl/kql.onyx @@ -0,0 +1,490 @@ +package core.encoding.kdl +#allow_stale_code + +use core {iter, alloc, array, string} + +#inject Document { + query :: query + query_all :: query_all +} + +query :: (d: &Document, query: str) -> ? &Node { + query_iter := query_all(d, query); + node, empty := iter.next(query_iter); + iter.close(query_iter); + + if !empty do return node; + return null; +} + +query_all :: (d: &Document, query: str) -> Iterator(&Node) { + arena := alloc.arena.make(context.allocator, 16 * 1024); + q := parse_query(query, alloc.as_allocator(&arena)); + + ctx := .{ + d = d, + q = q, + stack = make([..] QueryStack, 8, alloc.as_allocator(&arena)), + top_level_node = -1, + current_selector = 0, + arena = arena + }; + + return iter.generator( + &ctx, + query_next, + ctx => { alloc.arena.free(&ctx.arena); } + ); +} + +#local +QueryStack :: struct { + node: &Node; + current_child: i32; +} + +#local +query_next :: ctx => { + while true { + if !ctx.stack { + // If the stack is empty, populate with a node + ctx.top_level_node += 1; + if ctx.top_level_node >= ctx.d.nodes.length do break; + + ctx.stack << .{ ctx.d.nodes[ctx.top_level_node], 0 }; + } + + last_query := array.get_ptr(ctx.stack, -1); + if !last_query do break; + + while last_query.current_child < last_query.node.children.length { + ctx.stack << .{ last_query.node.children[last_query.current_child], 0 }; + last_query.current_child += 1; + last_query = array.get_ptr(ctx.stack, -1); + } + + defer array.pop(&ctx.stack); + for ctx.q.matches_any { + if query_selector_matches(it, ctx.stack) { + return last_query.node, true; + } + } + } + + return .{}, false; +} + +#local +query_selector_matches :: (s: &Selector, trail: [] QueryStack) -> bool { + if !trail do return false; + + node_index: i32 = trail.count - 1; + if !query_matcher_matches(s.segments[0].matcher, trail[node_index].node) { + return false; + } + + node_index -= 1; + for segment: s.segments[1 .. s.segments.length] { + switch segment.op->unwrap() { + case .Child, .Descendant { + while node_index >= 0 { + defer node_index -= 1; + if query_matcher_matches(segment.matcher, trail[node_index].node) { + // Continue from the outer for loop + continue continue; + } + + if segment.op->unwrap() == .Child { + break; + } + } + + return false; + } + + case .Neighbor, .Sibling { + if node_index < 0 do return false; + + parent_node := trail[node_index].node; + + walker_index: i32 = trail[node_index].current_child - 1; + if walker_index <= 0 do return false; + + walker_index -= 1; + while walker_index >= 0 { + defer walker_index -= 1; + + if query_matcher_matches(segment.matcher, parent_node.children[walker_index]) { + // Continue from the outer for loop + continue continue; + } + + if segment.op->unwrap() == .Neighbor { + break; + } + } + + return false; + } + } + } + + return true; +} + +#local +query_matcher_matches :: (s: &Matcher, node: &Node) => + iter.as_iter(s.details) + |> iter.every((d, [node]) => { + if d.accessor.Scope do return false; + + if d.accessor.Node { + name := node.node; + if !d.value && d.op == .Equal { + return true; + } + + return operate_on_values( + .{ data = .{ String = name } }, + d.value?, + d.op + ); + } + + d.accessor.Prop->with([prop] { + if !d.value { + return node.props->has(prop); + } + + return operate_on_values(node.props->get(prop)?, d.value?, d.op); + }); + + d.accessor.Arg->with([index] { + if !d.value { + return index < node.values.length; + } + + if index >= node.values.length do return false; + + return operate_on_values(node.values[index], d.value?, d.op); + }); + + return false; + }) + +#local +operate_on_values :: (v1, v2: Value, op: AttributeOp) -> bool { + v1_ := v1; + v2_ := v2; + left := v1_->as_str()?; + right := v2_->as_str()?; + + return switch op { + case .Equal => left == right; + case .NotEqual => left != right; + + case .StartsWith => string.starts_with(left, right); + case .EndsWith => string.ends_with(left, right); + case .Contains => string.contains(left, right); + + case #default => false; + }; +} + + +#local +Query :: struct { + matches_any: [..] &Selector; +} + +#local +Selector :: struct { + segments: [..] &SelectorSegment; +} + +#local +SelectorSegment :: struct { + op: ? SelectorOp; + matcher: &Matcher; +} + +#local +SelectorOp :: enum { + Child; + Descendant; + Neighbor; + Sibling; +} + +#local +Matcher :: struct { + details: [..] MatcherDetails; +} + +#local +MatcherDetails :: struct { + accessor: MatcherAccessor; + op: AttributeOp; + value: ? Value; +} + +#local +MatcherAccessor :: union { + Scope: void; + Node: void; + Annotation: void; + Arg: u32; + Prop: str; +} + +#local +AttributeOp :: enum { + Equal; + NotEqual; + Gt; + Gte; + Lt; + Lte; + StartsWith; + EndsWith; + Contains; +} + + +#local +QueryParser :: struct { + query: str; + cursor: u32; + + al: Allocator; +} + +#local +parse_query :: (q: str, al: Allocator) -> Query { + query: Query; + query.matches_any = make(typeof query.matches_any, al); + + parser: QueryParser; + parser.query = q; + parser.al = al; + + while !reached_end(&parser) { + query.matches_any << parse_selector(&parser); + + skip_whitespace(&parser); + if string.starts_with(parser->rem(), "||") { + parser.cursor += 2; + skip_whitespace(&parser); + } + } + + return query; +} + +#local +parse_selector :: (p: &QueryParser) -> &Selector { + s := p.al->move(Selector.{ + segments = make([..] &SelectorSegment, p.al) + }); + + while !reached_end(p) { + skip_whitespace(p); + + switch parse_matcher(p) { + case matcher: .Some { + segment := p.al->move(SelectorSegment.{ + matcher = matcher + }); + + s.segments << segment; + + skip_whitespace(p); + if reached_end(p) { + break break; + } + + segment.op = switch p.query[p.cursor] { + case '>' => SelectorOp.Child; + case '+' => .Neighbor; + case '~' => .Sibling; + case #default => .Descendant; + }; + + if segment.op->unwrap() != .Descendant { + p.cursor += 1; + } + } + + case .None do break break; + } + + skip_whitespace(p); + + if string.starts_with(p->rem(), "||") { + break; + } + } + + // Reverse the segments here so it is easier to process with later. + array.reverse(s.segments); + + return s; +} + +#local +parse_matcher :: (p: &QueryParser) -> ? &Matcher { + if reached_end(p) do return .{}; + + m := p.al->move(Matcher.{ + details = make([..] MatcherDetails, p.al) + }); + + if p.query[p.cursor] != '[' { + id := parse_identifier(p); + + m.details << .{ + accessor = .{ Node = .{} }, + op = .Equal, + value = .{ Some = .{ data = .{ String = id } } } + }; + } + + while p.query[p.cursor] == '[' { + p.cursor += 1; + + if p.query[p.cursor] == ']' { + p.cursor += 1; + + m.details << .{ + accessor = .{ Node = .{} }, + op = .Equal, + value = .{ None = .{} }, + }; + continue; + } + + accessor: MatcherAccessor; + + if string.starts_with(p->rem(), "val(") { + p.cursor += 4; // "(" + if string.starts_with(p->rem(), ")") { + p.cursor += 1; + accessor = .{ Arg = 0 }; + + } else { + index := parse_number(p); + accessor = .{ Arg = index }; + + if string.starts_with(p->rem(), ")") { + p.cursor += 1; + } + } + } + + elseif string.starts_with(p->rem(), "prop(") { + p.cursor += 5; // "prop(" + prop_id := parse_identifier(p); + accessor = .{ Prop = prop_id }; + + if string.starts_with(p->rem(), ")") { + p.cursor += 1; + } + } + + else { + prop_id := parse_identifier(p); + accessor = .{ Prop = prop_id }; + } + + skip_whitespace(p); + + op := AttributeOp.Equal; + value: ? Value; + + if p.query[p.cursor] != ']' { + rem := p->rem(); + if string.starts_with(rem, "=") { op = .Equal; p.cursor += 1; } + elseif string.starts_with(rem, "!=") { op = .NotEqual; p.cursor += 2; } + elseif string.starts_with(rem, "<=") { op = .Lte; p.cursor += 2; } + elseif string.starts_with(rem, ">=") { op = .Gte; p.cursor += 2; } + elseif string.starts_with(rem, "<") { op = .Lt; p.cursor += 1; } + elseif string.starts_with(rem, ">") { op = .Gt; p.cursor += 1; } + elseif string.starts_with(rem, "^=") { op = .StartsWith; p.cursor += 2; } + elseif string.starts_with(rem, "$=") { op = .EndsWith; p.cursor += 2; } + elseif string.starts_with(rem, "*=") { op = .Contains; p.cursor += 2; } + + skip_whitespace(p); + + // TODO Make any value work! + v := Value.{ data = .{ String = "" } }; + if string.starts_with(p->rem(), "\"") { + p.cursor += 1; + v.data = .{ + String = read_until(p, '"') + }; + p.cursor += 1; + } + + value = v; + } + + m.details << .{ + accessor, + op, + value + }; + + read_until(p, ']'); + if p.query[p.cursor] == ']' { + p.cursor += 1; + } + } + + return m; +} + +#local +skip_whitespace :: (p: &QueryParser) { + while p.query[p.cursor]->is_whitespace() { + p.cursor += 1; + if reached_end(p) do break; + } +} + +#local +reached_end :: macro (p: &QueryParser) => p.cursor >= p.query.length; + +#inject +QueryParser.rem :: (p: &QueryParser) => p.query[p.cursor .. p.query.length]; + +#local +parse_identifier :: (p: &QueryParser) -> str { + c := p.cursor; + while !is_end_of_word(~~p.query[p.cursor]) && is_id(~~p.query[p.cursor]) && !reached_end(p) { + p.cursor += 1; + } + + return p.query[c .. p.cursor]; +} + +#local +parse_number :: (p: &QueryParser) -> u32 { + v := 0; + while p.query[p.cursor]->is_num() && !reached_end(p) { + v *= 10; + v += cast(u32) (p.query[p.cursor] - '0'); + p.cursor += 1; + } + + return v; +} + +#local +read_until :: (p: &QueryParser, c: u8) -> str { + start := p.cursor; + while p.query[p.cursor] != c && !reached_end(p) { + p.cursor += 1; + } + + return p.query[start .. p.cursor]; +} diff --git a/core/encoding/kdl/parser.onyx b/core/encoding/kdl/parser.onyx new file mode 100644 index 00000000..bdb8d1fb --- /dev/null +++ b/core/encoding/kdl/parser.onyx @@ -0,0 +1,586 @@ +package core.encoding.kdl +#allow_stale_code + + +// TODO +// - Parse integers +// - Parse decimals +// - Types in the correct places +// - Escaped strings +// - Slashdash on children block + +use core {tprintf, Result, printf} +use core.io +use core.memory +use core.slice +use core.encoding.utf8 +use core.string + +#package +Tokenizer :: struct { + doc: [] u8; + cursor: u32; + doc_is_owned := false; + + peeked_token: ? Token; +} + +#package +Token :: union { + Error: void; + Start_Type: void; + End_Type: void; + Word: str; + String: str; + Raw_String: str; + Single_Line_Comment: str; + Slashdash: void; + Multi_Line_Comment: str; + Equals: void; + Start_Children: void; + End_Children: void; + Newline: void; + Semicolon: void; + Line_Continuation: void; + Whitespace: str; + EOF: void; +} + +#inject Tokenizer { + make :: #match { + ((r: &io.Reader) => #Self.{ doc = r->read_all(), doc_is_owned = true }), + ((s: str) => #Self.{ doc = s }), + } + + destroy :: (self: &#Self) { + if self.doc_is_owned { + delete(&self.doc); + } + } + + peek_char :: (self: &#Self) -> ? u32 { + codepoint_length := utf8.rune_length_from_first_byte(self.doc[self.cursor]); + if self.cursor + codepoint_length >= self.doc.length do return .{ None = .{} }; + + value := utf8.decode_rune(string.advance(self.doc, self.cursor)); + return value; + } + + eat_char :: (self: &#Self) -> ? u32 { + codepoint_length := utf8.rune_length_from_first_byte(self.doc[self.cursor]); + if self.cursor + codepoint_length >= self.doc.length do return .{ None = .{} }; + + value := utf8.decode_rune(string.advance(self.doc, self.cursor)); + self.cursor += codepoint_length; + + return value; + } + + peek_token :: (use self: &#Self) -> Token { + if peeked_token do return peeked_token->unwrap(); + + // :CompilerBug + // There is a weird bug related to an optimization happening here. + // I would like the following code to just be: + // + // peeked_token = self->next_token(); + // + // But sadly, this does not work. This is because the next_token return value + // is being upcasted to an optional token. The tag for the optional saying + // that it is a "some" not a "none" is emitted first, then the actual call. + // The problem is that when assigning a structure literal (which is what this + // is internally implemented as), the assignment is done in parts (1st member + // emit and store, 2nd member emit and store, etc.). Well, the first member + // says that the result is a Some, so at this point peeked_token is a Some + // of invalid data. Then in next_token, this is consumed and returned as + // a valid token, even though it is not. + // + new_token := self->next_token(); + peeked_token = new_token; + + return peeked_token?; + } + + next_token :: (self: &#Self) -> Token { + if self.peeked_token { + tkn := self.peeked_token->unwrap(); + self.peeked_token->reset(); + return tkn; + } + + c := self->peek_char()->or_return(Token.{ EOF = .{} }); + + if is_whitespace(c) { + self->consume_while([c](is_whitespace(c))); + return self->next_token(); + } + + if c == '/' { + comment := self->handle_comment(); + if comment.Slashdash do return comment; + return self->next_token(); + } + + if is_newline(c) { + self->eat_char(); + if c == '\r' { + // Consume one more character for CRLF. + self->eat_char(); + } + + return .{ Newline = .{} }; + // return self->next_token(); + } + + if c == ';' { self->eat_char(); return .{ Semicolon = .{} }; } + if c == '\\' { self->eat_char(); return .{ Line_Continuation = .{} }; } + if c == '(' { self->eat_char(); return .{ Start_Type = .{} }; } + if c == ')' { self->eat_char(); return .{ End_Type = .{} }; } + if c == '{' { self->eat_char(); return .{ Start_Children = .{} }; } + if c == '}' { self->eat_char(); return .{ End_Children = .{} }; } + if c == '=' { self->eat_char(); return .{ Equals = .{} }; } + if c == '"' { + return self->handle_string(); + } + if is_id(c) { + return self->handle_word(); + } + + assert(false, tprintf("Unhandled character, {}", c)); + return .{ Error = .{} }; + } + + consume_while :: macro (self: &#Self, cond: Code) -> str { + res := self.doc[self.cursor .. self.cursor]; + while true { + codepoint := self->peek_char()->or_return(res); + if !(#unquote cond(codepoint)) { + return res; + } else { + self->eat_char(); + res.length += 1; + } + } + } + + handle_comment :: (self: &#Self) -> Token { + self->eat_char(); + c := self->eat_char()->or_return(Token.{EOF=.{}}); + switch c { + case '-' { + return .{ Slashdash = .{} }; + } + case '/' { + body := self->consume_while([c](!is_newline(c))); + return .{ Single_Line_Comment = body }; + } + case '*' { + cursor_start := self.cursor; + + depth := 1; + prev_char := 0; + while depth >= 1 { + c := self->eat_char()->or_return(Token.{ Error=.{} }); + if c == '*' && prev_char == '/' { + depth += 1; + c = 0; + } + if c == '/' && prev_char == '*' { + depth -= 1; + c = 0; + } + + prev_char = c; + } + + return .{ Multi_Line_Comment = self.doc[cursor_start .. self.cursor-2] }; + } + } + } + + handle_string :: (self: &#Self) -> Token { + c := self->eat_char()->or_return(Token.{EOF=.{}}); + if c != '"' do return Token.{Error=.{}}; + + cursor_start := self.cursor; + prev_char := 0; + while true { + c := self->eat_char()->or_return(Token.{Error=.{}}); + if c == '\\' && prev_char == '\\' { + c = 0; + } + if c == '"' && prev_char != '\\' { + break; + } + prev_char = c; + } + + return .{ String = self.doc[cursor_start .. self.cursor-1] }; + } + + handle_word :: (self: &#Self) -> Token { + word := self->consume_while([c](!is_end_of_word(c) && is_id(c))); + return .{ Word = word }; + } +} + + +#package +Parser :: struct { + tokenizer: Tokenizer; + state: Parser_State; + + depth: i32; + + result_allocator: Allocator; +} + +#local +Parser_State :: enum #flags { + Outside_Node; + Inside_Node; + + Line_Continuation :: 0x100; + Annotation_Start :: 0x200; + Annotation_End :: 0x400; + Annotation_Ended :: 0x800; + + In_Property :: 0x1000; + + Whitespace_Banned :: Annotation_Start | Annotation_End | Annotation_Ended | In_Property; +} + +Parse_Error :: union { + None: void; + Whitespace_Banned: void; + Parser_Error: str; +} + +#inject Parser { + make :: #match { + ((r: &io.Reader) => Parser.{ Tokenizer.make(r) }), + ((s: str) => Parser.{ Tokenizer.make(s) }), + } + + parse :: (self: &#Self, doc: &Document) -> Parse_Error { + self.result_allocator = doc.allocator; + + while true { + token := self.tokenizer->peek_token(); + switch token { + case .EOF { + break break; + } + + case .Error { + self.tokenizer->next_token(); + return .{ Parser_Error = tprintf("bad token: {}", token) }; + } + + case .Whitespace, .Newline { + self.tokenizer->next_token(); + if self.state & .Whitespace_Banned { + return .{ Whitespace_Banned = .{} }; + } + } + + case .Single_Line_Comment, .Multi_Line_Comment { + self.tokenizer->next_token(); + if self.state & .Whitespace_Banned { + return .{ Whitespace_Banned = .{} }; + } + } + + case #default { + node_result := self->parse_node(); + if err := node_result->err(); err { + logf(.Info, self.tokenizer.doc[self.tokenizer.cursor .. self.tokenizer.doc.length]); + return err?; + } + + node := node_result->ok()->unwrap(); + if node { + doc.nodes << node; + } + } + } + } + + return .{}; + } + + parse_node :: (self: &#Self) -> Result(&Node, Parse_Error) { + self.depth += 1; + defer self.depth -= 1; + + self->skip_linespace(); + + if_next_token_is(self, .End_Children, [] { return .{ Ok = null }; }); + + is_ignored := false; + if self.tokenizer->peek_token().Slashdash { + self.tokenizer->next_token(); + is_ignored = true; + } + + type_annotation := self->parse_type_if_present()?; + name := self->parse_identifier()?; + + if !name do return .{ Ok = null }; + + node_to_return := self.result_allocator->move(Node.{ + node = name->unwrap(), + type_annotation = type_annotation, + props = make(Map(str, Value), self.result_allocator), + values = make([..] Value, 0, self.result_allocator), + children = make([..] &Node, 0, self.result_allocator), + }); + + while true { + switch tkn := self.tokenizer->peek_token(); tkn { + case .Newline, .Semicolon { + self.tokenizer->next_token(); + _apply_slashdash(node_to_return); + return .{ Ok = node_to_return }; + } + + case .Word, .Raw_String, .String { + self.tokenizer->next_token(); + if_next_token_is(self, .Equals, [] { + // Is this good? Or just too hacky? + prop_name := self->parse_into_string(tkn)->or_return( + Result(&Node, Parse_Error).{ Err = .{ Parser_Error = "Error parsing property key" } } + ); + + type := self->parse_type_if_present()?; + value := self->parse_value(self.tokenizer->next_token()) ?? [] { + return return .{ Err = .{ Parser_Error = "Error parsing property value" } }; + }; + + value.type_annotation = type; + + node_to_return.props[prop_name] = value; + continue; + }); + + value := self->parse_value(tkn) ?? [] { + return return .{ Err = .{ Parser_Error = "Error parsing argument value" } }; + }; + + node_to_return.values << value; + } + + case .Start_Type { + type := self->parse_type_if_present()?; + + value := self->parse_value(self.tokenizer->next_token()) ?? [] { + return return .{ Err = .{ Parser_Error = "Error parsing argument value" } }; + }; + + value.type_annotation = type; + node_to_return.values << value; + } + + case .Start_Children { + self.tokenizer->next_token(); + while !self.tokenizer->peek_token().End_Children { + child := self->parse_node()?; + if child { + node_to_return.children << child; + } + + self->skip_linespace(); + } + + self->expect_token(.End_Children); + break break; + } + + case .End_Children { + break break; + } + + case #default { + return .{ Err = .{ Parser_Error = tprintf("Unexpected token {}, expected node", tkn) } }; + } + } + } + + _apply_slashdash(node_to_return); + return .{ Ok = node_to_return }; + + _apply_slashdash :: macro (n: &Node) { + if is_ignored { + n = null; + } + } + } + + parse_value :: (self: &#Self, token: Token) -> ? Value { + switch token { + case s: .Raw_String { + return Value.{ + data = .{ String = string.alloc_copy(s, self.result_allocator) } + }; + } + + case s: .String { + // TODO: Handle escaped strings here + return Value.{ + data = .{ String = string.alloc_copy(s, self.result_allocator) } + }; + } + + case w: .Word { + if w == "null" { + return Value.{ + data = .{ Null = .{} } + }; + } + + if w == "true" { + return Value.{ + data = .{ Boolean = true } + }; + } + + if w == "false" { + return Value.{ + data = .{ Boolean = false } + }; + } + + // TODO: parse numbers + + return Value.{ + data = .{ String = string.alloc_copy(w, self.result_allocator) } + }; + } + + case #default do return .{}; + } + } + + parse_type_if_present :: (self: &#Self) -> Result(? str, Parse_Error) { + if_next_token_is(self, .Start_Type, [start_tkn] { + type_token := self.tokenizer->next_token(); + switch type_token { + case .Word, .String, .Raw_String { + self->expect_token(.End_Type); + return .{ Ok = self->parse_into_string(type_token) }; + } + + case #default { + return .{ Err = .{Parser_Error = tprintf("Expected identifier or string, got {}.", type_token)}}; + } + } + }); + + return .{ Ok = .{ None = .{} } }; + } + + parse_identifier :: (self: &#Self) -> Result(? str, Parse_Error) { + id_token := self.tokenizer->next_token(); + switch id_token { + case .Word, .String, .Raw_String { + name := self->parse_into_string(id_token) + ->catch([] { fallthrough; }); + return .{ Ok = .{ Some = name } }; + } + + case .EOF { + return .{ Ok = .{ None = .{} } }; + } + + case #default { + return .{ Err = .{Parser_Error = tprintf("Expected identifier or string, got {}.", id_token)}}; + } + } + } + + skip_linespace :: (self: &#Self) { + while true { + switch tkn := self.tokenizer->peek_token(); tkn { + case .Newline, .Single_Line_Comment { + self.tokenizer->next_token(); + } + + case #default { + return; + } + } + } + } + + + expect_token :: macro (self: &#Self, type: Token.tag_enum) -> Token { + tkn := self.tokenizer->next_token(); + if tkn.tag != type { + return return .{ Err = .{Parser_Error = tprintf("Expected {}, got {}", type, tkn) } }; + } else { + return tkn; + } + } + + parse_into_string :: (self: &#Self, tkn: Token) -> ? str { + return self->parse_value(tkn)->and_then(x => x.data.String); + } +} + +#package { + MIN_BUFFER_SIZE :: 1024 + BUFFER_SIZE_INCREMENT :: 4096 + + is_whitespace :: (c: u32) -> bool { + #persist chars := u32.[ + 0x9, 0x20, 0xa0, 0x1680, + 0x2000, 0x2001, 0x2002, 0x2003, + 0x2004, 0x2005, 0x2006, 0x2007, + 0x2008, 0x2009, 0x200A, + 0x202F, 0x205F, + 0x3000 + ]; + return slice.contains(chars, [it](it == c)); + } + + is_newline :: (c: u32) -> bool { + #persist chars := u32.[ + 0xd, 0xa, 0x85, 0xc, 0x2028, 0x2029 + ]; + return slice.contains(chars, [it](it == c)); + } + + is_id :: (c: u32) -> bool { + if c < 0x20 || c > 0x10ffff do return false; + if is_whitespace(c) || is_newline(c) do return false; + + #persist chars := u32.[ + '\\', '/', '(', ')', '{', '}', '<', '>', ';', '[', ']', '=', ',', '"' + ]; + + return !slice.contains(chars, [it](it == c)); + } + + is_id_start :: (c: u32) -> bool { + return is_id(c) && (c < '0' || c > '9'); + } + + is_end_of_word :: (c: u32) -> bool { + if is_whitespace(c) do return true; + if is_newline(c) do return true; + + #persist chars := u32.[ ';', ')', '}', '/', '\\', '=' ]; + return slice.contains(chars, [it](it == c)); + } + + if_next_token_is :: macro (p: &Parser, $type: Token.tag_enum, body: Code) { + switch __tkn := p.tokenizer->peek_token(); __tkn { + case type { + p.tokenizer->next_token(); + #unquote body(__tkn); + } + case #default --- + } + } +} + diff --git a/core/encoding/kdl/utils.onyx b/core/encoding/kdl/utils.onyx new file mode 100644 index 00000000..8c7ba068 --- /dev/null +++ b/core/encoding/kdl/utils.onyx @@ -0,0 +1,43 @@ +package core.encoding.kdl +#allow_stale_code + +use core {string} + +#inject Value { + as_str :: (v: &Value) -> ? str { + return v.data.String; + } + + as_int :: (v: Value) -> ? i64 { + return v.data.Number?.Integer; + } + + as_float :: (v: Value) -> ? f64 { + return v.data.Number?.Float; + } +} + +#inject Document { + create_node :: (d: &Document, name: str) -> &Node { + return d.allocator->move(Node.{ + node = name, + type_annotation = .{ None = .{} }, + values = make([..] Value, d.allocator), + props = make(Map(str, Value), d.allocator), + children = make([..] &Node, d.allocator) + }); + } +} + +#inject Node { + add_value :: (n: &Node, value: Value.Value_Data) { + n.values << Value.{data = value}; + } + + set_prop :: (n: &Node, name: str, value: Value.Value_Data) { + n.props->put( + string.alloc_copy(name, n.props.allocator), + .{ data = value } + ); + } +} \ No newline at end of file diff --git a/core/module.onyx b/core/module.onyx index ab028a03..c8809b15 100644 --- a/core/module.onyx +++ b/core/module.onyx @@ -64,6 +64,7 @@ use runtime #load "./encoding/utf8" #load "./encoding/osad" #load_all "./encoding/json" +#load "./encoding/kdl/kdl" #load "./runtime/common" diff --git a/scripts/onyx-pkg.onyx b/scripts/onyx-pkg.onyx index 0fbb05a9..1dc60aa1 100644 --- a/scripts/onyx-pkg.onyx +++ b/scripts/onyx-pkg.onyx @@ -287,11 +287,9 @@ run_sync_command :: (args: [] cstr) { dependencies_to_install := make([..] To_Install); dependencies_installed := make(Map(str, SemVer)); - needed_dependency_folders := make([..] str); defer { delete(&dependencies_to_install); delete(&dependencies_installed); - delete(&needed_dependency_folders); } for& config.dependencies.dependencies.entries { @@ -312,8 +310,6 @@ run_sync_command :: (args: [] cstr) { return; } - needed_dependency_folders << installed_folder; - inner_config := read_config_from_installed_dependency(installed_folder) ?? [] { error_print("Misconfigured onyx-pkg.ini in '{}'. Omitting.\n", to_install.repo); continue;