Progress on the initial parser #1
authorBrendan Hansen <brendan.f.hansen@gmail.com>
Tue, 19 May 2020 03:28:31 +0000 (22:28 -0500)
committerBrendan Hansen <brendan.f.hansen@gmail.com>
Tue, 19 May 2020 03:28:31 +0000 (22:28 -0500)
Makefile
bh.h
onyx
onyx.c
onyxlex.c
onyxlex.h
onyxmsgs.c [new file with mode: 0644]
onyxmsgs.h [new file with mode: 0644]
onyxparser.c
onyxparser.h
progs/minimal.onyx

index a3c6d82e3252d7f1d02975fba22fb936c21eafd2..b204645f96d8b26725ca9d3e28486da99aca77db 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
 OBJ_FILES=\
        onyxlex.o \
        onyxparser.o \
+       onyxmsgs.o \
        onyx.o
 
 CC=gcc
diff --git a/bh.h b/bh.h
index b6af93626c4a7c2f80b87e62d5bb9ff1d0d5f372..6ee6f88206566c155530c73bb887cfdf431cf766 100644 (file)
--- a/bh.h
+++ b/bh.h
@@ -321,6 +321,7 @@ typedef enum bh_file_standard {
 
 typedef struct bh_file_contents {
        bh_allocator allocator;
+       const char *filename;
        isize length;
        void* data;
 } bh_file_contents;
@@ -389,7 +390,7 @@ typedef struct bh__arr {
 #define bh_arr_last(arr)                       ((arr)[bh__arrhead(arr)->length - 1])
 #define bh_arr_end(arr, i)                     ((i) >= &(arr)[bh_arr_length(arr)])
 
-#define bh_arr_new(allocator_, arr, cap)       (bh__arr_grow((allocator_), (void**) &arr, sizeof(*(arr)), cap))
+#define bh_arr_new(allocator_, arr, cap)       (bh__arr_grow((allocator_), (void**) &(arr), sizeof(*(arr)), cap))
 #define bh_arr_free(arr)                                       (bh__arr_free((void**) &(arr)))
 #define bh_arr_copy(allocator_, arr)           (bh__arr_copy((allocator_), (arr), sizeof(*(arr))))
 
@@ -601,7 +602,7 @@ i64 chars_match(char* ptr1, char* ptr2) {
 
 
 //-------------------------------------------------------------------------------------
-// CUSTOM ALLOCATORS IMPLEMENTATION 
+// CUSTOM ALLOCATORS IMPLEMENTATION
 //-------------------------------------------------------------------------------------
 
 
@@ -716,7 +717,7 @@ BH_ALLOCATOR_PROC(bh_arena_allocator_proc) {
                        // Size too large for the arena
                        return NULL;
                }
-               
+
                if (alloc_arena->size + size >= alloc_arena->arena_size) {
                        alloc_arena->size = sizeof(ptr);
                        bh__arena_internal* new_arena = (bh__arena_internal *) bh_alloc(alloc_arena->backing, alloc_arena->arena_size);
@@ -790,7 +791,7 @@ BH_ALLOCATOR_PROC(bh_scratch_allocator_proc) {
 
                if (scratch->curr >= scratch->end) {
                        scratch->curr = scratch->memory;
-                       retval = scratch->curr; 
+                       retval = scratch->curr;
                }
        } break;
 
@@ -1103,6 +1104,7 @@ i64 bh_file_size(bh_file* file) {
 bh_file_contents bh_file_read_contents_bh_file(bh_allocator alloc, bh_file* file) {
        bh_file_contents fc = {
                .allocator = alloc,
+               .filename  = file->filename,
                .length = 0, .data = NULL
        };
 
@@ -1225,7 +1227,7 @@ void* bh__arr_copy(bh_allocator alloc, void *arr, i32 elemsize) {
 }
 
 void bh__arr_deleten(void **arr, i32 elemsize, i32 index, i32 numelems) {
-       bh__arr* arrptr = bh__arrhead(*arr);    
+       bh__arr* arrptr = bh__arrhead(*arr);
 
        if (index >= arrptr->length) return; // Can't delete past the end of the array
        if (numelems <= 0) return; // Can't delete nothing
@@ -1328,13 +1330,13 @@ found_matching:
 }
 
 b32 bh__hash_has(bh__hash *table, i32 elemsize, char *key) {
-       u64 index = bh__hash_function(key, 0);  
+       u64 index = bh__hash_function(key, 0);
 
        ptr arrptr = table->arrs[index];
        if (arrptr == NULL) return 0;
 
        i32 len = bh_arr_length(arrptr);
-       i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE;       
+       i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE;
 
        while (len--) {
                if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) return 1;
@@ -1348,11 +1350,11 @@ ptr bh__hash_get(bh__hash *table, i32 elemsize, char *key) {
        u64 index = bh__hash_function(key, 0);
 
        ptr arrptr = table->arrs[index];
-       i32 len = bh_arr_length(arrptr);
-       assert(arrptr != NULL);
+       if (arrptr == NULL) return NULL;
 
        i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE;
 
+       i32 len = bh_arr_length(arrptr);
        while (len--) {
                if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) {
                        return bh_pointer_add(arrptr, BH__HASH_STORED_KEY_SIZE);
@@ -1361,7 +1363,7 @@ ptr bh__hash_get(bh__hash *table, i32 elemsize, char *key) {
                return bh_pointer_add(arrptr, stride);
        }
 
-       return 0;
+       return NULL;
 }
 
 void bh__hash_delete(bh__hash *table, i32 elemsize, char *key) {
diff --git a/onyx b/onyx
index 40a17c46c671440e0ad8fe44d735b485029ac837..1cada87b18070428c1c4e5a7f8e4f39090250da9 100755 (executable)
Binary files a/onyx and b/onyx differ
diff --git a/onyx.c b/onyx.c
index 42fbb892abf16f5fbd161cacdbbd03f19ab6499e..8406fc07ec0206b25ad9e4faca790c114e19a07f 100644 (file)
--- a/onyx.c
+++ b/onyx.c
@@ -6,6 +6,7 @@
 #include <stdio.h> // TODO: Replace with custom lib
 
 #include "onyxlex.h"
+#include "onyxmsgs.h"
 #include "onyxparser.h"
 
 int main(int argc, char *argv[]) {
@@ -21,16 +22,39 @@ int main(int argc, char *argv[]) {
        bh_file_contents fc = bh_file_read_contents(alloc, &source_file);
        bh_file_close(&source_file);
 
-       bh_arr(OnyxToken) token_arr = onyx_parse_tokens(alloc, &fc);
+       OnyxTokenizer tokenizer = onyx_tokenizer_create(alloc, &fc);
+       onyx_parse_tokens(&tokenizer);
+       bh_arr(OnyxToken) token_arr = tokenizer.tokens;
 
        printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr));
 
        for (OnyxToken* it = token_arr; !bh_arr_end(token_arr, it); it++) {
-               printf("%s '%c' (Line %ld, Col %ld)\n", onyx_get_token_type_name(*it), *(char *)it->token, it->line_number, it->line_column);
+               onyx_token_null_toggle(*it);
+               printf("%s '%s' (%s:%ld:%ld)\n", onyx_get_token_type_name(*it), it->token, it->pos.filename, it->pos.line, it->pos.column);
+               onyx_token_null_toggle(*it);
        }
 
+       bh_arena msg_arena;
+       bh_arena_init(&msg_arena, alloc, 4096);
+       bh_allocator msg_alloc = bh_arena_allocator(&msg_arena);
+
+       OnyxMessages msgs;
+       onyx_message_create(msg_alloc, &msgs);
+
+       bh_arena ast_arena;
+       bh_arena_init(&ast_arena, alloc, 16 * 1024 * 1024); // 16MB
+       bh_allocator ast_alloc = bh_arena_allocator(&ast_arena);
+
+       OnyxParser parser = onyx_parser_create(ast_alloc, &tokenizer, &msgs);
+       OnyxAstNode* program = onyx_parse(&parser);
+
+       onyx_message_print(&msgs);
+
        bh_file_contents_delete(&fc);
-       bh_arr_free(token_arr);
+       onyx_tokenizer_free(&tokenizer);
+       bh_arena_free(&msg_arena);
+       bh_arena_free(&ast_arena);
+
 
        return 0;
 }
index b25256f0800c5e5644b2514e32e065a6f6c2de4d..2f790a1b88696caf9bda79f99eb2c8251251c155 100644 (file)
--- a/onyxlex.c
+++ b/onyxlex.c
@@ -37,7 +37,6 @@ static const char* onyx_token_type_names[] = {
        "TOKEN_TYPE_SYM_DOT",
        "TOKEN_TYPE_SYM_FSLASH",
        "TOKEN_TYPE_SYM_BSLASH",
-       "TOKEN_TYPE_SYM_TYPE_SIGNATURE",
        "TOKEN_TYPE_SYM_COLON",
        "TOKEN_TYPE_SYM_SEMICOLON",
        "TOKEN_TYPE_SYM_COMMA",
@@ -76,8 +75,8 @@ static b32 token_lit(OnyxTokenizer* tokenizer, OnyxToken* tk, char* lit, OnyxTok
                tk->type = type;
                tk->token = tokenizer->curr;
                tk->length = len;
-               tk->line_number = tokenizer->line_number;
-               tk->line_column = (i32)(tokenizer->curr - tokenizer->line_start) + 1;
+               tk->pos.line = tokenizer->line_number;
+               tk->pos.column = (i32)(tokenizer->curr - tokenizer->line_start) + 1;
 
                tokenizer->curr += len;
 
@@ -90,7 +89,14 @@ const char* onyx_get_token_type_name(OnyxToken tkn) {
        return onyx_token_type_names[tkn.type];
 }
 
-OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) {
+void onyx_token_null_toggle(OnyxToken tkn) {
+       static char backup = 0;
+       char tmp = tkn.token[tkn.length];
+       tkn.token[tkn.length] = backup;
+       backup = tmp;
+}
+
+OnyxToken* onyx_get_token(OnyxTokenizer* tokenizer) {
        OnyxToken tk;
 
        // Skip whitespace
@@ -100,8 +106,9 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) {
        tk.type = TOKEN_TYPE_UNKNOWN;
        tk.token = tokenizer->curr;
        tk.length = 1;
-       tk.line_number = tokenizer->line_number;
-       tk.line_column = (i32)(tokenizer->curr - tokenizer->line_start) + 1;
+       tk.pos.filename = tokenizer->filename;
+       tk.pos.line = tokenizer->line_number;
+       tk.pos.column = (i32)(tokenizer->curr - tokenizer->line_start) + 1;
 
        if (tokenizer->curr == tokenizer->end) {
                tk.type = TOKEN_TYPE_END_STREAM;
@@ -110,7 +117,7 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) {
 
        // Comments
        if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') {
-               tokenizer->curr += 2;   
+               tokenizer->curr += 2;
                tk.type = TOKEN_TYPE_COMMENT;
                tk.token = tokenizer->curr;
                u16 layers = 1;
@@ -122,7 +129,7 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) {
                                tk.type = TOKEN_TYPE_END_STREAM;
                                break;
                        }
-                       
+
                        if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') {
                                layers++;
                                INCREMENT_CURR_TOKEN(tokenizer);
@@ -132,7 +139,7 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) {
                                layers--;
                                INCREMENT_CURR_TOKEN(tokenizer);
                        }
-               }       
+               }
 
                INCREMENT_CURR_TOKEN(tokenizer);
 
@@ -168,7 +175,6 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) {
        LITERAL_TOKEN("%", TOKEN_TYPE_SYM_PERCENT);
        LITERAL_TOKEN("/", TOKEN_TYPE_SYM_FSLASH);
        LITERAL_TOKEN("\\", TOKEN_TYPE_SYM_BSLASH);
-       LITERAL_TOKEN("::", TOKEN_TYPE_SYM_TYPE_SIGNATURE);
        LITERAL_TOKEN(":", TOKEN_TYPE_SYM_COLON);
        LITERAL_TOKEN(";", TOKEN_TYPE_SYM_SEMICOLON);
        LITERAL_TOKEN(",", TOKEN_TYPE_SYM_COMMA);
@@ -235,26 +241,35 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) {
        INCREMENT_CURR_TOKEN(tokenizer);
 
 token_parsed:
-       return tk;
+       bh_arr_push(tokenizer->tokens, tk);
+
+       return &tokenizer->tokens[bh_arr_length(tokenizer->tokens) - 1];
 }
 
-bh_arr(OnyxToken) onyx_parse_tokens(bh_allocator tk_alloc, bh_file_contents *fc) {
+OnyxTokenizer onyx_tokenizer_create(bh_allocator allocator, bh_file_contents *fc) {
        OnyxTokenizer tknizer = {
                .start                  = fc->data,
                .curr                   = fc->data,
                .end                    = fc->data + fc->length,
+
+               .filename               = fc->filename,
+
                .line_number    = 1,
                .line_start     = fc->data,
+               .tokens                 = NULL,
        };
 
-       bh_arr(OnyxToken) token_arr = NULL;
-       bh_arr_new(tk_alloc, token_arr, 512);
+       bh_arr_new(allocator, tknizer.tokens, 512);
+       return tknizer;
+}
 
-       OnyxToken tk;
-       do {
-               tk = onyx_get_token(&tknizer);
-               bh_arr_push(token_arr, tk);
-       } while (tk.type != TOKEN_TYPE_END_STREAM);
+void onyx_tokenizer_free(OnyxTokenizer* tokenizer) {
+       bh_arr_free(tokenizer->tokens);
+}
 
-       return token_arr;
-}
\ No newline at end of file
+void onyx_parse_tokens(OnyxTokenizer* tokenizer) {
+       OnyxToken* tk;
+       do {
+               tk = onyx_get_token(tokenizer);
+       } while (tk->type != TOKEN_TYPE_END_STREAM);
+}
index 15b19b4038267651c2781581012fb4348cf87d63..9a3b3f1231cc6448fa2fc2ba0784006d5dfef0f6 100644 (file)
--- a/onyxlex.h
+++ b/onyxlex.h
@@ -3,13 +3,6 @@
 
 #include "bh.h"
 
-typedef struct OnyxTokenizer {
-       char *start, *curr, *end;
-
-       char* line_start;
-       u64 line_number;
-} OnyxTokenizer;
-
 typedef enum OnyxTokenType {
        TOKEN_TYPE_UNKNOWN,
        TOKEN_TYPE_END_STREAM,
@@ -46,7 +39,6 @@ typedef enum OnyxTokenType {
        TOKEN_TYPE_SYM_DOT,
        TOKEN_TYPE_SYM_FSLASH,
        TOKEN_TYPE_SYM_BSLASH,
-       TOKEN_TYPE_SYM_TYPE_SIGNATURE,
        TOKEN_TYPE_SYM_COLON,
        TOKEN_TYPE_SYM_SEMICOLON,
        TOKEN_TYPE_SYM_COMMA,
@@ -64,15 +56,34 @@ typedef enum OnyxTokenType {
        TOKEN_TYPE_COUNT
 } OnyxTokenType;
 
+typedef struct OnyxFilePos {
+       const char* filename;
+       u64 line, column;
+} OnyxFilePos;
+
 typedef struct OnyxToken {
        OnyxTokenType type;
        isize length;
        char* token;
-       u64 line_number, line_column;
+       OnyxFilePos pos;
 } OnyxToken;
 
+typedef struct OnyxTokenizer {
+       char *start, *curr, *end;
+
+       const char* filename;
+
+       char* line_start;
+       u64 line_number;
+
+       bh_arr(OnyxToken) tokens;
+} OnyxTokenizer;
+
 const char* onyx_get_token_type_name(OnyxToken tkn);
-OnyxToken onyx_get_token(OnyxTokenizer* tokenizer);
-bh_arr(OnyxToken) onyx_parse_tokens(bh_allocator tk_alloc, bh_file_contents *fc);
+void onyx_token_null_toggle(OnyxToken tkn);
+OnyxToken* onyx_get_token(OnyxTokenizer* tokenizer);
+OnyxTokenizer onyx_tokenizer_create(bh_allocator allocator, bh_file_contents *fc);
+void onyx_tokenizer_free(OnyxTokenizer* tokenizer);
+void onyx_parse_tokens(OnyxTokenizer* tokenizer);
 
-#endif
\ No newline at end of file
+#endif
diff --git a/onyxmsgs.c b/onyxmsgs.c
new file mode 100644 (file)
index 0000000..9838fa0
--- /dev/null
@@ -0,0 +1,44 @@
+
+#include "onyxmsgs.h"
+
+static const char* msg_formats[] = {
+       "expected token '%s'",
+       "unexpected token '%s'",
+       "unknown type '%s'"
+};
+
+void onyx_message_add(OnyxMessages* msgs, OnyxMessageType type, OnyxFilePos pos, ...) {
+       OnyxMessage* msg = bh_alloc_item(msgs->allocator, OnyxMessage);
+       msg->type = type;
+       msg->pos = pos;
+
+       va_list arg_list;
+       va_start(arg_list, pos);
+       vsnprintf(msg->text, ONYX_MSG_BUFFER_SIZE, msg_formats[type], arg_list);
+       va_end(arg_list);
+
+       OnyxMessage** walker = &msgs->first;
+       while (*walker && (*walker)->pos.line < pos.line) walker = &(*walker)->next;
+       while (*walker && (*walker)->pos.line == pos.line && (*walker)->pos.column < pos.column) walker = &(*walker)->next;
+
+       msg->next = *walker;
+       *walker = msg;
+}
+
+void onyx_message_print(OnyxMessages* msgs) {
+       OnyxMessage* msg = msgs->first;
+
+       while (msg) {
+               if (msg->pos.filename) {
+                       printf("(%s:%ld:%ld) %s\n", msg->pos.filename, msg->pos.line, msg->pos.column, msg->text);
+               } else {
+                       printf("(%ld:%ld) %s\n", msg->pos.line, msg->pos.column, msg->text);
+               }
+               msg = msg->next;
+       }       
+}
+
+void onyx_message_create(bh_allocator allocator, OnyxMessages* msgs) {
+       msgs->allocator = allocator;
+       msgs->first = NULL;
+}
diff --git a/onyxmsgs.h b/onyxmsgs.h
new file mode 100644 (file)
index 0000000..db3a22c
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef ONYXMSGS_H
+#define ONYXMSGS_H
+
+#include "bh.h"
+#include "onyxlex.h"
+
+#include <stdarg.h>
+
+#define ONYX_MSG_BUFFER_SIZE 256
+
+typedef enum OnyxMessageType {
+       ONYX_MESSAGE_TYPE_EXPECTED_TOKEN,
+       ONYX_MESSAGE_TYPE_UNEXPECTED_TOKEN,
+       ONYX_MESSAGE_TYPE_UNKNOWN_TYPE,
+
+       ONYX_MESSAGE_TYPE_COUNT,
+} OnyxMessageType;
+       
+typedef struct OnyxMessage {
+       OnyxMessageType type;
+       OnyxFilePos pos;
+       struct OnyxMessage* next;
+       char text[ONYX_MSG_BUFFER_SIZE];
+} OnyxMessage;
+
+typedef struct OnyxMessages {
+       bh_allocator allocator;
+
+       OnyxMessage* first;
+} OnyxMessages;
+
+void onyx_message_add(OnyxMessages* msgs, OnyxMessageType type, OnyxFilePos pos, ...);
+void onyx_message_print(OnyxMessages* msgs);
+void onyx_message_create(bh_allocator allocator, OnyxMessages* msgs);
+
+#endif
\ No newline at end of file
index 8db202c7597bd1001c840a63cf822a5830612818..002dd6ce2aa0afef0bcd8307e998e4479e6b9203 100644 (file)
@@ -1,4 +1,5 @@
 
+#include "onyxlex.h"
 #include "onyxparser.h"
 
 struct OnyxTypeInfo builtin_types[] = {
@@ -20,4 +21,271 @@ struct OnyxTypeInfo builtin_types[] = {
        { ONYX_TYPE_INFO_KIND_FLOAT32, 4, "f32", 0, 0, 1, 0 },
        { ONYX_TYPE_INFO_KIND_FLOAT64, 8, "f64", 0, 0, 1, 0 },
        { ONYX_TYPE_INFO_KIND_SOFT_FLOAT, 8, "sf64", 0, 0, 1, 0 },
-};
\ No newline at end of file
+
+       { 0xffffffff }
+};
+
+static OnyxAstNode error_node = { { ONYX_AST_NODE_KIND_ERROR, 0, NULL, &builtin_types[0], NULL, NULL, NULL } };
+
+static void parser_next_token(OnyxParser* parser) {
+       parser->prev_token = parser->curr_token;
+       parser->curr_token++;
+}
+
+static b32 is_terminating_token(OnyxTokenType token_type) {
+       switch (token_type) {
+       case TOKEN_TYPE_SYM_SEMICOLON:
+       case TOKEN_TYPE_CLOSE_BRACE:
+       case TOKEN_TYPE_OPEN_BRACE:
+       case TOKEN_TYPE_END_STREAM:
+               return 1;
+       default:
+               return 0;
+       }
+}
+
+// Advances to next token no matter what
+static OnyxToken* expect(OnyxParser* parser, OnyxTokenType token_type) {
+       OnyxToken* token = parser->curr_token;
+       if (token->type != token_type) {
+               onyx_message_add(parser->msgs, ONYX_MESSAGE_TYPE_EXPECTED_TOKEN, token->pos, onyx_get_token_type_name(*token));
+               return NULL;
+       }
+
+       parser_next_token(parser);
+       return token;
+}
+
+static OnyxAstNode* parse_expression(OnyxParser* parser) {
+       return &error_node;
+}
+
+static OnyxAstNode* parse_if_stmt(OnyxParser* parser) {
+       return &error_node;
+}
+
+static OnyxAstNode* parse_block(OnyxParser* parser) {
+       assert(parser->curr_token->type == TOKEN_TYPE_OPEN_BRACE);
+
+       return &error_node;
+}
+
+static OnyxAstNode* parse_expression_statement(OnyxParser* parser) {
+
+}
+
+static OnyxAstNode* parse_return_statement(OnyxParser* parser) {
+       // Only should get here with a return as the current token
+       assert(parser->curr_token->type == TOKEN_TYPE_KEYWORD_RETURN);
+
+       OnyxAstNode* expr = NULL;
+
+       OnyxToken* return_token = parser->curr_token;
+       parser_next_token(parser);
+       if (parser->curr_token->type != TOKEN_TYPE_SYM_SEMICOLON) {
+               expr = parse_expression(parser);
+
+               if (expr == &error_node) {
+                       return &error_node;
+               }
+       }
+}
+
+static OnyxAstNode* parse_statement(OnyxParser* parser, b32 is_top_level) {
+       switch (parser->curr_token->type) {
+       case TOKEN_TYPE_KEYWORD_RETURN:
+               return parse_return_statement(parser);
+
+       case TOKEN_TYPE_OPEN_BRACE:
+               return (OnyxAstNode *) parse_block(parser);
+
+       case TOKEN_TYPE_SYMBOL:
+       case TOKEN_TYPE_OPEN_PAREN:
+       case TOKEN_TYPE_SYM_PLUS:
+       case TOKEN_TYPE_SYM_MINUS:
+       case TOKEN_TYPE_SYM_BANG:
+       case TOKEN_TYPE_LITERAL_NUMERIC:
+       case TOKEN_TYPE_LITERAL_STRING:
+               return parse_expression_statement(parser);
+
+       case TOKEN_TYPE_KEYWORD_IF:
+               return parse_if_stmt(parser);
+
+       case TOKEN_TYPE_SYM_SEMICOLON:
+               return NULL;
+
+       default:
+               printf("ERROR\n");
+               parser_next_token(parser);
+               return NULL;
+       }
+}
+
+static OnyxTypeInfo* parse_type(OnyxParser* parser) {
+       OnyxTypeInfo* type_info = &builtin_types[ONYX_TYPE_INFO_KIND_UNKNOWN];
+
+       OnyxToken* symbol = expect(parser, TOKEN_TYPE_SYMBOL);
+       if (symbol == NULL) return type_info;
+
+       onyx_token_null_toggle(*symbol);
+
+       if (!bh_hash_has(OnyxAstNode*, parser->identifiers, symbol->token)) {
+               onyx_message_add(parser->msgs, ONYX_MESSAGE_TYPE_UNKNOWN_TYPE, symbol->pos, symbol->token);
+       } else {
+               OnyxAstNode* type_info_node = bh_hash_get(OnyxAstNode*, parser->identifiers, symbol->token);
+
+               if (type_info_node->kind == ONYX_AST_NODE_KIND_TYPE) {
+                       type_info = type_info_node->type;
+               }
+       }
+
+       onyx_token_null_toggle(*symbol);
+       return type_info;
+}
+
+static OnyxAstNodeParam* parse_function_params(OnyxParser* parser) {
+       expect(parser, TOKEN_TYPE_OPEN_PAREN);
+
+       if (parser->curr_token->type == TOKEN_TYPE_CLOSE_PAREN) {
+               parser_next_token(parser);
+               return NULL;
+       }
+
+       OnyxAstNodeParam* first_param = NULL;
+
+       OnyxAstNodeParam* curr_param = NULL;
+       OnyxAstNodeParam** walker = NULL;
+
+       OnyxToken* symbol;
+       while (parser->curr_token->type != TOKEN_TYPE_CLOSE_PAREN) {
+               if (parser->curr_token->type == TOKEN_TYPE_SYM_COMMA) parser_next_token(parser);
+
+               symbol = expect(parser, TOKEN_TYPE_SYMBOL);
+               curr_param = (OnyxAstNodeParam *) onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_PARAM);
+               curr_param->token = symbol;
+               curr_param->type = parse_type(parser);
+
+               curr_param->next = NULL;
+               if (first_param == NULL) {
+                       first_param = curr_param;
+               } else {
+                       (*walker)->next = curr_param;
+               }
+               walker = &curr_param;
+       }
+
+       parser_next_token(parser); // Skip the )
+       return first_param;
+}
+
+static OnyxAstNodeFuncDef* parse_function_definition(OnyxParser* parser) {
+       expect(parser, TOKEN_TYPE_KEYWORD_PROC);
+
+       OnyxAstNodeFuncDef* func_def = (OnyxAstNodeFuncDef *) onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_FUNCDEF);
+       func_def->param_count = 0;
+
+       OnyxAstNodeParam* params = parse_function_params(parser);
+       func_def->params = params;
+
+       for (OnyxAstNode* walker = (OnyxAstNode *) params; walker != NULL; walker = walker->next)
+               func_def->param_count++;
+
+       expect(parser, TOKEN_TYPE_RIGHT_ARROW);
+
+       OnyxTypeInfo* return_type = parse_type(parser);
+       func_def->return_type = return_type;
+
+       func_def->body = NULL;
+       return func_def;
+}
+
+
+static OnyxAstNode* parse_top_level_statement(OnyxParser* parser) {
+       switch (parser->curr_token->type) {
+       case TOKEN_TYPE_KEYWORD_USE:
+               assert(0);
+               break;
+
+       case TOKEN_TYPE_KEYWORD_EXPORT:
+               assert(0);
+               break;  
+
+       case TOKEN_TYPE_SYMBOL: {
+               OnyxToken* symbol = parser->curr_token;
+               parser_next_token(parser);
+
+               expect(parser, TOKEN_TYPE_SYM_COLON);
+               expect(parser, TOKEN_TYPE_SYM_COLON);
+
+               if (parser->curr_token->type == TOKEN_TYPE_KEYWORD_PROC) {
+                       OnyxAstNodeFuncDef* func_def = parse_function_definition(parser);
+                       func_def->token = symbol;
+                       return (OnyxAstNode *) func_def;
+
+               } else if (parser->curr_token->type == TOKEN_TYPE_KEYWORD_STRUCT) {
+                       // Handle struct case
+                       assert(0);
+               } else {
+                       onyx_message_add(parser->msgs,
+                               ONYX_MESSAGE_TYPE_UNEXPECTED_TOKEN,
+                               parser->curr_token->pos,
+                               onyx_get_token_type_name(*parser->curr_token));
+               }
+       } break;
+       }
+       parser_next_token(parser);
+       return NULL;
+}
+
+
+
+
+
+
+
+OnyxAstNode* onyx_ast_node_new(bh_allocator alloc, OnyxAstNodeKind kind) {\
+       OnyxAstNode* node = (OnyxAstNode *) bh_alloc(alloc, sizeof(OnyxAstNode));
+       node->kind = kind;
+
+       return node;
+}
+
+OnyxParser onyx_parser_create(bh_allocator alloc, OnyxTokenizer *tokenizer, OnyxMessages* msgs) {
+       OnyxParser parser;
+
+       bh_hash_init(bh_heap_allocator(), parser.identifiers);
+
+       OnyxTypeInfo* it = &builtin_types[0];
+       while (it->kind != 0xffffffff) {
+               OnyxAstNode* tmp = onyx_ast_node_new(alloc, ONYX_AST_NODE_KIND_TYPE);
+               tmp->type = it;
+               bh_hash_put(OnyxAstNode*, parser.identifiers, (char *)it->name, tmp);
+               it++;
+       }
+
+       parser.allocator = alloc;
+       parser.tokenizer = tokenizer;
+       parser.curr_token = tokenizer->tokens;
+       parser.prev_token = NULL;
+       parser.msgs = msgs;
+
+       return parser;
+}
+
+OnyxAstNode* onyx_parse(OnyxParser *parser) {
+       OnyxAstNode* program = onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_PROGRAM);
+
+       OnyxAstNode** prev_stmt = &program->next;
+       OnyxAstNode* curr_stmt = NULL;
+       while (parser->curr_token->type != TOKEN_TYPE_END_STREAM) {
+               curr_stmt = parse_top_level_statement(parser);
+
+               // Building a linked list of statements down the "next" chain
+               if (curr_stmt != NULL && curr_stmt != &error_node) {
+                       *prev_stmt = curr_stmt;
+                       prev_stmt = &curr_stmt->next;
+               }
+       }
+
+       return program;
+}
index c8b2c7e261c5bd127554731b81f651bdaf9ee179..5b2731fd9201401dbaf4e89eabfd44a436decf21 100644 (file)
@@ -1,50 +1,63 @@
+#ifndef ONYXPARSER_H
+#define ONYXPARSER_H
+
 #define BH_NO_STRING
 #include "bh.h"
 
 #include "onyxlex.h"
+#include "onyxmsgs.h"
+
+typedef union OnyxAstNode OnyxAstNode;
+typedef struct OnyxAstNodeBlock OnyxAstNodeBlock;
+typedef struct OnyxAstNodeParam OnyxAstNodeParam;
+typedef struct OnyxAstNodeFuncDef OnyxAstNodeFuncDef;
 
 typedef struct OnyxParser {
-       OnyxTokenizer tokenizer;
-       OnyxToken *prev;
-       OnyxToken *curr;
+       OnyxTokenizer *tokenizer;
+       OnyxToken *prev_token;
+       OnyxToken *curr_token;
+
+       bh_hash(OnyxAstNode*) identifiers;
+
+       OnyxMessages *msgs;
 
        bh_allocator allocator;
 } OnyxParser;
 
 typedef enum OnyxAstNodeKind {
-       ONYX_PARSE_NODE_KIND_ERROR,
-       ONYX_PARSE_NODE_KIND_PROGRAM,
-
-       ONYX_PARSE_NODE_KIND_FUNCDEF,
-       ONYX_PARSE_NODE_KIND_BLOCK,
-       ONYX_PARSE_NODE_KIND_SCOPE,
-
-       ONYX_PARSE_NODE_KIND_ADD,
-       ONYX_PARSE_NODE_KIND_SUB,
-       ONYX_PARSE_NODE_KIND_MUL,
-       ONYX_PARSE_NODE_KIND_DIVIDE,
-       ONYX_PARSE_NODE_KIND_MODULUS,
-       ONYX_PARSE_NODE_KIND_NEGATE,
-
-       ONYX_PARSE_NODE_KIND_TYPE,
-       ONYX_PARSE_NODE_KIND_LITERAL,
-       ONYX_PARSE_NODE_KIND_CAST,
-       ONYX_PARSE_NODE_KIND_PARAM,
-       ONYX_PARSE_NODE_KIND_CALL,
-       ONYX_PARSE_NODE_KIND_RETURN,
-
-       ONYX_PARSE_NODE_KIND_EQUAL,
-       ONYX_PARSE_NODE_KIND_NOT_EQUAL,
-       ONYX_PARSE_NODE_KIND_GREATER,
-       ONYX_PARSE_NODE_KIND_GREATER_EQUAL,
-       ONYX_PARSE_NODE_KIND_LESS,
-       ONYX_PARSE_NODE_KIND_LESS_EQUAL,
-       ONYX_PARSE_NODE_KIND_NOT,
-
-       ONYX_PARSE_NODE_KIND_IF,
-       ONYX_PARSE_NODE_KIND_LOOP,
-
-       ONYX_PARSE_NODE_KIND_COUNT
+       ONYX_AST_NODE_KIND_ERROR,
+       ONYX_AST_NODE_KIND_PROGRAM,
+
+       ONYX_AST_NODE_KIND_FUNCDEF,
+       ONYX_AST_NODE_KIND_BLOCK,
+       ONYX_AST_NODE_KIND_SCOPE,
+
+       ONYX_AST_NODE_KIND_ADD,
+       ONYX_AST_NODE_KIND_SUB,
+       ONYX_AST_NODE_KIND_MUL,
+       ONYX_AST_NODE_KIND_DIVIDE,
+       ONYX_AST_NODE_KIND_MODULUS,
+       ONYX_AST_NODE_KIND_NEGATE,
+
+       ONYX_AST_NODE_KIND_TYPE,
+       ONYX_AST_NODE_KIND_LITERAL,
+       ONYX_AST_NODE_KIND_CAST,
+       ONYX_AST_NODE_KIND_PARAM,
+       ONYX_AST_NODE_KIND_CALL,
+       ONYX_AST_NODE_KIND_RETURN,
+
+       ONYX_AST_NODE_KIND_EQUAL,
+       ONYX_AST_NODE_KIND_NOT_EQUAL,
+       ONYX_AST_NODE_KIND_GREATER,
+       ONYX_AST_NODE_KIND_GREATER_EQUAL,
+       ONYX_AST_NODE_KIND_LESS,
+       ONYX_AST_NODE_KIND_LESS_EQUAL,
+       ONYX_AST_NODE_KIND_NOT,
+
+       ONYX_AST_NODE_KIND_IF,
+       ONYX_AST_NODE_KIND_LOOP,
+
+       ONYX_AST_NODE_KIND_COUNT
 } OnyxAstNodeKind;
 
 typedef enum OnyxTypeInfoKind {
@@ -79,11 +92,6 @@ typedef struct OnyxTypeInfo {
 
 extern OnyxTypeInfo builtin_types[];
 
-typedef union OnyxAstNode OnyxAstNode;
-typedef struct OnyxAstNodeBlock OnyxAstNodeBlock;
-typedef struct OnyxAstNodeParam OnyxAstNodeParam;
-typedef struct OnyxAstNodeFuncDef OnyxAstNodeFuncDef;
-
 typedef enum OnyxAstFlags {
        ONYX_AST_BLOCK_FLAG_HAS_RETURN = BH_BIT(1),
        ONYX_AST_BLOCK_FLAG_TOP_LEVEL  = BH_BIT(2),
@@ -103,9 +111,9 @@ struct OnyxAstNodeBlock {
 struct OnyxAstNodeParam {
        OnyxAstNodeKind kind;
        u32 flags;
-       OnyxToken *token;
+       OnyxToken *token;                       // Symbol name i.e. 'a', 'b'
        OnyxTypeInfo *type;
-       OnyxAstNode *next;
+       OnyxAstNodeParam *next;
        OnyxAstNode *left;
        OnyxAstNode *right;     
 };
@@ -118,7 +126,6 @@ struct OnyxAstNodeFuncDef {
        OnyxAstNodeBlock *body;
        OnyxAstNodeParam *params;
        u64 param_count; // Same size as ptr
-       u64 unused1;
 };
 
 union OnyxAstNode {
@@ -128,15 +135,17 @@ union OnyxAstNode {
                OnyxAstNodeKind kind;
                u32 flags;
                OnyxToken *token;
-               OnyxTypeInfotype;
+               OnyxTypeInfo *type;
                OnyxAstNode *next;
                OnyxAstNode *left;
                OnyxAstNode *right;
-       } as_node;
+       };
 
        OnyxAstNodeBlock as_block;
 };
 
-ptr onyx_ast_node_new(bh_allocator alloc, OnyxAstNodeKind kind);
-OnyxParser onyx_parser_create(bh_allocator alloc, OnyxTokenizer tokenizer);
-OnyxAstNode* onyx_parse(OnyxParser parser);
\ No newline at end of file
+OnyxAstNode* onyx_ast_node_new(bh_allocator alloc, OnyxAstNodeKind kind);
+OnyxParser onyx_parser_create(bh_allocator alloc, OnyxTokenizer *tokenizer, OnyxMessages* msgs);
+OnyxAstNode* onyx_parse(OnyxParser *parser);
+
+#endif // #ifndef ONYXPARSER_H
\ No newline at end of file
index 7b9209ad7ca5fe488baf4ab211ee43941c321e59..0d4bee4b33677b668a55ef2e761bcf4811def043 100644 (file)
@@ -1,3 +1,3 @@
 add :: proc(a i32, b i32) -> i32 {
-       return a + b
-};
\ No newline at end of file
+       return a + b;
+};