More parser progress; added notion of 'const'
authorBrendan Hansen <brendan.f.hansen@gmail.com>
Sat, 23 May 2020 20:07:14 +0000 (15:07 -0500)
committerBrendan Hansen <brendan.f.hansen@gmail.com>
Sat, 23 May 2020 20:07:14 +0000 (15:07 -0500)
onyx
onyx.c
onyxlex.c
onyxlex.h
onyxmsgs.c
onyxmsgs.h
onyxparser.c
onyxparser.h
progs/minimal.onyx

diff --git a/onyx b/onyx
index 7759fcd93b530e775dfbe5c5d3faf33cff8f6c0d..2c67a97770447bdd8cd6786329c985c095e460e5 100755 (executable)
Binary files a/onyx and b/onyx differ
diff --git a/onyx.c b/onyx.c
index ebda7f6a23defc23e066c9ee757d5770a8fb7d4d..cd94fdc3de7673eec6e84fe25c7c4336db256422 100644 (file)
--- a/onyx.c
+++ b/onyx.c
@@ -25,13 +25,15 @@ int main(int argc, char *argv[]) {
        onyx_lex_tokens(&tokenizer);
        bh_arr(OnyxToken) token_arr = tokenizer.tokens;
 
-       // bh_printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr));
+#if 0
+       bh_printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr));
 
-       // for (OnyxToken* it = token_arr; !bh_arr_end(token_arr, it); it++) {
-       //      onyx_token_null_toggle(*it);
-       //      bh_printf("%s (%s:%l:%l)\n", onyx_get_token_type_name(it->type), it->pos.filename, it->pos.line, it->pos.column);
-       //      onyx_token_null_toggle(*it);
-       // }
+       for (OnyxToken* it = token_arr; !bh_arr_end(token_arr, it); it++) {
+               onyx_token_null_toggle(*it);
+               bh_printf("%s (%s:%l:%l)\n", onyx_get_token_type_name(it->type), it->pos.filename, it->pos.line, it->pos.column);
+               onyx_token_null_toggle(*it);
+       }
+#endif
 
        bh_arena msg_arena;
        bh_arena_init(&msg_arena, alloc, 4096);
@@ -47,8 +49,13 @@ int main(int argc, char *argv[]) {
        OnyxParser parser = onyx_parser_create(ast_alloc, &tokenizer, &msgs);
        OnyxAstNode* program = onyx_parse(&parser);
 
-       onyx_message_print(&msgs);
-       onyx_ast_print(program);
+       // NOTE: if there are errors, assume the parse tree was generated wrong,
+       // even if it may have still been generated correctly.
+       if (onyx_message_has_errors(&msgs)) {
+               onyx_message_print(&msgs);
+       } else {
+               onyx_ast_print(program);
+       }
 
        bh_file_contents_delete(&fc);
        onyx_tokenizer_free(&tokenizer);
index ca877d6c70547382fae56cafbcd60bcd6b7cc887..b7bf7a541f8efe56a92ce47fbe0ae0a4f0137d2b 100644 (file)
--- a/onyxlex.c
+++ b/onyxlex.c
@@ -15,6 +15,7 @@ static const char* onyx_token_type_names[] = {
        "for",                  //"TOKEN_TYPE_KEYWORD_FOR",
        "do",                   //"TOKEN_TYPE_KEYWORD_DO",
        "return",               //"TOKEN_TYPE_KEYWORD_RETURN",
+       "const",                //"TOKEN_TYPE_KEYWORD_CONST",
        "foreign",              //"TOKEN_TYPE_KEYWORD_FOREIGN",
        "proc",                 //"TOKEN_TYPE_KEYWORD_PROC",
        "global",               //"TOKEN_TYPE_KEYWORD_GLOBAL",
@@ -155,6 +156,7 @@ OnyxToken* onyx_get_token(OnyxTokenizer* tokenizer) {
        LITERAL_TOKEN("foreign", TOKEN_TYPE_KEYWORD_FOREIGN);
        LITERAL_TOKEN("for", TOKEN_TYPE_KEYWORD_FOR);
        LITERAL_TOKEN("return", TOKEN_TYPE_KEYWORD_RETURN);
+       LITERAL_TOKEN("const", TOKEN_TYPE_KEYWORD_CONST);
        LITERAL_TOKEN("do", TOKEN_TYPE_KEYWORD_DO);
        LITERAL_TOKEN("proc", TOKEN_TYPE_KEYWORD_PROC);
        LITERAL_TOKEN("global", TOKEN_TYPE_KEYWORD_GLOBAL);
index 1fbce3a91569b2e88d3ce0c7891551b6755e9f14..a64b0ec699134fab9eacc717cbbebb8426568c2a 100644 (file)
--- a/onyxlex.h
+++ b/onyxlex.h
@@ -17,6 +17,7 @@ typedef enum OnyxTokenType {
        TOKEN_TYPE_KEYWORD_FOR,
        TOKEN_TYPE_KEYWORD_DO,
        TOKEN_TYPE_KEYWORD_RETURN,
+       TOKEN_TYPE_KEYWORD_CONST,
        TOKEN_TYPE_KEYWORD_FOREIGN,
        TOKEN_TYPE_KEYWORD_PROC,
        TOKEN_TYPE_KEYWORD_GLOBAL,
index 53b35975f613c75734dcbfa27810ea4c99bd12b9..fae03d8576c7c014eebbbc1dc223310e254abe7c 100644 (file)
@@ -4,7 +4,10 @@
 static const char* msg_formats[] = {
        "expected token '%s', got '%s'",
        "unexpected token '%s'",
-       "unknown type '%s'"
+       "unknown type '%s'",
+       "expected lval '%s'",
+       "attempt to assign to constant '%s'",
+       "unknown symbol '%s'",
 };
 
 void onyx_message_add(OnyxMessages* msgs, OnyxMessageType type, OnyxFilePos pos, ...) {
@@ -38,6 +41,10 @@ void onyx_message_print(OnyxMessages* msgs) {
        }
 }
 
+b32 onyx_message_has_errors(OnyxMessages* msgs) {
+       return msgs->first != NULL;
+}
+
 void onyx_message_create(bh_allocator allocator, OnyxMessages* msgs) {
        msgs->allocator = allocator;
        msgs->first = NULL;
index 1a2ae502a03d5b1621bee67fd40c82432bb47a30..81c667b26cef29c44b45b24337bc5a581d02171c 100644 (file)
@@ -12,10 +12,13 @@ typedef enum OnyxMessageType {
        ONYX_MESSAGE_TYPE_EXPECTED_TOKEN,
        ONYX_MESSAGE_TYPE_UNEXPECTED_TOKEN,
        ONYX_MESSAGE_TYPE_UNKNOWN_TYPE,
+       ONYX_MESSAGE_TYPE_NOT_LVAL,
+       ONYX_MESSAGE_TYPE_ASSIGN_CONST,
+       ONYX_MESSAGE_TYPE_UNKNOWN_SYMBOL,
 
        ONYX_MESSAGE_TYPE_COUNT,
 } OnyxMessageType;
-       
+
 typedef struct OnyxMessage {
        OnyxMessageType type;
        OnyxFilePos pos;
@@ -31,6 +34,7 @@ typedef struct OnyxMessages {
 
 void onyx_message_add(OnyxMessages* msgs, OnyxMessageType type, OnyxFilePos pos, ...);
 void onyx_message_print(OnyxMessages* msgs);
+b32 onyx_message_has_errors(OnyxMessages* msgs);
 void onyx_message_create(bh_allocator allocator, OnyxMessages* msgs);
 
 #endif
index 475d38fc483824093f5225018944bc9f4712f078..020c88d34269d9f7722d82a1eb5a054a370074b2 100644 (file)
@@ -72,12 +72,12 @@ static b32 is_terminating_token(OnyxTokenType token_type);
 static OnyxToken* expect(OnyxParser* parser, OnyxTokenType token_type);
 static OnyxAstNodeScope* enter_scope(OnyxParser* parser);
 static OnyxAstNodeScope* leave_scope(OnyxParser* parser);
-static void insert_identifier(OnyxParser* parser, OnyxAstNodeLocal* local);
+static void insert_local(OnyxParser* parser, OnyxAstNodeLocal* local);
 static OnyxAstNode* parse_factor(OnyxParser* parser);
 static OnyxAstNode* parse_bin_op(OnyxParser* parser, OnyxAstNode* left);
 static OnyxAstNode* parse_expression(OnyxParser* parser);
 static OnyxAstNode* parse_if_stmt(OnyxParser* parser);
-static b32 parse_expression_statement(OnyxParser* parser, OnyxAstNode** ret);
+static b32 parse_symbol_statement(OnyxParser* parser, OnyxAstNode** ret);
 static OnyxAstNode* parse_return_statement(OnyxParser* parser);
 static OnyxAstNodeBlock* parse_block(OnyxParser* parser, b32 belongs_to_function);
 static OnyxAstNode* parse_statement(OnyxParser* parser);
@@ -112,6 +112,12 @@ static b32 is_terminating_token(OnyxTokenType token_type) {
        }
 }
 
+static void find_token(OnyxParser* parser, OnyxTokenType token_type) {
+       while (parser->curr_token->type != token_type && !is_terminating_token(parser->curr_token->type)) {
+               parser_next_token(parser);
+       }
+}
+
 // Advances to next token no matter what
 static OnyxToken* expect(OnyxParser* parser, OnyxTokenType token_type) {
        OnyxToken* token = parser->curr_token;
@@ -168,7 +174,7 @@ static OnyxAstNode* lookup_identifier(OnyxParser* parser, OnyxToken* token) {
        return ident;
 }
 
-static void insert_identifier(OnyxParser* parser, OnyxAstNodeLocal* local) {
+static void insert_local(OnyxParser* parser, OnyxAstNodeLocal* local) {
        OnyxAstNodeScope* scope = parser->curr_scope;
        local->prev_local = scope->last_local;
        scope->last_local = local;
@@ -263,7 +269,7 @@ static OnyxAstNode* parse_if_stmt(OnyxParser* parser) {
 
 // Returns 1 if the symbol was consumed. Returns 0 otherwise
 // ret is set to the statement to insert
-static b32 parse_expression_statement(OnyxParser* parser, OnyxAstNode** ret) {
+static b32 parse_symbol_statement(OnyxParser* parser, OnyxAstNode** ret) {
        if (parser->curr_token->type != TOKEN_TYPE_SYMBOL) return 0;
        OnyxToken* symbol = expect(parser, TOKEN_TYPE_SYMBOL);
 
@@ -272,6 +278,13 @@ static b32 parse_expression_statement(OnyxParser* parser, OnyxAstNode** ret) {
        case TOKEN_TYPE_SYM_COLON: {
                parser_next_token(parser);
                OnyxTypeInfo* type = &builtin_types[ONYX_TYPE_INFO_KIND_UNKNOWN];
+               u32 flags = ONYX_AST_FLAG_LVAL;
+
+               // NOTE: var: const ...
+               if (parser->curr_token->type == TOKEN_TYPE_KEYWORD_CONST) {
+                       parser_next_token(parser);
+                       flags |= ONYX_AST_FLAG_CONST;
+               }
 
                // NOTE: var: type
                if (parser->curr_token->type == TOKEN_TYPE_SYMBOL) {
@@ -281,8 +294,9 @@ static b32 parse_expression_statement(OnyxParser* parser, OnyxAstNode** ret) {
                OnyxAstNodeLocal* local = (OnyxAstNodeLocal*) onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_LOCAL);
                local->token = symbol;
                local->type = type;
+               local->flags |= flags;
 
-               insert_identifier(parser, local);
+               insert_local(parser, local);
 
                if (parser->curr_token->type == TOKEN_TYPE_SYM_EQUALS) {
                        parser_next_token(parser);
@@ -302,15 +316,36 @@ static b32 parse_expression_statement(OnyxParser* parser, OnyxAstNode** ret) {
 
                OnyxAstNode* lval = lookup_identifier(parser, symbol);
 
+               if (lval != NULL && lval->flags & ONYX_AST_FLAG_LVAL && (lval->flags & ONYX_AST_FLAG_CONST) == 0) {
+                       OnyxAstNode* rval = parse_expression(parser);
+                       OnyxAstNode* assignment = onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_ASSIGNMENT);
+                       assignment->right = rval;
+                       assignment->left = lval;
+                       *ret = assignment;
+                       return 1;
+               }
+
+               onyx_token_null_toggle(*symbol);
                if (lval == NULL) {
-                       // TODO: error handling
+                       onyx_message_add(parser->msgs,
+                               ONYX_MESSAGE_TYPE_UNKNOWN_SYMBOL,
+                               symbol->pos, symbol->token);
+               }
+
+               else if ((lval->flags & ONYX_AST_FLAG_LVAL) == 0) {
+                       onyx_message_add(parser->msgs,
+                               ONYX_MESSAGE_TYPE_NOT_LVAL,
+                               symbol->pos, symbol->token);
                }
 
-               OnyxAstNode* rval = parse_expression(parser);
-               OnyxAstNode* assignment = onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_ASSIGNMENT);
-               assignment->right = rval;
-               assignment->left = lval;
-               *ret = assignment;
+               else if (lval->flags & ONYX_AST_FLAG_CONST) {
+                       onyx_message_add(parser->msgs,
+                               ONYX_MESSAGE_TYPE_ASSIGN_CONST,
+                               symbol->pos, symbol->token);
+               }
+               onyx_token_null_toggle(*symbol);
+
+               find_token(parser, TOKEN_TYPE_SYM_SEMICOLON);
                return 1;
        }
 
@@ -351,7 +386,7 @@ static OnyxAstNode* parse_statement(OnyxParser* parser) {
 
        case TOKEN_TYPE_SYMBOL: {
                OnyxAstNode* ret = NULL;
-               if (parse_expression_statement(parser, &ret)) return ret;
+               if (parse_symbol_statement(parser, &ret)) return ret;
                // fallthrough
        }
 
@@ -404,6 +439,8 @@ static OnyxAstNodeBlock* parse_block(OnyxParser* parser, b32 belongs_to_function
                                parser->curr_token->pos,
                                onyx_get_token_type_name(TOKEN_TYPE_SYM_SEMICOLON),
                                onyx_get_token_type_name(parser->curr_token->type));
+
+                       find_token(parser, TOKEN_TYPE_SYM_SEMICOLON);
                }
                parser_next_token(parser);
        }
@@ -491,7 +528,8 @@ static OnyxAstNodeFuncDef* parse_function_definition(OnyxParser* parser) {
        OnyxTypeInfo* return_type = parse_type(parser);
        func_def->return_type = return_type;
 
-       // TODO: Add params to parser.identifiers
+       // BUG: if a param has the same name as a global or function, that global/function
+       // will no longer be in scope after the function body ends
        for (OnyxAstNodeParam* p = func_def->params; p != NULL; p = p->next) {
                onyx_token_null_toggle(*p->token);
                bh_hash_put(OnyxAstNode*, parser->identifiers, p->token->token, (OnyxAstNode*) p);
@@ -500,7 +538,6 @@ static OnyxAstNodeFuncDef* parse_function_definition(OnyxParser* parser) {
 
        func_def->body = parse_block(parser, 1);
 
-       // TODO: Remove params from parser.identifiers
        for (OnyxAstNodeParam* p = func_def->params; p != NULL; p = p->next) {
                onyx_token_null_toggle(*p->token);
                bh_hash_delete(OnyxAstNode*, parser->identifiers, p->token->token);
index 270deba3891d343752716857bbda03ac24af2d11..3dfd782ca3ed87769c71826992b1dc697ea12801 100644 (file)
@@ -109,6 +109,8 @@ extern OnyxTypeInfo builtin_types[];
 typedef enum OnyxAstFlags {
        // Top-level flags
        ONYX_AST_FLAG_EXPORTED   = BH_BIT(1),
+       ONYX_AST_FLAG_LVAL               = BH_BIT(2),
+       ONYX_AST_FLAG_CONST              = BH_BIT(3),
 } OnyxAstFlags;
 
 struct OnyxAstNodeLocal {
index d4f55aca369b666df37b161cd4c8f124c0aee8cf..8e3b4d853e127be8ea706c280dd25ccf32822a0a 100644 (file)
@@ -1,6 +1,7 @@
 /* This is a comment */
 
-log :: proc (a i32, b i32) -> i32 ---;
+/* Currently the "foreign" keyword doesn't do anything */
+foreign "console" log :: proc (a i32, b i32) -> i32 ---;
 
 export add :: proc (a i32, b i32) -> i32 {
        /* More comments */
@@ -8,9 +9,11 @@ export add :: proc (a i32, b i32) -> i32 {
 };
 
 export mul :: proc (a i32, b i32) -> i32 {
-       c: i32 = a - b;
-       d := a + 2;
+       c: const i32 = a - b;
+
+       /*  Don't love this syntax, but it's easy to parse so whatever
+               Inferred type, but constant */
+       d: const = a + 2;
 
-       e: i32;
        return c * d;
 };