From: Brendan Hansen Date: Sat, 23 May 2020 20:07:14 +0000 (-0500) Subject: More parser progress; added notion of 'const' X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=4d510c01f0aacf45cecf1a2d9853f3d0d08e159f;p=onyx.git More parser progress; added notion of 'const' --- diff --git a/onyx b/onyx index 7759fcd9..2c67a977 100755 Binary files a/onyx and b/onyx differ diff --git a/onyx.c b/onyx.c index ebda7f6a..cd94fdc3 100644 --- a/onyx.c +++ b/onyx.c @@ -25,13 +25,15 @@ int main(int argc, char *argv[]) { onyx_lex_tokens(&tokenizer); bh_arr(OnyxToken) token_arr = tokenizer.tokens; - // bh_printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr)); +#if 0 + bh_printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr)); - // for (OnyxToken* it = token_arr; !bh_arr_end(token_arr, it); it++) { - // onyx_token_null_toggle(*it); - // bh_printf("%s (%s:%l:%l)\n", onyx_get_token_type_name(it->type), it->pos.filename, it->pos.line, it->pos.column); - // onyx_token_null_toggle(*it); - // } + for (OnyxToken* it = token_arr; !bh_arr_end(token_arr, it); it++) { + onyx_token_null_toggle(*it); + bh_printf("%s (%s:%l:%l)\n", onyx_get_token_type_name(it->type), it->pos.filename, it->pos.line, it->pos.column); + onyx_token_null_toggle(*it); + } +#endif bh_arena msg_arena; bh_arena_init(&msg_arena, alloc, 4096); @@ -47,8 +49,13 @@ int main(int argc, char *argv[]) { OnyxParser parser = onyx_parser_create(ast_alloc, &tokenizer, &msgs); OnyxAstNode* program = onyx_parse(&parser); - onyx_message_print(&msgs); - onyx_ast_print(program); + // NOTE: if there are errors, assume the parse tree was generated wrong, + // even if it may have still been generated correctly. + if (onyx_message_has_errors(&msgs)) { + onyx_message_print(&msgs); + } else { + onyx_ast_print(program); + } bh_file_contents_delete(&fc); onyx_tokenizer_free(&tokenizer); diff --git a/onyxlex.c b/onyxlex.c index ca877d6c..b7bf7a54 100644 --- a/onyxlex.c +++ b/onyxlex.c @@ -15,6 +15,7 @@ static const char* onyx_token_type_names[] = { "for", //"TOKEN_TYPE_KEYWORD_FOR", "do", //"TOKEN_TYPE_KEYWORD_DO", "return", //"TOKEN_TYPE_KEYWORD_RETURN", + "const", //"TOKEN_TYPE_KEYWORD_CONST", "foreign", //"TOKEN_TYPE_KEYWORD_FOREIGN", "proc", //"TOKEN_TYPE_KEYWORD_PROC", "global", //"TOKEN_TYPE_KEYWORD_GLOBAL", @@ -155,6 +156,7 @@ OnyxToken* onyx_get_token(OnyxTokenizer* tokenizer) { LITERAL_TOKEN("foreign", TOKEN_TYPE_KEYWORD_FOREIGN); LITERAL_TOKEN("for", TOKEN_TYPE_KEYWORD_FOR); LITERAL_TOKEN("return", TOKEN_TYPE_KEYWORD_RETURN); + LITERAL_TOKEN("const", TOKEN_TYPE_KEYWORD_CONST); LITERAL_TOKEN("do", TOKEN_TYPE_KEYWORD_DO); LITERAL_TOKEN("proc", TOKEN_TYPE_KEYWORD_PROC); LITERAL_TOKEN("global", TOKEN_TYPE_KEYWORD_GLOBAL); diff --git a/onyxlex.h b/onyxlex.h index 1fbce3a9..a64b0ec6 100644 --- a/onyxlex.h +++ b/onyxlex.h @@ -17,6 +17,7 @@ typedef enum OnyxTokenType { TOKEN_TYPE_KEYWORD_FOR, TOKEN_TYPE_KEYWORD_DO, TOKEN_TYPE_KEYWORD_RETURN, + TOKEN_TYPE_KEYWORD_CONST, TOKEN_TYPE_KEYWORD_FOREIGN, TOKEN_TYPE_KEYWORD_PROC, TOKEN_TYPE_KEYWORD_GLOBAL, diff --git a/onyxmsgs.c b/onyxmsgs.c index 53b35975..fae03d85 100644 --- a/onyxmsgs.c +++ b/onyxmsgs.c @@ -4,7 +4,10 @@ static const char* msg_formats[] = { "expected token '%s', got '%s'", "unexpected token '%s'", - "unknown type '%s'" + "unknown type '%s'", + "expected lval '%s'", + "attempt to assign to constant '%s'", + "unknown symbol '%s'", }; void onyx_message_add(OnyxMessages* msgs, OnyxMessageType type, OnyxFilePos pos, ...) { @@ -38,6 +41,10 @@ void onyx_message_print(OnyxMessages* msgs) { } } +b32 onyx_message_has_errors(OnyxMessages* msgs) { + return msgs->first != NULL; +} + void onyx_message_create(bh_allocator allocator, OnyxMessages* msgs) { msgs->allocator = allocator; msgs->first = NULL; diff --git a/onyxmsgs.h b/onyxmsgs.h index 1a2ae502..81c667b2 100644 --- a/onyxmsgs.h +++ b/onyxmsgs.h @@ -12,10 +12,13 @@ typedef enum OnyxMessageType { ONYX_MESSAGE_TYPE_EXPECTED_TOKEN, ONYX_MESSAGE_TYPE_UNEXPECTED_TOKEN, ONYX_MESSAGE_TYPE_UNKNOWN_TYPE, + ONYX_MESSAGE_TYPE_NOT_LVAL, + ONYX_MESSAGE_TYPE_ASSIGN_CONST, + ONYX_MESSAGE_TYPE_UNKNOWN_SYMBOL, ONYX_MESSAGE_TYPE_COUNT, } OnyxMessageType; - + typedef struct OnyxMessage { OnyxMessageType type; OnyxFilePos pos; @@ -31,6 +34,7 @@ typedef struct OnyxMessages { void onyx_message_add(OnyxMessages* msgs, OnyxMessageType type, OnyxFilePos pos, ...); void onyx_message_print(OnyxMessages* msgs); +b32 onyx_message_has_errors(OnyxMessages* msgs); void onyx_message_create(bh_allocator allocator, OnyxMessages* msgs); #endif diff --git a/onyxparser.c b/onyxparser.c index 475d38fc..020c88d3 100644 --- a/onyxparser.c +++ b/onyxparser.c @@ -72,12 +72,12 @@ static b32 is_terminating_token(OnyxTokenType token_type); static OnyxToken* expect(OnyxParser* parser, OnyxTokenType token_type); static OnyxAstNodeScope* enter_scope(OnyxParser* parser); static OnyxAstNodeScope* leave_scope(OnyxParser* parser); -static void insert_identifier(OnyxParser* parser, OnyxAstNodeLocal* local); +static void insert_local(OnyxParser* parser, OnyxAstNodeLocal* local); static OnyxAstNode* parse_factor(OnyxParser* parser); static OnyxAstNode* parse_bin_op(OnyxParser* parser, OnyxAstNode* left); static OnyxAstNode* parse_expression(OnyxParser* parser); static OnyxAstNode* parse_if_stmt(OnyxParser* parser); -static b32 parse_expression_statement(OnyxParser* parser, OnyxAstNode** ret); +static b32 parse_symbol_statement(OnyxParser* parser, OnyxAstNode** ret); static OnyxAstNode* parse_return_statement(OnyxParser* parser); static OnyxAstNodeBlock* parse_block(OnyxParser* parser, b32 belongs_to_function); static OnyxAstNode* parse_statement(OnyxParser* parser); @@ -112,6 +112,12 @@ static b32 is_terminating_token(OnyxTokenType token_type) { } } +static void find_token(OnyxParser* parser, OnyxTokenType token_type) { + while (parser->curr_token->type != token_type && !is_terminating_token(parser->curr_token->type)) { + parser_next_token(parser); + } +} + // Advances to next token no matter what static OnyxToken* expect(OnyxParser* parser, OnyxTokenType token_type) { OnyxToken* token = parser->curr_token; @@ -168,7 +174,7 @@ static OnyxAstNode* lookup_identifier(OnyxParser* parser, OnyxToken* token) { return ident; } -static void insert_identifier(OnyxParser* parser, OnyxAstNodeLocal* local) { +static void insert_local(OnyxParser* parser, OnyxAstNodeLocal* local) { OnyxAstNodeScope* scope = parser->curr_scope; local->prev_local = scope->last_local; scope->last_local = local; @@ -263,7 +269,7 @@ static OnyxAstNode* parse_if_stmt(OnyxParser* parser) { // Returns 1 if the symbol was consumed. Returns 0 otherwise // ret is set to the statement to insert -static b32 parse_expression_statement(OnyxParser* parser, OnyxAstNode** ret) { +static b32 parse_symbol_statement(OnyxParser* parser, OnyxAstNode** ret) { if (parser->curr_token->type != TOKEN_TYPE_SYMBOL) return 0; OnyxToken* symbol = expect(parser, TOKEN_TYPE_SYMBOL); @@ -272,6 +278,13 @@ static b32 parse_expression_statement(OnyxParser* parser, OnyxAstNode** ret) { case TOKEN_TYPE_SYM_COLON: { parser_next_token(parser); OnyxTypeInfo* type = &builtin_types[ONYX_TYPE_INFO_KIND_UNKNOWN]; + u32 flags = ONYX_AST_FLAG_LVAL; + + // NOTE: var: const ... + if (parser->curr_token->type == TOKEN_TYPE_KEYWORD_CONST) { + parser_next_token(parser); + flags |= ONYX_AST_FLAG_CONST; + } // NOTE: var: type if (parser->curr_token->type == TOKEN_TYPE_SYMBOL) { @@ -281,8 +294,9 @@ static b32 parse_expression_statement(OnyxParser* parser, OnyxAstNode** ret) { OnyxAstNodeLocal* local = (OnyxAstNodeLocal*) onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_LOCAL); local->token = symbol; local->type = type; + local->flags |= flags; - insert_identifier(parser, local); + insert_local(parser, local); if (parser->curr_token->type == TOKEN_TYPE_SYM_EQUALS) { parser_next_token(parser); @@ -302,15 +316,36 @@ static b32 parse_expression_statement(OnyxParser* parser, OnyxAstNode** ret) { OnyxAstNode* lval = lookup_identifier(parser, symbol); + if (lval != NULL && lval->flags & ONYX_AST_FLAG_LVAL && (lval->flags & ONYX_AST_FLAG_CONST) == 0) { + OnyxAstNode* rval = parse_expression(parser); + OnyxAstNode* assignment = onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_ASSIGNMENT); + assignment->right = rval; + assignment->left = lval; + *ret = assignment; + return 1; + } + + onyx_token_null_toggle(*symbol); if (lval == NULL) { - // TODO: error handling + onyx_message_add(parser->msgs, + ONYX_MESSAGE_TYPE_UNKNOWN_SYMBOL, + symbol->pos, symbol->token); + } + + else if ((lval->flags & ONYX_AST_FLAG_LVAL) == 0) { + onyx_message_add(parser->msgs, + ONYX_MESSAGE_TYPE_NOT_LVAL, + symbol->pos, symbol->token); } - OnyxAstNode* rval = parse_expression(parser); - OnyxAstNode* assignment = onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_ASSIGNMENT); - assignment->right = rval; - assignment->left = lval; - *ret = assignment; + else if (lval->flags & ONYX_AST_FLAG_CONST) { + onyx_message_add(parser->msgs, + ONYX_MESSAGE_TYPE_ASSIGN_CONST, + symbol->pos, symbol->token); + } + onyx_token_null_toggle(*symbol); + + find_token(parser, TOKEN_TYPE_SYM_SEMICOLON); return 1; } @@ -351,7 +386,7 @@ static OnyxAstNode* parse_statement(OnyxParser* parser) { case TOKEN_TYPE_SYMBOL: { OnyxAstNode* ret = NULL; - if (parse_expression_statement(parser, &ret)) return ret; + if (parse_symbol_statement(parser, &ret)) return ret; // fallthrough } @@ -404,6 +439,8 @@ static OnyxAstNodeBlock* parse_block(OnyxParser* parser, b32 belongs_to_function parser->curr_token->pos, onyx_get_token_type_name(TOKEN_TYPE_SYM_SEMICOLON), onyx_get_token_type_name(parser->curr_token->type)); + + find_token(parser, TOKEN_TYPE_SYM_SEMICOLON); } parser_next_token(parser); } @@ -491,7 +528,8 @@ static OnyxAstNodeFuncDef* parse_function_definition(OnyxParser* parser) { OnyxTypeInfo* return_type = parse_type(parser); func_def->return_type = return_type; - // TODO: Add params to parser.identifiers + // BUG: if a param has the same name as a global or function, that global/function + // will no longer be in scope after the function body ends for (OnyxAstNodeParam* p = func_def->params; p != NULL; p = p->next) { onyx_token_null_toggle(*p->token); bh_hash_put(OnyxAstNode*, parser->identifiers, p->token->token, (OnyxAstNode*) p); @@ -500,7 +538,6 @@ static OnyxAstNodeFuncDef* parse_function_definition(OnyxParser* parser) { func_def->body = parse_block(parser, 1); - // TODO: Remove params from parser.identifiers for (OnyxAstNodeParam* p = func_def->params; p != NULL; p = p->next) { onyx_token_null_toggle(*p->token); bh_hash_delete(OnyxAstNode*, parser->identifiers, p->token->token); diff --git a/onyxparser.h b/onyxparser.h index 270deba3..3dfd782c 100644 --- a/onyxparser.h +++ b/onyxparser.h @@ -109,6 +109,8 @@ extern OnyxTypeInfo builtin_types[]; typedef enum OnyxAstFlags { // Top-level flags ONYX_AST_FLAG_EXPORTED = BH_BIT(1), + ONYX_AST_FLAG_LVAL = BH_BIT(2), + ONYX_AST_FLAG_CONST = BH_BIT(3), } OnyxAstFlags; struct OnyxAstNodeLocal { diff --git a/progs/minimal.onyx b/progs/minimal.onyx index d4f55aca..8e3b4d85 100644 --- a/progs/minimal.onyx +++ b/progs/minimal.onyx @@ -1,6 +1,7 @@ /* This is a comment */ -log :: proc (a i32, b i32) -> i32 ---; +/* Currently the "foreign" keyword doesn't do anything */ +foreign "console" log :: proc (a i32, b i32) -> i32 ---; export add :: proc (a i32, b i32) -> i32 { /* More comments */ @@ -8,9 +9,11 @@ export add :: proc (a i32, b i32) -> i32 { }; export mul :: proc (a i32, b i32) -> i32 { - c: i32 = a - b; - d := a + 2; + c: const i32 = a - b; + + /* Don't love this syntax, but it's easy to parse so whatever + Inferred type, but constant */ + d: const = a + 2; - e: i32; return c * d; };