From: Brendan Hansen Date: Mon, 5 Feb 2024 22:57:26 +0000 (-0600) Subject: compiler changes for optional semicolons X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=5a95c2fc45c7d86aa2b3c623fdb91a478de67854;p=onyx.git compiler changes for optional semicolons --- diff --git a/compiler/include/lex.h b/compiler/include/lex.h index 01711e88..2e721c77 100644 --- a/compiler/include/lex.h +++ b/compiler/include/lex.h @@ -84,6 +84,8 @@ typedef enum TokenType { Token_Type_Literal_True, Token_Type_Literal_False, + Token_Type_Inserted_Semicolon, + Token_Type_Count, } TokenType; @@ -112,6 +114,8 @@ typedef struct OnyxTokenizer { u64 line_number; bh_arr(OnyxToken) tokens; + + b32 insert_semicolon: 1; } OnyxTokenizer; const char *token_type_name(TokenType tkn_type); diff --git a/compiler/src/lex.c b/compiler/src/lex.c index 01380167..e4c23663 100644 --- a/compiler/src/lex.c +++ b/compiler/src/lex.c @@ -78,6 +78,8 @@ static const char* token_type_names[] = { "true", "false", + "inserted semicolon", + "TOKEN_TYPE_COUNT" }; @@ -155,8 +157,20 @@ OnyxToken* onyx_get_token(OnyxTokenizer* tokenizer) { if (tokenizer->curr == tokenizer->end) break; switch (*tokenizer->curr) { - case ' ': case '\n': + if (tokenizer->insert_semicolon) { + OnyxToken semicolon_token; + semicolon_token.type = Token_Type_Inserted_Semicolon; + semicolon_token.text = "; "; + semicolon_token.length = 1; + semicolon_token.pos.line_start = tokenizer->line_start; + semicolon_token.pos.filename = tokenizer->filename; + semicolon_token.pos.line = tokenizer->line_number; + semicolon_token.pos.column = (u16)(tokenizer->curr - tokenizer->line_start) + 1; + bh_arr_push(tokenizer->tokens, semicolon_token); + tokenizer->insert_semicolon = 0; + } + case ' ': case '\t': case '\r': INCREMENT_CURR_TOKEN(tokenizer); @@ -502,6 +516,33 @@ token_parsed: tk.pos.length = (u16) tk.length; bh_arr_push(tokenizer->tokens, tk); + switch ((u32) tk.type) { + case Token_Type_Comment: + break; + + case Token_Type_Symbol: + case Token_Type_Keyword_Break: + case Token_Type_Keyword_Continue: + case Token_Type_Keyword_Fallthrough: + case Token_Type_Keyword_Return: + case Token_Type_Literal_String: + case Token_Type_Literal_True: + case Token_Type_Literal_False: + case Token_Type_Literal_Integer: + case Token_Type_Literal_Float: + case Token_Type_Literal_Char: + case Token_Type_Empty_Block: + case '?': + case ')': + case '}': + case ']': + tokenizer->insert_semicolon = 1; + break; + + default: + tokenizer->insert_semicolon = 0; + } + return &tokenizer->tokens[bh_arr_length(tokenizer->tokens) - 1]; } @@ -516,6 +557,8 @@ OnyxTokenizer onyx_tokenizer_create(bh_allocator allocator, bh_file_contents *fc .line_number = 1, .line_start = fc->data, .tokens = NULL, + + .insert_semicolon = 0, }; bh_arr_new(allocator, tknizer.tokens, 1 << 12); diff --git a/compiler/src/parser.c b/compiler/src/parser.c index bd37b449..9a2d1c68 100644 --- a/compiler/src/parser.c +++ b/compiler/src/parser.c @@ -126,8 +126,21 @@ static OnyxToken* expect_token(OnyxParser* parser, TokenType token_type) { if (parser->hit_unexpected_token) return NULL; OnyxToken* token = parser->curr; + if (token_type == ';' && token->type == Token_Type_End_Stream) { + return token; + } + consume_token(parser); + if (token->type == Token_Type_Inserted_Semicolon) { + if (token_type == ';' || token_type == ',') { + return token; + } else { + token = parser->curr; + consume_token(parser); + } + } + if (token->type != token_type) { onyx_report_error(token->pos, Error_Critical, "expected token '%s', got '%s'.", token_type_name(token_type), token_name(token)); parser->hit_unexpected_token = 1; @@ -142,6 +155,11 @@ static OnyxToken* expect_token(OnyxParser* parser, TokenType token_type) { static b32 consume_token_if_next(OnyxParser* parser, TokenType token_type) { if (parser->hit_unexpected_token) return 0; + if (parser->curr->type == Token_Type_Inserted_Semicolon && token_type == ';') { + consume_token(parser); + return 1; + } + if (parser->curr->type == token_type) { consume_token(parser); return 1; @@ -151,7 +169,13 @@ static b32 consume_token_if_next(OnyxParser* parser, TokenType token_type) { } static void consume_tokens(OnyxParser* parser, i32 n) { - fori (i, 0, n) consume_token(parser); + fori (i, 0, n) { + if (parser->curr->type == Token_Type_Inserted_Semicolon) { + i--; + } + + consume_token(parser); + } } static b32 next_tokens_are(OnyxParser* parser, i32 n, ...) { @@ -159,11 +183,20 @@ static b32 next_tokens_are(OnyxParser* parser, i32 n, ...) { va_start(va, n); i32 matched = 1; + i32 skipped = 0; // BUG: This does not take into consideration comments that can occur between any tokens. fori (i, 0, n) { TokenType expected_type = va_arg(va, TokenType); - if (peek_token(i)->type != expected_type) { + OnyxToken *peeked_token = peek_token(i + skipped); + + // if (peeked_token->type == Token_Type_Inserted_Semicolon) { + // i--; + // skipped += 1; + // continue; + // } + + if (peeked_token->type != expected_type) { matched = 0; break; } @@ -411,6 +444,8 @@ static AstCall* parse_function_call(OnyxParser *parser, AstTyped *callee) { static AstTyped* parse_factor(OnyxParser* parser) { AstTyped* retval = NULL; + consume_token_if_next(parser, Token_Type_Inserted_Semicolon); + switch ((u16) parser->curr->type) { case '(': { if (parse_possible_function_definition(parser, &retval)) { @@ -1257,6 +1292,7 @@ static AstIfWhile* parse_if_stmt(OnyxParser* parser) { } AstBlock* true_stmt = parse_block(parser, 1, NULL); + consume_token_if_next(parser, ';'); if_node->initialization = initialization_or_cond; if_node->cond = cond; @@ -1278,6 +1314,8 @@ static AstIfWhile* parse_if_stmt(OnyxParser* parser) { if_node->false_stmt = (AstBlock *) elseif_node; if_node = elseif_node; + + consume_token_if_next(parser, ';'); } if (consume_token_if_next(parser, Token_Type_Keyword_Else)) { @@ -3294,6 +3332,7 @@ static AstIf* parse_static_if_stmt(OnyxParser* parser, b32 parse_block_as_statem if (parser->hit_unexpected_token) return static_if_node; parse_top_level_statement(parser); + consume_token_if_next(parser, ';'); } } @@ -3312,6 +3351,7 @@ static AstIf* parse_static_if_stmt(OnyxParser* parser, b32 parse_block_as_statem if (parser->hit_unexpected_token) return static_if_node; parse_top_level_statement(parser); + consume_token_if_next(parser, ';'); } } @@ -3880,7 +3920,6 @@ static void parse_top_level_statement(OnyxParser* parser) { default: break; } - expect_token(parser, ';'); return; submit_binding_to_entities: @@ -4080,6 +4119,7 @@ static void parse_top_level_statements_until(OnyxParser* parser, TokenType tt) { if (parser->hit_unexpected_token) break; if (onyx_has_errors()) break; parse_top_level_statement(parser); + consume_token_if_next(parser, ';'); } } @@ -4146,6 +4186,8 @@ void onyx_parse(OnyxParser *parser) { parser->file_scope = scope_create(parser->allocator, parser->package->private_scope, parser->tokenizer->tokens[0].pos); parser->current_scope = parser->file_scope; + consume_token_if_next(parser, ';'); + if (parse_possible_directive(parser, "allow_stale_code") && !parser->package->is_included_somewhere && !context.options->no_stale_code) { @@ -4153,8 +4195,11 @@ void onyx_parse(OnyxParser *parser) { bh_arr_push(parser->alternate_entity_placement_stack, &parser->package->buffered_entities); } + consume_token_if_next(parser, ';'); + while (parse_possible_directive(parser, "package_doc")) { OnyxToken *doc_string = expect_token(parser, Token_Type_Literal_String); + consume_token_if_next(parser, ';'); bh_arr_push(parser->package->doc_strings, doc_string); }