From: Brendan Hansen Date: Tue, 19 May 2020 03:28:31 +0000 (-0500) Subject: Progress on the initial parser #1 X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=1b430c1387f4281c02572c334084482e046aab7b;p=onyx.git Progress on the initial parser #1 --- diff --git a/Makefile b/Makefile index a3c6d82e..b204645f 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ OBJ_FILES=\ onyxlex.o \ onyxparser.o \ + onyxmsgs.o \ onyx.o CC=gcc diff --git a/bh.h b/bh.h index b6af9362..6ee6f882 100644 --- a/bh.h +++ b/bh.h @@ -321,6 +321,7 @@ typedef enum bh_file_standard { typedef struct bh_file_contents { bh_allocator allocator; + const char *filename; isize length; void* data; } bh_file_contents; @@ -389,7 +390,7 @@ typedef struct bh__arr { #define bh_arr_last(arr) ((arr)[bh__arrhead(arr)->length - 1]) #define bh_arr_end(arr, i) ((i) >= &(arr)[bh_arr_length(arr)]) -#define bh_arr_new(allocator_, arr, cap) (bh__arr_grow((allocator_), (void**) &arr, sizeof(*(arr)), cap)) +#define bh_arr_new(allocator_, arr, cap) (bh__arr_grow((allocator_), (void**) &(arr), sizeof(*(arr)), cap)) #define bh_arr_free(arr) (bh__arr_free((void**) &(arr))) #define bh_arr_copy(allocator_, arr) (bh__arr_copy((allocator_), (arr), sizeof(*(arr)))) @@ -601,7 +602,7 @@ i64 chars_match(char* ptr1, char* ptr2) { //------------------------------------------------------------------------------------- -// CUSTOM ALLOCATORS IMPLEMENTATION +// CUSTOM ALLOCATORS IMPLEMENTATION //------------------------------------------------------------------------------------- @@ -716,7 +717,7 @@ BH_ALLOCATOR_PROC(bh_arena_allocator_proc) { // Size too large for the arena return NULL; } - + if (alloc_arena->size + size >= alloc_arena->arena_size) { alloc_arena->size = sizeof(ptr); bh__arena_internal* new_arena = (bh__arena_internal *) bh_alloc(alloc_arena->backing, alloc_arena->arena_size); @@ -790,7 +791,7 @@ BH_ALLOCATOR_PROC(bh_scratch_allocator_proc) { if (scratch->curr >= scratch->end) { scratch->curr = scratch->memory; - retval = scratch->curr; + retval = scratch->curr; } } break; @@ -1103,6 +1104,7 @@ i64 bh_file_size(bh_file* file) { bh_file_contents bh_file_read_contents_bh_file(bh_allocator alloc, bh_file* file) { bh_file_contents fc = { .allocator = alloc, + .filename = file->filename, .length = 0, .data = NULL }; @@ -1225,7 +1227,7 @@ void* bh__arr_copy(bh_allocator alloc, void *arr, i32 elemsize) { } void bh__arr_deleten(void **arr, i32 elemsize, i32 index, i32 numelems) { - bh__arr* arrptr = bh__arrhead(*arr); + bh__arr* arrptr = bh__arrhead(*arr); if (index >= arrptr->length) return; // Can't delete past the end of the array if (numelems <= 0) return; // Can't delete nothing @@ -1328,13 +1330,13 @@ found_matching: } b32 bh__hash_has(bh__hash *table, i32 elemsize, char *key) { - u64 index = bh__hash_function(key, 0); + u64 index = bh__hash_function(key, 0); ptr arrptr = table->arrs[index]; if (arrptr == NULL) return 0; i32 len = bh_arr_length(arrptr); - i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE; + i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE; while (len--) { if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) return 1; @@ -1348,11 +1350,11 @@ ptr bh__hash_get(bh__hash *table, i32 elemsize, char *key) { u64 index = bh__hash_function(key, 0); ptr arrptr = table->arrs[index]; - i32 len = bh_arr_length(arrptr); - assert(arrptr != NULL); + if (arrptr == NULL) return NULL; i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE; + i32 len = bh_arr_length(arrptr); while (len--) { if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) { return bh_pointer_add(arrptr, BH__HASH_STORED_KEY_SIZE); @@ -1361,7 +1363,7 @@ ptr bh__hash_get(bh__hash *table, i32 elemsize, char *key) { return bh_pointer_add(arrptr, stride); } - return 0; + return NULL; } void bh__hash_delete(bh__hash *table, i32 elemsize, char *key) { diff --git a/onyx b/onyx index 40a17c46..1cada87b 100755 Binary files a/onyx and b/onyx differ diff --git a/onyx.c b/onyx.c index 42fbb892..8406fc07 100644 --- a/onyx.c +++ b/onyx.c @@ -6,6 +6,7 @@ #include // TODO: Replace with custom lib #include "onyxlex.h" +#include "onyxmsgs.h" #include "onyxparser.h" int main(int argc, char *argv[]) { @@ -21,16 +22,39 @@ int main(int argc, char *argv[]) { bh_file_contents fc = bh_file_read_contents(alloc, &source_file); bh_file_close(&source_file); - bh_arr(OnyxToken) token_arr = onyx_parse_tokens(alloc, &fc); + OnyxTokenizer tokenizer = onyx_tokenizer_create(alloc, &fc); + onyx_parse_tokens(&tokenizer); + bh_arr(OnyxToken) token_arr = tokenizer.tokens; printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr)); for (OnyxToken* it = token_arr; !bh_arr_end(token_arr, it); it++) { - printf("%s '%c' (Line %ld, Col %ld)\n", onyx_get_token_type_name(*it), *(char *)it->token, it->line_number, it->line_column); + onyx_token_null_toggle(*it); + printf("%s '%s' (%s:%ld:%ld)\n", onyx_get_token_type_name(*it), it->token, it->pos.filename, it->pos.line, it->pos.column); + onyx_token_null_toggle(*it); } + bh_arena msg_arena; + bh_arena_init(&msg_arena, alloc, 4096); + bh_allocator msg_alloc = bh_arena_allocator(&msg_arena); + + OnyxMessages msgs; + onyx_message_create(msg_alloc, &msgs); + + bh_arena ast_arena; + bh_arena_init(&ast_arena, alloc, 16 * 1024 * 1024); // 16MB + bh_allocator ast_alloc = bh_arena_allocator(&ast_arena); + + OnyxParser parser = onyx_parser_create(ast_alloc, &tokenizer, &msgs); + OnyxAstNode* program = onyx_parse(&parser); + + onyx_message_print(&msgs); + bh_file_contents_delete(&fc); - bh_arr_free(token_arr); + onyx_tokenizer_free(&tokenizer); + bh_arena_free(&msg_arena); + bh_arena_free(&ast_arena); + return 0; } diff --git a/onyxlex.c b/onyxlex.c index b25256f0..2f790a1b 100644 --- a/onyxlex.c +++ b/onyxlex.c @@ -37,7 +37,6 @@ static const char* onyx_token_type_names[] = { "TOKEN_TYPE_SYM_DOT", "TOKEN_TYPE_SYM_FSLASH", "TOKEN_TYPE_SYM_BSLASH", - "TOKEN_TYPE_SYM_TYPE_SIGNATURE", "TOKEN_TYPE_SYM_COLON", "TOKEN_TYPE_SYM_SEMICOLON", "TOKEN_TYPE_SYM_COMMA", @@ -76,8 +75,8 @@ static b32 token_lit(OnyxTokenizer* tokenizer, OnyxToken* tk, char* lit, OnyxTok tk->type = type; tk->token = tokenizer->curr; tk->length = len; - tk->line_number = tokenizer->line_number; - tk->line_column = (i32)(tokenizer->curr - tokenizer->line_start) + 1; + tk->pos.line = tokenizer->line_number; + tk->pos.column = (i32)(tokenizer->curr - tokenizer->line_start) + 1; tokenizer->curr += len; @@ -90,7 +89,14 @@ const char* onyx_get_token_type_name(OnyxToken tkn) { return onyx_token_type_names[tkn.type]; } -OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) { +void onyx_token_null_toggle(OnyxToken tkn) { + static char backup = 0; + char tmp = tkn.token[tkn.length]; + tkn.token[tkn.length] = backup; + backup = tmp; +} + +OnyxToken* onyx_get_token(OnyxTokenizer* tokenizer) { OnyxToken tk; // Skip whitespace @@ -100,8 +106,9 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) { tk.type = TOKEN_TYPE_UNKNOWN; tk.token = tokenizer->curr; tk.length = 1; - tk.line_number = tokenizer->line_number; - tk.line_column = (i32)(tokenizer->curr - tokenizer->line_start) + 1; + tk.pos.filename = tokenizer->filename; + tk.pos.line = tokenizer->line_number; + tk.pos.column = (i32)(tokenizer->curr - tokenizer->line_start) + 1; if (tokenizer->curr == tokenizer->end) { tk.type = TOKEN_TYPE_END_STREAM; @@ -110,7 +117,7 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) { // Comments if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') { - tokenizer->curr += 2; + tokenizer->curr += 2; tk.type = TOKEN_TYPE_COMMENT; tk.token = tokenizer->curr; u16 layers = 1; @@ -122,7 +129,7 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) { tk.type = TOKEN_TYPE_END_STREAM; break; } - + if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') { layers++; INCREMENT_CURR_TOKEN(tokenizer); @@ -132,7 +139,7 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) { layers--; INCREMENT_CURR_TOKEN(tokenizer); } - } + } INCREMENT_CURR_TOKEN(tokenizer); @@ -168,7 +175,6 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) { LITERAL_TOKEN("%", TOKEN_TYPE_SYM_PERCENT); LITERAL_TOKEN("/", TOKEN_TYPE_SYM_FSLASH); LITERAL_TOKEN("\\", TOKEN_TYPE_SYM_BSLASH); - LITERAL_TOKEN("::", TOKEN_TYPE_SYM_TYPE_SIGNATURE); LITERAL_TOKEN(":", TOKEN_TYPE_SYM_COLON); LITERAL_TOKEN(";", TOKEN_TYPE_SYM_SEMICOLON); LITERAL_TOKEN(",", TOKEN_TYPE_SYM_COMMA); @@ -235,26 +241,35 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) { INCREMENT_CURR_TOKEN(tokenizer); token_parsed: - return tk; + bh_arr_push(tokenizer->tokens, tk); + + return &tokenizer->tokens[bh_arr_length(tokenizer->tokens) - 1]; } -bh_arr(OnyxToken) onyx_parse_tokens(bh_allocator tk_alloc, bh_file_contents *fc) { +OnyxTokenizer onyx_tokenizer_create(bh_allocator allocator, bh_file_contents *fc) { OnyxTokenizer tknizer = { .start = fc->data, .curr = fc->data, .end = fc->data + fc->length, + + .filename = fc->filename, + .line_number = 1, .line_start = fc->data, + .tokens = NULL, }; - bh_arr(OnyxToken) token_arr = NULL; - bh_arr_new(tk_alloc, token_arr, 512); + bh_arr_new(allocator, tknizer.tokens, 512); + return tknizer; +} - OnyxToken tk; - do { - tk = onyx_get_token(&tknizer); - bh_arr_push(token_arr, tk); - } while (tk.type != TOKEN_TYPE_END_STREAM); +void onyx_tokenizer_free(OnyxTokenizer* tokenizer) { + bh_arr_free(tokenizer->tokens); +} - return token_arr; -} \ No newline at end of file +void onyx_parse_tokens(OnyxTokenizer* tokenizer) { + OnyxToken* tk; + do { + tk = onyx_get_token(tokenizer); + } while (tk->type != TOKEN_TYPE_END_STREAM); +} diff --git a/onyxlex.h b/onyxlex.h index 15b19b40..9a3b3f12 100644 --- a/onyxlex.h +++ b/onyxlex.h @@ -3,13 +3,6 @@ #include "bh.h" -typedef struct OnyxTokenizer { - char *start, *curr, *end; - - char* line_start; - u64 line_number; -} OnyxTokenizer; - typedef enum OnyxTokenType { TOKEN_TYPE_UNKNOWN, TOKEN_TYPE_END_STREAM, @@ -46,7 +39,6 @@ typedef enum OnyxTokenType { TOKEN_TYPE_SYM_DOT, TOKEN_TYPE_SYM_FSLASH, TOKEN_TYPE_SYM_BSLASH, - TOKEN_TYPE_SYM_TYPE_SIGNATURE, TOKEN_TYPE_SYM_COLON, TOKEN_TYPE_SYM_SEMICOLON, TOKEN_TYPE_SYM_COMMA, @@ -64,15 +56,34 @@ typedef enum OnyxTokenType { TOKEN_TYPE_COUNT } OnyxTokenType; +typedef struct OnyxFilePos { + const char* filename; + u64 line, column; +} OnyxFilePos; + typedef struct OnyxToken { OnyxTokenType type; isize length; char* token; - u64 line_number, line_column; + OnyxFilePos pos; } OnyxToken; +typedef struct OnyxTokenizer { + char *start, *curr, *end; + + const char* filename; + + char* line_start; + u64 line_number; + + bh_arr(OnyxToken) tokens; +} OnyxTokenizer; + const char* onyx_get_token_type_name(OnyxToken tkn); -OnyxToken onyx_get_token(OnyxTokenizer* tokenizer); -bh_arr(OnyxToken) onyx_parse_tokens(bh_allocator tk_alloc, bh_file_contents *fc); +void onyx_token_null_toggle(OnyxToken tkn); +OnyxToken* onyx_get_token(OnyxTokenizer* tokenizer); +OnyxTokenizer onyx_tokenizer_create(bh_allocator allocator, bh_file_contents *fc); +void onyx_tokenizer_free(OnyxTokenizer* tokenizer); +void onyx_parse_tokens(OnyxTokenizer* tokenizer); -#endif \ No newline at end of file +#endif diff --git a/onyxmsgs.c b/onyxmsgs.c new file mode 100644 index 00000000..9838fa02 --- /dev/null +++ b/onyxmsgs.c @@ -0,0 +1,44 @@ + +#include "onyxmsgs.h" + +static const char* msg_formats[] = { + "expected token '%s'", + "unexpected token '%s'", + "unknown type '%s'" +}; + +void onyx_message_add(OnyxMessages* msgs, OnyxMessageType type, OnyxFilePos pos, ...) { + OnyxMessage* msg = bh_alloc_item(msgs->allocator, OnyxMessage); + msg->type = type; + msg->pos = pos; + + va_list arg_list; + va_start(arg_list, pos); + vsnprintf(msg->text, ONYX_MSG_BUFFER_SIZE, msg_formats[type], arg_list); + va_end(arg_list); + + OnyxMessage** walker = &msgs->first; + while (*walker && (*walker)->pos.line < pos.line) walker = &(*walker)->next; + while (*walker && (*walker)->pos.line == pos.line && (*walker)->pos.column < pos.column) walker = &(*walker)->next; + + msg->next = *walker; + *walker = msg; +} + +void onyx_message_print(OnyxMessages* msgs) { + OnyxMessage* msg = msgs->first; + + while (msg) { + if (msg->pos.filename) { + printf("(%s:%ld:%ld) %s\n", msg->pos.filename, msg->pos.line, msg->pos.column, msg->text); + } else { + printf("(%ld:%ld) %s\n", msg->pos.line, msg->pos.column, msg->text); + } + msg = msg->next; + } +} + +void onyx_message_create(bh_allocator allocator, OnyxMessages* msgs) { + msgs->allocator = allocator; + msgs->first = NULL; +} diff --git a/onyxmsgs.h b/onyxmsgs.h new file mode 100644 index 00000000..db3a22ce --- /dev/null +++ b/onyxmsgs.h @@ -0,0 +1,36 @@ +#ifndef ONYXMSGS_H +#define ONYXMSGS_H + +#include "bh.h" +#include "onyxlex.h" + +#include + +#define ONYX_MSG_BUFFER_SIZE 256 + +typedef enum OnyxMessageType { + ONYX_MESSAGE_TYPE_EXPECTED_TOKEN, + ONYX_MESSAGE_TYPE_UNEXPECTED_TOKEN, + ONYX_MESSAGE_TYPE_UNKNOWN_TYPE, + + ONYX_MESSAGE_TYPE_COUNT, +} OnyxMessageType; + +typedef struct OnyxMessage { + OnyxMessageType type; + OnyxFilePos pos; + struct OnyxMessage* next; + char text[ONYX_MSG_BUFFER_SIZE]; +} OnyxMessage; + +typedef struct OnyxMessages { + bh_allocator allocator; + + OnyxMessage* first; +} OnyxMessages; + +void onyx_message_add(OnyxMessages* msgs, OnyxMessageType type, OnyxFilePos pos, ...); +void onyx_message_print(OnyxMessages* msgs); +void onyx_message_create(bh_allocator allocator, OnyxMessages* msgs); + +#endif \ No newline at end of file diff --git a/onyxparser.c b/onyxparser.c index 8db202c7..002dd6ce 100644 --- a/onyxparser.c +++ b/onyxparser.c @@ -1,4 +1,5 @@ +#include "onyxlex.h" #include "onyxparser.h" struct OnyxTypeInfo builtin_types[] = { @@ -20,4 +21,271 @@ struct OnyxTypeInfo builtin_types[] = { { ONYX_TYPE_INFO_KIND_FLOAT32, 4, "f32", 0, 0, 1, 0 }, { ONYX_TYPE_INFO_KIND_FLOAT64, 8, "f64", 0, 0, 1, 0 }, { ONYX_TYPE_INFO_KIND_SOFT_FLOAT, 8, "sf64", 0, 0, 1, 0 }, -}; \ No newline at end of file + + { 0xffffffff } +}; + +static OnyxAstNode error_node = { { ONYX_AST_NODE_KIND_ERROR, 0, NULL, &builtin_types[0], NULL, NULL, NULL } }; + +static void parser_next_token(OnyxParser* parser) { + parser->prev_token = parser->curr_token; + parser->curr_token++; +} + +static b32 is_terminating_token(OnyxTokenType token_type) { + switch (token_type) { + case TOKEN_TYPE_SYM_SEMICOLON: + case TOKEN_TYPE_CLOSE_BRACE: + case TOKEN_TYPE_OPEN_BRACE: + case TOKEN_TYPE_END_STREAM: + return 1; + default: + return 0; + } +} + +// Advances to next token no matter what +static OnyxToken* expect(OnyxParser* parser, OnyxTokenType token_type) { + OnyxToken* token = parser->curr_token; + if (token->type != token_type) { + onyx_message_add(parser->msgs, ONYX_MESSAGE_TYPE_EXPECTED_TOKEN, token->pos, onyx_get_token_type_name(*token)); + return NULL; + } + + parser_next_token(parser); + return token; +} + +static OnyxAstNode* parse_expression(OnyxParser* parser) { + return &error_node; +} + +static OnyxAstNode* parse_if_stmt(OnyxParser* parser) { + return &error_node; +} + +static OnyxAstNode* parse_block(OnyxParser* parser) { + assert(parser->curr_token->type == TOKEN_TYPE_OPEN_BRACE); + + return &error_node; +} + +static OnyxAstNode* parse_expression_statement(OnyxParser* parser) { + +} + +static OnyxAstNode* parse_return_statement(OnyxParser* parser) { + // Only should get here with a return as the current token + assert(parser->curr_token->type == TOKEN_TYPE_KEYWORD_RETURN); + + OnyxAstNode* expr = NULL; + + OnyxToken* return_token = parser->curr_token; + parser_next_token(parser); + if (parser->curr_token->type != TOKEN_TYPE_SYM_SEMICOLON) { + expr = parse_expression(parser); + + if (expr == &error_node) { + return &error_node; + } + } +} + +static OnyxAstNode* parse_statement(OnyxParser* parser, b32 is_top_level) { + switch (parser->curr_token->type) { + case TOKEN_TYPE_KEYWORD_RETURN: + return parse_return_statement(parser); + + case TOKEN_TYPE_OPEN_BRACE: + return (OnyxAstNode *) parse_block(parser); + + case TOKEN_TYPE_SYMBOL: + case TOKEN_TYPE_OPEN_PAREN: + case TOKEN_TYPE_SYM_PLUS: + case TOKEN_TYPE_SYM_MINUS: + case TOKEN_TYPE_SYM_BANG: + case TOKEN_TYPE_LITERAL_NUMERIC: + case TOKEN_TYPE_LITERAL_STRING: + return parse_expression_statement(parser); + + case TOKEN_TYPE_KEYWORD_IF: + return parse_if_stmt(parser); + + case TOKEN_TYPE_SYM_SEMICOLON: + return NULL; + + default: + printf("ERROR\n"); + parser_next_token(parser); + return NULL; + } +} + +static OnyxTypeInfo* parse_type(OnyxParser* parser) { + OnyxTypeInfo* type_info = &builtin_types[ONYX_TYPE_INFO_KIND_UNKNOWN]; + + OnyxToken* symbol = expect(parser, TOKEN_TYPE_SYMBOL); + if (symbol == NULL) return type_info; + + onyx_token_null_toggle(*symbol); + + if (!bh_hash_has(OnyxAstNode*, parser->identifiers, symbol->token)) { + onyx_message_add(parser->msgs, ONYX_MESSAGE_TYPE_UNKNOWN_TYPE, symbol->pos, symbol->token); + } else { + OnyxAstNode* type_info_node = bh_hash_get(OnyxAstNode*, parser->identifiers, symbol->token); + + if (type_info_node->kind == ONYX_AST_NODE_KIND_TYPE) { + type_info = type_info_node->type; + } + } + + onyx_token_null_toggle(*symbol); + return type_info; +} + +static OnyxAstNodeParam* parse_function_params(OnyxParser* parser) { + expect(parser, TOKEN_TYPE_OPEN_PAREN); + + if (parser->curr_token->type == TOKEN_TYPE_CLOSE_PAREN) { + parser_next_token(parser); + return NULL; + } + + OnyxAstNodeParam* first_param = NULL; + + OnyxAstNodeParam* curr_param = NULL; + OnyxAstNodeParam** walker = NULL; + + OnyxToken* symbol; + while (parser->curr_token->type != TOKEN_TYPE_CLOSE_PAREN) { + if (parser->curr_token->type == TOKEN_TYPE_SYM_COMMA) parser_next_token(parser); + + symbol = expect(parser, TOKEN_TYPE_SYMBOL); + curr_param = (OnyxAstNodeParam *) onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_PARAM); + curr_param->token = symbol; + curr_param->type = parse_type(parser); + + curr_param->next = NULL; + if (first_param == NULL) { + first_param = curr_param; + } else { + (*walker)->next = curr_param; + } + walker = &curr_param; + } + + parser_next_token(parser); // Skip the ) + return first_param; +} + +static OnyxAstNodeFuncDef* parse_function_definition(OnyxParser* parser) { + expect(parser, TOKEN_TYPE_KEYWORD_PROC); + + OnyxAstNodeFuncDef* func_def = (OnyxAstNodeFuncDef *) onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_FUNCDEF); + func_def->param_count = 0; + + OnyxAstNodeParam* params = parse_function_params(parser); + func_def->params = params; + + for (OnyxAstNode* walker = (OnyxAstNode *) params; walker != NULL; walker = walker->next) + func_def->param_count++; + + expect(parser, TOKEN_TYPE_RIGHT_ARROW); + + OnyxTypeInfo* return_type = parse_type(parser); + func_def->return_type = return_type; + + func_def->body = NULL; + return func_def; +} + + +static OnyxAstNode* parse_top_level_statement(OnyxParser* parser) { + switch (parser->curr_token->type) { + case TOKEN_TYPE_KEYWORD_USE: + assert(0); + break; + + case TOKEN_TYPE_KEYWORD_EXPORT: + assert(0); + break; + + case TOKEN_TYPE_SYMBOL: { + OnyxToken* symbol = parser->curr_token; + parser_next_token(parser); + + expect(parser, TOKEN_TYPE_SYM_COLON); + expect(parser, TOKEN_TYPE_SYM_COLON); + + if (parser->curr_token->type == TOKEN_TYPE_KEYWORD_PROC) { + OnyxAstNodeFuncDef* func_def = parse_function_definition(parser); + func_def->token = symbol; + return (OnyxAstNode *) func_def; + + } else if (parser->curr_token->type == TOKEN_TYPE_KEYWORD_STRUCT) { + // Handle struct case + assert(0); + } else { + onyx_message_add(parser->msgs, + ONYX_MESSAGE_TYPE_UNEXPECTED_TOKEN, + parser->curr_token->pos, + onyx_get_token_type_name(*parser->curr_token)); + } + } break; + } + parser_next_token(parser); + return NULL; +} + + + + + + + +OnyxAstNode* onyx_ast_node_new(bh_allocator alloc, OnyxAstNodeKind kind) {\ + OnyxAstNode* node = (OnyxAstNode *) bh_alloc(alloc, sizeof(OnyxAstNode)); + node->kind = kind; + + return node; +} + +OnyxParser onyx_parser_create(bh_allocator alloc, OnyxTokenizer *tokenizer, OnyxMessages* msgs) { + OnyxParser parser; + + bh_hash_init(bh_heap_allocator(), parser.identifiers); + + OnyxTypeInfo* it = &builtin_types[0]; + while (it->kind != 0xffffffff) { + OnyxAstNode* tmp = onyx_ast_node_new(alloc, ONYX_AST_NODE_KIND_TYPE); + tmp->type = it; + bh_hash_put(OnyxAstNode*, parser.identifiers, (char *)it->name, tmp); + it++; + } + + parser.allocator = alloc; + parser.tokenizer = tokenizer; + parser.curr_token = tokenizer->tokens; + parser.prev_token = NULL; + parser.msgs = msgs; + + return parser; +} + +OnyxAstNode* onyx_parse(OnyxParser *parser) { + OnyxAstNode* program = onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_PROGRAM); + + OnyxAstNode** prev_stmt = &program->next; + OnyxAstNode* curr_stmt = NULL; + while (parser->curr_token->type != TOKEN_TYPE_END_STREAM) { + curr_stmt = parse_top_level_statement(parser); + + // Building a linked list of statements down the "next" chain + if (curr_stmt != NULL && curr_stmt != &error_node) { + *prev_stmt = curr_stmt; + prev_stmt = &curr_stmt->next; + } + } + + return program; +} diff --git a/onyxparser.h b/onyxparser.h index c8b2c7e2..5b2731fd 100644 --- a/onyxparser.h +++ b/onyxparser.h @@ -1,50 +1,63 @@ +#ifndef ONYXPARSER_H +#define ONYXPARSER_H + #define BH_NO_STRING #include "bh.h" #include "onyxlex.h" +#include "onyxmsgs.h" + +typedef union OnyxAstNode OnyxAstNode; +typedef struct OnyxAstNodeBlock OnyxAstNodeBlock; +typedef struct OnyxAstNodeParam OnyxAstNodeParam; +typedef struct OnyxAstNodeFuncDef OnyxAstNodeFuncDef; typedef struct OnyxParser { - OnyxTokenizer tokenizer; - OnyxToken *prev; - OnyxToken *curr; + OnyxTokenizer *tokenizer; + OnyxToken *prev_token; + OnyxToken *curr_token; + + bh_hash(OnyxAstNode*) identifiers; + + OnyxMessages *msgs; bh_allocator allocator; } OnyxParser; typedef enum OnyxAstNodeKind { - ONYX_PARSE_NODE_KIND_ERROR, - ONYX_PARSE_NODE_KIND_PROGRAM, - - ONYX_PARSE_NODE_KIND_FUNCDEF, - ONYX_PARSE_NODE_KIND_BLOCK, - ONYX_PARSE_NODE_KIND_SCOPE, - - ONYX_PARSE_NODE_KIND_ADD, - ONYX_PARSE_NODE_KIND_SUB, - ONYX_PARSE_NODE_KIND_MUL, - ONYX_PARSE_NODE_KIND_DIVIDE, - ONYX_PARSE_NODE_KIND_MODULUS, - ONYX_PARSE_NODE_KIND_NEGATE, - - ONYX_PARSE_NODE_KIND_TYPE, - ONYX_PARSE_NODE_KIND_LITERAL, - ONYX_PARSE_NODE_KIND_CAST, - ONYX_PARSE_NODE_KIND_PARAM, - ONYX_PARSE_NODE_KIND_CALL, - ONYX_PARSE_NODE_KIND_RETURN, - - ONYX_PARSE_NODE_KIND_EQUAL, - ONYX_PARSE_NODE_KIND_NOT_EQUAL, - ONYX_PARSE_NODE_KIND_GREATER, - ONYX_PARSE_NODE_KIND_GREATER_EQUAL, - ONYX_PARSE_NODE_KIND_LESS, - ONYX_PARSE_NODE_KIND_LESS_EQUAL, - ONYX_PARSE_NODE_KIND_NOT, - - ONYX_PARSE_NODE_KIND_IF, - ONYX_PARSE_NODE_KIND_LOOP, - - ONYX_PARSE_NODE_KIND_COUNT + ONYX_AST_NODE_KIND_ERROR, + ONYX_AST_NODE_KIND_PROGRAM, + + ONYX_AST_NODE_KIND_FUNCDEF, + ONYX_AST_NODE_KIND_BLOCK, + ONYX_AST_NODE_KIND_SCOPE, + + ONYX_AST_NODE_KIND_ADD, + ONYX_AST_NODE_KIND_SUB, + ONYX_AST_NODE_KIND_MUL, + ONYX_AST_NODE_KIND_DIVIDE, + ONYX_AST_NODE_KIND_MODULUS, + ONYX_AST_NODE_KIND_NEGATE, + + ONYX_AST_NODE_KIND_TYPE, + ONYX_AST_NODE_KIND_LITERAL, + ONYX_AST_NODE_KIND_CAST, + ONYX_AST_NODE_KIND_PARAM, + ONYX_AST_NODE_KIND_CALL, + ONYX_AST_NODE_KIND_RETURN, + + ONYX_AST_NODE_KIND_EQUAL, + ONYX_AST_NODE_KIND_NOT_EQUAL, + ONYX_AST_NODE_KIND_GREATER, + ONYX_AST_NODE_KIND_GREATER_EQUAL, + ONYX_AST_NODE_KIND_LESS, + ONYX_AST_NODE_KIND_LESS_EQUAL, + ONYX_AST_NODE_KIND_NOT, + + ONYX_AST_NODE_KIND_IF, + ONYX_AST_NODE_KIND_LOOP, + + ONYX_AST_NODE_KIND_COUNT } OnyxAstNodeKind; typedef enum OnyxTypeInfoKind { @@ -79,11 +92,6 @@ typedef struct OnyxTypeInfo { extern OnyxTypeInfo builtin_types[]; -typedef union OnyxAstNode OnyxAstNode; -typedef struct OnyxAstNodeBlock OnyxAstNodeBlock; -typedef struct OnyxAstNodeParam OnyxAstNodeParam; -typedef struct OnyxAstNodeFuncDef OnyxAstNodeFuncDef; - typedef enum OnyxAstFlags { ONYX_AST_BLOCK_FLAG_HAS_RETURN = BH_BIT(1), ONYX_AST_BLOCK_FLAG_TOP_LEVEL = BH_BIT(2), @@ -103,9 +111,9 @@ struct OnyxAstNodeBlock { struct OnyxAstNodeParam { OnyxAstNodeKind kind; u32 flags; - OnyxToken *token; + OnyxToken *token; // Symbol name i.e. 'a', 'b' OnyxTypeInfo *type; - OnyxAstNode *next; + OnyxAstNodeParam *next; OnyxAstNode *left; OnyxAstNode *right; }; @@ -118,7 +126,6 @@ struct OnyxAstNodeFuncDef { OnyxAstNodeBlock *body; OnyxAstNodeParam *params; u64 param_count; // Same size as ptr - u64 unused1; }; union OnyxAstNode { @@ -128,15 +135,17 @@ union OnyxAstNode { OnyxAstNodeKind kind; u32 flags; OnyxToken *token; - OnyxTypeInfo* type; + OnyxTypeInfo *type; OnyxAstNode *next; OnyxAstNode *left; OnyxAstNode *right; - } as_node; + }; OnyxAstNodeBlock as_block; }; -ptr onyx_ast_node_new(bh_allocator alloc, OnyxAstNodeKind kind); -OnyxParser onyx_parser_create(bh_allocator alloc, OnyxTokenizer tokenizer); -OnyxAstNode* onyx_parse(OnyxParser parser); \ No newline at end of file +OnyxAstNode* onyx_ast_node_new(bh_allocator alloc, OnyxAstNodeKind kind); +OnyxParser onyx_parser_create(bh_allocator alloc, OnyxTokenizer *tokenizer, OnyxMessages* msgs); +OnyxAstNode* onyx_parse(OnyxParser *parser); + +#endif // #ifndef ONYXPARSER_H \ No newline at end of file diff --git a/progs/minimal.onyx b/progs/minimal.onyx index 7b9209ad..0d4bee4b 100644 --- a/progs/minimal.onyx +++ b/progs/minimal.onyx @@ -1,3 +1,3 @@ add :: proc(a i32, b i32) -> i32 { - return a + b -}; \ No newline at end of file + return a + b; +};