From: Brendan Hansen Date: Mon, 22 Jun 2020 21:43:54 +0000 (-0500) Subject: Added initial type checking X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=89709b7284638a684a9f2a7c882f761c58dff096;p=onyx.git Added initial type checking --- diff --git a/.vimspector.json b/.vimspector.json index a52e2a69..6444b264 100644 --- a/.vimspector.json +++ b/.vimspector.json @@ -6,7 +6,7 @@ "type": "cppdbg", "request": "launch", "program": "${workspaceFolder}/onyx", - "args": ["progs/minimal.onyx"], + "args": ["progs/new_minimal.onyx"], "stopAtEntry": true, "cwd": "${workspaceFolder}", "environment": [], diff --git a/Makefile b/Makefile index 817e8f10..b300bbaf 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ OBJ_FILES=\ build/onyxparser.o \ build/onyxsempass.o \ build/onyxsymres.o \ + build/onyxtypecheck.o \ build/onyxmsgs.o \ build/onyxutils.o \ build/onyxwasm.o \ diff --git a/include/onyxmsgs.h b/include/onyxmsgs.h index b1d0f50d..b6e0a8fb 100644 --- a/include/onyxmsgs.h +++ b/include/onyxmsgs.h @@ -9,6 +9,7 @@ #define ONYX_MSG_BUFFER_SIZE 256 typedef enum OnyxMessageType { + ONYX_MESSAGE_TYPE_LITERAL, ONYX_MESSAGE_TYPE_EXPECTED_TOKEN, ONYX_MESSAGE_TYPE_UNEXPECTED_TOKEN, ONYX_MESSAGE_TYPE_UNKNOWN_TYPE, @@ -20,6 +21,12 @@ typedef enum OnyxMessageType { ONYX_MESSAGE_TYPE_ASSIGNMENT_TYPE_MISMATCH, ONYX_MESSAGE_TYPE_EXPECTED_EXPRESSION, + ONYX_MESSAGE_TYPE_FUNCTION_RETURN_MISMATCH, + ONYX_MESSAGE_TYPE_FUNCTION_PARAM_TYPE_MISMATCH, + + ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE, + ONYX_MESSAGE_TYPE_UNRESOLVED_SYMBOL, + ONYX_MESSAGE_TYPE_COUNT, } OnyxMessageType; diff --git a/include/onyxsempass.h b/include/onyxsempass.h index c2e6bcf7..238e1035 100644 --- a/include/onyxsempass.h +++ b/include/onyxsempass.h @@ -18,14 +18,21 @@ typedef struct OnyxSemPassState { bh_allocator allocator, node_allocator; OnyxMessages *msgs; + // NOTE: Used in symbol resolution phase OnyxAstNodeScope* curr_scope; + // NOTE: Used in type checking phase + OnyxTypeInfo* expected_return_type; + bh_table(SemPassSymbol *) symbols; } OnyxSemPassState; // NOTE: Resolving all symbols in the tree void onyx_resolve_symbols(OnyxSemPassState* state, OnyxAstNode* root_node); +// NOTE: Inferring and checking types in the tree +void onyx_type_check(OnyxSemPassState* state, OnyxAstNode* root_node); + // NOTE: Full semantic pass OnyxSemPassState onyx_sempass_create(bh_allocator alloc, bh_allocator node_alloc, OnyxMessages* msgs); void onyx_sempass(OnyxSemPassState* state, OnyxAstNode* root_node); diff --git a/onyx b/onyx index dc020938..06ca0405 100755 Binary files a/onyx and b/onyx differ diff --git a/progs/new_minimal.onyx b/progs/new_minimal.onyx index 3114932b..56c8f0b6 100644 --- a/progs/new_minimal.onyx +++ b/progs/new_minimal.onyx @@ -1,18 +1,24 @@ print :: foreign "host" "print" proc (value i32) --- -diff_square :: proc (a i32, b i32) -> i32 { - // Typechecked - c := a - b; // Mutable - d :: a + b; // Constant +simple_test :: proc { + a: i32 = 5; + b: i64 = 6 as i64; - { - c : i32 = a * 2 + foo(5, 2); - d : i32 = (c + a) * 2; + if a > b { + foo := 123; + print(foo); } - return c * d; + c :: a + foo(); + print(c); } -foo :: proc (a i32, b i32) -> i32 { - return a * 5 + b * 7; +foo :: proc -> i32 { + return 10; +} + +print_nums :: proc (a i32, b i32, c i32) { + print(a); + print(b); + print(c); } diff --git a/src/onyx.c b/src/onyx.c index 788be323..f86095d6 100644 --- a/src/onyx.c +++ b/src/onyx.c @@ -63,6 +63,7 @@ int main(int argc, char *argv[]) { // NOTE: if there are errors, assume the parse tree was generated wrong, // even if it may have still been generated correctly. if (onyx_message_has_errors(&msgs)) { + bh_printf("\n\n"); onyx_message_print(&msgs); goto main_exit; } else { diff --git a/src/onyxmsgs.c b/src/onyxmsgs.c index a5f0c7d8..3f87b874 100644 --- a/src/onyxmsgs.c +++ b/src/onyxmsgs.c @@ -2,6 +2,7 @@ #include "onyxmsgs.h" static const char* msg_formats[] = { + "%s", "expected token '%s', got '%s'", "unexpected token '%s'", "unknown type '%s'", @@ -10,8 +11,14 @@ static const char* msg_formats[] = { "unknown symbol '%s'", "redefinition of function '%s'", "mismatched types for binary operator, '%s', '%s'", - "mismatched types on assignment, '%s', '%s'", + "mismatched types on assignment, expected '%s', got '%s'", "expected expression, got '%s'", + + "returning '%s' from function that returns '%s'", + "function '%b' expected type '%s' in position '%d', got '%s'", + + "unable to resolve type for symbol '%b'", + "unable to resolve symbol '%b'", }; void onyx_message_add(OnyxMessages* msgs, OnyxMessageType type, OnyxFilePos pos, ...) { diff --git a/src/onyxparser.c b/src/onyxparser.c index 01dc0fec..6286e4ce 100644 --- a/src/onyxparser.c +++ b/src/onyxparser.c @@ -209,9 +209,8 @@ static OnyxAstNode* parse_factor(OnyxParser* parser) { } // NOTE: Function call - expect(parser, TOKEN_TYPE_OPEN_PAREN); - OnyxAstNodeCall* call_node = (OnyxAstNodeCall *) onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_CALL); + call_node->token = expect(parser, TOKEN_TYPE_OPEN_PAREN); call_node->callee = sym_node; // NOTE: Return type is stored on function definition's type // This may have to change if we want multiple returns @@ -319,6 +318,7 @@ static OnyxAstNode* parse_expression(OnyxParser* parser) { parser_next_token(parser); OnyxAstNode* bin_op = onyx_ast_node_new(parser->allocator, bin_op_kind); + bin_op->token = bin_op_tok; while ( !bh_arr_is_empty(tree_stack) && get_precedence(bh_arr_last(tree_stack)->kind) >= get_precedence(bin_op_kind)) @@ -469,10 +469,9 @@ static b32 parse_symbol_statement(OnyxParser* parser, OnyxAstNode** ret) { } static OnyxAstNode* parse_return_statement(OnyxParser* parser) { - expect(parser, TOKEN_TYPE_KEYWORD_RETURN); - OnyxAstNode* return_node = onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_RETURN); - return_node->type = &builtin_types[ONYX_TYPE_INFO_KIND_VOID]; + return_node->token = expect(parser, TOKEN_TYPE_KEYWORD_RETURN); + OnyxAstNode* expr = NULL; if (parser->curr_token->type != TOKEN_TYPE_SYM_SEMICOLON) { diff --git a/src/onyxsempass.c b/src/onyxsempass.c index 7a36627e..d593cb30 100644 --- a/src/onyxsempass.c +++ b/src/onyxsempass.c @@ -19,4 +19,5 @@ OnyxSemPassState onyx_sempass_create(bh_allocator alloc, bh_allocator node_alloc void onyx_sempass(OnyxSemPassState* state, OnyxAstNode* root_node) { onyx_resolve_symbols(state, root_node); + onyx_type_check(state, root_node); } diff --git a/src/onyxsymres.c b/src/onyxsymres.c index edccbecf..6e2b2a2a 100644 --- a/src/onyxsymres.c +++ b/src/onyxsymres.c @@ -64,7 +64,7 @@ static OnyxAstNode* symbol_resolve(OnyxSemPassState* state, OnyxAstNode* symbol) symbol->token->token); onyx_token_null_toggle(*symbol->token); - return NULL; + return symbol; } SemPassSymbol* sp_sym = bh_table_get(SemPassSymbol *, state->symbols, symbol->token->token); @@ -159,7 +159,7 @@ static void symres_expression(OnyxSemPassState* state, OnyxAstNode** expr) { case ONYX_AST_NODE_KIND_CALL: symres_call(state, *expr); break; - case ONYX_AST_NODE_KIND_BLOCK: symres_block(state, &(*expr)->as_block); + case ONYX_AST_NODE_KIND_BLOCK: symres_block(state, &(*expr)->as_block); break; case ONYX_AST_NODE_KIND_SYMBOL: *expr = symbol_resolve(state, *expr); @@ -281,7 +281,6 @@ void onyx_resolve_symbols(OnyxSemPassState* state, OnyxAstNode* root_node) { walker = walker->next; } - // NOTE: First, resolve all symbols walker = root_node; while (walker) { switch (walker->kind) { diff --git a/src/onyxtypecheck.c b/src/onyxtypecheck.c new file mode 100644 index 00000000..28b1f72e --- /dev/null +++ b/src/onyxtypecheck.c @@ -0,0 +1,276 @@ +#define BH_DEBUG +#include "onyxsempass.h" + +static void typecheck_function_defintion(OnyxSemPassState* state, OnyxAstNodeFuncDef* func); +static void typecheck_block(OnyxSemPassState* state, OnyxAstNodeBlock* block); +static void typecheck_statement_chain(OnyxSemPassState* state, OnyxAstNode* start); +static void typecheck_statement(OnyxSemPassState* state, OnyxAstNode* stmt); +static void typecheck_assignment(OnyxSemPassState* state, OnyxAstNode* assign); +static void typecheck_return(OnyxSemPassState* state, OnyxAstNode* retnode); +static void typecheck_if(OnyxSemPassState* state, OnyxAstNodeIf* ifnode); +static void typecheck_call(OnyxSemPassState* state, OnyxAstNodeCall* call); +static void typecheck_expression(OnyxSemPassState* state, OnyxAstNode* expr); + +static void typecheck_assignment(OnyxSemPassState* state, OnyxAstNode* assign) { + if (assign->left->kind == ONYX_AST_NODE_KIND_SYMBOL) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_UNRESOLVED_SYMBOL, + assign->left->token->pos, + assign->left->token->token, assign->left->token->length); + return; + } + + typecheck_expression(state, assign->right); + + if (!assign->left->type->is_known) { + assign->left->type = assign->right->type; + } else { + if (assign->left->type != assign->right->type) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_ASSIGNMENT_TYPE_MISMATCH, + assign->token->pos, + assign->left->type->name, assign->right->type->name); + return; + } + } +} + +static void typecheck_return(OnyxSemPassState* state, OnyxAstNode* retnode) { + if (retnode->left) { + typecheck_expression(state, retnode->left); + + if (retnode->left->type != state->expected_return_type) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_FUNCTION_RETURN_MISMATCH, + retnode->left->token->pos, + retnode->left->type->name, state->expected_return_type->name); + } + } else { + if (state->expected_return_type->size > 0) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_LITERAL, + retnode->token->pos, + "returning from non-void function without value"); + } + } +} + +static void typecheck_if(OnyxSemPassState* state, OnyxAstNodeIf* ifnode) { + // NOTE: Add check for boolean type on condition + if (ifnode->true_block) typecheck_statement(state, ifnode->true_block); + if (ifnode->false_block) typecheck_statement(state, ifnode->false_block); +} + +static void typecheck_call(OnyxSemPassState* state, OnyxAstNodeCall* call) { + OnyxAstNodeFuncDef* callee = (OnyxAstNodeFuncDef *) call->callee; + + if (callee->kind == ONYX_AST_NODE_KIND_SYMBOL) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_UNRESOLVED_SYMBOL, + callee->token->pos, + callee->token->token, callee->token->length); + return; + } + + call->type = callee->return_type; + + OnyxAstNodeParam* formal_param = callee->params; + OnyxAstNode* actual_param = call->arguments; + + i32 arg_pos = 0; + while (formal_param != NULL && actual_param != NULL) { + typecheck_expression(state, actual_param); + + if (formal_param->type != actual_param->type) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_FUNCTION_PARAM_TYPE_MISMATCH, + call->token->pos, + callee->token->token, callee->token->length, + formal_param->type->name, arg_pos, + actual_param->type->name); + return; + } + + arg_pos++; + formal_param = formal_param->next; + actual_param = actual_param->next; + } + + if (formal_param != NULL && actual_param == NULL) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_LITERAL, + call->token->pos, + "too few arguments to function call"); + return; + } + + if (formal_param == NULL && actual_param != NULL) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_LITERAL, + call->token->pos, + "too many arguments to function call"); + return; + } +} + +static void typecheck_expression(OnyxSemPassState* state, OnyxAstNode* expr) { + switch (expr->kind) { + case ONYX_AST_NODE_KIND_ADD: + case ONYX_AST_NODE_KIND_MINUS: + case ONYX_AST_NODE_KIND_MULTIPLY: + case ONYX_AST_NODE_KIND_DIVIDE: + case ONYX_AST_NODE_KIND_MODULUS: + case ONYX_AST_NODE_KIND_EQUAL: + case ONYX_AST_NODE_KIND_NOT_EQUAL: + case ONYX_AST_NODE_KIND_LESS: + case ONYX_AST_NODE_KIND_LESS_EQUAL: + case ONYX_AST_NODE_KIND_GREATER: + case ONYX_AST_NODE_KIND_GREATER_EQUAL: + expr->type = &builtin_types[ONYX_TYPE_INFO_KIND_UNKNOWN]; + + typecheck_expression(state, expr->left); + typecheck_expression(state, expr->right); + + if (expr->left->type == NULL) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE, + expr->token->pos, + NULL, 0); + return; + } + + if (expr->right->type == NULL) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE, + expr->token->pos, + NULL, 0); + return; + } + + if (expr->left->type != expr->right->type) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_BINOP_MISMATCH_TYPE, + expr->token->pos, + expr->left->type->name, + expr->right->type->name); + return; + } + + expr->type = expr->left->type; + break; + + case ONYX_AST_NODE_KIND_NEGATE: + typecheck_expression(state, expr->left); + expr->type = expr->left->type; + break; + + case ONYX_AST_NODE_KIND_CAST: + // NOTE: Do nothing. The resulting type from the cast + // is already in the cast expression. + break; + + case ONYX_AST_NODE_KIND_CALL: + typecheck_call(state, &expr->as_call); + break; + + case ONYX_AST_NODE_KIND_BLOCK: + typecheck_block(state, &expr->as_block); + break; + + case ONYX_AST_NODE_KIND_SYMBOL: + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_UNRESOLVED_SYMBOL, + expr->token->pos, + expr->token->token, expr->token->length); + break; + + case ONYX_AST_NODE_KIND_LOCAL: + case ONYX_AST_NODE_KIND_PARAM: + if (!expr->type->is_known) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_LITERAL, + expr->token->pos, + "local variable with unknown type"); + } + break; + + case ONYX_AST_NODE_KIND_ARGUMENT: + typecheck_expression(state, expr->left); + expr->type = expr->left->type; + break; + + case ONYX_AST_NODE_KIND_LITERAL: + // NOTE: Literal types should have been decided + // in the parser (for now). + assert(expr->type->is_known); + break; + + default: + DEBUG_HERE; + break; + } +} + +static void typecheck_statement(OnyxSemPassState* state, OnyxAstNode* stmt) { + switch (stmt->kind) { + case ONYX_AST_NODE_KIND_ASSIGNMENT: typecheck_assignment(state, stmt); break; + case ONYX_AST_NODE_KIND_RETURN: typecheck_return(state, stmt); break; + case ONYX_AST_NODE_KIND_IF: typecheck_if(state, &stmt->as_if); break; + case ONYX_AST_NODE_KIND_CALL: typecheck_call(state, &stmt->as_call); break; + case ONYX_AST_NODE_KIND_BLOCK: typecheck_block(state, &stmt->as_block); break; + + default: break; + } +} + +static void typecheck_statement_chain(OnyxSemPassState* state, OnyxAstNode* start) { + while (start) { + typecheck_statement(state, start); + start = start->next; + } +} + +static void typecheck_block(OnyxSemPassState* state, OnyxAstNodeBlock* block) { + typecheck_statement_chain(state, block->body); + + forll(OnyxAstNodeLocal, local, block->scope->last_local, prev_local) { + if (!local->type->is_known) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE, + local->token->pos, + local->token->token, local->token->length); + return; + } + } +} + +static void typecheck_function_defintion(OnyxSemPassState* state, OnyxAstNodeFuncDef* func) { + forll(OnyxAstNodeParam, param, func->params, next) { + if (!param->type->is_known) { + onyx_message_add(state->msgs, + ONYX_MESSAGE_TYPE_LITERAL, + param->token->pos, + "function parameter types must be known"); + return; + } + } + + state->expected_return_type = func->return_type; + if (func->body) { + typecheck_block(state, func->body); + } +} + +void onyx_type_check(OnyxSemPassState* state, OnyxAstNode* root_node) { + OnyxAstNode* walker = root_node; + while (walker) { + switch (walker->kind) { + case ONYX_AST_NODE_KIND_FUNCDEF: + typecheck_function_defintion(state, &walker->as_funcdef); + break; + default: break; + } + + walker = walker->next; + } +}