Added initial type checking
authorBrendan Hansen <brendan.f.hansen@gmail.com>
Mon, 22 Jun 2020 21:43:54 +0000 (16:43 -0500)
committerBrendan Hansen <brendan.f.hansen@gmail.com>
Mon, 22 Jun 2020 21:43:54 +0000 (16:43 -0500)
12 files changed:
.vimspector.json
Makefile
include/onyxmsgs.h
include/onyxsempass.h
onyx
progs/new_minimal.onyx
src/onyx.c
src/onyxmsgs.c
src/onyxparser.c
src/onyxsempass.c
src/onyxsymres.c
src/onyxtypecheck.c [new file with mode: 0644]

index a52e2a695eaad6c9a1d7d36373098d819829dc86..6444b264eff9b2fbb9a8b049af33825b3945fe41 100644 (file)
@@ -6,7 +6,7 @@
                 "type": "cppdbg",
                 "request": "launch",
                 "program": "${workspaceFolder}/onyx",
-                "args": ["progs/minimal.onyx"],
+                "args": ["progs/new_minimal.onyx"],
                 "stopAtEntry": true,
                 "cwd": "${workspaceFolder}",
                 "environment": [],
index 817e8f10999e9bd04b3824ae6609ad919caa0ca9..b300bbaf6197c5211ddffdcb6e023e770e451450 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -3,6 +3,7 @@ OBJ_FILES=\
        build/onyxparser.o \
        build/onyxsempass.o \
        build/onyxsymres.o \
+       build/onyxtypecheck.o \
        build/onyxmsgs.o \
        build/onyxutils.o \
        build/onyxwasm.o \
index b1d0f50d8134a13811eecc87f5d0e90ccc41c544..b6e0a8fb45f85fa5646b7703e0bb8e41ed462e74 100644 (file)
@@ -9,6 +9,7 @@
 #define ONYX_MSG_BUFFER_SIZE 256
 
 typedef enum OnyxMessageType {
+    ONYX_MESSAGE_TYPE_LITERAL,
        ONYX_MESSAGE_TYPE_EXPECTED_TOKEN,
        ONYX_MESSAGE_TYPE_UNEXPECTED_TOKEN,
        ONYX_MESSAGE_TYPE_UNKNOWN_TYPE,
@@ -20,6 +21,12 @@ typedef enum OnyxMessageType {
        ONYX_MESSAGE_TYPE_ASSIGNMENT_TYPE_MISMATCH,
        ONYX_MESSAGE_TYPE_EXPECTED_EXPRESSION,
 
+    ONYX_MESSAGE_TYPE_FUNCTION_RETURN_MISMATCH,
+    ONYX_MESSAGE_TYPE_FUNCTION_PARAM_TYPE_MISMATCH,
+
+    ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE,
+    ONYX_MESSAGE_TYPE_UNRESOLVED_SYMBOL,
+
        ONYX_MESSAGE_TYPE_COUNT,
 } OnyxMessageType;
 
index c2e6bcf77f8a0056c748bfb87878505ebfe11ee5..238e1035a4ee727b37c85ded1353508eaaf573ef 100644 (file)
@@ -18,14 +18,21 @@ typedef struct OnyxSemPassState {
        bh_allocator allocator, node_allocator;
        OnyxMessages *msgs;
 
+    // NOTE: Used in symbol resolution phase
     OnyxAstNodeScope* curr_scope;
 
+    // NOTE: Used in type checking phase
+    OnyxTypeInfo* expected_return_type;
+
     bh_table(SemPassSymbol *) symbols;
 } OnyxSemPassState;
 
 // NOTE: Resolving all symbols in the tree
 void onyx_resolve_symbols(OnyxSemPassState* state, OnyxAstNode* root_node);
 
+// NOTE: Inferring and checking types in the tree
+void onyx_type_check(OnyxSemPassState* state, OnyxAstNode* root_node);
+
 // NOTE: Full semantic pass
 OnyxSemPassState onyx_sempass_create(bh_allocator alloc, bh_allocator node_alloc, OnyxMessages* msgs);
 void onyx_sempass(OnyxSemPassState* state, OnyxAstNode* root_node);
diff --git a/onyx b/onyx
index dc020938d5f77feea550cb5b1c5cf7ab0ea2f26f..06ca04055b3312b7c0578b066db555927aca8fec 100755 (executable)
Binary files a/onyx and b/onyx differ
index 3114932bf56b562a5ebf53800843a8f7fc4364c3..56c8f0b6c2257ea5aafacfe5a242f808bc8f33c2 100644 (file)
@@ -1,18 +1,24 @@
 print :: foreign "host" "print" proc (value i32) ---
 
-diff_square :: proc (a i32, b i32) -> i32 {
-       // Typechecked
-       c := a - b; // Mutable
-       d :: a + b; // Constant
+simple_test :: proc {
+    a: i32 = 5;
+    b: i64 = 6 as i64;
 
-    {
-        c : i32 = a * 2 + foo(5, 2);
-        d : i32 = (c + a) * 2;
+    if a > b {
+        foo := 123;
+        print(foo);
     }
 
-       return c * d;
+    c :: a + foo();
+    print(c);
 }
 
-foo :: proc (a i32, b i32) -> i32 {
-    return a * 5 + b * 7;
+foo :: proc -> i32 {
+    return 10;
+}
+
+print_nums :: proc (a i32, b i32, c i32) {
+    print(a);
+    print(b);
+    print(c);
 }
index 788be3238caf3dc52c4e12264dd0ecec1cd9dffe..f86095d680d0a3d7a356c56a902a753a69994f7f 100644 (file)
@@ -63,6 +63,7 @@ int main(int argc, char *argv[]) {
        // NOTE: if there are errors, assume the parse tree was generated wrong,
        // even if it may have still been generated correctly.
        if (onyx_message_has_errors(&msgs)) {
+        bh_printf("\n\n");
                onyx_message_print(&msgs);
                goto main_exit;
        } else {
index a5f0c7d8e797076558e3958ca9eed63055170b2a..3f87b8743a1454fc63df48ce2df5175156efbcda 100644 (file)
@@ -2,6 +2,7 @@
 #include "onyxmsgs.h"
 
 static const char* msg_formats[] = {
+    "%s",
        "expected token '%s', got '%s'",
        "unexpected token '%s'",
        "unknown type '%s'",
@@ -10,8 +11,14 @@ static const char* msg_formats[] = {
        "unknown symbol '%s'",
        "redefinition of function '%s'",
        "mismatched types for binary operator, '%s', '%s'",
-       "mismatched types on assignment, '%s', '%s'",
+       "mismatched types on assignment, expected '%s', got '%s'",
        "expected expression, got '%s'",
+
+    "returning '%s' from function that returns '%s'",
+    "function '%b' expected type '%s' in position '%d', got '%s'",
+
+    "unable to resolve type for symbol '%b'",
+    "unable to resolve symbol '%b'",
 };
 
 void onyx_message_add(OnyxMessages* msgs, OnyxMessageType type, OnyxFilePos pos, ...) {
index 01dc0fec3b921317697dc50218347413b27dee4b..6286e4ce0ca782a5d898adb1844dfac591646e19 100644 (file)
@@ -209,9 +209,8 @@ static OnyxAstNode* parse_factor(OnyxParser* parser) {
                                }
 
                                // NOTE: Function call
-                               expect(parser, TOKEN_TYPE_OPEN_PAREN);
-
                                OnyxAstNodeCall* call_node = (OnyxAstNodeCall *) onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_CALL);
+                call_node->token = expect(parser, TOKEN_TYPE_OPEN_PAREN);
                                call_node->callee = sym_node;
                                // NOTE: Return type is stored on function definition's type
                                // This may have to change if we want multiple returns
@@ -319,6 +318,7 @@ static OnyxAstNode* parse_expression(OnyxParser* parser) {
             parser_next_token(parser);
 
             OnyxAstNode* bin_op = onyx_ast_node_new(parser->allocator, bin_op_kind);
+            bin_op->token = bin_op_tok;
 
             while ( !bh_arr_is_empty(tree_stack) &&
                     get_precedence(bh_arr_last(tree_stack)->kind) >= get_precedence(bin_op_kind))
@@ -469,10 +469,9 @@ static b32 parse_symbol_statement(OnyxParser* parser, OnyxAstNode** ret) {
 }
 
 static OnyxAstNode* parse_return_statement(OnyxParser* parser) {
-       expect(parser, TOKEN_TYPE_KEYWORD_RETURN);
-
        OnyxAstNode* return_node = onyx_ast_node_new(parser->allocator, ONYX_AST_NODE_KIND_RETURN);
-       return_node->type = &builtin_types[ONYX_TYPE_INFO_KIND_VOID];
+       return_node->token = expect(parser, TOKEN_TYPE_KEYWORD_RETURN);
+
        OnyxAstNode* expr = NULL;
 
        if (parser->curr_token->type != TOKEN_TYPE_SYM_SEMICOLON) {
index 7a36627e7a8778ba608e9efb04519bb1fbc9346e..d593cb30508749ea70b75f61680bb683e73eb323 100644 (file)
@@ -19,4 +19,5 @@ OnyxSemPassState onyx_sempass_create(bh_allocator alloc, bh_allocator node_alloc
 
 void onyx_sempass(OnyxSemPassState* state, OnyxAstNode* root_node) {
     onyx_resolve_symbols(state, root_node);
+    onyx_type_check(state, root_node);
 }
index edccbecf68ad8fcc3dd744ceeedd016780c890e2..6e2b2a2a9bb32e8a344f754e443e241f1fe1596d 100644 (file)
@@ -64,7 +64,7 @@ static OnyxAstNode* symbol_resolve(OnyxSemPassState* state, OnyxAstNode* symbol)
                 symbol->token->token);
 
         onyx_token_null_toggle(*symbol->token);
-        return NULL;
+        return symbol;
     }
 
     SemPassSymbol* sp_sym = bh_table_get(SemPassSymbol *, state->symbols, symbol->token->token);
@@ -159,7 +159,7 @@ static void symres_expression(OnyxSemPassState* state, OnyxAstNode** expr) {
 
         case ONYX_AST_NODE_KIND_CALL: symres_call(state, *expr); break;
 
-        case ONYX_AST_NODE_KIND_BLOCK: symres_block(state, &(*expr)->as_block);
+        case ONYX_AST_NODE_KIND_BLOCK: symres_block(state, &(*expr)->as_block); break;
 
         case ONYX_AST_NODE_KIND_SYMBOL:
             *expr = symbol_resolve(state, *expr);
@@ -281,7 +281,6 @@ void onyx_resolve_symbols(OnyxSemPassState* state, OnyxAstNode* root_node) {
         walker = walker->next;
     }
 
-    // NOTE: First, resolve all symbols
     walker = root_node;
     while (walker) {
         switch (walker->kind) {
diff --git a/src/onyxtypecheck.c b/src/onyxtypecheck.c
new file mode 100644 (file)
index 0000000..28b1f72
--- /dev/null
@@ -0,0 +1,276 @@
+#define BH_DEBUG
+#include "onyxsempass.h"
+
+static void typecheck_function_defintion(OnyxSemPassState* state, OnyxAstNodeFuncDef* func);
+static void typecheck_block(OnyxSemPassState* state, OnyxAstNodeBlock* block);
+static void typecheck_statement_chain(OnyxSemPassState* state, OnyxAstNode* start);
+static void typecheck_statement(OnyxSemPassState* state, OnyxAstNode* stmt);
+static void typecheck_assignment(OnyxSemPassState* state, OnyxAstNode* assign);
+static void typecheck_return(OnyxSemPassState* state, OnyxAstNode* retnode);
+static void typecheck_if(OnyxSemPassState* state, OnyxAstNodeIf* ifnode);
+static void typecheck_call(OnyxSemPassState* state, OnyxAstNodeCall* call);
+static void typecheck_expression(OnyxSemPassState* state, OnyxAstNode* expr);
+
+static void typecheck_assignment(OnyxSemPassState* state, OnyxAstNode* assign) {
+    if (assign->left->kind == ONYX_AST_NODE_KIND_SYMBOL) {
+        onyx_message_add(state->msgs,
+                ONYX_MESSAGE_TYPE_UNRESOLVED_SYMBOL,
+                assign->left->token->pos,
+                assign->left->token->token, assign->left->token->length);
+        return;
+    }
+
+    typecheck_expression(state, assign->right);
+
+    if (!assign->left->type->is_known) {
+        assign->left->type = assign->right->type;
+    } else {
+        if (assign->left->type != assign->right->type) {
+            onyx_message_add(state->msgs,
+                    ONYX_MESSAGE_TYPE_ASSIGNMENT_TYPE_MISMATCH,
+                    assign->token->pos,
+                    assign->left->type->name, assign->right->type->name);
+            return;
+        }
+    }
+}
+
+static void typecheck_return(OnyxSemPassState* state, OnyxAstNode* retnode) {
+    if (retnode->left) {
+        typecheck_expression(state, retnode->left);
+
+        if (retnode->left->type != state->expected_return_type) {
+            onyx_message_add(state->msgs,
+                    ONYX_MESSAGE_TYPE_FUNCTION_RETURN_MISMATCH,
+                    retnode->left->token->pos,
+                    retnode->left->type->name, state->expected_return_type->name);
+        }
+    } else {
+        if (state->expected_return_type->size > 0) {
+            onyx_message_add(state->msgs,
+                    ONYX_MESSAGE_TYPE_LITERAL,
+                    retnode->token->pos,
+                    "returning from non-void function without value");
+        }
+    }
+}
+
+static void typecheck_if(OnyxSemPassState* state, OnyxAstNodeIf* ifnode) {
+    // NOTE: Add check for boolean type on condition
+    if (ifnode->true_block) typecheck_statement(state, ifnode->true_block);
+    if (ifnode->false_block) typecheck_statement(state, ifnode->false_block);
+}
+
+static void typecheck_call(OnyxSemPassState* state, OnyxAstNodeCall* call) {
+    OnyxAstNodeFuncDef* callee = (OnyxAstNodeFuncDef *) call->callee;
+
+    if (callee->kind == ONYX_AST_NODE_KIND_SYMBOL) {
+        onyx_message_add(state->msgs,
+                ONYX_MESSAGE_TYPE_UNRESOLVED_SYMBOL,
+                callee->token->pos,
+                callee->token->token, callee->token->length);
+        return;
+    }
+
+    call->type = callee->return_type;
+
+    OnyxAstNodeParam* formal_param = callee->params;
+    OnyxAstNode* actual_param = call->arguments;
+
+    i32 arg_pos = 0;
+    while (formal_param != NULL && actual_param != NULL) {
+        typecheck_expression(state, actual_param);
+
+        if (formal_param->type != actual_param->type) {
+            onyx_message_add(state->msgs,
+                    ONYX_MESSAGE_TYPE_FUNCTION_PARAM_TYPE_MISMATCH,
+                    call->token->pos,
+                    callee->token->token, callee->token->length,
+                    formal_param->type->name, arg_pos,
+                    actual_param->type->name);
+            return;
+        }
+
+        arg_pos++;
+        formal_param = formal_param->next;
+        actual_param = actual_param->next;
+    }
+
+    if (formal_param != NULL && actual_param == NULL) {
+        onyx_message_add(state->msgs,
+                ONYX_MESSAGE_TYPE_LITERAL,
+                call->token->pos,
+                "too few arguments to function call");
+        return;
+    }
+
+    if (formal_param == NULL && actual_param != NULL) {
+        onyx_message_add(state->msgs,
+                ONYX_MESSAGE_TYPE_LITERAL,
+                call->token->pos,
+                "too many arguments to function call");
+        return;
+    }
+}
+
+static void typecheck_expression(OnyxSemPassState* state, OnyxAstNode* expr) {
+    switch (expr->kind) {
+        case ONYX_AST_NODE_KIND_ADD:
+        case ONYX_AST_NODE_KIND_MINUS:
+        case ONYX_AST_NODE_KIND_MULTIPLY:
+        case ONYX_AST_NODE_KIND_DIVIDE:
+        case ONYX_AST_NODE_KIND_MODULUS:
+        case ONYX_AST_NODE_KIND_EQUAL:
+        case ONYX_AST_NODE_KIND_NOT_EQUAL:
+        case ONYX_AST_NODE_KIND_LESS:
+        case ONYX_AST_NODE_KIND_LESS_EQUAL:
+        case ONYX_AST_NODE_KIND_GREATER:
+        case ONYX_AST_NODE_KIND_GREATER_EQUAL:
+            expr->type = &builtin_types[ONYX_TYPE_INFO_KIND_UNKNOWN];
+
+            typecheck_expression(state, expr->left);
+            typecheck_expression(state, expr->right);
+
+            if (expr->left->type == NULL) {
+                onyx_message_add(state->msgs,
+                        ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE,
+                        expr->token->pos,
+                        NULL, 0);
+                return;
+            }
+
+            if (expr->right->type == NULL) {
+                onyx_message_add(state->msgs,
+                        ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE,
+                        expr->token->pos,
+                        NULL, 0);
+                return;
+            }
+
+            if (expr->left->type != expr->right->type) {
+                onyx_message_add(state->msgs,
+                        ONYX_MESSAGE_TYPE_BINOP_MISMATCH_TYPE,
+                        expr->token->pos,
+                        expr->left->type->name,
+                        expr->right->type->name);
+                return;
+            }
+
+            expr->type = expr->left->type;
+            break;
+
+        case ONYX_AST_NODE_KIND_NEGATE:
+            typecheck_expression(state, expr->left);
+            expr->type = expr->left->type;
+            break;
+
+        case ONYX_AST_NODE_KIND_CAST:
+            // NOTE: Do nothing. The resulting type from the cast
+            // is already in the cast expression.
+            break;
+
+        case ONYX_AST_NODE_KIND_CALL:
+            typecheck_call(state, &expr->as_call);
+            break;
+
+        case ONYX_AST_NODE_KIND_BLOCK:
+            typecheck_block(state, &expr->as_block);
+            break;
+
+        case ONYX_AST_NODE_KIND_SYMBOL:
+            onyx_message_add(state->msgs,
+                    ONYX_MESSAGE_TYPE_UNRESOLVED_SYMBOL,
+                    expr->token->pos,
+                    expr->token->token, expr->token->length);
+            break;
+
+        case ONYX_AST_NODE_KIND_LOCAL:
+        case ONYX_AST_NODE_KIND_PARAM:
+            if (!expr->type->is_known) {
+                onyx_message_add(state->msgs,
+                        ONYX_MESSAGE_TYPE_LITERAL,
+                        expr->token->pos,
+                        "local variable with unknown type");
+            }
+            break;
+
+        case ONYX_AST_NODE_KIND_ARGUMENT:
+            typecheck_expression(state, expr->left);
+            expr->type = expr->left->type;
+            break;
+
+        case ONYX_AST_NODE_KIND_LITERAL:
+            // NOTE: Literal types should have been decided
+            // in the parser (for now).
+            assert(expr->type->is_known);
+            break;
+
+        default:
+            DEBUG_HERE;
+            break;
+    }
+}
+
+static void typecheck_statement(OnyxSemPassState* state, OnyxAstNode* stmt) {
+    switch (stmt->kind) {
+        case ONYX_AST_NODE_KIND_ASSIGNMENT: typecheck_assignment(state, stmt); break;
+               case ONYX_AST_NODE_KIND_RETURN:     typecheck_return(state, stmt); break;
+        case ONYX_AST_NODE_KIND_IF:         typecheck_if(state, &stmt->as_if); break;
+        case ONYX_AST_NODE_KIND_CALL:       typecheck_call(state, &stmt->as_call); break;
+        case ONYX_AST_NODE_KIND_BLOCK:      typecheck_block(state, &stmt->as_block); break;
+
+        default: break;
+    }
+}
+
+static void typecheck_statement_chain(OnyxSemPassState* state, OnyxAstNode* start) {
+    while (start) {
+        typecheck_statement(state, start);
+        start = start->next;
+    }
+}
+
+static void typecheck_block(OnyxSemPassState* state, OnyxAstNodeBlock* block) {
+    typecheck_statement_chain(state, block->body);
+
+    forll(OnyxAstNodeLocal, local, block->scope->last_local, prev_local) {
+        if (!local->type->is_known) {
+            onyx_message_add(state->msgs,
+                    ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE,
+                    local->token->pos,
+                    local->token->token, local->token->length);
+            return;
+        }
+    }
+}
+
+static void typecheck_function_defintion(OnyxSemPassState* state, OnyxAstNodeFuncDef* func) {
+    forll(OnyxAstNodeParam, param, func->params, next) {
+        if (!param->type->is_known) {
+            onyx_message_add(state->msgs,
+                    ONYX_MESSAGE_TYPE_LITERAL,
+                    param->token->pos,
+                    "function parameter types must be known");
+            return;
+        }
+    }
+
+    state->expected_return_type = func->return_type;
+    if (func->body) {
+        typecheck_block(state, func->body);
+    }
+}
+
+void onyx_type_check(OnyxSemPassState* state, OnyxAstNode* root_node) {
+    OnyxAstNode* walker = root_node;
+    while (walker) {
+        switch (walker->kind) {
+            case ONYX_AST_NODE_KIND_FUNCDEF:
+                typecheck_function_defintion(state, &walker->as_funcdef);
+                break;
+            default: break;
+        }
+
+        walker = walker->next;
+    }
+}