updates
authorBrendan Hansen <brendan.f.hansen@gmail.com>
Mon, 11 May 2020 14:42:45 +0000 (09:42 -0500)
committerBrendan Hansen <brendan.f.hansen@gmail.com>
Mon, 11 May 2020 14:42:45 +0000 (09:42 -0500)
.vimspector.json
bh.h
onyx
onyx.c
progs/demo.onyx
progs/mvp.onyx

index ee0c1bfc99e7e26786d54e57b0318cd66e9271fd..72e3a7abfa4568b526c68362688bf3ea7ac3706a 100644 (file)
@@ -6,7 +6,7 @@
                 "type": "cppdbg",
                 "request": "launch",
                 "program": "${workspaceFolder}/onyx",
-                "args": ["demo.onyx"],
+                "args": ["progs/mvp.onyx"],
                 "stopAtEntry": true,
                 "cwd": "${workspaceFolder}",
                 "environment": [],
diff --git a/bh.h b/bh.h
index 7eb4fd09ea664d26f0ed585d98bf65d37b925b6a..40dd979f21f44bd56ac7811ef4a9f6c5d04546f2 100644 (file)
--- a/bh.h
+++ b/bh.h
@@ -26,11 +26,11 @@ typedef i32 b32;
 //-------------------------------------------------------------------------------------
 // Better character functions
 //-------------------------------------------------------------------------------------
-inline b32 char_is_alpha(const char a);
-inline b32 char_is_num(const char a);
-inline b32 char_is_alphanum(const char a);
-inline b32 char_is_whitespace(const char a);
-inline b32 char_in_range(const char lo, const char hi, const char a);
+b32 char_is_alpha(const char a);
+b32 char_is_num(const char a);
+b32 char_is_alphanum(const char a);
+b32 char_is_whitespace(const char a);
+b32 char_in_range(const char lo, const char hi, const char a);
 char charset_contains(const char* charset, char ch);
 
 //-------------------------------------------------------------------------------------
@@ -168,23 +168,23 @@ i32 bh_file_contents_delete(bh_file_contents* contents);
 //-------------------------------------------------------------------------------------
 // CHAR FUNCTIONS
 //-------------------------------------------------------------------------------------
-inline b32 char_is_alpha(const char a) {
+b32 char_is_alpha(const char a) {
        return ('a' <= a && a <= 'z') || ('A' <= a && a <= 'Z');
 }
 
-inline b32 char_is_num(const char a) {
+b32 char_is_num(const char a) {
        return ('0' <= a && a <= '9');
 }
 
-inline b32 char_is_alphanum(const char a) {
+b32 char_is_alphanum(const char a) {
        return char_is_alpha(a) || char_is_num(a);
 }
 
-inline b32 char_is_whitespace(const char a) {
+b32 char_is_whitespace(const char a) {
        return charset_contains(" \t\r\n", a);
 }
 
-inline b32 char_in_range(const char lo, const char hi, const char a) {
+b32 char_in_range(const char lo, const char hi, const char a) {
        return lo <= a <= hi;
 }
 
diff --git a/onyx b/onyx
index 2d5a2d93f7c2ed09c2d0dd54a2cc0bc6f90e9a32..b0a2ad6f86daadfc6c9215135d1e8514ea63ec3b 100755 (executable)
Binary files a/onyx and b/onyx differ
diff --git a/onyx.c b/onyx.c
index 970c85c843aad13a2711576da41e4702411dc3c3..9034852ec7a97b2af0518c24a1111ea069fc3b03 100644 (file)
--- a/onyx.c
+++ b/onyx.c
@@ -1,16 +1,20 @@
 #include <stdio.h> // TODO: Replace with custom lib
-#include <stdlib.h> // TODO: Replace with custom lib
 #include "bh.h"
 
 typedef struct Tokenizer {
        char *start, *curr, *end;
+
+       // TODO: Fix the line number and column count
        u64 line_number;
+       u64 line_column;
 } Tokenizer;
 
 typedef enum TokenType {
        TOKEN_TYPE_UNKNOWN,
        TOKEN_TYPE_END_STREAM,
 
+       TOKEN_TYPE_COMMENT,
+
        TOKEN_TYPE_KEYWORD_STRUCT,
        TOKEN_TYPE_KEYWORD_USE,
        TOKEN_TYPE_KEYWORD_EXPORT,
@@ -20,18 +24,33 @@ typedef enum TokenType {
        TOKEN_TYPE_KEYWORD_RETURN,
 
        TOKEN_TYPE_RIGHT_ARROW,
+       TOKEN_TYPE_LEFT_ARROW,
        TOKEN_TYPE_OPEN_PAREN,
        TOKEN_TYPE_CLOSE_PAREN,
        TOKEN_TYPE_OPEN_BRACE,
        TOKEN_TYPE_CLOSE_BRACE,
        TOKEN_TYPE_OPEN_BRACKET,
        TOKEN_TYPE_CLOSE_BRACKET,
+       TOKEN_TYPE_OPEN_ANGLE,
+       TOKEN_TYPE_CLOSE_ANGLE,
+
+       TOKEN_TYPE_SYM_PLUS,
+       TOKEN_TYPE_SYM_MINUS,
+       TOKEN_TYPE_SYM_STAR,
+       TOKEN_TYPE_SYM_PERCENT,
+       TOKEN_TYPE_SYM_FSLASH,
+       TOKEN_TYPE_SYM_BSLASH,
+       TOKEN_TYPE_SYM_COLON,
+       TOKEN_TYPE_SYM_SEMICOLON,
+       TOKEN_TYPE_SYM_COMMA,
+       TOKEN_TYPE_SYM_EQUALS,
+       TOKEN_TYPE_SYM_GRAVE,
+       TOKEN_TYPE_SYM_TILDE,
+       TOKEN_TYPE_SYM_BANG,
 
-       TOKEN_TYPE_OP_ADD,
-       TOKEN_TYPE_OP_SUB,
-       TOKEN_TYPE_OP_MUL,
-       TOKEN_TYPE_OP_DIV,
-       TOKEN_TYPE_OP_MOD,
+       TOKEN_TYPE_SYMBOL,
+       TOKEN_TYPE_LITERAL_STRING,
+       TOKEN_TYPE_LITERAL_NUMERIC,
 
        TOKEN_TYPE_COUNT
 } TokenType;
@@ -40,6 +59,8 @@ static const char* TokenTypeNames[] = {
        "TOKEN_TYPE_UNKNOWN",
        "TOKEN_TYPE_END_STREAM",
 
+       "TOKEN_TYPE_COMMENT",
+
        "TOKEN_TYPE_KEYWORD_STRUCT",
        "TOKEN_TYPE_KEYWORD_USE",
        "TOKEN_TYPE_KEYWORD_EXPORT",
@@ -49,18 +70,33 @@ static const char* TokenTypeNames[] = {
        "TOKEN_TYPE_KEYWORD_RETURN",
 
        "TOKEN_TYPE_RIGHT_ARROW",
+       "TOKEN_TYPE_LEFT_ARROW",
        "TOKEN_TYPE_OPEN_PAREN",
        "TOKEN_TYPE_CLOSE_PAREN",
        "TOKEN_TYPE_OPEN_BRACE",
        "TOKEN_TYPE_CLOSE_BRACE",
        "TOKEN_TYPE_OPEN_BRACKET",
        "TOKEN_TYPE_CLOSE_BRACKET",
+       "TOKEN_TYPE_OPEN_ANGLE",
+       "TOKEN_TYPE_CLOSE_ANGLE",
+
+       "TOKEN_TYPE_SYM_PLUS",
+       "TOKEN_TYPE_SYM_MINUS",
+       "TOKEN_TYPE_SYM_STAR",
+       "TOKEN_TYPE_SYM_PERCENT",
+       "TOKEN_TYPE_SYM_FSLASH",
+       "TOKEN_TYPE_SYM_BSLASH",
+       "TOKEN_TYPE_SYM_COLON",
+       "TOKEN_TYPE_SYM_SEMICOLON",
+       "TOKEN_TYPE_SYM_COMMA",
+       "TOKEN_TYPE_SYM_EQUALS",
+       "TOKEN_TYPE_SYM_GRAVE",
+       "TOKEN_TYPE_SYM_TILDE",
+       "TOKEN_TYPE_SYM_BANG",
 
-       "TOKEN_TYPE_OP_ADD",
-       "TOKEN_TYPE_OP_SUB",
-       "TOKEN_TYPE_OP_MUL",
-       "TOKEN_TYPE_OP_DIV",
-       "TOKEN_TYPE_OP_MOD",
+       "TOKEN_TYPE_SYMBOL",
+       "TOKEN_TYPE_LITERAL_STRING",
+       "TOKEN_TYPE_LITERAL_NUMERIC",
 
        "TOKEN_TYPE_COUNT"
 };
@@ -72,58 +108,172 @@ typedef struct Token {
        u64 line_number, line_column;
 } Token;
 
+#ifndef LITERAL_TOKEN
+#define LITERAL_TOKEN(token, token_type) \
+       if (token_lit(tokenizer, &tk, token, token_type)) goto token_parsed;
+#endif
+
+#ifndef INCREMENT_CURR_TOKEN
+#define INCREMENT_CURR_TOKEN(tkn) { \
+       tkn->curr++; \
+       tkn->line_column++; \
+       if (*tkn->curr == '\n') { \
+               tkn->line_number++; \
+               tkn->line_column = 1; \
+       } \
+}
+#endif
+
 b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type) {
        i64 len = chars_match(tokenizer->curr, lit);
        if (len > 0) {
                tk->type = type;
                tk->token = tokenizer->curr;
                tk->length = len;
+
                tokenizer->curr += len;
+               tokenizer->line_column += len;
+
                return 1;
        }
        return 0;
 }
 
 Token get_token(Tokenizer* tokenizer) {
-       #ifndef LITERAL_TOKEN
-       #define LITERAL_TOKEN(token, token_type) \
-               if (token_lit(tokenizer, &tk, token, token_type)) goto token_parsed;
-       #endif
-
        Token tk;
 
+       // Skip whitespace
+       while (char_is_whitespace(*tokenizer->curr) && tokenizer->curr != tokenizer->end)
+               INCREMENT_CURR_TOKEN(tokenizer)
+
        tk.type = TOKEN_TYPE_UNKNOWN;
        tk.token = tokenizer->curr;
        tk.length = 1;
-       tk.line_number = 0;
-       tk.line_column = 0;
+       tk.line_number = tokenizer->line_number;
+       tk.line_column = tokenizer->line_column;
 
        if (tokenizer->curr == tokenizer->end) {
                tk.type = TOKEN_TYPE_END_STREAM;
                goto token_parsed;
        }
 
+       // Comments
+       if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') {
+               tokenizer->curr += 2;   
+               tk.type = TOKEN_TYPE_COMMENT;
+               tk.token = tokenizer->curr;
+               u16 layers = 1;
+
+               while (layers >= 1) {
+                       INCREMENT_CURR_TOKEN(tokenizer);
+
+                       if (tokenizer->curr == tokenizer->end) {
+                               tk.type = TOKEN_TYPE_END_STREAM;
+                               break;
+                       }
+                       
+                       if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') {
+                               layers++;
+                               INCREMENT_CURR_TOKEN(tokenizer);
+                       }
+
+                       if (*tokenizer->curr == '*' && *(tokenizer->curr + 1) == '/') {
+                               layers--;
+                               INCREMENT_CURR_TOKEN(tokenizer);
+                       }
+               }       
+
+               INCREMENT_CURR_TOKEN(tokenizer);
+
+               tk.length = tokenizer->curr - tk.token - 2;
+               goto token_parsed;
+       }
+
        LITERAL_TOKEN("struct", TOKEN_TYPE_KEYWORD_STRUCT);
        LITERAL_TOKEN("export", TOKEN_TYPE_KEYWORD_EXPORT);
        LITERAL_TOKEN("use", TOKEN_TYPE_KEYWORD_USE);
        LITERAL_TOKEN("if", TOKEN_TYPE_KEYWORD_IF);
-       LITERAL_TOKEN("else", TOKEN_TYPE_KEYWORD_IF);
+       LITERAL_TOKEN("else", TOKEN_TYPE_KEYWORD_ELSE);
        LITERAL_TOKEN("for", TOKEN_TYPE_KEYWORD_FOR);
        LITERAL_TOKEN("return", TOKEN_TYPE_KEYWORD_RETURN);
        LITERAL_TOKEN("->", TOKEN_TYPE_RIGHT_ARROW);
+       LITERAL_TOKEN("<-", TOKEN_TYPE_RIGHT_ARROW);
        LITERAL_TOKEN("(", TOKEN_TYPE_OPEN_PAREN);
        LITERAL_TOKEN(")", TOKEN_TYPE_CLOSE_PAREN);
        LITERAL_TOKEN("{", TOKEN_TYPE_OPEN_BRACE);
        LITERAL_TOKEN("}", TOKEN_TYPE_CLOSE_BRACE);
        LITERAL_TOKEN("[", TOKEN_TYPE_OPEN_BRACKET);
        LITERAL_TOKEN("]", TOKEN_TYPE_CLOSE_BRACKET);
-       LITERAL_TOKEN("+", TOKEN_TYPE_OP_ADD);
-       LITERAL_TOKEN("-", TOKEN_TYPE_OP_SUB);
-       LITERAL_TOKEN("*", TOKEN_TYPE_OP_MUL);
-       LITERAL_TOKEN("/", TOKEN_TYPE_OP_DIV);
-       LITERAL_TOKEN("%", TOKEN_TYPE_OP_MOD);
+       LITERAL_TOKEN("<", TOKEN_TYPE_OPEN_ANGLE);
+       LITERAL_TOKEN(">", TOKEN_TYPE_CLOSE_ANGLE);
+       LITERAL_TOKEN("+", TOKEN_TYPE_SYM_PLUS);
+       LITERAL_TOKEN("-", TOKEN_TYPE_SYM_MINUS);
+       LITERAL_TOKEN("*", TOKEN_TYPE_SYM_STAR);
+       LITERAL_TOKEN("/", TOKEN_TYPE_SYM_FSLASH);
+       LITERAL_TOKEN("%", TOKEN_TYPE_SYM_PERCENT);
+       LITERAL_TOKEN("\\", TOKEN_TYPE_SYM_BSLASH);
+       LITERAL_TOKEN(":", TOKEN_TYPE_SYM_COLON);
+       LITERAL_TOKEN(";", TOKEN_TYPE_SYM_SEMICOLON);
+       LITERAL_TOKEN(",", TOKEN_TYPE_SYM_COMMA);
+       LITERAL_TOKEN("=", TOKEN_TYPE_SYM_EQUALS);
+       LITERAL_TOKEN("`", TOKEN_TYPE_SYM_GRAVE);
+       LITERAL_TOKEN("~", TOKEN_TYPE_SYM_TILDE);
+       LITERAL_TOKEN("!", TOKEN_TYPE_SYM_BANG);
+
+       // Symbols
+       if (char_is_alpha(*tk.token)) {
+               u64 len = 0;
+               while (char_is_alphanum(*tokenizer->curr) || charset_contains("_$", *tokenizer->curr)) {
+                       len++;
+                       INCREMENT_CURR_TOKEN(tokenizer);
+               }
+
+               tk.length = len;
+               tk.type = TOKEN_TYPE_SYMBOL;
+               goto token_parsed;
+       }
+
+       // String literal
+       if (*tk.token == '"') {
+               u64 len = 0;
+               u64 slash_count = 0;
+
+               INCREMENT_CURR_TOKEN(tokenizer);
+
+               while (!(*tokenizer->curr == '"' && slash_count == 0)) {
+                       len++;
+
+                       if (*tokenizer->curr == '\\') {
+                               slash_count += 1;
+                               slash_count %= 2;
+                       } else {
+                               slash_count = 0;
+                       }
+
+                       INCREMENT_CURR_TOKEN(tokenizer);
+               }
+
+               INCREMENT_CURR_TOKEN(tokenizer);
+
+               tk.token++;
+               tk.type = TOKEN_TYPE_LITERAL_STRING;
+               tk.length = len;
+               goto token_parsed;
+       }
+
+       // Number literal
+       if (char_is_num(*tokenizer->curr)) {
+               u64 len = 0;
+               while (char_is_num(*tokenizer->curr) || *tokenizer->curr == '.') {
+                       len++;
+                       INCREMENT_CURR_TOKEN(tokenizer);
+               }
+
+               tk.type = TOKEN_TYPE_LITERAL_NUMERIC;
+               tk.length = len;
+       }
 
-       tokenizer->curr++; // Ignore token
+       INCREMENT_CURR_TOKEN(tokenizer);
 
 token_parsed:
        return tk;
@@ -143,8 +293,9 @@ int main(int argc, char *argv[]) {
        Tokenizer tknizer = {
                .start = fc.data,
                .curr = fc.data,
-               .end = fc.data + fc.length,
+               .end = fc.data + fc.length - 1,
                .line_number = 1,
+               .line_column = 1,
        };
 
        Token tk;
@@ -152,7 +303,7 @@ int main(int argc, char *argv[]) {
                tk = get_token(&tknizer);
                char c = *(tk.token + tk.length);
                *(tk.token + tk.length) = '\0';
-               printf("%s: %s\n", TokenTypeNames[tk.type], tk.token);
+               printf("Line %ld, Column %ld: \n%s: %s\n", tk.line_number, tk.line_column, TokenTypeNames[tk.type], tk.token);
                *(tk.token + tk.length) = c;
        } while (tk.type != TOKEN_TYPE_END_STREAM);
 
index 32a37db1518370113d66ebf747aff353ca6193fa..b0d3cf0778ebd3cf87dfc27b3897d6d94508980c 100644 (file)
@@ -7,5 +7,6 @@ use "core"; /* Looks for "core.onyx" in the current directory */
 Foo :: struct { x i32, y i32 };
 
 add :: (a i32, b i32) -> i32 {
-       return a + b;   
-};
\ No newline at end of file
+       return a + b + 1234.56;
+};
+
index 10f24015b969c436d2df8907e5946ef369608b49..cf693c37a22d21bc277072f770143bbdb61a7c41 100644 (file)
@@ -1,11 +1,17 @@
-/* Comments need to be parsed */
+/* Comments need to be parsed
+ /* nested comments /* are /* okay */ */ */
+*/
 
-export add :: (a: i32, b: i32) -> i32 {
+foreign "console" "log" :: (ptr, i32) -> void;
+
+export add :: (a i32, b i32) -> i32 {
        return a + b;
 }
 
-export max :: (a: i32, b: i32) -> i32 {
+export max :: (a i32, b i32) -> i32 {
        /* Curly braces are required */
+       x := "String literal! HERE \\\"Woot Woot\" done";
+
        if a > b {
                return a;
        } else {