From: Brendan Hansen Date: Mon, 11 May 2020 14:42:45 +0000 (-0500) Subject: updates X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=373b31c24884298219f4236676dfef0a1a524e30;p=onyx.git updates --- diff --git a/.vimspector.json b/.vimspector.json index ee0c1bfc..72e3a7ab 100644 --- a/.vimspector.json +++ b/.vimspector.json @@ -6,7 +6,7 @@ "type": "cppdbg", "request": "launch", "program": "${workspaceFolder}/onyx", - "args": ["demo.onyx"], + "args": ["progs/mvp.onyx"], "stopAtEntry": true, "cwd": "${workspaceFolder}", "environment": [], diff --git a/bh.h b/bh.h index 7eb4fd09..40dd979f 100644 --- a/bh.h +++ b/bh.h @@ -26,11 +26,11 @@ typedef i32 b32; //------------------------------------------------------------------------------------- // Better character functions //------------------------------------------------------------------------------------- -inline b32 char_is_alpha(const char a); -inline b32 char_is_num(const char a); -inline b32 char_is_alphanum(const char a); -inline b32 char_is_whitespace(const char a); -inline b32 char_in_range(const char lo, const char hi, const char a); +b32 char_is_alpha(const char a); +b32 char_is_num(const char a); +b32 char_is_alphanum(const char a); +b32 char_is_whitespace(const char a); +b32 char_in_range(const char lo, const char hi, const char a); char charset_contains(const char* charset, char ch); //------------------------------------------------------------------------------------- @@ -168,23 +168,23 @@ i32 bh_file_contents_delete(bh_file_contents* contents); //------------------------------------------------------------------------------------- // CHAR FUNCTIONS //------------------------------------------------------------------------------------- -inline b32 char_is_alpha(const char a) { +b32 char_is_alpha(const char a) { return ('a' <= a && a <= 'z') || ('A' <= a && a <= 'Z'); } -inline b32 char_is_num(const char a) { +b32 char_is_num(const char a) { return ('0' <= a && a <= '9'); } -inline b32 char_is_alphanum(const char a) { +b32 char_is_alphanum(const char a) { return char_is_alpha(a) || char_is_num(a); } -inline b32 char_is_whitespace(const char a) { +b32 char_is_whitespace(const char a) { return charset_contains(" \t\r\n", a); } -inline b32 char_in_range(const char lo, const char hi, const char a) { +b32 char_in_range(const char lo, const char hi, const char a) { return lo <= a <= hi; } diff --git a/onyx b/onyx index 2d5a2d93..b0a2ad6f 100755 Binary files a/onyx and b/onyx differ diff --git a/onyx.c b/onyx.c index 970c85c8..9034852e 100644 --- a/onyx.c +++ b/onyx.c @@ -1,16 +1,20 @@ #include // TODO: Replace with custom lib -#include // TODO: Replace with custom lib #include "bh.h" typedef struct Tokenizer { char *start, *curr, *end; + + // TODO: Fix the line number and column count u64 line_number; + u64 line_column; } Tokenizer; typedef enum TokenType { TOKEN_TYPE_UNKNOWN, TOKEN_TYPE_END_STREAM, + TOKEN_TYPE_COMMENT, + TOKEN_TYPE_KEYWORD_STRUCT, TOKEN_TYPE_KEYWORD_USE, TOKEN_TYPE_KEYWORD_EXPORT, @@ -20,18 +24,33 @@ typedef enum TokenType { TOKEN_TYPE_KEYWORD_RETURN, TOKEN_TYPE_RIGHT_ARROW, + TOKEN_TYPE_LEFT_ARROW, TOKEN_TYPE_OPEN_PAREN, TOKEN_TYPE_CLOSE_PAREN, TOKEN_TYPE_OPEN_BRACE, TOKEN_TYPE_CLOSE_BRACE, TOKEN_TYPE_OPEN_BRACKET, TOKEN_TYPE_CLOSE_BRACKET, + TOKEN_TYPE_OPEN_ANGLE, + TOKEN_TYPE_CLOSE_ANGLE, + + TOKEN_TYPE_SYM_PLUS, + TOKEN_TYPE_SYM_MINUS, + TOKEN_TYPE_SYM_STAR, + TOKEN_TYPE_SYM_PERCENT, + TOKEN_TYPE_SYM_FSLASH, + TOKEN_TYPE_SYM_BSLASH, + TOKEN_TYPE_SYM_COLON, + TOKEN_TYPE_SYM_SEMICOLON, + TOKEN_TYPE_SYM_COMMA, + TOKEN_TYPE_SYM_EQUALS, + TOKEN_TYPE_SYM_GRAVE, + TOKEN_TYPE_SYM_TILDE, + TOKEN_TYPE_SYM_BANG, - TOKEN_TYPE_OP_ADD, - TOKEN_TYPE_OP_SUB, - TOKEN_TYPE_OP_MUL, - TOKEN_TYPE_OP_DIV, - TOKEN_TYPE_OP_MOD, + TOKEN_TYPE_SYMBOL, + TOKEN_TYPE_LITERAL_STRING, + TOKEN_TYPE_LITERAL_NUMERIC, TOKEN_TYPE_COUNT } TokenType; @@ -40,6 +59,8 @@ static const char* TokenTypeNames[] = { "TOKEN_TYPE_UNKNOWN", "TOKEN_TYPE_END_STREAM", + "TOKEN_TYPE_COMMENT", + "TOKEN_TYPE_KEYWORD_STRUCT", "TOKEN_TYPE_KEYWORD_USE", "TOKEN_TYPE_KEYWORD_EXPORT", @@ -49,18 +70,33 @@ static const char* TokenTypeNames[] = { "TOKEN_TYPE_KEYWORD_RETURN", "TOKEN_TYPE_RIGHT_ARROW", + "TOKEN_TYPE_LEFT_ARROW", "TOKEN_TYPE_OPEN_PAREN", "TOKEN_TYPE_CLOSE_PAREN", "TOKEN_TYPE_OPEN_BRACE", "TOKEN_TYPE_CLOSE_BRACE", "TOKEN_TYPE_OPEN_BRACKET", "TOKEN_TYPE_CLOSE_BRACKET", + "TOKEN_TYPE_OPEN_ANGLE", + "TOKEN_TYPE_CLOSE_ANGLE", + + "TOKEN_TYPE_SYM_PLUS", + "TOKEN_TYPE_SYM_MINUS", + "TOKEN_TYPE_SYM_STAR", + "TOKEN_TYPE_SYM_PERCENT", + "TOKEN_TYPE_SYM_FSLASH", + "TOKEN_TYPE_SYM_BSLASH", + "TOKEN_TYPE_SYM_COLON", + "TOKEN_TYPE_SYM_SEMICOLON", + "TOKEN_TYPE_SYM_COMMA", + "TOKEN_TYPE_SYM_EQUALS", + "TOKEN_TYPE_SYM_GRAVE", + "TOKEN_TYPE_SYM_TILDE", + "TOKEN_TYPE_SYM_BANG", - "TOKEN_TYPE_OP_ADD", - "TOKEN_TYPE_OP_SUB", - "TOKEN_TYPE_OP_MUL", - "TOKEN_TYPE_OP_DIV", - "TOKEN_TYPE_OP_MOD", + "TOKEN_TYPE_SYMBOL", + "TOKEN_TYPE_LITERAL_STRING", + "TOKEN_TYPE_LITERAL_NUMERIC", "TOKEN_TYPE_COUNT" }; @@ -72,58 +108,172 @@ typedef struct Token { u64 line_number, line_column; } Token; +#ifndef LITERAL_TOKEN +#define LITERAL_TOKEN(token, token_type) \ + if (token_lit(tokenizer, &tk, token, token_type)) goto token_parsed; +#endif + +#ifndef INCREMENT_CURR_TOKEN +#define INCREMENT_CURR_TOKEN(tkn) { \ + tkn->curr++; \ + tkn->line_column++; \ + if (*tkn->curr == '\n') { \ + tkn->line_number++; \ + tkn->line_column = 1; \ + } \ +} +#endif + b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type) { i64 len = chars_match(tokenizer->curr, lit); if (len > 0) { tk->type = type; tk->token = tokenizer->curr; tk->length = len; + tokenizer->curr += len; + tokenizer->line_column += len; + return 1; } return 0; } Token get_token(Tokenizer* tokenizer) { - #ifndef LITERAL_TOKEN - #define LITERAL_TOKEN(token, token_type) \ - if (token_lit(tokenizer, &tk, token, token_type)) goto token_parsed; - #endif - Token tk; + // Skip whitespace + while (char_is_whitespace(*tokenizer->curr) && tokenizer->curr != tokenizer->end) + INCREMENT_CURR_TOKEN(tokenizer) + tk.type = TOKEN_TYPE_UNKNOWN; tk.token = tokenizer->curr; tk.length = 1; - tk.line_number = 0; - tk.line_column = 0; + tk.line_number = tokenizer->line_number; + tk.line_column = tokenizer->line_column; if (tokenizer->curr == tokenizer->end) { tk.type = TOKEN_TYPE_END_STREAM; goto token_parsed; } + // Comments + if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') { + tokenizer->curr += 2; + tk.type = TOKEN_TYPE_COMMENT; + tk.token = tokenizer->curr; + u16 layers = 1; + + while (layers >= 1) { + INCREMENT_CURR_TOKEN(tokenizer); + + if (tokenizer->curr == tokenizer->end) { + tk.type = TOKEN_TYPE_END_STREAM; + break; + } + + if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') { + layers++; + INCREMENT_CURR_TOKEN(tokenizer); + } + + if (*tokenizer->curr == '*' && *(tokenizer->curr + 1) == '/') { + layers--; + INCREMENT_CURR_TOKEN(tokenizer); + } + } + + INCREMENT_CURR_TOKEN(tokenizer); + + tk.length = tokenizer->curr - tk.token - 2; + goto token_parsed; + } + LITERAL_TOKEN("struct", TOKEN_TYPE_KEYWORD_STRUCT); LITERAL_TOKEN("export", TOKEN_TYPE_KEYWORD_EXPORT); LITERAL_TOKEN("use", TOKEN_TYPE_KEYWORD_USE); LITERAL_TOKEN("if", TOKEN_TYPE_KEYWORD_IF); - LITERAL_TOKEN("else", TOKEN_TYPE_KEYWORD_IF); + LITERAL_TOKEN("else", TOKEN_TYPE_KEYWORD_ELSE); LITERAL_TOKEN("for", TOKEN_TYPE_KEYWORD_FOR); LITERAL_TOKEN("return", TOKEN_TYPE_KEYWORD_RETURN); LITERAL_TOKEN("->", TOKEN_TYPE_RIGHT_ARROW); + LITERAL_TOKEN("<-", TOKEN_TYPE_RIGHT_ARROW); LITERAL_TOKEN("(", TOKEN_TYPE_OPEN_PAREN); LITERAL_TOKEN(")", TOKEN_TYPE_CLOSE_PAREN); LITERAL_TOKEN("{", TOKEN_TYPE_OPEN_BRACE); LITERAL_TOKEN("}", TOKEN_TYPE_CLOSE_BRACE); LITERAL_TOKEN("[", TOKEN_TYPE_OPEN_BRACKET); LITERAL_TOKEN("]", TOKEN_TYPE_CLOSE_BRACKET); - LITERAL_TOKEN("+", TOKEN_TYPE_OP_ADD); - LITERAL_TOKEN("-", TOKEN_TYPE_OP_SUB); - LITERAL_TOKEN("*", TOKEN_TYPE_OP_MUL); - LITERAL_TOKEN("/", TOKEN_TYPE_OP_DIV); - LITERAL_TOKEN("%", TOKEN_TYPE_OP_MOD); + LITERAL_TOKEN("<", TOKEN_TYPE_OPEN_ANGLE); + LITERAL_TOKEN(">", TOKEN_TYPE_CLOSE_ANGLE); + LITERAL_TOKEN("+", TOKEN_TYPE_SYM_PLUS); + LITERAL_TOKEN("-", TOKEN_TYPE_SYM_MINUS); + LITERAL_TOKEN("*", TOKEN_TYPE_SYM_STAR); + LITERAL_TOKEN("/", TOKEN_TYPE_SYM_FSLASH); + LITERAL_TOKEN("%", TOKEN_TYPE_SYM_PERCENT); + LITERAL_TOKEN("\\", TOKEN_TYPE_SYM_BSLASH); + LITERAL_TOKEN(":", TOKEN_TYPE_SYM_COLON); + LITERAL_TOKEN(";", TOKEN_TYPE_SYM_SEMICOLON); + LITERAL_TOKEN(",", TOKEN_TYPE_SYM_COMMA); + LITERAL_TOKEN("=", TOKEN_TYPE_SYM_EQUALS); + LITERAL_TOKEN("`", TOKEN_TYPE_SYM_GRAVE); + LITERAL_TOKEN("~", TOKEN_TYPE_SYM_TILDE); + LITERAL_TOKEN("!", TOKEN_TYPE_SYM_BANG); + + // Symbols + if (char_is_alpha(*tk.token)) { + u64 len = 0; + while (char_is_alphanum(*tokenizer->curr) || charset_contains("_$", *tokenizer->curr)) { + len++; + INCREMENT_CURR_TOKEN(tokenizer); + } + + tk.length = len; + tk.type = TOKEN_TYPE_SYMBOL; + goto token_parsed; + } + + // String literal + if (*tk.token == '"') { + u64 len = 0; + u64 slash_count = 0; + + INCREMENT_CURR_TOKEN(tokenizer); + + while (!(*tokenizer->curr == '"' && slash_count == 0)) { + len++; + + if (*tokenizer->curr == '\\') { + slash_count += 1; + slash_count %= 2; + } else { + slash_count = 0; + } + + INCREMENT_CURR_TOKEN(tokenizer); + } + + INCREMENT_CURR_TOKEN(tokenizer); + + tk.token++; + tk.type = TOKEN_TYPE_LITERAL_STRING; + tk.length = len; + goto token_parsed; + } + + // Number literal + if (char_is_num(*tokenizer->curr)) { + u64 len = 0; + while (char_is_num(*tokenizer->curr) || *tokenizer->curr == '.') { + len++; + INCREMENT_CURR_TOKEN(tokenizer); + } + + tk.type = TOKEN_TYPE_LITERAL_NUMERIC; + tk.length = len; + } - tokenizer->curr++; // Ignore token + INCREMENT_CURR_TOKEN(tokenizer); token_parsed: return tk; @@ -143,8 +293,9 @@ int main(int argc, char *argv[]) { Tokenizer tknizer = { .start = fc.data, .curr = fc.data, - .end = fc.data + fc.length, + .end = fc.data + fc.length - 1, .line_number = 1, + .line_column = 1, }; Token tk; @@ -152,7 +303,7 @@ int main(int argc, char *argv[]) { tk = get_token(&tknizer); char c = *(tk.token + tk.length); *(tk.token + tk.length) = '\0'; - printf("%s: %s\n", TokenTypeNames[tk.type], tk.token); + printf("Line %ld, Column %ld: \n%s: %s\n", tk.line_number, tk.line_column, TokenTypeNames[tk.type], tk.token); *(tk.token + tk.length) = c; } while (tk.type != TOKEN_TYPE_END_STREAM); diff --git a/progs/demo.onyx b/progs/demo.onyx index 32a37db1..b0d3cf07 100644 --- a/progs/demo.onyx +++ b/progs/demo.onyx @@ -7,5 +7,6 @@ use "core"; /* Looks for "core.onyx" in the current directory */ Foo :: struct { x i32, y i32 }; add :: (a i32, b i32) -> i32 { - return a + b; -}; \ No newline at end of file + return a + b + 1234.56; +}; + diff --git a/progs/mvp.onyx b/progs/mvp.onyx index 10f24015..cf693c37 100644 --- a/progs/mvp.onyx +++ b/progs/mvp.onyx @@ -1,11 +1,17 @@ -/* Comments need to be parsed */ +/* Comments need to be parsed + /* nested comments /* are /* okay */ */ */ +*/ -export add :: (a: i32, b: i32) -> i32 { +foreign "console" "log" :: (ptr, i32) -> void; + +export add :: (a i32, b i32) -> i32 { return a + b; } -export max :: (a: i32, b: i32) -> i32 { +export max :: (a i32, b i32) -> i32 { /* Curly braces are required */ + x := "String literal! HERE \\\"Woot Woot\" done"; + if a > b { return a; } else {