From: Brendan Hansen Date: Tue, 12 May 2020 23:33:38 +0000 (-0500) Subject: Commiting at end of day. Working on rewriting the bh_string library to include better... X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=11ecfdfd9c5df475250e53a4fca2ceee8c144410;p=onyx.git Commiting at end of day. Working on rewriting the bh_string library to include better support for C-strs --- diff --git a/bh.h b/bh.h index c322aa80..f6574665 100644 --- a/bh.h +++ b/bh.h @@ -1,7 +1,6 @@ #ifndef BH_H #define BH_H -#include #include #include #include @@ -43,15 +42,19 @@ i64 chars_match(char* ptr1, char* ptr2); #define bh_max(a, b) ((a) > (b) ? (a) : (b)) #define bh_min(a, b) ((a) < (b) ? (a) : (b)) #define bh_clamp(v, a, b) (bh_min((b), bh_max((a), (v)))) +#define bh_abs(x) ((x) < 0 ? -(x) : (x)) //------------------------------------------------------------------------------------- // Better strings //------------------------------------------------------------------------------------- -typedef struct bh_string { - u8* data; +typedef struct bh__string { u64 length; u64 capacity; -} bh_string; +} bh__string; + +typedef char bh_string; + +#define bh__stringhead(x) (((bh__string *)(x)) - 1) #define bh_string_new(x) _Generic((x), \ unsigned long: bh_string_new_cap, \ @@ -266,47 +269,52 @@ i64 chars_match(char* ptr1, char* ptr2) { //------------------------------------------------------------------------------------- // STRING IMPLEMENTATION //------------------------------------------------------------------------------------- -bh_string bh_string_new_cap(unsigned long cap) { - bh_string str; - str.data = (u8*) malloc(sizeof(u8) * cap); - str.length = 0; - str.capacity = cap; - return str; +bh_string* bh_string_new_cap(unsigned long cap) { + bh__string* str; + str = (bh__string*) malloc(sizeof(*str) + sizeof(char) * cap + 1); + str[0] = 0; + return str + 1; } -bh_string bh_string_new_str(const char* cstr) { +bh_string* bh_string_new_str(const char* cstr) { const i32 len = strlen(cstr); - bh_string str; + bh__string* str; i32 i; - str.data = (u8*) malloc(sizeof(u8) * len); + str = malloc(sizeof(*str) + sizeof(char) * len + 1); + char* data = (char*) (str + 1); for (i = 0; i < len; i++) { - str.data[i] = cstr[i]; + data[i] = cstr[i]; } - str.length = len; - str.capacity = len; - return str; + data[i] = 0; // Always null terminate the string + + str->length = len; + str->capacity = len; + return str + 1; } -b32 bh_string_delete(bh_string* str) { - free(str->data); +b32 bh_string_delete(bh_string** str) { + bh__string* strptr = bh__stringhead(*str); + free(strptr); str->length = 0; str->capacity = 0; return 1; } -b32 bh_string_ensure_capacity(bh_string* str, u64 cap) { - if (str->capacity >= cap) return 1; +b32 bh_string_grow(bh_string** str, u64 cap) { + bh__string* strptr = bh__stringhead(*str); + if (strptr->capacity >= cap) return 1; + + void* p; + p = realloc(strptr, sizeof(*strptr) + sizeof(char) * cap + 1); - //TODO: This could fail - str->data = (u8*) realloc((void*) str->data, sizeof(u8) * cap); - str->capacity = cap; + strptr->capacity = cap; return 1; } -void bh_string_append_bh_string(bh_string* str1, bh_string* str2) { +void bh_string_append_bh_string(bh_string** str1, bh_string** str2) { if (!bh_string_ensure_capacity(str1, str1->length + str2->length)) return; //TODO: Replace with custom memory management diff --git a/docs/parse_grammar b/docs/parse_grammar new file mode 100644 index 00000000..0a07b3f7 --- /dev/null +++ b/docs/parse_grammar @@ -0,0 +1,72 @@ +Note: ~ is empty +Goal: Design the language to have no ambiguity + + +SOURCE_FILE = TOP_LEVEL_STATEMENT TOKEN_TYPE_SYM_SEMICOLON SOURCE_FILE | ~ + +TOP_LEVEL_STATEMENT + = USE_DECLARATION + | EXPORT_DECLARATION + | FOREIGN_DECLARATION + | TOP_LEVEL_DECLARATION + | COMMENT + +COMMENT = TOKEN_TYPE_COMMENT + +USE_DECLARATION = use TOKEN_TYPE_LITERAL_STRING + +EXPORT_DECLARATION = export TOP_LEVEL_DECLARATION + +TOP_LEVEL_DECLARATION + = FUNCTION_DECLARATION + | STRUCT_DECLARATION + | GLOBAL_DECLARATION + +FUNCTION_DECLARATION = proc TOKEN_TYPE_SYMBOL FUNCTION_TYPE BLOCK + +FUNCTION_TYPE = :: ( FUNCTION_PARAMS ) -> TOKEN_TYPE_SYMBOL + +BLOCK = { STATEMENTS } + +STATEMENTS = STATEMENT ; STATEMENTS | ~ + +STATEMENT + = ASSIGNMENT_STATEMENT + | IF_STATEMENT + | FOR_STATEMENT + | RETURN_STATEMENT + | EXPRESSION + +ASSIGNMENT_STATEMENT = TOKEN_TYPE_SYMBOL = EXPRESSION + +IF_STATEMENT + = if EXPRESSION BLOCK ELSE_IF_STATEMENT ELSE_STATEMENT + +ELSEIF_STATEMENT = TOKEN_TYPE_KEYWORD_ELSEIF EXPRESSION BLOCK ELSEIF_STATEMENT | ~ + +ELSE_STATEMENT = TOKEN_TYPE_KEYWORD_ELSE BLOCK | ~ + +-- This needs to be better +FOR_STATEMENT = for STATEMENT ; EXPRESSION ; STATEMENT BLOCK + +RETURN_STATEMENT = return EXPRESSION + +-- Remove abiguity in implementation +EXPRESSION + = EXPRESSION + EXPRESSION + | EXPRESSION - EXPRESSION + | EXPRESSION * EXPRESSION + | EXPRESSION / EXPRESSION + | EXPRESSION % EXPRESSION + | do BLOCK + | FUNCTION_CALL -- This could have some abiguity with just the symbol + | ( EXPRESSION ) + | TOKEN_TYPE_SYMBOL + +FUNCTION_CALL = TOKEN_TYPE_SYMBOL ( EXPRESSION_LIST ) + +-- Implement just using a loop +COMMA_LIST(T) = T | T , COMMA_LIST(T) + +FUNCTION_PARAMS = COMMA_LIST(TOKEN_TYPE_SYMBOL :: TOKEN_TYPE_SYMBOL) +EXPRESSION_LIST = COMMA_LIST(EXPRESSION) diff --git a/docs/plan b/docs/plan index 70cd353e..666e18db 100644 --- a/docs/plan +++ b/docs/plan @@ -40,14 +40,15 @@ foo :: (a: i32) -> Foo { MVP CODE: -// Comments need to be parsed +/* Comments need to be parsed */ -export add :: (a: i32, b: i32) -> i32 { +export proc add :: (a i32, b i32) -> i32 { return a + b; } -export max :: (a: i32, b: i32) -> i32 { - // Curly braces are required +export proc max :: (a i32, b i32) -> i32 { + /* Curly braces are required */ + if a > b { return a; } else { diff --git a/onyx b/onyx index 690a2c82..e6cb94a2 100755 Binary files a/onyx and b/onyx differ diff --git a/onyx.c b/onyx.c index d4b8f88e..32e38f5b 100644 --- a/onyx.c +++ b/onyx.c @@ -15,7 +15,7 @@ bh_arr(Token) parse_tokens(bh_file_contents *fc) { }; bh_arr(Token) token_arr = NULL; - bh_arr_grow(token_arr, 1024); + bh_arr_grow(token_arr, 512); Token tk; do { @@ -41,6 +41,10 @@ int main(int argc, char *argv[]) { printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr)); + for (Token* it = token_arr; !bh_arr_end(token_arr, it); it++) { + printf("%s\n", get_token_type_name(*it)); + } + bh_file_contents_delete(&fc); bh_arr_free(token_arr); diff --git a/onyxlex.c b/onyxlex.c index 38c54c4c..af930ded 100644 --- a/onyxlex.c +++ b/onyxlex.c @@ -13,8 +13,11 @@ static const char* TokenTypeNames[] = { "TOKEN_TYPE_KEYWORD_IF", "TOKEN_TYPE_KEYWORD_ELSE", "TOKEN_TYPE_KEYWORD_FOR", + "TOKEN_TYPE_KEYWORD_DO", "TOKEN_TYPE_KEYWORD_RETURN", "TOKEN_TYPE_KEYWORD_FOREIGN", + "TOKEN_TYPE_KEYWORD_PROC", + "TOKEN_TYPE_KEYWORD_GLOBAL", "TOKEN_TYPE_RIGHT_ARROW", "TOKEN_TYPE_LEFT_ARROW", @@ -33,6 +36,7 @@ static const char* TokenTypeNames[] = { "TOKEN_TYPE_SYM_PERCENT", "TOKEN_TYPE_SYM_FSLASH", "TOKEN_TYPE_SYM_BSLASH", + "TOKEN_TYPE_SYM_TYPE_SIGNATURE", "TOKEN_TYPE_SYM_COLON", "TOKEN_TYPE_SYM_SEMICOLON", "TOKEN_TYPE_SYM_COMMA", @@ -40,6 +44,8 @@ static const char* TokenTypeNames[] = { "TOKEN_TYPE_SYM_GRAVE", "TOKEN_TYPE_SYM_TILDE", "TOKEN_TYPE_SYM_BANG", + "TOKEN_TYPE_SYM_CARET", + "TOKEN_TYPE_SYM_AMPERSAND", "TOKEN_TYPE_SYMBOL", "TOKEN_TYPE_LITERAL_STRING", @@ -142,6 +148,9 @@ Token get_token(Tokenizer* tokenizer) { LITERAL_TOKEN("foreign", TOKEN_TYPE_KEYWORD_FOREIGN); LITERAL_TOKEN("for", TOKEN_TYPE_KEYWORD_FOR); LITERAL_TOKEN("return", TOKEN_TYPE_KEYWORD_RETURN); + LITERAL_TOKEN("do", TOKEN_TYPE_KEYWORD_DO); + LITERAL_TOKEN("proc", TOKEN_TYPE_KEYWORD_PROC); + LITERAL_TOKEN("global", TOKEN_TYPE_KEYWORD_GLOBAL); LITERAL_TOKEN("->", TOKEN_TYPE_RIGHT_ARROW); LITERAL_TOKEN("<-", TOKEN_TYPE_RIGHT_ARROW); LITERAL_TOKEN("(", TOKEN_TYPE_OPEN_PAREN); @@ -158,6 +167,7 @@ Token get_token(Tokenizer* tokenizer) { LITERAL_TOKEN("/", TOKEN_TYPE_SYM_FSLASH); LITERAL_TOKEN("%", TOKEN_TYPE_SYM_PERCENT); LITERAL_TOKEN("\\", TOKEN_TYPE_SYM_BSLASH); + LITERAL_TOKEN("::", TOKEN_TYPE_SYM_TYPE_SIGNATURE); LITERAL_TOKEN(":", TOKEN_TYPE_SYM_COLON); LITERAL_TOKEN(";", TOKEN_TYPE_SYM_SEMICOLON); LITERAL_TOKEN(",", TOKEN_TYPE_SYM_COMMA); @@ -165,6 +175,8 @@ Token get_token(Tokenizer* tokenizer) { LITERAL_TOKEN("`", TOKEN_TYPE_SYM_GRAVE); LITERAL_TOKEN("~", TOKEN_TYPE_SYM_TILDE); LITERAL_TOKEN("!", TOKEN_TYPE_SYM_BANG); + LITERAL_TOKEN("^", TOKEN_TYPE_SYM_CARET); + LITERAL_TOKEN("&", TOKEN_TYPE_SYM_AMPERSAND); // Symbols if (char_is_alpha(*tk.token)) { diff --git a/onyxlex.h b/onyxlex.h index 8fb15967..608d133b 100644 --- a/onyxlex.h +++ b/onyxlex.h @@ -23,8 +23,11 @@ typedef enum TokenType { TOKEN_TYPE_KEYWORD_IF, TOKEN_TYPE_KEYWORD_ELSE, TOKEN_TYPE_KEYWORD_FOR, + TOKEN_TYPE_KEYWORD_DO, TOKEN_TYPE_KEYWORD_RETURN, TOKEN_TYPE_KEYWORD_FOREIGN, + TOKEN_TYPE_KEYWORD_PROC, + TOKEN_TYPE_KEYWORD_GLOBAL, TOKEN_TYPE_RIGHT_ARROW, TOKEN_TYPE_LEFT_ARROW, @@ -43,6 +46,7 @@ typedef enum TokenType { TOKEN_TYPE_SYM_PERCENT, TOKEN_TYPE_SYM_FSLASH, TOKEN_TYPE_SYM_BSLASH, + TOKEN_TYPE_SYM_TYPE_SIGNATURE, TOKEN_TYPE_SYM_COLON, TOKEN_TYPE_SYM_SEMICOLON, TOKEN_TYPE_SYM_COMMA, @@ -50,6 +54,8 @@ typedef enum TokenType { TOKEN_TYPE_SYM_GRAVE, TOKEN_TYPE_SYM_TILDE, TOKEN_TYPE_SYM_BANG, + TOKEN_TYPE_SYM_CARET, + TOKEN_TYPE_SYM_AMPERSAND, TOKEN_TYPE_SYMBOL, TOKEN_TYPE_LITERAL_STRING, diff --git a/progs/mvp.onyx b/progs/mvp.onyx index cf693c37..1a8d8c00 100644 --- a/progs/mvp.onyx +++ b/progs/mvp.onyx @@ -2,13 +2,13 @@ /* nested comments /* are /* okay */ */ */ */ -foreign "console" "log" :: (ptr, i32) -> void; +foreign proc "console" "log" :: (data ptr, length i32) -> void; -export add :: (a i32, b i32) -> i32 { +export proc add :: (a i32, b i32) -> i32 { return a + b; -} +}; -export max :: (a i32, b i32) -> i32 { +export proc max :: (a i32, b i32) -> i32 { /* Curly braces are required */ x := "String literal! HERE \\\"Woot Woot\" done"; @@ -17,4 +17,4 @@ export max :: (a i32, b i32) -> i32 { } else { return b; } -} \ No newline at end of file +}; \ No newline at end of file