From: Brendan Hansen Date: Mon, 11 May 2020 22:29:00 +0000 (-0500) Subject: Split project up and working on bh_arr X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=094182ca4197f568880ce15fc94c19a796118a7f;p=onyx.git Split project up and working on bh_arr --- diff --git a/Makefile b/Makefile index 2f544b46..187b707f 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ OBJ_FILES=\ + onyxlex.o \ onyx.o CC=gcc @@ -10,7 +11,7 @@ FLAGS=-g $(CC) $(FLAGS) -c $< -o $@ $(INCLUDES) onyx: $(OBJ_FILES) - $(CC) $(FLAGS) $< -o $@ $(LIBS) + $(CC) $(FLAGS) $? -o $@ $(LIBS) clean: rm $(OBJ_FILES) 2>&1 >/dev/null diff --git a/bh.h b/bh.h index 40dd979f..16a51c87 100644 --- a/bh.h +++ b/bh.h @@ -1,3 +1,6 @@ +#ifndef BH_H +#define BH_H + #include #include #include @@ -32,6 +35,7 @@ b32 char_is_alphanum(const char a); b32 char_is_whitespace(const char a); b32 char_in_range(const char lo, const char hi, const char a); char charset_contains(const char* charset, char ch); +i64 chars_match(char* ptr1, char* ptr2); //------------------------------------------------------------------------------------- // Better strings @@ -150,15 +154,39 @@ bh_file_contents bh_file_read_contents_bh_file(bh_file* file); bh_file_contents bh_file_read_contents_direct(const char* filename); i32 bh_file_contents_delete(bh_file_contents* contents); +//------------------------------------------------------------------------------------- +// Better arrays +//------------------------------------------------------------------------------------- +typedef struct bh__arr { + i32 length, capacity; +} bh__arr; +#define bh_arr(T) T* +#define bh__arrhead(arr) (((bh__arr *)(arr)) - 1) +#define bh_arr_length(arr) (arr ? bh__arrhead(arr)->length : 0) +#define bh_arr_capacity(arr) (arr ? bh__arrhead(arr)->capacity : 0) +#define bh_arr_valid(arr, i) (arr ? (int)(i) < bh__arrhead(arr)->length : 0) +#define bh_arr_set_length(arr, n) (bh__arr_ensure_capacity((void **) &arr, sizeof(arr[0]), n), bh__arrhead(arr)->length = n) +#define bh_arr_pop(arr) ((arr)[--bh__arrhead(arr)->length]) +#define bh_arr_last(arr) ((arr)[bh__arrhead(arr)->length - 1]) +#define bh_arr_end(arr, i) ((i) >= &(arr)[bh_arr_length(arr)]) +#define bh_arr_new(arr, cap) (bh__arr_ensure_capacity((void**) &arr, sizeof(arr[0]), cap)) +#define bh_arr_free(arr) (bh__arr_free((void**) &arr)) +#define bh_arr_copy(arr) (bh__arr_copy(arr, sizeof(arr[0]))) +#define bh_arr_insert_end(arr, n) ( \ + bh__arr_ensure_capacity((void**) &arr, sizeof(arr[0]), bh_arr_length(arr) + n), \ + bh__arrhead(arr)->length += n) +b32 bh__arr_ensure_capacity(void** arr, int elemsize, int cap); +b32 bh__arr_free(void **arr); +void* bh__arr_copy(void *arr, int elemsize); - - +#ifdef BH_DEFINE +#undef BH_DEFINE //------------------------------------------------------------------------------------- @@ -509,3 +537,51 @@ b32 bh_file_contents_delete(bh_file_contents* contents) { contents->length = 0; return 1; } + +//------------------------------------------------------------------------------------- +// ARRAY IMPLEMENTATION +//------------------------------------------------------------------------------------- + +b32 bh__arr_ensure_capacity(void** arr, int elemsize, int cap) { + bh__arr* arrptr; + + if (*arr == NULL) { + if (cap == 0 && elemsize == 0) return 1; + + arrptr = (bh__arr *) malloc(sizeof(*arrptr) + elemsize * cap); + arrptr->capacity = cap; + arrptr->length = 0; + + } else { + arrptr = bh__arrhead(*arr); + if (arrptr->length > cap) return 1; + + if (arrptr->capacity < cap) { + void* p; + int newcap = arrptr->capacity ? arrptr->capacity : 4; + while (newcap < cap) newcap <<= 1; + + p = realloc(arrptr, sizeof(*arrptr) + elemsize * newcap); + + if (p) { + arrptr = (bh__arr *) p; + arrptr->capacity = newcap; + } else { + return 0; + } + } + } + + *arr = arrptr + 1; + return 1; +} + +b32 bh__arr_free(void **arr) { + bh__arr* arrptr = bh__arrhead(*arr); + free(arrptr); +} + + +#endif // ifdef BH_DEFINE + +#endif // ifndef BH_H diff --git a/onyx b/onyx index b0a2ad6f..6ea581cc 100755 Binary files a/onyx and b/onyx differ diff --git a/onyx.c b/onyx.c index 9034852e..100dad99 100644 --- a/onyx.c +++ b/onyx.c @@ -1,285 +1,33 @@ -#include // TODO: Replace with custom lib +#define BH_DEFINE #include "bh.h" -typedef struct Tokenizer { - char *start, *curr, *end; - - // TODO: Fix the line number and column count - u64 line_number; - u64 line_column; -} Tokenizer; - -typedef enum TokenType { - TOKEN_TYPE_UNKNOWN, - TOKEN_TYPE_END_STREAM, - - TOKEN_TYPE_COMMENT, - - TOKEN_TYPE_KEYWORD_STRUCT, - TOKEN_TYPE_KEYWORD_USE, - TOKEN_TYPE_KEYWORD_EXPORT, - TOKEN_TYPE_KEYWORD_IF, - TOKEN_TYPE_KEYWORD_ELSE, - TOKEN_TYPE_KEYWORD_FOR, - TOKEN_TYPE_KEYWORD_RETURN, - - TOKEN_TYPE_RIGHT_ARROW, - TOKEN_TYPE_LEFT_ARROW, - TOKEN_TYPE_OPEN_PAREN, - TOKEN_TYPE_CLOSE_PAREN, - TOKEN_TYPE_OPEN_BRACE, - TOKEN_TYPE_CLOSE_BRACE, - TOKEN_TYPE_OPEN_BRACKET, - TOKEN_TYPE_CLOSE_BRACKET, - TOKEN_TYPE_OPEN_ANGLE, - TOKEN_TYPE_CLOSE_ANGLE, - - TOKEN_TYPE_SYM_PLUS, - TOKEN_TYPE_SYM_MINUS, - TOKEN_TYPE_SYM_STAR, - TOKEN_TYPE_SYM_PERCENT, - TOKEN_TYPE_SYM_FSLASH, - TOKEN_TYPE_SYM_BSLASH, - TOKEN_TYPE_SYM_COLON, - TOKEN_TYPE_SYM_SEMICOLON, - TOKEN_TYPE_SYM_COMMA, - TOKEN_TYPE_SYM_EQUALS, - TOKEN_TYPE_SYM_GRAVE, - TOKEN_TYPE_SYM_TILDE, - TOKEN_TYPE_SYM_BANG, - - TOKEN_TYPE_SYMBOL, - TOKEN_TYPE_LITERAL_STRING, - TOKEN_TYPE_LITERAL_NUMERIC, - - TOKEN_TYPE_COUNT -} TokenType; - -static const char* TokenTypeNames[] = { - "TOKEN_TYPE_UNKNOWN", - "TOKEN_TYPE_END_STREAM", - - "TOKEN_TYPE_COMMENT", - - "TOKEN_TYPE_KEYWORD_STRUCT", - "TOKEN_TYPE_KEYWORD_USE", - "TOKEN_TYPE_KEYWORD_EXPORT", - "TOKEN_TYPE_KEYWORD_IF", - "TOKEN_TYPE_KEYWORD_ELSE", - "TOKEN_TYPE_KEYWORD_FOR", - "TOKEN_TYPE_KEYWORD_RETURN", - - "TOKEN_TYPE_RIGHT_ARROW", - "TOKEN_TYPE_LEFT_ARROW", - "TOKEN_TYPE_OPEN_PAREN", - "TOKEN_TYPE_CLOSE_PAREN", - "TOKEN_TYPE_OPEN_BRACE", - "TOKEN_TYPE_CLOSE_BRACE", - "TOKEN_TYPE_OPEN_BRACKET", - "TOKEN_TYPE_CLOSE_BRACKET", - "TOKEN_TYPE_OPEN_ANGLE", - "TOKEN_TYPE_CLOSE_ANGLE", - - "TOKEN_TYPE_SYM_PLUS", - "TOKEN_TYPE_SYM_MINUS", - "TOKEN_TYPE_SYM_STAR", - "TOKEN_TYPE_SYM_PERCENT", - "TOKEN_TYPE_SYM_FSLASH", - "TOKEN_TYPE_SYM_BSLASH", - "TOKEN_TYPE_SYM_COLON", - "TOKEN_TYPE_SYM_SEMICOLON", - "TOKEN_TYPE_SYM_COMMA", - "TOKEN_TYPE_SYM_EQUALS", - "TOKEN_TYPE_SYM_GRAVE", - "TOKEN_TYPE_SYM_TILDE", - "TOKEN_TYPE_SYM_BANG", - - "TOKEN_TYPE_SYMBOL", - "TOKEN_TYPE_LITERAL_STRING", - "TOKEN_TYPE_LITERAL_NUMERIC", - - "TOKEN_TYPE_COUNT" -}; - -typedef struct Token { - TokenType type; - char* token; - isize length; - u64 line_number, line_column; -} Token; - -#ifndef LITERAL_TOKEN -#define LITERAL_TOKEN(token, token_type) \ - if (token_lit(tokenizer, &tk, token, token_type)) goto token_parsed; -#endif - -#ifndef INCREMENT_CURR_TOKEN -#define INCREMENT_CURR_TOKEN(tkn) { \ - tkn->curr++; \ - tkn->line_column++; \ - if (*tkn->curr == '\n') { \ - tkn->line_number++; \ - tkn->line_column = 1; \ - } \ -} -#endif - -b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type) { - i64 len = chars_match(tokenizer->curr, lit); - if (len > 0) { - tk->type = type; - tk->token = tokenizer->curr; - tk->length = len; - - tokenizer->curr += len; - tokenizer->line_column += len; - - return 1; - } - return 0; -} - -Token get_token(Tokenizer* tokenizer) { - Token tk; - - // Skip whitespace - while (char_is_whitespace(*tokenizer->curr) && tokenizer->curr != tokenizer->end) - INCREMENT_CURR_TOKEN(tokenizer) - - tk.type = TOKEN_TYPE_UNKNOWN; - tk.token = tokenizer->curr; - tk.length = 1; - tk.line_number = tokenizer->line_number; - tk.line_column = tokenizer->line_column; - - if (tokenizer->curr == tokenizer->end) { - tk.type = TOKEN_TYPE_END_STREAM; - goto token_parsed; - } - - // Comments - if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') { - tokenizer->curr += 2; - tk.type = TOKEN_TYPE_COMMENT; - tk.token = tokenizer->curr; - u16 layers = 1; - - while (layers >= 1) { - INCREMENT_CURR_TOKEN(tokenizer); - - if (tokenizer->curr == tokenizer->end) { - tk.type = TOKEN_TYPE_END_STREAM; - break; - } - - if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') { - layers++; - INCREMENT_CURR_TOKEN(tokenizer); - } - - if (*tokenizer->curr == '*' && *(tokenizer->curr + 1) == '/') { - layers--; - INCREMENT_CURR_TOKEN(tokenizer); - } - } - - INCREMENT_CURR_TOKEN(tokenizer); - - tk.length = tokenizer->curr - tk.token - 2; - goto token_parsed; - } - - LITERAL_TOKEN("struct", TOKEN_TYPE_KEYWORD_STRUCT); - LITERAL_TOKEN("export", TOKEN_TYPE_KEYWORD_EXPORT); - LITERAL_TOKEN("use", TOKEN_TYPE_KEYWORD_USE); - LITERAL_TOKEN("if", TOKEN_TYPE_KEYWORD_IF); - LITERAL_TOKEN("else", TOKEN_TYPE_KEYWORD_ELSE); - LITERAL_TOKEN("for", TOKEN_TYPE_KEYWORD_FOR); - LITERAL_TOKEN("return", TOKEN_TYPE_KEYWORD_RETURN); - LITERAL_TOKEN("->", TOKEN_TYPE_RIGHT_ARROW); - LITERAL_TOKEN("<-", TOKEN_TYPE_RIGHT_ARROW); - LITERAL_TOKEN("(", TOKEN_TYPE_OPEN_PAREN); - LITERAL_TOKEN(")", TOKEN_TYPE_CLOSE_PAREN); - LITERAL_TOKEN("{", TOKEN_TYPE_OPEN_BRACE); - LITERAL_TOKEN("}", TOKEN_TYPE_CLOSE_BRACE); - LITERAL_TOKEN("[", TOKEN_TYPE_OPEN_BRACKET); - LITERAL_TOKEN("]", TOKEN_TYPE_CLOSE_BRACKET); - LITERAL_TOKEN("<", TOKEN_TYPE_OPEN_ANGLE); - LITERAL_TOKEN(">", TOKEN_TYPE_CLOSE_ANGLE); - LITERAL_TOKEN("+", TOKEN_TYPE_SYM_PLUS); - LITERAL_TOKEN("-", TOKEN_TYPE_SYM_MINUS); - LITERAL_TOKEN("*", TOKEN_TYPE_SYM_STAR); - LITERAL_TOKEN("/", TOKEN_TYPE_SYM_FSLASH); - LITERAL_TOKEN("%", TOKEN_TYPE_SYM_PERCENT); - LITERAL_TOKEN("\\", TOKEN_TYPE_SYM_BSLASH); - LITERAL_TOKEN(":", TOKEN_TYPE_SYM_COLON); - LITERAL_TOKEN(";", TOKEN_TYPE_SYM_SEMICOLON); - LITERAL_TOKEN(",", TOKEN_TYPE_SYM_COMMA); - LITERAL_TOKEN("=", TOKEN_TYPE_SYM_EQUALS); - LITERAL_TOKEN("`", TOKEN_TYPE_SYM_GRAVE); - LITERAL_TOKEN("~", TOKEN_TYPE_SYM_TILDE); - LITERAL_TOKEN("!", TOKEN_TYPE_SYM_BANG); - - // Symbols - if (char_is_alpha(*tk.token)) { - u64 len = 0; - while (char_is_alphanum(*tokenizer->curr) || charset_contains("_$", *tokenizer->curr)) { - len++; - INCREMENT_CURR_TOKEN(tokenizer); - } - - tk.length = len; - tk.type = TOKEN_TYPE_SYMBOL; - goto token_parsed; - } - - // String literal - if (*tk.token == '"') { - u64 len = 0; - u64 slash_count = 0; - - INCREMENT_CURR_TOKEN(tokenizer); - - while (!(*tokenizer->curr == '"' && slash_count == 0)) { - len++; +#include // TODO: Replace with custom lib - if (*tokenizer->curr == '\\') { - slash_count += 1; - slash_count %= 2; - } else { - slash_count = 0; - } +#include "onyxlex.h" - INCREMENT_CURR_TOKEN(tokenizer); - } +int main(int argc, char const *argv[]) { + bh_arr(int) arr = NULL; // Must initialize to NULL + bh_arr_new(arr, 0); - INCREMENT_CURR_TOKEN(tokenizer); + bh_arr_set_length(arr, 10); + for (int i = 0; i < 10; i++) + arr[i] = i; + printf("Length: %d\nCapacity: %d\n", bh_arr_length(arr), bh_arr_capacity(arr)); - tk.token++; - tk.type = TOKEN_TYPE_LITERAL_STRING; - tk.length = len; - goto token_parsed; - } + bh_arr_set_length(arr, 0); - // Number literal - if (char_is_num(*tokenizer->curr)) { - u64 len = 0; - while (char_is_num(*tokenizer->curr) || *tokenizer->curr == '.') { - len++; - INCREMENT_CURR_TOKEN(tokenizer); - } + printf("Length: %d\nCapacity: %d\n", bh_arr_length(arr), bh_arr_capacity(arr)); - tk.type = TOKEN_TYPE_LITERAL_NUMERIC; - tk.length = len; + for (int* it = arr; !bh_arr_end(arr, it); it++) { + printf("%d ", *it); } - INCREMENT_CURR_TOKEN(tokenizer); + bh_arr_free(arr); -token_parsed: - return tk; + return 0; } -int main(int argc, char *argv[]) { +int main2(int argc, char *argv[]) { bh_file source_file; bh_file_error err = bh_file_open(&source_file, argv[1]); if (err != BH_FILE_ERROR_NONE) { @@ -303,7 +51,7 @@ int main(int argc, char *argv[]) { tk = get_token(&tknizer); char c = *(tk.token + tk.length); *(tk.token + tk.length) = '\0'; - printf("Line %ld, Column %ld: \n%s: %s\n", tk.line_number, tk.line_column, TokenTypeNames[tk.type], tk.token); + printf("Line %ld, Column %ld: \n%s: %s\n", tk.line_number, tk.line_column, get_token_type_name(tk), tk.token); *(tk.token + tk.length) = c; } while (tk.type != TOKEN_TYPE_END_STREAM); diff --git a/onyxlex.c b/onyxlex.c new file mode 100644 index 00000000..a5945683 --- /dev/null +++ b/onyxlex.c @@ -0,0 +1,225 @@ +#include "bh.h" +#include "onyxlex.h" + +static const char* TokenTypeNames[] = { + "TOKEN_TYPE_UNKNOWN", + "TOKEN_TYPE_END_STREAM", + + "TOKEN_TYPE_COMMENT", + + "TOKEN_TYPE_KEYWORD_STRUCT", + "TOKEN_TYPE_KEYWORD_USE", + "TOKEN_TYPE_KEYWORD_EXPORT", + "TOKEN_TYPE_KEYWORD_IF", + "TOKEN_TYPE_KEYWORD_ELSE", + "TOKEN_TYPE_KEYWORD_FOR", + "TOKEN_TYPE_KEYWORD_RETURN", + "TOKEN_TYPE_KEYWORD_FOREIGN", + + "TOKEN_TYPE_RIGHT_ARROW", + "TOKEN_TYPE_LEFT_ARROW", + "TOKEN_TYPE_OPEN_PAREN", + "TOKEN_TYPE_CLOSE_PAREN", + "TOKEN_TYPE_OPEN_BRACE", + "TOKEN_TYPE_CLOSE_BRACE", + "TOKEN_TYPE_OPEN_BRACKET", + "TOKEN_TYPE_CLOSE_BRACKET", + "TOKEN_TYPE_OPEN_ANGLE", + "TOKEN_TYPE_CLOSE_ANGLE", + + "TOKEN_TYPE_SYM_PLUS", + "TOKEN_TYPE_SYM_MINUS", + "TOKEN_TYPE_SYM_STAR", + "TOKEN_TYPE_SYM_PERCENT", + "TOKEN_TYPE_SYM_FSLASH", + "TOKEN_TYPE_SYM_BSLASH", + "TOKEN_TYPE_SYM_COLON", + "TOKEN_TYPE_SYM_SEMICOLON", + "TOKEN_TYPE_SYM_COMMA", + "TOKEN_TYPE_SYM_EQUALS", + "TOKEN_TYPE_SYM_GRAVE", + "TOKEN_TYPE_SYM_TILDE", + "TOKEN_TYPE_SYM_BANG", + + "TOKEN_TYPE_SYMBOL", + "TOKEN_TYPE_LITERAL_STRING", + "TOKEN_TYPE_LITERAL_NUMERIC", + + "TOKEN_TYPE_COUNT" +}; + +#ifndef LITERAL_TOKEN +#define LITERAL_TOKEN(token, token_type) \ + if (token_lit(tokenizer, &tk, token, token_type)) goto token_parsed; +#endif + +#ifndef INCREMENT_CURR_TOKEN +#define INCREMENT_CURR_TOKEN(tkn) { \ + tkn->curr++; \ + tkn->line_column++; \ + if (*tkn->curr == '\n') { \ + tkn->line_number++; \ + tkn->line_column = 1; \ + } \ +} +#endif + +static b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type) { + i64 len = chars_match(tokenizer->curr, lit); + if (len > 0) { + tk->type = type; + tk->token = tokenizer->curr; + tk->length = len; + + tokenizer->curr += len; + tokenizer->line_column += len; + + return 1; + } + return 0; +} + +const char* get_token_type_name(Token tkn) { + return TokenTypeNames[tkn.type]; +} + +Token get_token(Tokenizer* tokenizer) { + Token tk; + + // Skip whitespace + while (char_is_whitespace(*tokenizer->curr) && tokenizer->curr != tokenizer->end) + INCREMENT_CURR_TOKEN(tokenizer) + + tk.type = TOKEN_TYPE_UNKNOWN; + tk.token = tokenizer->curr; + tk.length = 1; + tk.line_number = tokenizer->line_number; + tk.line_column = tokenizer->line_column; + + if (tokenizer->curr == tokenizer->end) { + tk.type = TOKEN_TYPE_END_STREAM; + goto token_parsed; + } + + // Comments + if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') { + tokenizer->curr += 2; + tk.type = TOKEN_TYPE_COMMENT; + tk.token = tokenizer->curr; + u16 layers = 1; + + while (layers >= 1) { + INCREMENT_CURR_TOKEN(tokenizer); + + if (tokenizer->curr == tokenizer->end) { + tk.type = TOKEN_TYPE_END_STREAM; + break; + } + + if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') { + layers++; + INCREMENT_CURR_TOKEN(tokenizer); + } + + if (*tokenizer->curr == '*' && *(tokenizer->curr + 1) == '/') { + layers--; + INCREMENT_CURR_TOKEN(tokenizer); + } + } + + INCREMENT_CURR_TOKEN(tokenizer); + + tk.length = tokenizer->curr - tk.token - 2; + goto token_parsed; + } + + LITERAL_TOKEN("struct", TOKEN_TYPE_KEYWORD_STRUCT); + LITERAL_TOKEN("export", TOKEN_TYPE_KEYWORD_EXPORT); + LITERAL_TOKEN("use", TOKEN_TYPE_KEYWORD_USE); + LITERAL_TOKEN("if", TOKEN_TYPE_KEYWORD_IF); + LITERAL_TOKEN("else", TOKEN_TYPE_KEYWORD_ELSE); + LITERAL_TOKEN("foreign", TOKEN_TYPE_KEYWORD_FOREIGN); + LITERAL_TOKEN("for", TOKEN_TYPE_KEYWORD_FOR); + LITERAL_TOKEN("return", TOKEN_TYPE_KEYWORD_RETURN); + LITERAL_TOKEN("->", TOKEN_TYPE_RIGHT_ARROW); + LITERAL_TOKEN("<-", TOKEN_TYPE_RIGHT_ARROW); + LITERAL_TOKEN("(", TOKEN_TYPE_OPEN_PAREN); + LITERAL_TOKEN(")", TOKEN_TYPE_CLOSE_PAREN); + LITERAL_TOKEN("{", TOKEN_TYPE_OPEN_BRACE); + LITERAL_TOKEN("}", TOKEN_TYPE_CLOSE_BRACE); + LITERAL_TOKEN("[", TOKEN_TYPE_OPEN_BRACKET); + LITERAL_TOKEN("]", TOKEN_TYPE_CLOSE_BRACKET); + LITERAL_TOKEN("<", TOKEN_TYPE_OPEN_ANGLE); + LITERAL_TOKEN(">", TOKEN_TYPE_CLOSE_ANGLE); + LITERAL_TOKEN("+", TOKEN_TYPE_SYM_PLUS); + LITERAL_TOKEN("-", TOKEN_TYPE_SYM_MINUS); + LITERAL_TOKEN("*", TOKEN_TYPE_SYM_STAR); + LITERAL_TOKEN("/", TOKEN_TYPE_SYM_FSLASH); + LITERAL_TOKEN("%", TOKEN_TYPE_SYM_PERCENT); + LITERAL_TOKEN("\\", TOKEN_TYPE_SYM_BSLASH); + LITERAL_TOKEN(":", TOKEN_TYPE_SYM_COLON); + LITERAL_TOKEN(";", TOKEN_TYPE_SYM_SEMICOLON); + LITERAL_TOKEN(",", TOKEN_TYPE_SYM_COMMA); + LITERAL_TOKEN("=", TOKEN_TYPE_SYM_EQUALS); + LITERAL_TOKEN("`", TOKEN_TYPE_SYM_GRAVE); + LITERAL_TOKEN("~", TOKEN_TYPE_SYM_TILDE); + LITERAL_TOKEN("!", TOKEN_TYPE_SYM_BANG); + + // Symbols + if (char_is_alpha(*tk.token)) { + u64 len = 0; + while (char_is_alphanum(*tokenizer->curr) || charset_contains("_$", *tokenizer->curr)) { + len++; + INCREMENT_CURR_TOKEN(tokenizer); + } + + tk.length = len; + tk.type = TOKEN_TYPE_SYMBOL; + goto token_parsed; + } + + // String literal + if (*tk.token == '"') { + u64 len = 0; + u64 slash_count = 0; + + INCREMENT_CURR_TOKEN(tokenizer); + + while (!(*tokenizer->curr == '"' && slash_count == 0)) { + len++; + + if (*tokenizer->curr == '\\') { + slash_count += 1; + slash_count %= 2; + } else { + slash_count = 0; + } + + INCREMENT_CURR_TOKEN(tokenizer); + } + + INCREMENT_CURR_TOKEN(tokenizer); + + tk.token++; + tk.type = TOKEN_TYPE_LITERAL_STRING; + tk.length = len; + goto token_parsed; + } + + // Number literal + if (char_is_num(*tokenizer->curr)) { + u64 len = 0; + while (char_is_num(*tokenizer->curr) || *tokenizer->curr == '.') { + len++; + INCREMENT_CURR_TOKEN(tokenizer); + } + + tk.type = TOKEN_TYPE_LITERAL_NUMERIC; + tk.length = len; + } + + INCREMENT_CURR_TOKEN(tokenizer); + +token_parsed: + return tk; +} diff --git a/onyxlex.h b/onyxlex.h new file mode 100644 index 00000000..01dc7fd6 --- /dev/null +++ b/onyxlex.h @@ -0,0 +1,71 @@ +#ifndef ONYXLEX_H +#define ONYXLEX_H + +#include "bh.h" + +typedef struct Tokenizer { + char *start, *curr, *end; + + // TODO: Fix the line number and column count + u64 line_number; + u64 line_column; +} Tokenizer; + +typedef enum TokenType { + TOKEN_TYPE_UNKNOWN, + TOKEN_TYPE_END_STREAM, + + TOKEN_TYPE_COMMENT, + + TOKEN_TYPE_KEYWORD_STRUCT, + TOKEN_TYPE_KEYWORD_USE, + TOKEN_TYPE_KEYWORD_EXPORT, + TOKEN_TYPE_KEYWORD_IF, + TOKEN_TYPE_KEYWORD_ELSE, + TOKEN_TYPE_KEYWORD_FOR, + TOKEN_TYPE_KEYWORD_RETURN, + TOKEN_TYPE_KEYWORD_FOREIGN, + + TOKEN_TYPE_RIGHT_ARROW, + TOKEN_TYPE_LEFT_ARROW, + TOKEN_TYPE_OPEN_PAREN, + TOKEN_TYPE_CLOSE_PAREN, + TOKEN_TYPE_OPEN_BRACE, + TOKEN_TYPE_CLOSE_BRACE, + TOKEN_TYPE_OPEN_BRACKET, + TOKEN_TYPE_CLOSE_BRACKET, + TOKEN_TYPE_OPEN_ANGLE, + TOKEN_TYPE_CLOSE_ANGLE, + + TOKEN_TYPE_SYM_PLUS, + TOKEN_TYPE_SYM_MINUS, + TOKEN_TYPE_SYM_STAR, + TOKEN_TYPE_SYM_PERCENT, + TOKEN_TYPE_SYM_FSLASH, + TOKEN_TYPE_SYM_BSLASH, + TOKEN_TYPE_SYM_COLON, + TOKEN_TYPE_SYM_SEMICOLON, + TOKEN_TYPE_SYM_COMMA, + TOKEN_TYPE_SYM_EQUALS, + TOKEN_TYPE_SYM_GRAVE, + TOKEN_TYPE_SYM_TILDE, + TOKEN_TYPE_SYM_BANG, + + TOKEN_TYPE_SYMBOL, + TOKEN_TYPE_LITERAL_STRING, + TOKEN_TYPE_LITERAL_NUMERIC, + + TOKEN_TYPE_COUNT +} TokenType; + +typedef struct Token { + TokenType type; + char* token; + isize length; + u64 line_number, line_column; +} Token; + +const char* get_token_type_name(Token tkn); +Token get_token(Tokenizer* tokenizer); + +#endif \ No newline at end of file