Split project up and working on bh_arr
authorBrendan Hansen <brendan.f.hansen@gmail.com>
Mon, 11 May 2020 22:29:00 +0000 (17:29 -0500)
committerBrendan Hansen <brendan.f.hansen@gmail.com>
Mon, 11 May 2020 22:29:00 +0000 (17:29 -0500)
Makefile
bh.h
onyx
onyx.c
onyxlex.c [new file with mode: 0644]
onyxlex.h [new file with mode: 0644]

index 2f544b466121dc719beb581160546b0d1aeaa277..187b707fa3b34a86b43ad36bf71128f3b6ffe38c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,5 @@
 OBJ_FILES=\
+       onyxlex.o \
        onyx.o
 
 CC=gcc
@@ -10,7 +11,7 @@ FLAGS=-g
        $(CC) $(FLAGS) -c $< -o $@ $(INCLUDES)
 
 onyx: $(OBJ_FILES)
-       $(CC) $(FLAGS) $< -o $@ $(LIBS)
+       $(CC) $(FLAGS) $? -o $@ $(LIBS)
 
 clean:
        rm $(OBJ_FILES) 2>&1 >/dev/null
diff --git a/bh.h b/bh.h
index 40dd979f21f44bd56ac7811ef4a9f6c5d04546f2..16a51c87f73b9dba048b42b5d7f5a8ce7d130227 100644 (file)
--- a/bh.h
+++ b/bh.h
@@ -1,3 +1,6 @@
+#ifndef BH_H
+#define BH_H
+
 #include <stdio.h>
 #include <sys/stat.h>
 #include <unistd.h>
@@ -32,6 +35,7 @@ b32 char_is_alphanum(const char a);
 b32 char_is_whitespace(const char a);
 b32 char_in_range(const char lo, const char hi, const char a);
 char charset_contains(const char* charset, char ch);
+i64 chars_match(char* ptr1, char* ptr2);
 
 //-------------------------------------------------------------------------------------
 // Better strings
@@ -150,15 +154,39 @@ bh_file_contents bh_file_read_contents_bh_file(bh_file* file);
 bh_file_contents bh_file_read_contents_direct(const char* filename);
 i32 bh_file_contents_delete(bh_file_contents* contents);
 
+//-------------------------------------------------------------------------------------
+// Better arrays
+//-------------------------------------------------------------------------------------
+typedef struct bh__arr {
+       i32 length, capacity;
+} bh__arr;
 
+#define bh_arr(T)                                      T*
+#define bh__arrhead(arr)                       (((bh__arr *)(arr)) - 1)
 
+#define bh_arr_length(arr)                     (arr ? bh__arrhead(arr)->length : 0)
+#define bh_arr_capacity(arr)           (arr ? bh__arrhead(arr)->capacity : 0)
+#define bh_arr_valid(arr, i)           (arr ? (int)(i) < bh__arrhead(arr)->length : 0)
 
+#define bh_arr_set_length(arr, n)      (bh__arr_ensure_capacity((void **) &arr, sizeof(arr[0]), n), bh__arrhead(arr)->length = n)
 
+#define bh_arr_pop(arr)                                ((arr)[--bh__arrhead(arr)->length])
+#define bh_arr_last(arr)                       ((arr)[bh__arrhead(arr)->length - 1])
+#define bh_arr_end(arr, i)                     ((i) >= &(arr)[bh_arr_length(arr)])
 
+#define bh_arr_new(arr, cap)           (bh__arr_ensure_capacity((void**) &arr, sizeof(arr[0]), cap))
+#define bh_arr_free(arr)                       (bh__arr_free((void**) &arr))
+#define bh_arr_copy(arr)                       (bh__arr_copy(arr, sizeof(arr[0])))
+#define bh_arr_insert_end(arr, n)      ( \
+       bh__arr_ensure_capacity((void**) &arr, sizeof(arr[0]), bh_arr_length(arr) + n), \
+       bh__arrhead(arr)->length += n)
 
+b32 bh__arr_ensure_capacity(void** arr, int elemsize, int cap);
+b32 bh__arr_free(void **arr);
+void* bh__arr_copy(void *arr, int elemsize);
 
-
-
+#ifdef BH_DEFINE
+#undef BH_DEFINE
 
 
 //-------------------------------------------------------------------------------------
@@ -509,3 +537,51 @@ b32 bh_file_contents_delete(bh_file_contents* contents) {
        contents->length = 0;
        return 1;
 }
+
+//-------------------------------------------------------------------------------------
+// ARRAY IMPLEMENTATION
+//-------------------------------------------------------------------------------------
+
+b32 bh__arr_ensure_capacity(void** arr, int elemsize, int cap) {
+       bh__arr* arrptr;
+
+       if (*arr == NULL) {
+               if (cap == 0 && elemsize == 0) return 1;
+
+               arrptr = (bh__arr *) malloc(sizeof(*arrptr) + elemsize * cap);
+               arrptr->capacity = cap;
+               arrptr->length = 0;
+
+       } else {
+               arrptr = bh__arrhead(*arr);
+               if (arrptr->length > cap) return 1;
+
+               if (arrptr->capacity < cap) {
+                       void* p;
+                       int newcap = arrptr->capacity ? arrptr->capacity : 4;
+                       while (newcap < cap) newcap <<= 1;
+
+                       p = realloc(arrptr, sizeof(*arrptr) + elemsize * newcap);
+
+                       if (p) {
+                               arrptr = (bh__arr *) p;
+                               arrptr->capacity = newcap;
+                       } else {
+                               return 0;
+                       }
+               }
+       }
+
+       *arr = arrptr + 1;
+       return 1;
+}
+
+b32 bh__arr_free(void **arr) {
+       bh__arr* arrptr = bh__arrhead(*arr);
+       free(arrptr);
+}
+
+
+#endif // ifdef BH_DEFINE
+
+#endif // ifndef BH_H
diff --git a/onyx b/onyx
index b0a2ad6f86daadfc6c9215135d1e8514ea63ec3b..6ea581cce3958f127854e5c867d1f2b65076bbfa 100755 (executable)
Binary files a/onyx and b/onyx differ
diff --git a/onyx.c b/onyx.c
index 9034852ec7a97b2af0518c24a1111ea069fc3b03..100dad99f2560717e5405ac4c335e8c60f9bb7b6 100644 (file)
--- a/onyx.c
+++ b/onyx.c
-#include <stdio.h> // TODO: Replace with custom lib
+#define BH_DEFINE
 #include "bh.h"
 
-typedef struct Tokenizer {
-       char *start, *curr, *end;
-
-       // TODO: Fix the line number and column count
-       u64 line_number;
-       u64 line_column;
-} Tokenizer;
-
-typedef enum TokenType {
-       TOKEN_TYPE_UNKNOWN,
-       TOKEN_TYPE_END_STREAM,
-
-       TOKEN_TYPE_COMMENT,
-
-       TOKEN_TYPE_KEYWORD_STRUCT,
-       TOKEN_TYPE_KEYWORD_USE,
-       TOKEN_TYPE_KEYWORD_EXPORT,
-       TOKEN_TYPE_KEYWORD_IF,
-       TOKEN_TYPE_KEYWORD_ELSE,
-       TOKEN_TYPE_KEYWORD_FOR,
-       TOKEN_TYPE_KEYWORD_RETURN,
-
-       TOKEN_TYPE_RIGHT_ARROW,
-       TOKEN_TYPE_LEFT_ARROW,
-       TOKEN_TYPE_OPEN_PAREN,
-       TOKEN_TYPE_CLOSE_PAREN,
-       TOKEN_TYPE_OPEN_BRACE,
-       TOKEN_TYPE_CLOSE_BRACE,
-       TOKEN_TYPE_OPEN_BRACKET,
-       TOKEN_TYPE_CLOSE_BRACKET,
-       TOKEN_TYPE_OPEN_ANGLE,
-       TOKEN_TYPE_CLOSE_ANGLE,
-
-       TOKEN_TYPE_SYM_PLUS,
-       TOKEN_TYPE_SYM_MINUS,
-       TOKEN_TYPE_SYM_STAR,
-       TOKEN_TYPE_SYM_PERCENT,
-       TOKEN_TYPE_SYM_FSLASH,
-       TOKEN_TYPE_SYM_BSLASH,
-       TOKEN_TYPE_SYM_COLON,
-       TOKEN_TYPE_SYM_SEMICOLON,
-       TOKEN_TYPE_SYM_COMMA,
-       TOKEN_TYPE_SYM_EQUALS,
-       TOKEN_TYPE_SYM_GRAVE,
-       TOKEN_TYPE_SYM_TILDE,
-       TOKEN_TYPE_SYM_BANG,
-
-       TOKEN_TYPE_SYMBOL,
-       TOKEN_TYPE_LITERAL_STRING,
-       TOKEN_TYPE_LITERAL_NUMERIC,
-
-       TOKEN_TYPE_COUNT
-} TokenType;
-
-static const char* TokenTypeNames[] = {
-       "TOKEN_TYPE_UNKNOWN",
-       "TOKEN_TYPE_END_STREAM",
-
-       "TOKEN_TYPE_COMMENT",
-
-       "TOKEN_TYPE_KEYWORD_STRUCT",
-       "TOKEN_TYPE_KEYWORD_USE",
-       "TOKEN_TYPE_KEYWORD_EXPORT",
-       "TOKEN_TYPE_KEYWORD_IF",
-       "TOKEN_TYPE_KEYWORD_ELSE",
-       "TOKEN_TYPE_KEYWORD_FOR",
-       "TOKEN_TYPE_KEYWORD_RETURN",
-
-       "TOKEN_TYPE_RIGHT_ARROW",
-       "TOKEN_TYPE_LEFT_ARROW",
-       "TOKEN_TYPE_OPEN_PAREN",
-       "TOKEN_TYPE_CLOSE_PAREN",
-       "TOKEN_TYPE_OPEN_BRACE",
-       "TOKEN_TYPE_CLOSE_BRACE",
-       "TOKEN_TYPE_OPEN_BRACKET",
-       "TOKEN_TYPE_CLOSE_BRACKET",
-       "TOKEN_TYPE_OPEN_ANGLE",
-       "TOKEN_TYPE_CLOSE_ANGLE",
-
-       "TOKEN_TYPE_SYM_PLUS",
-       "TOKEN_TYPE_SYM_MINUS",
-       "TOKEN_TYPE_SYM_STAR",
-       "TOKEN_TYPE_SYM_PERCENT",
-       "TOKEN_TYPE_SYM_FSLASH",
-       "TOKEN_TYPE_SYM_BSLASH",
-       "TOKEN_TYPE_SYM_COLON",
-       "TOKEN_TYPE_SYM_SEMICOLON",
-       "TOKEN_TYPE_SYM_COMMA",
-       "TOKEN_TYPE_SYM_EQUALS",
-       "TOKEN_TYPE_SYM_GRAVE",
-       "TOKEN_TYPE_SYM_TILDE",
-       "TOKEN_TYPE_SYM_BANG",
-
-       "TOKEN_TYPE_SYMBOL",
-       "TOKEN_TYPE_LITERAL_STRING",
-       "TOKEN_TYPE_LITERAL_NUMERIC",
-
-       "TOKEN_TYPE_COUNT"
-};
-
-typedef struct Token {
-       TokenType type;
-       char* token;
-       isize length;
-       u64 line_number, line_column;
-} Token;
-
-#ifndef LITERAL_TOKEN
-#define LITERAL_TOKEN(token, token_type) \
-       if (token_lit(tokenizer, &tk, token, token_type)) goto token_parsed;
-#endif
-
-#ifndef INCREMENT_CURR_TOKEN
-#define INCREMENT_CURR_TOKEN(tkn) { \
-       tkn->curr++; \
-       tkn->line_column++; \
-       if (*tkn->curr == '\n') { \
-               tkn->line_number++; \
-               tkn->line_column = 1; \
-       } \
-}
-#endif
-
-b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type) {
-       i64 len = chars_match(tokenizer->curr, lit);
-       if (len > 0) {
-               tk->type = type;
-               tk->token = tokenizer->curr;
-               tk->length = len;
-
-               tokenizer->curr += len;
-               tokenizer->line_column += len;
-
-               return 1;
-       }
-       return 0;
-}
-
-Token get_token(Tokenizer* tokenizer) {
-       Token tk;
-
-       // Skip whitespace
-       while (char_is_whitespace(*tokenizer->curr) && tokenizer->curr != tokenizer->end)
-               INCREMENT_CURR_TOKEN(tokenizer)
-
-       tk.type = TOKEN_TYPE_UNKNOWN;
-       tk.token = tokenizer->curr;
-       tk.length = 1;
-       tk.line_number = tokenizer->line_number;
-       tk.line_column = tokenizer->line_column;
-
-       if (tokenizer->curr == tokenizer->end) {
-               tk.type = TOKEN_TYPE_END_STREAM;
-               goto token_parsed;
-       }
-
-       // Comments
-       if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') {
-               tokenizer->curr += 2;   
-               tk.type = TOKEN_TYPE_COMMENT;
-               tk.token = tokenizer->curr;
-               u16 layers = 1;
-
-               while (layers >= 1) {
-                       INCREMENT_CURR_TOKEN(tokenizer);
-
-                       if (tokenizer->curr == tokenizer->end) {
-                               tk.type = TOKEN_TYPE_END_STREAM;
-                               break;
-                       }
-                       
-                       if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') {
-                               layers++;
-                               INCREMENT_CURR_TOKEN(tokenizer);
-                       }
-
-                       if (*tokenizer->curr == '*' && *(tokenizer->curr + 1) == '/') {
-                               layers--;
-                               INCREMENT_CURR_TOKEN(tokenizer);
-                       }
-               }       
-
-               INCREMENT_CURR_TOKEN(tokenizer);
-
-               tk.length = tokenizer->curr - tk.token - 2;
-               goto token_parsed;
-       }
-
-       LITERAL_TOKEN("struct", TOKEN_TYPE_KEYWORD_STRUCT);
-       LITERAL_TOKEN("export", TOKEN_TYPE_KEYWORD_EXPORT);
-       LITERAL_TOKEN("use", TOKEN_TYPE_KEYWORD_USE);
-       LITERAL_TOKEN("if", TOKEN_TYPE_KEYWORD_IF);
-       LITERAL_TOKEN("else", TOKEN_TYPE_KEYWORD_ELSE);
-       LITERAL_TOKEN("for", TOKEN_TYPE_KEYWORD_FOR);
-       LITERAL_TOKEN("return", TOKEN_TYPE_KEYWORD_RETURN);
-       LITERAL_TOKEN("->", TOKEN_TYPE_RIGHT_ARROW);
-       LITERAL_TOKEN("<-", TOKEN_TYPE_RIGHT_ARROW);
-       LITERAL_TOKEN("(", TOKEN_TYPE_OPEN_PAREN);
-       LITERAL_TOKEN(")", TOKEN_TYPE_CLOSE_PAREN);
-       LITERAL_TOKEN("{", TOKEN_TYPE_OPEN_BRACE);
-       LITERAL_TOKEN("}", TOKEN_TYPE_CLOSE_BRACE);
-       LITERAL_TOKEN("[", TOKEN_TYPE_OPEN_BRACKET);
-       LITERAL_TOKEN("]", TOKEN_TYPE_CLOSE_BRACKET);
-       LITERAL_TOKEN("<", TOKEN_TYPE_OPEN_ANGLE);
-       LITERAL_TOKEN(">", TOKEN_TYPE_CLOSE_ANGLE);
-       LITERAL_TOKEN("+", TOKEN_TYPE_SYM_PLUS);
-       LITERAL_TOKEN("-", TOKEN_TYPE_SYM_MINUS);
-       LITERAL_TOKEN("*", TOKEN_TYPE_SYM_STAR);
-       LITERAL_TOKEN("/", TOKEN_TYPE_SYM_FSLASH);
-       LITERAL_TOKEN("%", TOKEN_TYPE_SYM_PERCENT);
-       LITERAL_TOKEN("\\", TOKEN_TYPE_SYM_BSLASH);
-       LITERAL_TOKEN(":", TOKEN_TYPE_SYM_COLON);
-       LITERAL_TOKEN(";", TOKEN_TYPE_SYM_SEMICOLON);
-       LITERAL_TOKEN(",", TOKEN_TYPE_SYM_COMMA);
-       LITERAL_TOKEN("=", TOKEN_TYPE_SYM_EQUALS);
-       LITERAL_TOKEN("`", TOKEN_TYPE_SYM_GRAVE);
-       LITERAL_TOKEN("~", TOKEN_TYPE_SYM_TILDE);
-       LITERAL_TOKEN("!", TOKEN_TYPE_SYM_BANG);
-
-       // Symbols
-       if (char_is_alpha(*tk.token)) {
-               u64 len = 0;
-               while (char_is_alphanum(*tokenizer->curr) || charset_contains("_$", *tokenizer->curr)) {
-                       len++;
-                       INCREMENT_CURR_TOKEN(tokenizer);
-               }
-
-               tk.length = len;
-               tk.type = TOKEN_TYPE_SYMBOL;
-               goto token_parsed;
-       }
-
-       // String literal
-       if (*tk.token == '"') {
-               u64 len = 0;
-               u64 slash_count = 0;
-
-               INCREMENT_CURR_TOKEN(tokenizer);
-
-               while (!(*tokenizer->curr == '"' && slash_count == 0)) {
-                       len++;
+#include <stdio.h> // TODO: Replace with custom lib
 
-                       if (*tokenizer->curr == '\\') {
-                               slash_count += 1;
-                               slash_count %= 2;
-                       } else {
-                               slash_count = 0;
-                       }
+#include "onyxlex.h"
 
-                       INCREMENT_CURR_TOKEN(tokenizer);
-               }
+int main(int argc, char const *argv[]) {
+       bh_arr(int) arr = NULL; // Must initialize to NULL
+       bh_arr_new(arr, 0);
 
-               INCREMENT_CURR_TOKEN(tokenizer);
+       bh_arr_set_length(arr, 10);
+       for (int i = 0; i < 10; i++)
+               arr[i] = i;
+       printf("Length: %d\nCapacity: %d\n", bh_arr_length(arr), bh_arr_capacity(arr));
 
-               tk.token++;
-               tk.type = TOKEN_TYPE_LITERAL_STRING;
-               tk.length = len;
-               goto token_parsed;
-       }
+       bh_arr_set_length(arr, 0);
 
-       // Number literal
-       if (char_is_num(*tokenizer->curr)) {
-               u64 len = 0;
-               while (char_is_num(*tokenizer->curr) || *tokenizer->curr == '.') {
-                       len++;
-                       INCREMENT_CURR_TOKEN(tokenizer);
-               }
+       printf("Length: %d\nCapacity: %d\n", bh_arr_length(arr), bh_arr_capacity(arr));
 
-               tk.type = TOKEN_TYPE_LITERAL_NUMERIC;
-               tk.length = len;
+       for (int* it = arr; !bh_arr_end(arr, it); it++) {
+               printf("%d ", *it);
        }
 
-       INCREMENT_CURR_TOKEN(tokenizer);
+       bh_arr_free(arr);
 
-token_parsed:
-       return tk;
+       return 0;
 }
 
-int main(int argc, char *argv[]) {
+int main2(int argc, char *argv[]) {
        bh_file source_file;
        bh_file_error err = bh_file_open(&source_file, argv[1]);
        if (err != BH_FILE_ERROR_NONE) {
@@ -303,7 +51,7 @@ int main(int argc, char *argv[]) {
                tk = get_token(&tknizer);
                char c = *(tk.token + tk.length);
                *(tk.token + tk.length) = '\0';
-               printf("Line %ld, Column %ld: \n%s: %s\n", tk.line_number, tk.line_column, TokenTypeNames[tk.type], tk.token);
+               printf("Line %ld, Column %ld: \n%s: %s\n", tk.line_number, tk.line_column, get_token_type_name(tk), tk.token);
                *(tk.token + tk.length) = c;
        } while (tk.type != TOKEN_TYPE_END_STREAM);
 
diff --git a/onyxlex.c b/onyxlex.c
new file mode 100644 (file)
index 0000000..a594568
--- /dev/null
+++ b/onyxlex.c
@@ -0,0 +1,225 @@
+#include "bh.h"
+#include "onyxlex.h"
+
+static const char* TokenTypeNames[] = {
+       "TOKEN_TYPE_UNKNOWN",
+       "TOKEN_TYPE_END_STREAM",
+
+       "TOKEN_TYPE_COMMENT",
+
+       "TOKEN_TYPE_KEYWORD_STRUCT",
+       "TOKEN_TYPE_KEYWORD_USE",
+       "TOKEN_TYPE_KEYWORD_EXPORT",
+       "TOKEN_TYPE_KEYWORD_IF",
+       "TOKEN_TYPE_KEYWORD_ELSE",
+       "TOKEN_TYPE_KEYWORD_FOR",
+       "TOKEN_TYPE_KEYWORD_RETURN",
+       "TOKEN_TYPE_KEYWORD_FOREIGN",
+
+       "TOKEN_TYPE_RIGHT_ARROW",
+       "TOKEN_TYPE_LEFT_ARROW",
+       "TOKEN_TYPE_OPEN_PAREN",
+       "TOKEN_TYPE_CLOSE_PAREN",
+       "TOKEN_TYPE_OPEN_BRACE",
+       "TOKEN_TYPE_CLOSE_BRACE",
+       "TOKEN_TYPE_OPEN_BRACKET",
+       "TOKEN_TYPE_CLOSE_BRACKET",
+       "TOKEN_TYPE_OPEN_ANGLE",
+       "TOKEN_TYPE_CLOSE_ANGLE",
+
+       "TOKEN_TYPE_SYM_PLUS",
+       "TOKEN_TYPE_SYM_MINUS",
+       "TOKEN_TYPE_SYM_STAR",
+       "TOKEN_TYPE_SYM_PERCENT",
+       "TOKEN_TYPE_SYM_FSLASH",
+       "TOKEN_TYPE_SYM_BSLASH",
+       "TOKEN_TYPE_SYM_COLON",
+       "TOKEN_TYPE_SYM_SEMICOLON",
+       "TOKEN_TYPE_SYM_COMMA",
+       "TOKEN_TYPE_SYM_EQUALS",
+       "TOKEN_TYPE_SYM_GRAVE",
+       "TOKEN_TYPE_SYM_TILDE",
+       "TOKEN_TYPE_SYM_BANG",
+
+       "TOKEN_TYPE_SYMBOL",
+       "TOKEN_TYPE_LITERAL_STRING",
+       "TOKEN_TYPE_LITERAL_NUMERIC",
+
+       "TOKEN_TYPE_COUNT"
+};
+
+#ifndef LITERAL_TOKEN
+#define LITERAL_TOKEN(token, token_type) \
+       if (token_lit(tokenizer, &tk, token, token_type)) goto token_parsed;
+#endif
+
+#ifndef INCREMENT_CURR_TOKEN
+#define INCREMENT_CURR_TOKEN(tkn) { \
+       tkn->curr++; \
+       tkn->line_column++; \
+       if (*tkn->curr == '\n') { \
+               tkn->line_number++; \
+               tkn->line_column = 1; \
+       } \
+}
+#endif
+
+static b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type) {
+       i64 len = chars_match(tokenizer->curr, lit);
+       if (len > 0) {
+               tk->type = type;
+               tk->token = tokenizer->curr;
+               tk->length = len;
+
+               tokenizer->curr += len;
+               tokenizer->line_column += len;
+
+               return 1;
+       }
+       return 0;
+}
+
+const char* get_token_type_name(Token tkn) {
+       return TokenTypeNames[tkn.type];
+}
+
+Token get_token(Tokenizer* tokenizer) {
+       Token tk;
+
+       // Skip whitespace
+       while (char_is_whitespace(*tokenizer->curr) && tokenizer->curr != tokenizer->end)
+               INCREMENT_CURR_TOKEN(tokenizer)
+
+       tk.type = TOKEN_TYPE_UNKNOWN;
+       tk.token = tokenizer->curr;
+       tk.length = 1;
+       tk.line_number = tokenizer->line_number;
+       tk.line_column = tokenizer->line_column;
+
+       if (tokenizer->curr == tokenizer->end) {
+               tk.type = TOKEN_TYPE_END_STREAM;
+               goto token_parsed;
+       }
+
+       // Comments
+       if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') {
+               tokenizer->curr += 2;   
+               tk.type = TOKEN_TYPE_COMMENT;
+               tk.token = tokenizer->curr;
+               u16 layers = 1;
+
+               while (layers >= 1) {
+                       INCREMENT_CURR_TOKEN(tokenizer);
+
+                       if (tokenizer->curr == tokenizer->end) {
+                               tk.type = TOKEN_TYPE_END_STREAM;
+                               break;
+                       }
+                       
+                       if (*tokenizer->curr == '/' && *(tokenizer->curr + 1) == '*') {
+                               layers++;
+                               INCREMENT_CURR_TOKEN(tokenizer);
+                       }
+
+                       if (*tokenizer->curr == '*' && *(tokenizer->curr + 1) == '/') {
+                               layers--;
+                               INCREMENT_CURR_TOKEN(tokenizer);
+                       }
+               }       
+
+               INCREMENT_CURR_TOKEN(tokenizer);
+
+               tk.length = tokenizer->curr - tk.token - 2;
+               goto token_parsed;
+       }
+
+       LITERAL_TOKEN("struct", TOKEN_TYPE_KEYWORD_STRUCT);
+       LITERAL_TOKEN("export", TOKEN_TYPE_KEYWORD_EXPORT);
+       LITERAL_TOKEN("use", TOKEN_TYPE_KEYWORD_USE);
+       LITERAL_TOKEN("if", TOKEN_TYPE_KEYWORD_IF);
+       LITERAL_TOKEN("else", TOKEN_TYPE_KEYWORD_ELSE);
+       LITERAL_TOKEN("foreign", TOKEN_TYPE_KEYWORD_FOREIGN);
+       LITERAL_TOKEN("for", TOKEN_TYPE_KEYWORD_FOR);
+       LITERAL_TOKEN("return", TOKEN_TYPE_KEYWORD_RETURN);
+       LITERAL_TOKEN("->", TOKEN_TYPE_RIGHT_ARROW);
+       LITERAL_TOKEN("<-", TOKEN_TYPE_RIGHT_ARROW);
+       LITERAL_TOKEN("(", TOKEN_TYPE_OPEN_PAREN);
+       LITERAL_TOKEN(")", TOKEN_TYPE_CLOSE_PAREN);
+       LITERAL_TOKEN("{", TOKEN_TYPE_OPEN_BRACE);
+       LITERAL_TOKEN("}", TOKEN_TYPE_CLOSE_BRACE);
+       LITERAL_TOKEN("[", TOKEN_TYPE_OPEN_BRACKET);
+       LITERAL_TOKEN("]", TOKEN_TYPE_CLOSE_BRACKET);
+       LITERAL_TOKEN("<", TOKEN_TYPE_OPEN_ANGLE);
+       LITERAL_TOKEN(">", TOKEN_TYPE_CLOSE_ANGLE);
+       LITERAL_TOKEN("+", TOKEN_TYPE_SYM_PLUS);
+       LITERAL_TOKEN("-", TOKEN_TYPE_SYM_MINUS);
+       LITERAL_TOKEN("*", TOKEN_TYPE_SYM_STAR);
+       LITERAL_TOKEN("/", TOKEN_TYPE_SYM_FSLASH);
+       LITERAL_TOKEN("%", TOKEN_TYPE_SYM_PERCENT);
+       LITERAL_TOKEN("\\", TOKEN_TYPE_SYM_BSLASH);
+       LITERAL_TOKEN(":", TOKEN_TYPE_SYM_COLON);
+       LITERAL_TOKEN(";", TOKEN_TYPE_SYM_SEMICOLON);
+       LITERAL_TOKEN(",", TOKEN_TYPE_SYM_COMMA);
+       LITERAL_TOKEN("=", TOKEN_TYPE_SYM_EQUALS);
+       LITERAL_TOKEN("`", TOKEN_TYPE_SYM_GRAVE);
+       LITERAL_TOKEN("~", TOKEN_TYPE_SYM_TILDE);
+       LITERAL_TOKEN("!", TOKEN_TYPE_SYM_BANG);
+
+       // Symbols
+       if (char_is_alpha(*tk.token)) {
+               u64 len = 0;
+               while (char_is_alphanum(*tokenizer->curr) || charset_contains("_$", *tokenizer->curr)) {
+                       len++;
+                       INCREMENT_CURR_TOKEN(tokenizer);
+               }
+
+               tk.length = len;
+               tk.type = TOKEN_TYPE_SYMBOL;
+               goto token_parsed;
+       }
+
+       // String literal
+       if (*tk.token == '"') {
+               u64 len = 0;
+               u64 slash_count = 0;
+
+               INCREMENT_CURR_TOKEN(tokenizer);
+
+               while (!(*tokenizer->curr == '"' && slash_count == 0)) {
+                       len++;
+
+                       if (*tokenizer->curr == '\\') {
+                               slash_count += 1;
+                               slash_count %= 2;
+                       } else {
+                               slash_count = 0;
+                       }
+
+                       INCREMENT_CURR_TOKEN(tokenizer);
+               }
+
+               INCREMENT_CURR_TOKEN(tokenizer);
+
+               tk.token++;
+               tk.type = TOKEN_TYPE_LITERAL_STRING;
+               tk.length = len;
+               goto token_parsed;
+       }
+
+       // Number literal
+       if (char_is_num(*tokenizer->curr)) {
+               u64 len = 0;
+               while (char_is_num(*tokenizer->curr) || *tokenizer->curr == '.') {
+                       len++;
+                       INCREMENT_CURR_TOKEN(tokenizer);
+               }
+
+               tk.type = TOKEN_TYPE_LITERAL_NUMERIC;
+               tk.length = len;
+       }
+
+       INCREMENT_CURR_TOKEN(tokenizer);
+
+token_parsed:
+       return tk;
+}
diff --git a/onyxlex.h b/onyxlex.h
new file mode 100644 (file)
index 0000000..01dc7fd
--- /dev/null
+++ b/onyxlex.h
@@ -0,0 +1,71 @@
+#ifndef ONYXLEX_H
+#define ONYXLEX_H
+
+#include "bh.h"
+
+typedef struct Tokenizer {
+       char *start, *curr, *end;
+
+       // TODO: Fix the line number and column count
+       u64 line_number;
+       u64 line_column;
+} Tokenizer;
+
+typedef enum TokenType {
+       TOKEN_TYPE_UNKNOWN,
+       TOKEN_TYPE_END_STREAM,
+
+       TOKEN_TYPE_COMMENT,
+
+       TOKEN_TYPE_KEYWORD_STRUCT,
+       TOKEN_TYPE_KEYWORD_USE,
+       TOKEN_TYPE_KEYWORD_EXPORT,
+       TOKEN_TYPE_KEYWORD_IF,
+       TOKEN_TYPE_KEYWORD_ELSE,
+       TOKEN_TYPE_KEYWORD_FOR,
+       TOKEN_TYPE_KEYWORD_RETURN,
+       TOKEN_TYPE_KEYWORD_FOREIGN,
+
+       TOKEN_TYPE_RIGHT_ARROW,
+       TOKEN_TYPE_LEFT_ARROW,
+       TOKEN_TYPE_OPEN_PAREN,
+       TOKEN_TYPE_CLOSE_PAREN,
+       TOKEN_TYPE_OPEN_BRACE,
+       TOKEN_TYPE_CLOSE_BRACE,
+       TOKEN_TYPE_OPEN_BRACKET,
+       TOKEN_TYPE_CLOSE_BRACKET,
+       TOKEN_TYPE_OPEN_ANGLE,
+       TOKEN_TYPE_CLOSE_ANGLE,
+
+       TOKEN_TYPE_SYM_PLUS,
+       TOKEN_TYPE_SYM_MINUS,
+       TOKEN_TYPE_SYM_STAR,
+       TOKEN_TYPE_SYM_PERCENT,
+       TOKEN_TYPE_SYM_FSLASH,
+       TOKEN_TYPE_SYM_BSLASH,
+       TOKEN_TYPE_SYM_COLON,
+       TOKEN_TYPE_SYM_SEMICOLON,
+       TOKEN_TYPE_SYM_COMMA,
+       TOKEN_TYPE_SYM_EQUALS,
+       TOKEN_TYPE_SYM_GRAVE,
+       TOKEN_TYPE_SYM_TILDE,
+       TOKEN_TYPE_SYM_BANG,
+
+       TOKEN_TYPE_SYMBOL,
+       TOKEN_TYPE_LITERAL_STRING,
+       TOKEN_TYPE_LITERAL_NUMERIC,
+
+       TOKEN_TYPE_COUNT
+} TokenType;
+
+typedef struct Token {
+       TokenType type;
+       char* token;
+       isize length;
+       u64 line_number, line_column;
+} Token;
+
+const char* get_token_type_name(Token tkn);
+Token get_token(Tokenizer* tokenizer);
+
+#endif
\ No newline at end of file