Added strings to the language
authorBrendan Hansen <brendan.f.hansen@gmail.com>
Sun, 19 Jul 2020 15:48:51 +0000 (10:48 -0500)
committerBrendan Hansen <brendan.f.hansen@gmail.com>
Sun, 19 Jul 2020 15:48:51 +0000 (10:48 -0500)
Currently they are null-terminated ^u8

include/onyxastnodes.h
include/onyxwasm.h
onyx
progs/arrays.onyx
src/onyx.c
src/onyxchecker.c
src/onyxparser.c
src/onyxsymres.c
src/onyxutils.c
src/onyxwasm.c

index fcd257d48ff6bc8e37b28cb86d743050b32b1bb9..8aec472030a66bd4669cfd7df1158b5af909c943 100644 (file)
@@ -11,6 +11,7 @@ typedef struct AstTyped AstTyped;
 typedef struct AstBinOp AstBinaryOp;
 typedef struct AstUnaryOp AstUnaryOp;
 typedef struct AstNumLit AstNumLit;
+typedef struct AstStrLit AstStrLit;
 typedef struct AstLocal AstLocal;
 typedef struct AstCall AstCall;
 typedef struct AstIntrinsicCall AstIntrinsicCall;
@@ -69,7 +70,8 @@ typedef enum AstKind {
     Ast_Kind_Function_Type,
     Ast_Kind_Type_End,
 
-    Ast_Kind_Literal,
+    Ast_Kind_NumLit,
+    Ast_Kind_StrLit,
     Ast_Kind_Param,
     Ast_Kind_Argument,
     Ast_Kind_Call,
@@ -194,6 +196,7 @@ struct AstTyped AstTyped_members;
 struct AstBinOp         { AstTyped_base; BinaryOp operation; AstTyped *left, *right; };
 struct AstUnaryOp       { AstTyped_base; UnaryOp operation; AstTyped *expr; };
 struct AstNumLit        { AstTyped_base; union { i32 i; i64 l; f32 f; f64 d; } value; };
+struct AstStrLit        { AstTyped_base; u64 addr; };
 struct AstLocal         { AstTyped_base; AstLocal *prev_local; };
 struct AstCall          { AstTyped_base; AstArgument *arguments; u64 arg_count; AstNode *callee; };
 struct AstIntrinsicCall { AstTyped_base; AstArgument *arguments; u64 arg_count; OnyxIntrinsic intrinsic; };
@@ -279,6 +282,7 @@ typedef enum EntityType {
     Entity_Type_Function,
     Entity_Type_Overloaded_Function,
     Entity_Type_Global,
+    Entity_Type_String_Literal,
     Entity_Type_Expression
 } EntityType;
 
@@ -286,10 +290,11 @@ typedef struct Entity {
     EntityType type;
 
     union {
-        AstFunction*           function;
-        AstOverloadedFunction* overloaded_function;
-        AstGlobal*             global;
-        AstTyped*              expr;
+        AstFunction           *function;
+        AstOverloadedFunction *overloaded_function;
+        AstGlobal             *global;
+        AstTyped              *expr;
+        AstStrLit             *strlit;
     };
 } Entity;
 
index fe6aa6b46ac7d7e2bca16c478696ab37f48818e2..863ee1faef700e2f8e73dff8ed395065be9bdbb8 100644 (file)
@@ -221,6 +221,12 @@ typedef enum WasmInstructionType {
     WI_I64_REINTERPRET_F64           = 0xBD,
     WI_F32_REINTERPRET_I32           = 0xBE,
     WI_F64_REINTERPRET_I64           = 0xBF,
+
+    WI_I32_EXTEND_8_S                = 0xC0,
+    WI_I32_EXTEND_16_S               = 0xC1,
+    WI_I64_EXTEND_8_S                = 0xC2,
+    WI_I64_EXTEND_16_S               = 0xC3,
+    WI_I64_EXTEND_32_S               = 0xC4,
 } WasmInstructionType;
 
 typedef union {
@@ -275,6 +281,11 @@ typedef struct WasmImport {
     OnyxToken *mod, *name;
 } WasmImport;
 
+typedef struct WasmDatum {
+    u32 offset, length;
+    ptr data;
+} WasmDatum;
+
 typedef struct OnyxWasmModule {
     bh_allocator allocator;
     OnyxMessages* msgs;
@@ -297,6 +308,7 @@ typedef struct OnyxWasmModule {
     bh_table(WasmExport)  exports;
     bh_arr(WasmGlobal)    globals;
     bh_arr(WasmFunc)      funcs;
+    bh_arr(WasmDatum)     data;
 
     u32 next_type_idx;
     u32 export_count;
@@ -304,6 +316,7 @@ typedef struct OnyxWasmModule {
     u32 next_foreign_func_idx;
     u32 next_global_idx;
     u32 next_foreign_global_idx;
+    u32 next_datum_offset;
 } OnyxWasmModule;
 
 OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc, OnyxMessages* msgs);
diff --git a/onyx b/onyx
index 0791ddf04cb41f4c72f037c9e51e4538d381c291..c60bce6eacbfad42fe9f260901f3ad5ef65ef89e 100755 (executable)
Binary files a/onyx and b/onyx differ
index ed01b7219e9089ffa60b5179648abfdc8e6d443d..b10b6debd5500af62991df5a85120c448a10b0af 100644 (file)
@@ -39,8 +39,21 @@ sort :: proc (src: ^i32, len: i32) {
     }
 }
 
+other_str :: "I can't believe this \n actually fricken worked!";
+
+str_test :: proc #export "main" {
+    hello_str :: "Hello World!";
+
+    walker := other_str;
+    while walker[0] != 0 as u8 {
+        print(walker[0] as i32);
+
+        walker = (walker as i32 + 1) as ^u8;
+    }
+}
+
 // Don't need to bind this function to a symbol
-proc #export "main" {
+proc #export "main2" {
     print(min(10.0, 12.0));
 
     global_arr = 0 as ^i32;
index 028eafc88ed62db82861c93e309f1f723bee5096..5996355357a983a227ed9900d4d33975d18aa75f 100644 (file)
@@ -183,7 +183,7 @@ static void merge_parse_results(CompilerState* compiler_state, ParseResults* res
     bh_arr_each(AstNode *, node, results->nodes_to_process) {
         Entity ent = { Entity_Type_Unknown };
 
-        AstKind nkind    = (*node)->kind;
+        AstKind nkind = (*node)->kind;
         switch (nkind) {
             case Ast_Kind_Function:
                 ent.type     = Entity_Type_Function;
@@ -200,6 +200,11 @@ static void merge_parse_results(CompilerState* compiler_state, ParseResults* res
                 ent.global = (AstGlobal *) *node;
                 break;
 
+            case Ast_Kind_StrLit:
+                ent.type   = Entity_Type_String_Literal;
+                ent.strlit = (AstStrLit *) *node;
+                break;
+
             default:
                 ent.type = Entity_Type_Expression;
                 ent.expr = (AstTyped *) *node;
index e1c215ca8e21253025afba9e804db8d4a133c6fa..abaf0ede4e425a208ed84572ba2adaa8540478b4 100644 (file)
@@ -322,7 +322,7 @@ CHECK(binaryop, AstBinaryOp* binop) {
         onyx_message_add(state->msgs,
                 ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE,
                 binop->token->pos,
-                NULL, 0);
+                binop->left->token->text, binop->left->token->length);
         return 1;
     }
 
@@ -330,7 +330,7 @@ CHECK(binaryop, AstBinaryOp* binop) {
         onyx_message_add(state->msgs,
                 ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE,
                 binop->token->pos,
-                NULL, 0);
+                binop->right->token->text, binop->right->token->length);
         return 1;
     }
 
@@ -446,12 +446,13 @@ CHECK(expression, AstTyped* expr) {
             expr->type = ((AstArgument *) expr)->value->type;
             break;
 
-        case Ast_Kind_Literal:
+        case Ast_Kind_NumLit:
             // NOTE: Literal types should have been decided
             // in the parser (for now).
             assert(expr->type != NULL);
             break;
 
+        case Ast_Kind_StrLit: break;
         case Ast_Kind_Function: break;
         case Ast_Kind_Overloaded_Function: break;
 
@@ -648,6 +649,8 @@ void onyx_type_check(SemState* state, ProgramInfo* program) {
                 if (check_expression(state, entity->expr)) return;
                 break;
 
+            case Entity_Type_String_Literal: break;
+
             default: DEBUG_HERE; break;
         }
     }
index 641c32d985eb224855f35339f9c7f50c79cb9e4f..d92608519ee37875a285f830a0fa3879baf10937 100644 (file)
@@ -87,7 +87,7 @@ static OnyxToken* expect_token(OnyxParser* parser, TokenType token_type) {
 }
 
 static AstNumLit* parse_numeric_literal(OnyxParser* parser) {
-    AstNumLit* lit_node = make_node(AstNumLit, Ast_Kind_Literal);
+    AstNumLit* lit_node = make_node(AstNumLit, Ast_Kind_NumLit);
     lit_node->token = expect_token(parser, Token_Type_Literal_Numeric);
     lit_node->flags |= Ast_Flag_Comptime;
     lit_node->value.l = 0ll;
@@ -230,25 +230,39 @@ static AstTyped* parse_factor(OnyxParser* parser) {
             retval = (AstTyped *) parse_numeric_literal(parser);
             break;
 
-        case Token_Type_Literal_True:
-            {
-                AstNumLit* bool_node = make_node(AstNumLit, Ast_Kind_Literal);
-                bool_node->type_node = (AstType *) &basic_type_bool;
-                bool_node->token = expect_token(parser, Token_Type_Literal_True);
-                bool_node->value.i = 1;
-                retval = (AstTyped *) bool_node;
-                break;
-            }
+        case Token_Type_Literal_String: {
+            AstPointerType* str_type = make_node(AstPointerType, Ast_Kind_Pointer_Type);
+            str_type->flags |= Basic_Flag_Pointer;
+            str_type->elem = (AstType *) &basic_type_u8;
 
-        case Token_Type_Literal_False:
-            {
-                AstNumLit* bool_node = make_node(AstNumLit, Ast_Kind_Literal);
-                bool_node->type_node = (AstType *) &basic_type_bool;
-                bool_node->token = expect_token(parser, Token_Type_Literal_False);
-                bool_node->value.i = 0;
-                retval = (AstTyped *) bool_node;
-                break;
-            }
+            AstStrLit* str_node = make_node(AstStrLit, Ast_Kind_StrLit);
+            str_node->token     = expect_token(parser, Token_Type_Literal_String);
+            str_node->type_node = (AstType *) str_type;
+            str_node->addr      = 0;
+
+            bh_arr_push(parser->results.nodes_to_process, (AstNode *) str_node);
+
+            retval = (AstTyped *) str_node;
+            break;
+        }
+
+        case Token_Type_Literal_True: {
+            AstNumLit* bool_node = make_node(AstNumLit, Ast_Kind_NumLit);
+            bool_node->type_node = (AstType *) &basic_type_bool;
+            bool_node->token = expect_token(parser, Token_Type_Literal_True);
+            bool_node->value.i = 1;
+            retval = (AstTyped *) bool_node;
+            break;
+        }
+
+        case Token_Type_Literal_False: {
+            AstNumLit* bool_node = make_node(AstNumLit, Ast_Kind_NumLit);
+            bool_node->type_node = (AstType *) &basic_type_bool;
+            bool_node->token = expect_token(parser, Token_Type_Literal_False);
+            bool_node->value.i = 0;
+            retval = (AstTyped *) bool_node;
+            break;
+        }
 
         default:
             onyx_message_add(parser->msgs,
@@ -941,7 +955,8 @@ static AstNode* parse_top_level_statement(OnyxParser* parser) {
                     if (global->exported_name == NULL)
                         global->exported_name = symbol;
 
-                } else if (node->kind != Ast_Kind_Overloaded_Function) {
+                } else if (node->kind != Ast_Kind_Overloaded_Function
+                        && node->kind != Ast_Kind_StrLit) {
                     // HACK
                     bh_arr_push(parser->results.nodes_to_process, (AstNode *) node);
                 }
index 179dc9a1fc3a73af96208c1768cacb152181c41c..5b03638b1bd0b40830f86fbf083967fed59b7822 100644 (file)
@@ -158,12 +158,8 @@ static void symres_expression(SemState* state, AstTyped** expr) {
             symres_expression(state, &((AstBinaryOp *)(*expr))->right);
             break;
 
-        case Ast_Kind_Unary_Op:
-            symres_unaryop(state, (AstUnaryOp **) expr);
-            break;
-
+        case Ast_Kind_Unary_Op: symres_unaryop(state, (AstUnaryOp **) expr); break;
         case Ast_Kind_Call: symres_call(state, (AstCall *) *expr); break;
-
         case Ast_Kind_Block: symres_block(state, (AstBlock *) *expr); break;
 
         case Ast_Kind_Symbol:
@@ -174,7 +170,8 @@ static void symres_expression(SemState* state, AstTyped** expr) {
         case Ast_Kind_Local: break;
 
         case Ast_Kind_Function:
-        case Ast_Kind_Literal:
+        case Ast_Kind_NumLit:
+        case Ast_Kind_StrLit:
             (*expr)->type_node = symres_type(state, (*expr)->type_node);
             break;
 
index 96163fafbd3662dd3e7db18c6c7117bf8fa54fd9..1138f7cc0f461e7d94aac55179f76619cabc2606 100644 (file)
@@ -32,7 +32,8 @@ static const char* ast_node_names[] = {
     "FUNCTION_TYPE",
     "TYPE_END (BAD)"
 
-    "LITERAL",
+    "NUMERIC LITERAL",
+    "STRING LITERAL",
     "PARAM",
     "ARGUMENT",
     "CALL",
index d8040ae78edfe75a1169c525ed1c6b1c6f566a88..7117648d8ee61445b52ff0d6cdf084bf2bac7bbd 100644 (file)
@@ -634,70 +634,62 @@ COMPILE_FUNC(expression, AstTyped* expr) {
     bh_arr(WasmInstruction) code = *pcode;
 
     switch (expr->kind) {
-        case Ast_Kind_Binary_Op:
-            compile_binop(mod, &code, (AstBinaryOp *) expr);
-            break;
-
-        case Ast_Kind_Unary_Op:
-            compile_unaryop(mod, &code, (AstUnaryOp *) expr);
-            break;
-
         case Ast_Kind_Local:
-        case Ast_Kind_Param:
-            {
-                i32 localidx = (i32) bh_imap_get(&mod->local_map, (u64) expr);
+        case Ast_Kind_Param: {
+            i32 localidx = (i32) bh_imap_get(&mod->local_map, (u64) expr);
 
-                WID(WI_LOCAL_GET, localidx);
-                break;
-            }
-
-        case Ast_Kind_Global:
-            {
-                i32 globalidx = (i32) bh_imap_get(&mod->index_map, (u64) expr);
+            WID(WI_LOCAL_GET, localidx);
+            break;
+        }
 
-                WID(WI_GLOBAL_GET, globalidx);
-                break;
-            }
+        case Ast_Kind_Global: {
+            i32 globalidx = (i32) bh_imap_get(&mod->index_map, (u64) expr);
 
-        case Ast_Kind_Literal:
-            {
-                AstNumLit* lit = (AstNumLit *) expr;
-                WasmType lit_type = onyx_type_to_wasm_type(lit->type);
-                WasmInstruction instr = { WI_NOP, 0 };
-
-                if (lit_type == WASM_TYPE_INT32) {
-                    instr.type = WI_I32_CONST;
-                    instr.data.i1 = lit->value.i;
-                } else if (lit_type == WASM_TYPE_INT64) {
-                    instr.type = WI_I64_CONST;
-                    instr.data.l = lit->value.l;
-                } else if (lit_type == WASM_TYPE_FLOAT32) {
-                    instr.type = WI_F32_CONST;
-                    instr.data.f = lit->value.f;
-                } else if (lit_type == WASM_TYPE_FLOAT64) {
-                    instr.type = WI_F64_CONST;
-                    instr.data.d = lit->value.d;
-                }
+            WID(WI_GLOBAL_GET, globalidx);
+            break;
+        }
 
-                bh_arr_push(code, instr);
-                break;
+        case Ast_Kind_NumLit: {
+            AstNumLit* lit = (AstNumLit *) expr;
+            WasmType lit_type = onyx_type_to_wasm_type(lit->type);
+            WasmInstruction instr = { WI_NOP, 0 };
+
+            if (lit_type == WASM_TYPE_INT32) {
+                instr.type = WI_I32_CONST;
+                instr.data.i1 = lit->value.i;
+            } else if (lit_type == WASM_TYPE_INT64) {
+                instr.type = WI_I64_CONST;
+                instr.data.l = lit->value.l;
+            } else if (lit_type == WASM_TYPE_FLOAT32) {
+                instr.type = WI_F32_CONST;
+                instr.data.f = lit->value.f;
+            } else if (lit_type == WASM_TYPE_FLOAT64) {
+                instr.type = WI_F64_CONST;
+                instr.data.d = lit->value.d;
             }
 
-        case Ast_Kind_Block: compile_block(mod, &code, (AstBlock *) expr); break;
-
-        case Ast_Kind_Call:
-            compile_call(mod, &code, (AstCall *) expr);
+            bh_arr_push(code, instr);
             break;
+        }
 
-        case Ast_Kind_Intrinsic_Call:
-            compile_intrinsic_call(mod, &code, (AstIntrinsicCall *) expr);
+        case Ast_Kind_StrLit: {
+            WID(WI_I32_CONST, ((AstStrLit *) expr)->addr);
             break;
+        }
+
+        case Ast_Kind_Block:          compile_block(mod, &code, (AstBlock *) expr); break;
+        case Ast_Kind_Call:           compile_call(mod, &code, (AstCall *) expr); break;
+        case Ast_Kind_Intrinsic_Call: compile_intrinsic_call(mod, &code, (AstIntrinsicCall *) expr); break;
+        case Ast_Kind_Binary_Op:      compile_binop(mod, &code, (AstBinaryOp *) expr); break;
+        case Ast_Kind_Unary_Op:       compile_unaryop(mod, &code, (AstUnaryOp *) expr); break;
 
         case Ast_Kind_Array_Access: {
             AstArrayAccess* aa = (AstArrayAccess *) expr;
-            WID(WI_I32_CONST, aa->elem_size);
             compile_expression(mod, &code, aa->expr);
-            WI(WI_I32_MUL);
+            if (aa->elem_size != 1) {
+                WID(WI_I32_CONST, aa->elem_size);
+                WI(WI_I32_MUL);
+            }
             compile_expression(mod, &code, aa->addr);
             WI(WI_I32_ADD);
 
@@ -981,6 +973,47 @@ static void compile_global(OnyxWasmModule* module, AstGlobal* global) {
     bh_arr_push(module->globals, glob);
 }
 
+static void compile_string_literal(OnyxWasmModule* mod, AstStrLit* strlit) {
+
+    // NOTE: Allocating more than necessary, but there are no cases
+    // in a string literal that create more bytes than already
+    // existed. You can create less however ('\n' => 0x0a).
+    i8* strdata = bh_alloc_array(global_heap_allocator, i8, strlit->token->length + 1);
+
+    i8* src = (i8 *) strlit->token->text;
+    i8* des = strdata;
+    for (i32 i = 0, len = strlit->token->length; i < len; i++) {
+        if (src[i] == '\\') {
+            i++;
+            switch (src[i]) {
+            case 'n': *des++ = '\n'; break;
+            case 't': *des++ = '\t'; break;
+            case 'r': *des++ = '\r'; break;
+            case 'v': *des++ = '\v'; break;
+            case 'e': *des++ = '\e'; break;
+            default:  *des++ = '\\';
+                      *des++ = src[i];
+            }
+        } else {
+            *des++ = src[i];
+        }
+    }
+    *des++ = '\0';
+
+    u32 length = (u32) (des - strdata);
+
+    WasmDatum datum = {
+        .offset = mod->next_datum_offset,
+        .length = length,
+        .data = strdata,
+    };
+
+    strlit->addr = (u32) mod->next_datum_offset,
+    mod->next_datum_offset += length;
+
+    bh_arr_push(mod->data, datum);
+}
+
 OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc, OnyxMessages* msgs) {
     OnyxWasmModule module = {
         .allocator = alloc,
@@ -1003,6 +1036,9 @@ OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc, OnyxMessages* msgs) {
         .next_global_idx = 0,
         .next_foreign_global_idx = 0,
 
+        .data = NULL,
+        .next_datum_offset = 0,
+
         .structured_jump_target = NULL,
     };
 
@@ -1010,6 +1046,7 @@ OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc, OnyxMessages* msgs) {
     bh_arr_new(alloc, module.funcs, 4);
     bh_arr_new(alloc, module.imports, 4);
     bh_arr_new(alloc, module.globals, 4);
+    bh_arr_new(alloc, module.data, 4);
 
     // NOTE: 16 is probably needlessly large
     bh_arr_new(global_heap_allocator, module.structured_jump_target, 16);
@@ -1025,7 +1062,6 @@ OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc, OnyxMessages* msgs) {
 }
 
 void onyx_wasm_module_compile(OnyxWasmModule* module, ProgramInfo* program) {
-
     module->next_func_idx   = program->foreign_func_count;
     module->next_global_idx = program->foreign_global_count;
 
@@ -1056,10 +1092,17 @@ void onyx_wasm_module_compile(OnyxWasmModule* module, ProgramInfo* program) {
                 break;
             }
 
+            case Entity_Type_String_Literal: {
+                compile_string_literal(module, (AstStrLit *) entity->strlit);
+            }
+
             default: break;
         }
     }
 
+
+
+
     // NOTE: Then, compile everything
     bh_arr_each(Entity, entity, program->entities) {
         switch (entity->type) {
@@ -1516,6 +1559,41 @@ static i32 output_codesection(OnyxWasmModule* module, bh_buffer* buff) {
     return buff->length - prev_len;
 }
 
+static i32 output_datasection(OnyxWasmModule* module, bh_buffer* buff) {
+    i32 prev_len = buff->length;
+
+    bh_buffer_write_byte(buff, WASM_SECTION_ID_DATA);
+
+    bh_buffer vec_buff;
+    bh_buffer_init(&vec_buff, buff->allocator, 128);
+
+    i32 leb_len;
+    u8* leb = uint_to_uleb128((u64) bh_arr_length(module->data), &leb_len);
+    bh_buffer_append(&vec_buff, leb, leb_len);
+
+    bh_arr_each(WasmDatum, datum, module->data) {
+        // NOTE: 0x00 memory index
+        bh_buffer_write_byte(&vec_buff, 0x00);
+
+        bh_buffer_write_byte(&vec_buff, WI_I32_CONST);
+        leb = int_to_leb128((i64) datum->offset, &leb_len);
+        bh_buffer_append(&vec_buff, leb, leb_len);
+        bh_buffer_write_byte(&vec_buff, WI_BLOCK_END);
+
+        leb = uint_to_uleb128((u64) datum->length, &leb_len);
+        bh_buffer_append(&vec_buff, leb, leb_len);
+        fori (i, 0, datum->length - 1) bh_buffer_write_byte(&vec_buff, ((u8 *) datum->data)[i]);
+    }
+
+    leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len);
+    bh_buffer_append(buff, leb, leb_len);
+
+    bh_buffer_concat(buff, vec_buff);
+    bh_buffer_free(&vec_buff);
+
+    return buff->length - prev_len;
+}
+
 void onyx_wasm_module_write_to_file(OnyxWasmModule* module, bh_file file) {
     bh_buffer master_buffer;
     bh_buffer_init(&master_buffer, global_heap_allocator, 128);
@@ -1530,6 +1608,7 @@ void onyx_wasm_module_write_to_file(OnyxWasmModule* module, bh_file file) {
     output_exportsection(module, &master_buffer);
     output_startsection(module, &master_buffer);
     output_codesection(module, &master_buffer);
+    output_datasection(module, &master_buffer);
 
     bh_file_write(&file, master_buffer.data, master_buffer.length);
 }