From: Brendan Hansen Date: Sun, 19 Jul 2020 15:48:51 +0000 (-0500) Subject: Added strings to the language X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=9915980b1d6bbe0b7c899be2b570c03ceffd0288;p=onyx.git Added strings to the language Currently they are null-terminated ^u8 --- diff --git a/include/onyxastnodes.h b/include/onyxastnodes.h index fcd257d4..8aec4720 100644 --- a/include/onyxastnodes.h +++ b/include/onyxastnodes.h @@ -11,6 +11,7 @@ typedef struct AstTyped AstTyped; typedef struct AstBinOp AstBinaryOp; typedef struct AstUnaryOp AstUnaryOp; typedef struct AstNumLit AstNumLit; +typedef struct AstStrLit AstStrLit; typedef struct AstLocal AstLocal; typedef struct AstCall AstCall; typedef struct AstIntrinsicCall AstIntrinsicCall; @@ -69,7 +70,8 @@ typedef enum AstKind { Ast_Kind_Function_Type, Ast_Kind_Type_End, - Ast_Kind_Literal, + Ast_Kind_NumLit, + Ast_Kind_StrLit, Ast_Kind_Param, Ast_Kind_Argument, Ast_Kind_Call, @@ -194,6 +196,7 @@ struct AstTyped AstTyped_members; struct AstBinOp { AstTyped_base; BinaryOp operation; AstTyped *left, *right; }; struct AstUnaryOp { AstTyped_base; UnaryOp operation; AstTyped *expr; }; struct AstNumLit { AstTyped_base; union { i32 i; i64 l; f32 f; f64 d; } value; }; +struct AstStrLit { AstTyped_base; u64 addr; }; struct AstLocal { AstTyped_base; AstLocal *prev_local; }; struct AstCall { AstTyped_base; AstArgument *arguments; u64 arg_count; AstNode *callee; }; struct AstIntrinsicCall { AstTyped_base; AstArgument *arguments; u64 arg_count; OnyxIntrinsic intrinsic; }; @@ -279,6 +282,7 @@ typedef enum EntityType { Entity_Type_Function, Entity_Type_Overloaded_Function, Entity_Type_Global, + Entity_Type_String_Literal, Entity_Type_Expression } EntityType; @@ -286,10 +290,11 @@ typedef struct Entity { EntityType type; union { - AstFunction* function; - AstOverloadedFunction* overloaded_function; - AstGlobal* global; - AstTyped* expr; + AstFunction *function; + AstOverloadedFunction *overloaded_function; + AstGlobal *global; + AstTyped *expr; + AstStrLit *strlit; }; } Entity; diff --git a/include/onyxwasm.h b/include/onyxwasm.h index fe6aa6b4..863ee1fa 100644 --- a/include/onyxwasm.h +++ b/include/onyxwasm.h @@ -221,6 +221,12 @@ typedef enum WasmInstructionType { WI_I64_REINTERPRET_F64 = 0xBD, WI_F32_REINTERPRET_I32 = 0xBE, WI_F64_REINTERPRET_I64 = 0xBF, + + WI_I32_EXTEND_8_S = 0xC0, + WI_I32_EXTEND_16_S = 0xC1, + WI_I64_EXTEND_8_S = 0xC2, + WI_I64_EXTEND_16_S = 0xC3, + WI_I64_EXTEND_32_S = 0xC4, } WasmInstructionType; typedef union { @@ -275,6 +281,11 @@ typedef struct WasmImport { OnyxToken *mod, *name; } WasmImport; +typedef struct WasmDatum { + u32 offset, length; + ptr data; +} WasmDatum; + typedef struct OnyxWasmModule { bh_allocator allocator; OnyxMessages* msgs; @@ -297,6 +308,7 @@ typedef struct OnyxWasmModule { bh_table(WasmExport) exports; bh_arr(WasmGlobal) globals; bh_arr(WasmFunc) funcs; + bh_arr(WasmDatum) data; u32 next_type_idx; u32 export_count; @@ -304,6 +316,7 @@ typedef struct OnyxWasmModule { u32 next_foreign_func_idx; u32 next_global_idx; u32 next_foreign_global_idx; + u32 next_datum_offset; } OnyxWasmModule; OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc, OnyxMessages* msgs); diff --git a/onyx b/onyx index 0791ddf0..c60bce6e 100755 Binary files a/onyx and b/onyx differ diff --git a/progs/arrays.onyx b/progs/arrays.onyx index ed01b721..b10b6deb 100644 --- a/progs/arrays.onyx +++ b/progs/arrays.onyx @@ -39,8 +39,21 @@ sort :: proc (src: ^i32, len: i32) { } } +other_str :: "I can't believe this \n actually fricken worked!"; + +str_test :: proc #export "main" { + hello_str :: "Hello World!"; + + walker := other_str; + while walker[0] != 0 as u8 { + print(walker[0] as i32); + + walker = (walker as i32 + 1) as ^u8; + } +} + // Don't need to bind this function to a symbol -proc #export "main" { +proc #export "main2" { print(min(10.0, 12.0)); global_arr = 0 as ^i32; diff --git a/src/onyx.c b/src/onyx.c index 028eafc8..59963553 100644 --- a/src/onyx.c +++ b/src/onyx.c @@ -183,7 +183,7 @@ static void merge_parse_results(CompilerState* compiler_state, ParseResults* res bh_arr_each(AstNode *, node, results->nodes_to_process) { Entity ent = { Entity_Type_Unknown }; - AstKind nkind = (*node)->kind; + AstKind nkind = (*node)->kind; switch (nkind) { case Ast_Kind_Function: ent.type = Entity_Type_Function; @@ -200,6 +200,11 @@ static void merge_parse_results(CompilerState* compiler_state, ParseResults* res ent.global = (AstGlobal *) *node; break; + case Ast_Kind_StrLit: + ent.type = Entity_Type_String_Literal; + ent.strlit = (AstStrLit *) *node; + break; + default: ent.type = Entity_Type_Expression; ent.expr = (AstTyped *) *node; diff --git a/src/onyxchecker.c b/src/onyxchecker.c index e1c215ca..abaf0ede 100644 --- a/src/onyxchecker.c +++ b/src/onyxchecker.c @@ -322,7 +322,7 @@ CHECK(binaryop, AstBinaryOp* binop) { onyx_message_add(state->msgs, ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE, binop->token->pos, - NULL, 0); + binop->left->token->text, binop->left->token->length); return 1; } @@ -330,7 +330,7 @@ CHECK(binaryop, AstBinaryOp* binop) { onyx_message_add(state->msgs, ONYX_MESSAGE_TYPE_UNRESOLVED_TYPE, binop->token->pos, - NULL, 0); + binop->right->token->text, binop->right->token->length); return 1; } @@ -446,12 +446,13 @@ CHECK(expression, AstTyped* expr) { expr->type = ((AstArgument *) expr)->value->type; break; - case Ast_Kind_Literal: + case Ast_Kind_NumLit: // NOTE: Literal types should have been decided // in the parser (for now). assert(expr->type != NULL); break; + case Ast_Kind_StrLit: break; case Ast_Kind_Function: break; case Ast_Kind_Overloaded_Function: break; @@ -648,6 +649,8 @@ void onyx_type_check(SemState* state, ProgramInfo* program) { if (check_expression(state, entity->expr)) return; break; + case Entity_Type_String_Literal: break; + default: DEBUG_HERE; break; } } diff --git a/src/onyxparser.c b/src/onyxparser.c index 641c32d9..d9260851 100644 --- a/src/onyxparser.c +++ b/src/onyxparser.c @@ -87,7 +87,7 @@ static OnyxToken* expect_token(OnyxParser* parser, TokenType token_type) { } static AstNumLit* parse_numeric_literal(OnyxParser* parser) { - AstNumLit* lit_node = make_node(AstNumLit, Ast_Kind_Literal); + AstNumLit* lit_node = make_node(AstNumLit, Ast_Kind_NumLit); lit_node->token = expect_token(parser, Token_Type_Literal_Numeric); lit_node->flags |= Ast_Flag_Comptime; lit_node->value.l = 0ll; @@ -230,25 +230,39 @@ static AstTyped* parse_factor(OnyxParser* parser) { retval = (AstTyped *) parse_numeric_literal(parser); break; - case Token_Type_Literal_True: - { - AstNumLit* bool_node = make_node(AstNumLit, Ast_Kind_Literal); - bool_node->type_node = (AstType *) &basic_type_bool; - bool_node->token = expect_token(parser, Token_Type_Literal_True); - bool_node->value.i = 1; - retval = (AstTyped *) bool_node; - break; - } + case Token_Type_Literal_String: { + AstPointerType* str_type = make_node(AstPointerType, Ast_Kind_Pointer_Type); + str_type->flags |= Basic_Flag_Pointer; + str_type->elem = (AstType *) &basic_type_u8; - case Token_Type_Literal_False: - { - AstNumLit* bool_node = make_node(AstNumLit, Ast_Kind_Literal); - bool_node->type_node = (AstType *) &basic_type_bool; - bool_node->token = expect_token(parser, Token_Type_Literal_False); - bool_node->value.i = 0; - retval = (AstTyped *) bool_node; - break; - } + AstStrLit* str_node = make_node(AstStrLit, Ast_Kind_StrLit); + str_node->token = expect_token(parser, Token_Type_Literal_String); + str_node->type_node = (AstType *) str_type; + str_node->addr = 0; + + bh_arr_push(parser->results.nodes_to_process, (AstNode *) str_node); + + retval = (AstTyped *) str_node; + break; + } + + case Token_Type_Literal_True: { + AstNumLit* bool_node = make_node(AstNumLit, Ast_Kind_NumLit); + bool_node->type_node = (AstType *) &basic_type_bool; + bool_node->token = expect_token(parser, Token_Type_Literal_True); + bool_node->value.i = 1; + retval = (AstTyped *) bool_node; + break; + } + + case Token_Type_Literal_False: { + AstNumLit* bool_node = make_node(AstNumLit, Ast_Kind_NumLit); + bool_node->type_node = (AstType *) &basic_type_bool; + bool_node->token = expect_token(parser, Token_Type_Literal_False); + bool_node->value.i = 0; + retval = (AstTyped *) bool_node; + break; + } default: onyx_message_add(parser->msgs, @@ -941,7 +955,8 @@ static AstNode* parse_top_level_statement(OnyxParser* parser) { if (global->exported_name == NULL) global->exported_name = symbol; - } else if (node->kind != Ast_Kind_Overloaded_Function) { + } else if (node->kind != Ast_Kind_Overloaded_Function + && node->kind != Ast_Kind_StrLit) { // HACK bh_arr_push(parser->results.nodes_to_process, (AstNode *) node); } diff --git a/src/onyxsymres.c b/src/onyxsymres.c index 179dc9a1..5b03638b 100644 --- a/src/onyxsymres.c +++ b/src/onyxsymres.c @@ -158,12 +158,8 @@ static void symres_expression(SemState* state, AstTyped** expr) { symres_expression(state, &((AstBinaryOp *)(*expr))->right); break; - case Ast_Kind_Unary_Op: - symres_unaryop(state, (AstUnaryOp **) expr); - break; - + case Ast_Kind_Unary_Op: symres_unaryop(state, (AstUnaryOp **) expr); break; case Ast_Kind_Call: symres_call(state, (AstCall *) *expr); break; - case Ast_Kind_Block: symres_block(state, (AstBlock *) *expr); break; case Ast_Kind_Symbol: @@ -174,7 +170,8 @@ static void symres_expression(SemState* state, AstTyped** expr) { case Ast_Kind_Local: break; case Ast_Kind_Function: - case Ast_Kind_Literal: + case Ast_Kind_NumLit: + case Ast_Kind_StrLit: (*expr)->type_node = symres_type(state, (*expr)->type_node); break; diff --git a/src/onyxutils.c b/src/onyxutils.c index 96163faf..1138f7cc 100644 --- a/src/onyxutils.c +++ b/src/onyxutils.c @@ -32,7 +32,8 @@ static const char* ast_node_names[] = { "FUNCTION_TYPE", "TYPE_END (BAD)" - "LITERAL", + "NUMERIC LITERAL", + "STRING LITERAL", "PARAM", "ARGUMENT", "CALL", diff --git a/src/onyxwasm.c b/src/onyxwasm.c index d8040ae7..7117648d 100644 --- a/src/onyxwasm.c +++ b/src/onyxwasm.c @@ -634,70 +634,62 @@ COMPILE_FUNC(expression, AstTyped* expr) { bh_arr(WasmInstruction) code = *pcode; switch (expr->kind) { - case Ast_Kind_Binary_Op: - compile_binop(mod, &code, (AstBinaryOp *) expr); - break; - - case Ast_Kind_Unary_Op: - compile_unaryop(mod, &code, (AstUnaryOp *) expr); - break; - case Ast_Kind_Local: - case Ast_Kind_Param: - { - i32 localidx = (i32) bh_imap_get(&mod->local_map, (u64) expr); + case Ast_Kind_Param: { + i32 localidx = (i32) bh_imap_get(&mod->local_map, (u64) expr); - WID(WI_LOCAL_GET, localidx); - break; - } - - case Ast_Kind_Global: - { - i32 globalidx = (i32) bh_imap_get(&mod->index_map, (u64) expr); + WID(WI_LOCAL_GET, localidx); + break; + } - WID(WI_GLOBAL_GET, globalidx); - break; - } + case Ast_Kind_Global: { + i32 globalidx = (i32) bh_imap_get(&mod->index_map, (u64) expr); - case Ast_Kind_Literal: - { - AstNumLit* lit = (AstNumLit *) expr; - WasmType lit_type = onyx_type_to_wasm_type(lit->type); - WasmInstruction instr = { WI_NOP, 0 }; - - if (lit_type == WASM_TYPE_INT32) { - instr.type = WI_I32_CONST; - instr.data.i1 = lit->value.i; - } else if (lit_type == WASM_TYPE_INT64) { - instr.type = WI_I64_CONST; - instr.data.l = lit->value.l; - } else if (lit_type == WASM_TYPE_FLOAT32) { - instr.type = WI_F32_CONST; - instr.data.f = lit->value.f; - } else if (lit_type == WASM_TYPE_FLOAT64) { - instr.type = WI_F64_CONST; - instr.data.d = lit->value.d; - } + WID(WI_GLOBAL_GET, globalidx); + break; + } - bh_arr_push(code, instr); - break; + case Ast_Kind_NumLit: { + AstNumLit* lit = (AstNumLit *) expr; + WasmType lit_type = onyx_type_to_wasm_type(lit->type); + WasmInstruction instr = { WI_NOP, 0 }; + + if (lit_type == WASM_TYPE_INT32) { + instr.type = WI_I32_CONST; + instr.data.i1 = lit->value.i; + } else if (lit_type == WASM_TYPE_INT64) { + instr.type = WI_I64_CONST; + instr.data.l = lit->value.l; + } else if (lit_type == WASM_TYPE_FLOAT32) { + instr.type = WI_F32_CONST; + instr.data.f = lit->value.f; + } else if (lit_type == WASM_TYPE_FLOAT64) { + instr.type = WI_F64_CONST; + instr.data.d = lit->value.d; } - case Ast_Kind_Block: compile_block(mod, &code, (AstBlock *) expr); break; - - case Ast_Kind_Call: - compile_call(mod, &code, (AstCall *) expr); + bh_arr_push(code, instr); break; + } - case Ast_Kind_Intrinsic_Call: - compile_intrinsic_call(mod, &code, (AstIntrinsicCall *) expr); + case Ast_Kind_StrLit: { + WID(WI_I32_CONST, ((AstStrLit *) expr)->addr); break; + } + + case Ast_Kind_Block: compile_block(mod, &code, (AstBlock *) expr); break; + case Ast_Kind_Call: compile_call(mod, &code, (AstCall *) expr); break; + case Ast_Kind_Intrinsic_Call: compile_intrinsic_call(mod, &code, (AstIntrinsicCall *) expr); break; + case Ast_Kind_Binary_Op: compile_binop(mod, &code, (AstBinaryOp *) expr); break; + case Ast_Kind_Unary_Op: compile_unaryop(mod, &code, (AstUnaryOp *) expr); break; case Ast_Kind_Array_Access: { AstArrayAccess* aa = (AstArrayAccess *) expr; - WID(WI_I32_CONST, aa->elem_size); compile_expression(mod, &code, aa->expr); - WI(WI_I32_MUL); + if (aa->elem_size != 1) { + WID(WI_I32_CONST, aa->elem_size); + WI(WI_I32_MUL); + } compile_expression(mod, &code, aa->addr); WI(WI_I32_ADD); @@ -981,6 +973,47 @@ static void compile_global(OnyxWasmModule* module, AstGlobal* global) { bh_arr_push(module->globals, glob); } +static void compile_string_literal(OnyxWasmModule* mod, AstStrLit* strlit) { + + // NOTE: Allocating more than necessary, but there are no cases + // in a string literal that create more bytes than already + // existed. You can create less however ('\n' => 0x0a). + i8* strdata = bh_alloc_array(global_heap_allocator, i8, strlit->token->length + 1); + + i8* src = (i8 *) strlit->token->text; + i8* des = strdata; + for (i32 i = 0, len = strlit->token->length; i < len; i++) { + if (src[i] == '\\') { + i++; + switch (src[i]) { + case 'n': *des++ = '\n'; break; + case 't': *des++ = '\t'; break; + case 'r': *des++ = '\r'; break; + case 'v': *des++ = '\v'; break; + case 'e': *des++ = '\e'; break; + default: *des++ = '\\'; + *des++ = src[i]; + } + } else { + *des++ = src[i]; + } + } + *des++ = '\0'; + + u32 length = (u32) (des - strdata); + + WasmDatum datum = { + .offset = mod->next_datum_offset, + .length = length, + .data = strdata, + }; + + strlit->addr = (u32) mod->next_datum_offset, + mod->next_datum_offset += length; + + bh_arr_push(mod->data, datum); +} + OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc, OnyxMessages* msgs) { OnyxWasmModule module = { .allocator = alloc, @@ -1003,6 +1036,9 @@ OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc, OnyxMessages* msgs) { .next_global_idx = 0, .next_foreign_global_idx = 0, + .data = NULL, + .next_datum_offset = 0, + .structured_jump_target = NULL, }; @@ -1010,6 +1046,7 @@ OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc, OnyxMessages* msgs) { bh_arr_new(alloc, module.funcs, 4); bh_arr_new(alloc, module.imports, 4); bh_arr_new(alloc, module.globals, 4); + bh_arr_new(alloc, module.data, 4); // NOTE: 16 is probably needlessly large bh_arr_new(global_heap_allocator, module.structured_jump_target, 16); @@ -1025,7 +1062,6 @@ OnyxWasmModule onyx_wasm_module_create(bh_allocator alloc, OnyxMessages* msgs) { } void onyx_wasm_module_compile(OnyxWasmModule* module, ProgramInfo* program) { - module->next_func_idx = program->foreign_func_count; module->next_global_idx = program->foreign_global_count; @@ -1056,10 +1092,17 @@ void onyx_wasm_module_compile(OnyxWasmModule* module, ProgramInfo* program) { break; } + case Entity_Type_String_Literal: { + compile_string_literal(module, (AstStrLit *) entity->strlit); + } + default: break; } } + + + // NOTE: Then, compile everything bh_arr_each(Entity, entity, program->entities) { switch (entity->type) { @@ -1516,6 +1559,41 @@ static i32 output_codesection(OnyxWasmModule* module, bh_buffer* buff) { return buff->length - prev_len; } +static i32 output_datasection(OnyxWasmModule* module, bh_buffer* buff) { + i32 prev_len = buff->length; + + bh_buffer_write_byte(buff, WASM_SECTION_ID_DATA); + + bh_buffer vec_buff; + bh_buffer_init(&vec_buff, buff->allocator, 128); + + i32 leb_len; + u8* leb = uint_to_uleb128((u64) bh_arr_length(module->data), &leb_len); + bh_buffer_append(&vec_buff, leb, leb_len); + + bh_arr_each(WasmDatum, datum, module->data) { + // NOTE: 0x00 memory index + bh_buffer_write_byte(&vec_buff, 0x00); + + bh_buffer_write_byte(&vec_buff, WI_I32_CONST); + leb = int_to_leb128((i64) datum->offset, &leb_len); + bh_buffer_append(&vec_buff, leb, leb_len); + bh_buffer_write_byte(&vec_buff, WI_BLOCK_END); + + leb = uint_to_uleb128((u64) datum->length, &leb_len); + bh_buffer_append(&vec_buff, leb, leb_len); + fori (i, 0, datum->length - 1) bh_buffer_write_byte(&vec_buff, ((u8 *) datum->data)[i]); + } + + leb = uint_to_uleb128((u64) (vec_buff.length), &leb_len); + bh_buffer_append(buff, leb, leb_len); + + bh_buffer_concat(buff, vec_buff); + bh_buffer_free(&vec_buff); + + return buff->length - prev_len; +} + void onyx_wasm_module_write_to_file(OnyxWasmModule* module, bh_file file) { bh_buffer master_buffer; bh_buffer_init(&master_buffer, global_heap_allocator, 128); @@ -1530,6 +1608,7 @@ void onyx_wasm_module_write_to_file(OnyxWasmModule* module, bh_file file) { output_exportsection(module, &master_buffer); output_startsection(module, &master_buffer); output_codesection(module, &master_buffer); + output_datasection(module, &master_buffer); bh_file_write(&file, master_buffer.data, master_buffer.length); }