From 5a26feff68cfc3febda691584b951e97e7adf2cf Mon Sep 17 00:00:00 2001 From: Brendan Hansen Date: Wed, 6 May 2020 14:58:54 -0500 Subject: [PATCH] Working on the tokenizer --- bh.h | 89 ++++++++++++++++++------- docs/plan | 56 ++++++++++++++++ onyx | Bin 31664 -> 33824 bytes onyx.c | 171 +++++++++++++++++++++++++++++++++++++++++------- progs/demo.onyx | 16 +++-- progs/mvp.onyx | 14 ++++ 6 files changed, 289 insertions(+), 57 deletions(-) create mode 100644 docs/plan create mode 100644 progs/mvp.onyx diff --git a/bh.h b/bh.h index 16221650..7eb4fd09 100644 --- a/bh.h +++ b/bh.h @@ -21,6 +21,17 @@ typedef signed int i32; typedef signed long i64; typedef signed long long i128; typedef unsigned long isize; +typedef i32 b32; + +//------------------------------------------------------------------------------------- +// Better character functions +//------------------------------------------------------------------------------------- +inline b32 char_is_alpha(const char a); +inline b32 char_is_num(const char a); +inline b32 char_is_alphanum(const char a); +inline b32 char_is_whitespace(const char a); +inline b32 char_in_range(const char lo, const char hi, const char a); +char charset_contains(const char* charset, char ch); //------------------------------------------------------------------------------------- // Better strings @@ -56,8 +67,8 @@ typedef struct bh_string { bh_string bh_string_new_cap(unsigned long cap); bh_string bh_string_new_str(const char* cstr); -i32 bh_string_delete(bh_string* str); -i32 bh_string_ensure_capacity(bh_string* str, u64 cap); +b32 bh_string_delete(bh_string* str); +b32 bh_string_ensure_capacity(bh_string* str, u64 cap); void bh_string_append_bh_string(bh_string* str1, bh_string* str2); void bh_string_append_cstr(bh_string* str1, const char* str2); void bh_string_replace_at_bh_string(bh_string* dest, bh_string* src, u64 offset); @@ -74,7 +85,6 @@ void bh_string_print(bh_string* str); //------------------------------------------------------------------------------------- // Better files //------------------------------------------------------------------------------------- - typedef enum bh_file_error { BH_FILE_ERROR_NONE, BH_FILE_ERROR_INVALID @@ -120,10 +130,9 @@ bh_file_error bh_file_create(bh_file* file, char const* filename); bh_file_error bh_file_open(bh_file* file, char const* filename); bh_file_error bh_file_open_mode(bh_file* file, bh_file_mode mode, const char* filename); bh_file_error bh_file_new(bh_file* file, bh_file_descriptor fd, const char* filename); -i32 bh_file_read_at(bh_file* file, i64 offset, void* buffer, isize buff_size, isize* bytes_read); -i32 bh_file_write_at(bh_file* file, i64 offset, void const* buffer, isize buff_size, isize* bytes_wrote); -static i32 bh__file_seek_wrapper(i32 fd, i64 offset, bh_file_whence whence, i64* new_offset); -i32 bh_file_seek(bh_file* file, i64 offset); +b32 bh_file_read_at(bh_file* file, i64 offset, void* buffer, isize buff_size, isize* bytes_read); +b32 bh_file_write_at(bh_file* file, i64 offset, void const* buffer, isize buff_size, isize* bytes_wrote); +static b32 bh__file_seek_wrapper(i32 fd, i64 offset, bh_file_whence whence, i64* new_offset); i64 bh_file_seek_to_end(bh_file* file); i64 bh_file_skip(bh_file* file, i64 bytes); i64 bh_file_tell(bh_file* file); @@ -156,6 +165,44 @@ i32 bh_file_contents_delete(bh_file_contents* contents); // IMPLEMENTATIONS //------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------------- +// CHAR FUNCTIONS +//------------------------------------------------------------------------------------- +inline b32 char_is_alpha(const char a) { + return ('a' <= a && a <= 'z') || ('A' <= a && a <= 'Z'); +} + +inline b32 char_is_num(const char a) { + return ('0' <= a && a <= '9'); +} + +inline b32 char_is_alphanum(const char a) { + return char_is_alpha(a) || char_is_num(a); +} + +inline b32 char_is_whitespace(const char a) { + return charset_contains(" \t\r\n", a); +} + +inline b32 char_in_range(const char lo, const char hi, const char a) { + return lo <= a <= hi; +} + +char charset_contains(const char* charset, char ch) { + while (*charset) { + if (*charset == ch) return ch; + charset++; + } + + return 0; +} + +i64 chars_match(char* ptr1, char* ptr2) { + i64 len = 0; + while (*ptr1 == *ptr2) ptr1++, ptr2++, len++; + return *ptr2 == '\0' ? len : 0; +} + //------------------------------------------------------------------------------------- // STRING IMPLEMENTATION //------------------------------------------------------------------------------------- @@ -182,14 +229,14 @@ bh_string bh_string_new_str(const char* cstr) { return str; } -i32 bh_string_delete(bh_string* str) { +b32 bh_string_delete(bh_string* str) { free(str->data); str->length = 0; str->capacity = 0; return 1; } -i32 bh_string_ensure_capacity(bh_string* str, u64 cap) { +b32 bh_string_ensure_capacity(bh_string* str, u64 cap) { if (str->capacity >= cap) return 1; //TODO: This could fail @@ -217,7 +264,7 @@ void bh_string_append_cstr(bh_string* str1, const char* str2) { } void bh_string_replace_at_bh_string(bh_string* dest, bh_string* src, u64 offset) { - if (offset >= dest->length) return; + if (offset > dest->length) return; if (!bh_string_ensure_capacity(dest, offset + src->length)) return; memcpy(dest->data + offset, src->data, src->length); @@ -226,7 +273,7 @@ void bh_string_replace_at_bh_string(bh_string* dest, bh_string* src, u64 offset) } void bh_string_replace_at_cstr(bh_string* dest, const char* src, u64 offset) { - if (offset >= dest->length) return; + if (offset > dest->length) return; const int srclen = strlen(src); if (!bh_string_ensure_capacity(dest, offset + srclen)) return; @@ -253,14 +300,6 @@ void bh_string_insert_at_cstr(bh_string* dest, const char* src, u64 offset) { dest->length += srclen; } -static inline u8 charset_contains(const char* charset, char ch) { - while (*charset) { - if (*charset == ch) return *charset; - charset++; - } - - return 0; -} void bh_string_trim_end(bh_string* str, const char* charset) { while (charset_contains(charset, str->data[str->length - 1])) @@ -362,14 +401,14 @@ bh_file_error bh_file_new(bh_file* file, bh_file_descriptor fd, const char* file return BH_FILE_ERROR_NONE; } -i32 bh_file_read_at(bh_file* file, i64 offset, void* buffer, isize buff_size, isize* bytes_read) { +b32 bh_file_read_at(bh_file* file, i64 offset, void* buffer, isize buff_size, isize* bytes_read) { isize res = pread(file->fd, buffer, buff_size, offset); if (res < 0) return 0; if (bytes_read) *bytes_read = res; return 1; } -i32 bh_file_write_at(bh_file* file, i64 offset, void const* buffer, isize buff_size, isize* bytes_wrote) { +b32 bh_file_write_at(bh_file* file, i64 offset, void const* buffer, isize buff_size, isize* bytes_wrote) { isize res; i64 current_offset = 0; bh__file_seek_wrapper(file->fd, offset, BH_FILE_WHENCE_CURRENT, ¤t_offset); @@ -385,7 +424,7 @@ i32 bh_file_write_at(bh_file* file, i64 offset, void const* buffer, isize buff_s return 1; } -static i32 bh__file_seek_wrapper(i32 fd, i64 offset, bh_file_whence whence, i64* new_offset) { +static b32 bh__file_seek_wrapper(i32 fd, i64 offset, bh_file_whence whence, i64* new_offset) { i64 res = lseek(fd, offset, whence); if (res < 0) return 0; if (new_offset) *new_offset = res; @@ -426,11 +465,11 @@ bh_file_error bh_file_close(bh_file* file) { return err; } -i32 bh_file_read(bh_file* file, void* buffer, isize buff_size) { +b32 bh_file_read(bh_file* file, void* buffer, isize buff_size) { return bh_file_read_at(file, bh_file_tell(file), buffer, buff_size, NULL); } -i32 bh_file_write(bh_file* file, void* buffer, isize buff_size) { +b32 bh_file_write(bh_file* file, void* buffer, isize buff_size) { return bh_file_write_at(file, bh_file_tell(file), buffer, buff_size, NULL); } @@ -465,7 +504,7 @@ bh_file_contents bh_file_read_contents_direct(const char* filename) { return fc; } -i32 bh_file_contents_delete(bh_file_contents* contents) { +b32 bh_file_contents_delete(bh_file_contents* contents) { free(contents->data); contents->length = 0; return 1; diff --git a/docs/plan b/docs/plan new file mode 100644 index 00000000..70cd353e --- /dev/null +++ b/docs/plan @@ -0,0 +1,56 @@ +The ONYX Programming Language +----------------------------- + +WHAT: + ONYX is a low-ish level programming language designed for use with + Web-Assembly 32-bit (WASM). It features some advanced features such + as comptime code execution and JS literals for external functions. + +WHY: + ONYX was made to help me learn about compiler design. + +FEATURES: + - Strong type system + - Smart package loading + - Structs and enums + - functions (no anonymous functions) + - Control structures + if, for, switch + - pointers + - inferred typing + - defer + ? polymorphic functions + +EXAMPLE CODE: + +// This is a comment +// This is also the only way to do comments + +use "core"; // Looks for "core.onyx" in the current directory + +Foo :: struct { x: i32, y: i32 }; + +export add :: (a: i32, b: i32) -> i32 { + return a + b; +}; + +foo :: (a: i32) -> Foo { + return Foo { x = a, y = 0 }; +} + +MVP CODE: + +// Comments need to be parsed + +export add :: (a: i32, b: i32) -> i32 { + return a + b; +} + +export max :: (a: i32, b: i32) -> i32 { + // Curly braces are required + if a > b { + return a; + } else { + return b; + } +} \ No newline at end of file diff --git a/onyx b/onyx index 272ceb6aab5bb2c5260ac4a5b1ecf709eec5ac1a..2d5a2d93f7c2ed09c2d0dd54a2cc0bc6f90e9a32 100755 GIT binary patch literal 33824 zcmeHwdw5jUx%b{PS;-tBU;+sVm~g4$5RyqUl1ySU!^MhF zB7RH*nzYnfOO?}e+WJ*(Ij!ZPiZxyesJ&q8v9YCX>QPTR?@@Ya04cU|`E*-1R- zJkR%h=Z}vKnRl)Cw%+xwcU|_{YtQWUm9Dhux~49>wp62>H%nlOB;nZSWdfi`Td1Ys zbCNbuOGZA4;{?4(08qu%U`vD!iMIoi+@q3ih2tcjg33aIBsbi3B~?%q)Z8LZa*UEP zZ-v28f=@w3H_9s%bj<==oC2?)S&n?uw?xqU$`av0asaMJB~)_c4wAbf<*rCM1$(6< z6jc5spU~fn(w|ni0yMf5Bwo5I-j38#`p9)WI|$w?%TrLvt%Drd`Cq-1+tUGYo#+F!)!2_u|ib%>iPldMz9VPfi_* zf5$NRhGFof!{A$n!S@31#h>-s48$ZW68W3(m!;)s`^X`@mo*fI3$#F>EmB>x zJy6rIJy2I2YSdbT)s2nKHCkJwwJ{jh8ry=w?OIc?sitMO)}%6T^`ljZv)EN-0 zLo$lKV?)oe<4^sgQAni;5*K~y;G!<2#(^VU51^YRX-6c^6AEQek9kRSqr{i;?G&|dnCTcg8!<-_ge7Z zllVRh{;b4bwBY|B@mDPPze~JkiN6f$aJU>6{B((TTkuOIKF5Nulz6WNzg^;ISnywv z_(BW*L5W{z!5^3SA`AW{iT7LZzmxb13;r(>zsZ6hkoc_@eAacsKMfZAWQp%77jzo) z`Cu=yTCWK&r+7~3GvR4WR@X%nK3gFHubA*ON2zO{w1?ItG{2}TN49$k5Y%7PBTyk-dO(uN0QUbWu zghxj=t_Bm{WneX}#e~l^;dhzvZWDf=37=)cA2i{InrBR`SCdygaMc4>J@EgP2QFof z|H$9|uEXEK-oP5yf1o>J8#w20f6?&*_rt*aH-R3Q`W8MjClnz?`39n0=pDc{^|zF# zsp`Txp8pl)X)3+Y&GRo)o~En|$9evF%F|SJ;TX?9MR}T{E_Cwz4=7Jl(}jIJ|6R({ zlysqm=f6pLnu;!L<@tvwPgC@T3ZDNGJUNRFC{=mpD zE!DKkoyg`E5tfZ0u;@x;BwQN?ja=&-=*^t454yD%Bn1Bj6zD(9{p_Fl4}a{U|GB^3 z4S^) zWu9}O_m?e7-Clpd%&2jV$@>`Y(MKL!T;F&`DtM@0y{@1nn> ztcP^@JE~xU?vJfQXagN9rqX&|685#fTLgt?p|Bg+Zc@1X(}97Q?|g^vy#GPU|14oPU@hCBgff@0M)*a~Ys*s;~b z0Zx-wz^^&g6I?V^)#7jFEB zyU_4YPon=GL+HbQ8x16OQaZJ93$ zK1+!*`V*tX>VEF*KVSLJ;`K)Kh6CN1_v{ySLq&{Wq>7;-DdwnRJ$_L!WJh$AA-X^0 zOfloa$s{ckXQbnuu;(l z8}9?CBL>PI*-jn4x7vF%+pnbW+!*WjANCvoKiPT=tEi478;0-0BahIEN; zwMZ00bN^%{im+bnAAyMS4D~Ii==@EeAFhE`L+TYp$Y4pSk+l*?1sjZwf>HzsWZrD&@iXlwB+eQ zm>s6UfdpQCiW&rkwGu1UZ;|g@bUaeT)=||Bo1pU-;8=P2O(zhlppTL0X|+@=WY%K>}5{bPIHMFo0ko{W|C@S&>=3nd__;}8Vh}6NZxX61F1v|L{fr(`&TtRwK9Ia$gMWD;bX#C^lIPp37QT>vK0e?(e~s>G@fad1R=UBqt#YN8RAro9L2oZ zX@vP9roPV>&-yI!(^m05bdHOv`c%NLsIt`Ys6Qa@6qV=c6M}}|poIg``BLK`t^DD7 z!>&T!DV5^5Q_xY*^zG&LLAZTSpBBX=O~F=mrGFjQ*xrp>kTZQ7-ZNXl!%>ymBb>n- zhgkoKF9f+k7cn57<}t#V4SR}U&sl3gbzrIny^v zxS^mbIerlu)A)Iypd_W`BdLXBK;sHHw;Gm1ZXrzi}*8Cw~C*l-dy z95;KYZ%+3aUpE;HUB~%-01ee=eCJ39U=OFA@%3`Zd3brmi#w_3qv?<*-V5(x5bz)7 zH;%`DOalUDuRr5{jGXfU+=`Xlfd|Yyn4pN~=aT1D$rCm6(EbQ`j!T{=Bu}N8hxSOo z(AfBsGK+-K&Yy&32^Cwcx=@-&%wXuk$L-;+FtB~PiDhxTy5 zb4c=pB+o1}5AExK=XS~ClRT+r9?*!#FL@?Po(mi;+Z}Z#o@tWjQ?Y_SaMsL2k0HR5 zBzfMDJl`?%&?5=(ya~I=KR=Q@_nLX=aT0jCB~KUUiH3}JpvOv}e^b)$5cEfob( zK46^0o}&L2xj*!UcROKp?3ugx(Db7g0CR7pMNY>7!us30QIH$MR;L_4!dOYge23ci z9MU3lEnFS!iEGIh^B+Vkz@L1Y>H9GZ;6ufQUvp29sxy6F8X;r8V;HF(73$CQY2sGz zQQ*<=?g3}7xP3Y3>)@lFK=5I_st21_ zv;t25DKtO@TRID%BMEcFX>0_3geVs#zy4?P8NY>xI^4wgIu6<(+kAN@1!58tw-_}1br z#kUm)%zKDb-V3N4LB6kS>?Bi_uU=O@aMc4>J#f_nS3Pjm16Mt8)dT<29?)^>&Ff6f zz#-LIoFBz!b2LJLj-}%IHrA4N;O|cSQMz41N~^y^NPk7<9|o)}U*lUFsN7uP3skLL zv$lNWTFr#=tz8kgxpKX)xXg@QN^ZQJh>3)oDQ-Z`SIXTXE_+5^W7@vzKZ!w3*uHwLRJ`+7@k=He0(!E6}ED zD{(YBSnG{6d+EHhw+=b))V5S@YTFGQ;()?f`;D7$lXVT^@6hiD1`Z*8=nn$}5x@g~ z92j^K@EO3@0MmO12IzUn7C;YnIX(cK2UzeY#3A54fDyptHwOle03HH-5^(OnLmu!= zz{`L?cnk8_h55qUkOzDNuo5u#4&(tJ13UuwI^dIl)&Bu`!21C&1D*i%V5etiALIeQ z30Mhu888BP0=HI20MpPvo&*d5z6SUr;AOx`xJmV3hi4DqJis3SRsy~U7y&GN7xI8H zz$XE7{{nfy1AvzSe+cNoshI_Tg*@QzvCXy#uoSz|cLAOPd=XHcLDlbEuj#wo`q=ao zM<@J8csh&khQA8{wG}C}$DvKZ-#q+X`Q5+(KalEjuXN?C&P?0s*r(kz=EiI0PMJh} zBzr6VUV{8WLKM5)_u7_cB)6deWQl$k{{94dIiNwet>z3wKZ-v$+L<4XRr0n{gMNy{ zKz|T0F8^hNejex_gMNdV9*vj(3GlxLy~<3tRT%Q);0FidYJUQKk3nAy`t6|Kn?V19 zL8o_Q&Vf!lJaPNJY0&QieIw%Y`w8^#8uX)}zXSTY1o|rm{S@dAAfA7jK!4ewp9kHC z_}-L2zuBOF0{R}%k0;O{Gw9>c4>Hgnc!MhcZ{cL#ev3iR1Ra|e@%q)p>9>MD2J{Xy z-FB~`|1QuEfSzxrhvM=_LH{!7*pi6r-xjBz0{tP-pEJ{Krwsk)LBA3GcsTn$0evOt z>l5V54Eb^B$Gbp(K7sy>L0=5|7>o#EVbJdaeF?_N#sqr3K|cz54D{jR;}qy$1-(5%{%$Vs1@1iPAA$ZI z)YB;6*KW6e!*1}m30eOL#`?51fVkt_4+J-o3pPis56#t5LP_wmNb zyrSdDXfvT4G&bW>V`7m^U}?&)LP@6whje`;0NytjSB``@E|hRdF}#>0(3tA^#ZRUq zrPjSE5@}8{PmF7oPX<`XI+y{~`1I2) z2B{xUns{FMRjU1e4%B<))`AzvI9Ms+1_|pV+#%t;5itXgzNLD9-k((OORD!H)%%d@ z{YUk_qk6wlz0auLUsUfas`nGs`-tlOL-oGls^!aX@Xjc1jf8f2OX*E^@1p#<`GvFR z%oGXjTGT@XuL!|hdPI!tIrCeE0>wW|fXBKAmqQS1#nGf|H73@~hN3Te`uBcX^baCQ=xb37Gv zJWX6XcLvb0f$cbAFQ)Y~kiS>Rd~zZ*#5!?zoednJkbx8<~^N za`M|&ZdhaVHeKsM(I8pyZTXQj|)=ADiV$`wQ4p9818E;vhmgH5l2IK_Noe z`N*VQgjp$$T1x&9Mx{8Kk>tEzRJ_@mtCf_+Ty*#)g*UeArzH19j&Us{bsZx8Yp^)R zqz8dYe;w&p@RzA2cXP>a0GoUcNf#-1j2%N%yMuinjfkKf$C*CHr#tHv7~`B^%XIQ; zP#C#>4R54Qaq+ZH>Fangt_v_F^?H$B1IE-Fd3r>2p{8eyE&*mlbnbPUo;j+Brx(oA zbT{uPBSM7tc*xr1Z769(h=?Oc^3bn>Rr+YbxQ!TdGKj}<0v|d0bob8you^=@J|mgZ znvlD^6HDKef%Q4!6v51-h( zC9-~t{SUaLQ?rqPUTb%PHG2|1WI1)N z?IcVenrA9psC(!P4c_A?CjEB%N&s6H8Qz#dPH}{x+pUM)-vwc0U9F}!kK9Sp&R$}~V zFi5!Y^NzhxFP(drgE|8WAwGSN{Xaykak!IR>-4H_JtcaU~^egoT6Jv*Q#&65nybk8d=Im6=skISE!5fP|17mNNvMX*=1 zkvdz6{W@qH{VJvJVL=pg#}rJfQ8_gbvfbk%4tv+Tcjx>TcH8b5Lp|8u z)qD4--5}Twq*5r^yY}tPrRNZ~d)*ZJ_O4Iv&aJD_Y=^Qb3hZ5-dq+J)?DwZr3){Pn z?ahsB*K7|ssAjgVqA?4nf{d$804*27UlS8l*T;9m-lwU;8&in?Wv^i3y9Y?>NAD+l zPmzB%j-#|uz!qMBz3cd1z6`eQ)2GYIq6?ELqpu~YHLL?xuabJ(g?i|x+2L z>GE?G?ojv+Ep|fFpfJi<1j8%ZPuC3!~ zZx#6kG$fI=d9=x>zb1699Hg_~(D|COQ|f$8pPr!eHGRIJ^IPQ9SM*Hk3|Ueq-|XXd z-h_K5QTJE$X|nEK>OT6>D0AXOPzc}nEUHZ=$u3|;A8qUS_as^TzO?%#O@?PA)3x|ANpCBsZ z7bSmPpD9W%5<&ZQeaQ#17mKL_u5TfA_O3_w3S*9{UbuC_FQ}T| zP{vWOB-y;hFj60$)5Yg`!wjI$|i0CgGKZo3Q zMBgAxJ1I^3iZZQ1o9rbc?i1Z3O&T%F@X!{+i2L*@!b1(bsqWL4&}zljRV#+M`}8sz z=6EmGQ8~R$s9Zfp=z7jj`JAD$O}|D(P=yd#LMt#^SEbO|rk4wy{t35GJ)))_IEsAU zEXtTH%b0GI!CcCB&H7Ynw$RzEUoUi`2d<=E91=P{$Q8oz!GE)%BcxAPcJd)Tq%RUW zDkfBsitT!)u)9&4{Cx_z3Ze6Msk6hd>ls65g+5#9+(&h*&{xoc4t1aKI_X@e>RLO2 zm$_c&3qqQJrmUo)WR;$Llhhw?uD?M4Bs^&*@?yG&2clTwomt0reE zsb%^#N@~h4C@|*h$xU+ihE`*A=|{g{@Xyz$3I0vvPYZVG-3o7WU7e;c(34lAY#KvD zl*<&kMs3QUX!I-8lSfMJn}l*`f6mZes85mB2S>gkswc0Mds4qNL{dszk$Zr+;^Yjv-f0H)lUTWlQJ^75>gZsXr?^Q!zwmwUGQ|Qaq zmn7Jmt*=b5ccjVQH%Wi0p6r$Szhmg1h)p8lk5s)d!QNE8IKkdjeYIilr;yimJ^3NI z2Y9cc?+(&O-%SDXbZPJSCXqM#MGcu_)6>J;zm3||ZxO?X$vINpenZ{&pe~22`|w(6 zT@Kgw;Sy<_Hu_&EbpOT|X)nkfo%rnWHz*j#WBzX{c*oULw?JU7sLfVww1awOA6F2Q z3T`kgn260Hp`g#`Gowo>oZm>Jp7WmEOnT6e*b7XKiz4uinbblw3-AS|{##k&(p+xQ z@6t&D)^@oF|MPPF;MYc1k?gBn`!T6~ouR#(?BO+db=D$au~HP;ex=*!`K``O7N z7#qgbG=O&wZF*q%hg_CE)s>xQca3#r=_69y=#jQjn%#9n+Kp+8(rByAK1EAQTM7=` z;bkJJX-s0!u*uUj;z=O_B^=qh-IbOmq;(*$4HQ?l5W=Z(?YI%l26ZvUv~*oB$tM1c ziBMPoKG!^x;7z&2lsOSryQkqJOA;BGD2OAV*AcfmI|+s?O(Uz~#&~8x=Qt=I39Afd zvTBsMG#iGs(fRsKxythCxh2`;ouZrvmgSO_Im~1nvFAeGP)LX|Y4Gd_#NpUfgEb3* zGY*mojCn>usLY^(#>;|uMM>`jbIl3il_eV-6Z27X)Ojd%lVs@z^W>pR|Ej3wltgNR z3#J-0#L4vB1lqJjnsT1hI(=9wTDBW5vul4xO4mPjWzaYUuVLdy(+i?)t@Nd+1@$#G z1^719z=C{*_O|y64=D96~HNEMi7WFY!(r4u3>q&+fKs#j!#=1!?s!y zOWw>1=d$d}EbS?lv4&-~GiN)~9gnbr1~%dnOMi-G7qPUnByEc@Y{MkkVHSb+SsH=W z0=b`Iqb<3XGuDD zGjpG1dGyvW~eax0tI%^iU4bL*b?`7F_`Kpj;<&-OAkdJJA#L>8zlT*$_pJ0yga? zN{s;*2rscU%USXh%oSzWq;opH#L}b8{seOtS`%rLSr+0`#aZTM=8Qt$8dl81l2V(| z)f|N^a|fGco1IPiXBEUh7xJ|EGh+%%b3DvaLyXn2)HN(?m(;(pkY&Bi(h&IQ4x8Dw zLgsQj0skVZOGO>w$R~(&dm$ny`5k6!N1Hy)*lKcT3Uz)0;4fodat09JBmUbQpJVBc z%?P|G8zl%Vny9jH&igF+ZPMt3Mtu#N;rQHB2IeKgT-y>iaAm zBTYM8_#5U~&9YXryv59k&O+5{XTHLxnC%^w12s03LPJP9TZHPP@w1_x+?QlyYwSO} zD<3YYhfB~aqb#S3`Y^^F6q^b(*}a)fFGR4WzRN~MS=xRoIIF##W!}Rk!NvPo+Gb`) z0NrAnlKtQP??5{{-eFk?M|9R<#yu7wUWS^z%cFJ>u}t7m)~SdIH@ z&cZntGklkE5?XG$K=Pavphg^0Btxj<-Ei?OJg5n?Ir%MH3TEs`^Y&mk&{Eyn7Od3* zZ4Edxy)BCGcP=Rp;LD-62O{Pp*QUeq@(e!lfj>YGDKIXC%w?<=Q5Z@-< zmSBxKluu{l19gqn^=(>Bw6!%Djs$Q|+!l;z_}1yx?ZMVIa~;eF>Z#cL!KzA5%OeowI z#P?xWV&(k+~ zgTmd~R=p#rT8DO9sljTRYlAgAz>mfWw9)r>o2qP_=l5y&+Ua$LiJ&si>K%8 zCT8^lMZ9|a`@!+GTf-BK9+DJ^MC1W(4Trcn zzM*v+1=6S$O0b|d*cimO*Ygk#N1L{phA(OP@~ZWCktX477sMC6!eWe7G=|nR(5Omx z4B-gA{k{!fUf)t(JKx+kDr^V8CM4cvF?>&7s1KhQjX`~%yRNzw!}g8{eL230Xw(QH zr{Qmh=zM$_H8mEDrFUtdT#VE_)ONN~Y{i?qZF{IC9tbqP^T~ma05r^MXwv#fgVCxt zR~D~bQH*z_O4e3XRT_pS#+R&**#)Ah#H&fFy771vZDHW~4-xQ?2-OAywbhYo4Iw&* zKCXj%gW(;a*59)zpJu*`g7?e1*p6TP@JsSgRV6DoXdMgMG|r3tt;KbufZSRk$|TR?`~7lrH8B(FbWfkoVe(k9T#Isevod zcLa0mPD49K7&+CvB08-`(>&bPf-wLMiZL1^F|K0XvB^3OsDqZ#0<| zpVJLfp()y`ra<^IVu%|ZrM#+=dJ3=Qf_bQCAR5N&YPE=DG(4@U_^?L18lQF1FIPN@ zEe`92)uHiwJ7$_4+aK!OXd*tzIRBc6XPP?qnut%<)cM#%e2S*dktX6D;+vDrNTlsJ zy{gW&nn`vo6-Um@7?HNqXuPQ>H6NTC@c(smT8 zeuv0RwQE`0#kppT;vZrBE{K_G$MGU{4mlC;(S|zjYR54e^*a!W{CHwgVaABGT^p?( z8xkM?eHgQpU6aqL%t)l|nta+f2rnOA4#KNb)rkbVCZD+{qM=`&5isFt`!Gp+Z@`4& zN$g$^iO2RXqC7*Zz@rKCMbBmkzt4i7A@S;*xsq9mgpSkG2j#;7vV*QN{OLoqR|I&{ zugKLfAhG`ckoFG+rDy^^`32KS3x;B{?qeA445+YtS?0Q@7U9qEAq zE<7aR*G(KRGbXT3@*j{Pq>jS`fcKjCRPqUokKQ5r#pk6S>jPwZ=alTRK9qbH_RFV0 zN;QYy1HT*kThzlPN&S)3V|}#yvc$h9l|3l=7ZKhzcwC}$z(bWwCtruczkvDH^0Re(J1O~B zc}0cVWxbvsM*nYsm#mUgfOn)G>v0@91e0YOJ?NajeVBL{#qk-M^$0)#@L4$As^%9w zbKuuf$-hoEntH@YPw~mm)+0u@Nqp~AAs>`_=qWJqTTkH7DV3q(;meZWdamv?40B^V z?6k!5ze@jj<+xfP%RMgjSRWd{JWRQN1fKL;`%C{Y`~ywykBRWg)+$|b+`S@u?mw?1Y6yu@3N)Eyi~|3i}hm@Hhm`*Df4o)bDL@z%o} zbU2slWj#djCyBS7aU9@y*?1-}0s}AUu^wHVAo00g!JIE;u9tZ0VT*OZQ-86xFP%Om z{no=u&A?05QnLUt$^Wt>s?)DumU!!7u73u8Qi2ANc^3GgejksH~|;9s#`3#=+FUshZiC||kq zW?yBXvUpjkkF@e%`m1S+@@nzl`70_?KlDex>8Ea}#Esx*{lcH(OxWQtZmVFv+CDXK zS|G@G0t`fe-|35+-#C)?{OFz>f7rT6G|&3uh;iz?!P_|emlER~=IZfH6BNlez~#-b zk>#5g{I?Vng$MmkVj?+S1JmA^frRg=^0XO*Z##+aP{+2{JA}XP)VXS8b)J8+F=;qixLq-M?D5U$`^O+nJ_tq0mkY4%+1sKR+2S z+w9`R4JEl@X}AM@DRGAY?DQ*_iMj%TZEbDRW#Y#w6X|>pA+9qXb9~EQxs7it#JR-I zxBMl{M1654xw#le47;IA_2j>~Ih-ofqXq6^+M$Z;RV`#fnV)fhkqaeyQ)0!>cn&I8 zv?Dz(P||862fxMat4~G01&-gPACB-d9j3oX7 zX`->>;Z!0{$@488Ex&Dd6E;l&Bdr29s1!Dlf~_rDei-{f`8cYQzbzWV5Br8{C8~H? z$?QmVy~b$`)ol%0e(mls^a+R&hEsOnxl;&xk>*SQwANr_H3`U6OJhXK=e;0565NF} z%?qG4^C2xih<4)hLPITNRZd8VDM3i6G!$1ig=$c0Gj!o2pBEsMA?USyOioRhaA0h3 zTNDqzTC2nLn8{=|6s~Jl>FRBGO0z@dXg^$~X&C*#a?wr#ZeegwC@!^+rJ!|3fq2c= zWMps%$&k1HuJjZ$GtyZ0BwP!SGs~-eF$KK|+=Jy;fR0~rp!W^=<-j_XE(KNjSZ45x z?%&PVT4b=BW!SIw*%Wk3#mV5Ix+{5{CF9oyd{U9Be6=5^;75YZ__X?uUN@k3!Iiw) zw^Q(1DXHvNd z@{qAoq2yKmAxrscKT|=x0g!N6?LP)8m9PA-?n@Q?l0gt^tn!aq)44aB6=_{eHWZU*#428DwxbZpf>B*^5g^TRdJw zE}mENJs?o*$Zl`!n_ZFey|Vr)zLcD5$3H=qSd_fl=k1XjS~Ea1UrJuVcR)1D8~edU zLaNg&jI@$d@NdAG<<-8*u_7UOP5|T6YotWt0|3&c>{q`V+)^y)s-7aJX9u@ZFVf^>x~%dGrM$JC+X6!9o0jJ%O5O}DCUQYXNXWO)tGLw)5!czi~HzANf0u$vCd4vR`Nk}FT2npmtP=g1O z7L92r!}ftSdLy5;Ds4sMZK%7(oFwyo-ub?n zwfB1bd#$zCew;G{_e#gV0@_vyc6C#pbtI{A0Oyes|xlH7WnudCfUE zEcqm9pmgv?`AME|q*T{8JYh#l#Z}wgB)(HQV0}K?AwBPy%O_eLjQRRvzaK{=f6&fY zF!}Ie#y&rUZpjkj%1f7s%J@k(x7M4JAbR8Tru`W@tbNu7Uw3{MHGMsQZ;SQy=KlrZIbqhX~=4 z_;+$Kzakc;yd>RmRa{G%D8*kDdy=MzRnBB-*H@y>xg+A}SHYn0ri_!8dG|Z|;?F-w1n9dTq*lY;dd{a@^lKNqWH=Olg<=>PBVt|Jrd$2f=81L-a&EReJu9FWX5_U zB;=tlc}Vo6uIJmurPLmNS#0$j&f;=|0OMCX7^!2nKuk-arWKV{K0?R!G{o}swQ#N0(!`D}Q&ewB+ zke)z#dy3!c-uo77n?9n_;PCazYf;j^zBxgvv7R$pCWjYeWNo+)R_Q2bhAY2;Gd(n= z8-{(o^riM|sKYysQlFo9K`a&&*Ia zy&nu_+M#a&+9gJRE$H%^hDLu~Mq6`6^G<(LMm>5N*{xA5yRmMe)<6Qe z$C5|ucoN!A{UsROgZ5KH!C*D$JZKr4n}4|*49HRzL|`#^Kw zgFR@~dny3XH~_{F=n6!~A{c7{%>aEBv;g$L2LJ$l2Xr6kf)4=zdOzqX&`&{!Kv#VP zdzrB=&r}1U<|+O)D=W1C z8JqzaRuqX%jIjy$&c=80B{6mOMD;6(Zy|o6-Elx#7HgGO#zbN_^n%ri?^GoI$QZdW z<^A9vg?!;?dAlao>`#J!67mhBe~~ejU4}x}7=^v2!Zq;MVa5-Pk?%L=z9^t&!QsmDpVeEyebXJ~3m#bXx>=uRV~9#~TuHx+sxiIDfw6 z%M@9QFJQr30Jsng-fJCyC{@W)Q#Bm zFg!jso`_P4`5z4SG**>6LE|#I%!NKu3^Xj~oeRLQ*dp(Nnk6Fgdms@u3%5NCQ;QT? z1rIFT`iRz(DS2{)+|bmM!Jte zFDi=QrrD#eIeOTd)KlxJ9a(N6pY+K~|ju;Z)kl-Hyj49k*Y@XMJ*Vt$L| z_oP~=j_9tl=U|j1I!JlaO8iPmpMepP^izyyOX`4T9Fq#kdXwckSr3mGSK6z{*abFA zdKSMt>E{H&BvRtlmW2>YCop+@b1bF8_5&~+T&;6FhDLII9pnDw?PxOP3|Z9M&w<#O zM0$LSr2{1KN3b^{Yq0+goTMFOk@OK|B&ifq3`k0RhvgYaY>5y!c(cO-o}R>AHkEwT z)7xTGsroPqDe_j!J1|OVL0fXken`F^haY^jeG`=Tt3YyA!ZpcdCk;#B$EtHS%p_MT z6@Vo$pt_bpAi1L`GAU5t_P4Y!Db+z!VF{d6-SxFh%19)iBsEaVQjbE9zry9sP&jLh zsO{w1h=b7cc*wt3_zdbufvuQ!GEeQq5C0`kpiYfpshts;Wh{(5S~h>l^C+E`zzNm+ zD2;ZK>)xq>f;70pU*ftyvXg{As1!ZFP^EoEwiy`gCVCT+QVzmEAF z9c1{7ICeOMsX`|!G}oGEU86vJWZ6N;^s7t@x9a`9+>xAL`Q zORGVfrMA$Rv{=b9^$MlARxheW9xWqGeOad%`QNbC^IXfv>EZbFp=mbD z3puxyWkiw}W%?@lwc&;KWTJWT2h)o(K0(jXR&BP*cs?B-QlL)X@XGiq+VHf+e3dei zr$c_@)&o1O6Q=V1`O%@s^7zzH8I_Qyc|4zvL@BUd2hHPabWoqyMst}zY9T&M3vr%4 z$RCYCCK-e9{!Vj@c|6rEJ(cGCvNRnFt| zZX?SYqEvlteZ#54mYe8vVi_B+1@ZL+!x^Ur{`H*D+)WswoPCa3B z#l${4bBv#73&;3*_9oNM$6?H0wI&)^2TZKEIK*5-3;fkds^O@!Uh9XJk;_4j{3fPJ zODHn<7>Q3>1Bp6HmkGAg1Uq?y;h2tfGT(5Fr4&>8pVzK;iwuC!frI_04+wg3Js+y{ z@wGKOVKGaOrJAKYM(!Nb5F`UZd!1chwi@`(gPtR7{>E#dt$W3?y(E^hU;QdSz4k~W)}Ox=)C9!(Itrz*0*c6SYDPX z4wa?N_82lbB=gSJl}$z(7&cg&jCApCnI~Z!49BoDfR|!>XsMeci<#nvjhm2>C}WAKB8KZs4G7t3O6P@LOqxw?0Hr(urcVE$rLG+AB*5z zq*Z*wlZ1EE?AY{&<;D3j&z4uul#|45n{s%%_}-=mCKS$;ZEwhtGh}O-;;K=_Nl~>Z@8_va+~p)taK> zs*P()))%v?k~LKwEe&n{Dt}WQ!=6JWQdQsL_v4CF)!5M5CMH*Bh^p!o(O11aee)e{ zV0W}Mw~e3`uU)%lZPn^EtBb|Nnx*3I8fV<*tyQRqhNdl5JMr5leqS@uMI&vlsombb z6QtG8TJLDBYQpVTtl9j26bX0Kv~2gch~2f5#m{SZi)Zgh78P|_<3mnw^cAmOR$R4g z!`ijQtJgCOK`~XeZg1Ero~@gB*k35p+f&3ozf<)47m1Jkvz00YwY#CExk)UopBx>s zX!3W6N9)In59@CfGq)s*=sjs-{gz&Fup?Cjx2%k;-CEPq>TeS}x4s~<8ZJruB89Z= zc6t@Id0S3u=+91LDQLvP;@PQO@yxb^{E*1po@{Zk0iuKT+oQPjRHTXy77?A z!d>^!khNYPQST(4W01wuOEDhF+5{W zla>4uq#1aXnwWdmun&J$`ZM_Pk`w=(kB#kpvUguMtio?$DdPN2XGu2pe~L5wA#Q=e zKbN6n`VHt3gCBardD-A^qS?x&1C`LiUsgPhtCxeh*zqtAkBY!9r<-1DkyZHV`9s2V zzX&PWhF8FAB=!Hvunm8e^B033e&3>}G#V@XX%Ckf9QXQCk(4Uy2ZA(7aYdUT~*uM)K=5b)GE^2orBBT3%Ph;_akCqN9W+n9r#ls z`p!S_!J)e{B>uKI-RTsryIoPCe>kw&FTDFTDcd0=H76>xQ(*63s4nO2qOfzE7*HMY zp{+DTi^OSlM*OKfTQbCv5AdN*6}ZfD?lgsANHDwAtM b?pr@QA#@9h#-yC?o1MhHhO diff --git a/onyx.c b/onyx.c index 60d8ea96..970c85c8 100644 --- a/onyx.c +++ b/onyx.c @@ -2,41 +2,162 @@ #include // TODO: Replace with custom lib #include "bh.h" +typedef struct Tokenizer { + char *start, *curr, *end; + u64 line_number; +} Tokenizer; + +typedef enum TokenType { + TOKEN_TYPE_UNKNOWN, + TOKEN_TYPE_END_STREAM, + + TOKEN_TYPE_KEYWORD_STRUCT, + TOKEN_TYPE_KEYWORD_USE, + TOKEN_TYPE_KEYWORD_EXPORT, + TOKEN_TYPE_KEYWORD_IF, + TOKEN_TYPE_KEYWORD_ELSE, + TOKEN_TYPE_KEYWORD_FOR, + TOKEN_TYPE_KEYWORD_RETURN, + + TOKEN_TYPE_RIGHT_ARROW, + TOKEN_TYPE_OPEN_PAREN, + TOKEN_TYPE_CLOSE_PAREN, + TOKEN_TYPE_OPEN_BRACE, + TOKEN_TYPE_CLOSE_BRACE, + TOKEN_TYPE_OPEN_BRACKET, + TOKEN_TYPE_CLOSE_BRACKET, + + TOKEN_TYPE_OP_ADD, + TOKEN_TYPE_OP_SUB, + TOKEN_TYPE_OP_MUL, + TOKEN_TYPE_OP_DIV, + TOKEN_TYPE_OP_MOD, + + TOKEN_TYPE_COUNT +} TokenType; + +static const char* TokenTypeNames[] = { + "TOKEN_TYPE_UNKNOWN", + "TOKEN_TYPE_END_STREAM", + + "TOKEN_TYPE_KEYWORD_STRUCT", + "TOKEN_TYPE_KEYWORD_USE", + "TOKEN_TYPE_KEYWORD_EXPORT", + "TOKEN_TYPE_KEYWORD_IF", + "TOKEN_TYPE_KEYWORD_ELSE", + "TOKEN_TYPE_KEYWORD_FOR", + "TOKEN_TYPE_KEYWORD_RETURN", + + "TOKEN_TYPE_RIGHT_ARROW", + "TOKEN_TYPE_OPEN_PAREN", + "TOKEN_TYPE_CLOSE_PAREN", + "TOKEN_TYPE_OPEN_BRACE", + "TOKEN_TYPE_CLOSE_BRACE", + "TOKEN_TYPE_OPEN_BRACKET", + "TOKEN_TYPE_CLOSE_BRACKET", + + "TOKEN_TYPE_OP_ADD", + "TOKEN_TYPE_OP_SUB", + "TOKEN_TYPE_OP_MUL", + "TOKEN_TYPE_OP_DIV", + "TOKEN_TYPE_OP_MOD", + + "TOKEN_TYPE_COUNT" +}; + +typedef struct Token { + TokenType type; + char* token; + isize length; + u64 line_number, line_column; +} Token; + +b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type) { + i64 len = chars_match(tokenizer->curr, lit); + if (len > 0) { + tk->type = type; + tk->token = tokenizer->curr; + tk->length = len; + tokenizer->curr += len; + return 1; + } + return 0; +} + +Token get_token(Tokenizer* tokenizer) { + #ifndef LITERAL_TOKEN + #define LITERAL_TOKEN(token, token_type) \ + if (token_lit(tokenizer, &tk, token, token_type)) goto token_parsed; + #endif + + Token tk; + + tk.type = TOKEN_TYPE_UNKNOWN; + tk.token = tokenizer->curr; + tk.length = 1; + tk.line_number = 0; + tk.line_column = 0; + + if (tokenizer->curr == tokenizer->end) { + tk.type = TOKEN_TYPE_END_STREAM; + goto token_parsed; + } + + LITERAL_TOKEN("struct", TOKEN_TYPE_KEYWORD_STRUCT); + LITERAL_TOKEN("export", TOKEN_TYPE_KEYWORD_EXPORT); + LITERAL_TOKEN("use", TOKEN_TYPE_KEYWORD_USE); + LITERAL_TOKEN("if", TOKEN_TYPE_KEYWORD_IF); + LITERAL_TOKEN("else", TOKEN_TYPE_KEYWORD_IF); + LITERAL_TOKEN("for", TOKEN_TYPE_KEYWORD_FOR); + LITERAL_TOKEN("return", TOKEN_TYPE_KEYWORD_RETURN); + LITERAL_TOKEN("->", TOKEN_TYPE_RIGHT_ARROW); + LITERAL_TOKEN("(", TOKEN_TYPE_OPEN_PAREN); + LITERAL_TOKEN(")", TOKEN_TYPE_CLOSE_PAREN); + LITERAL_TOKEN("{", TOKEN_TYPE_OPEN_BRACE); + LITERAL_TOKEN("}", TOKEN_TYPE_CLOSE_BRACE); + LITERAL_TOKEN("[", TOKEN_TYPE_OPEN_BRACKET); + LITERAL_TOKEN("]", TOKEN_TYPE_CLOSE_BRACKET); + LITERAL_TOKEN("+", TOKEN_TYPE_OP_ADD); + LITERAL_TOKEN("-", TOKEN_TYPE_OP_SUB); + LITERAL_TOKEN("*", TOKEN_TYPE_OP_MUL); + LITERAL_TOKEN("/", TOKEN_TYPE_OP_DIV); + LITERAL_TOKEN("%", TOKEN_TYPE_OP_MOD); + + tokenizer->curr++; // Ignore token + +token_parsed: + return tk; +} + int main(int argc, char *argv[]) { - bh_file demofile; - bh_file_error err = bh_file_open(&demofile, argv[1]); + bh_file source_file; + bh_file_error err = bh_file_open(&source_file, argv[1]); if (err != BH_FILE_ERROR_NONE) { fprintf(stderr, "Failed to open file %s\n", argv[1]); return EXIT_FAILURE; } - bh_file_contents fc = bh_file_read_contents(&demofile); - printf("%ld: %s\n", fc.length, fc.data); + bh_file_contents fc = bh_file_read_contents(&source_file); + bh_file_close(&source_file); - bh_file_contents_delete(&fc); - bh_file_close(&demofile); + Tokenizer tknizer = { + .start = fc.data, + .curr = fc.data, + .end = fc.data + fc.length, + .line_number = 1, + }; - // bh_string test_str = bh_string_new(256); - // bh_string world_str = bh_string_new("World FOO Bar test\n"); + Token tk; + do { + tk = get_token(&tknizer); + char c = *(tk.token + tk.length); + *(tk.token + tk.length) = '\0'; + printf("%s: %s\n", TokenTypeNames[tk.type], tk.token); + *(tk.token + tk.length) = c; + } while (tk.type != TOKEN_TYPE_END_STREAM); - // bh_string_append(&test_str, "Hello Frank!\n"); - // bh_string_replace_at(&test_str, &world_str, 6); - // bh_string_replace_at(&test_str, "Hola ", 0); - // bh_string_insert_at(&test_str, "World", 3); - // bh_string_print(&test_str); - // bh_string trim_str = bh_string_new("abcdeTesting words herezzzz\n \t"); - // bh_string_print(&trim_str); - // bh_string_trim_begin(&trim_str, "abcde"); - // bh_string_print(&trim_str); - // bh_string_trim_end_space(&trim_str); - // bh_string_print(&trim_str); - - // bh_string_delete(&test_str); - // bh_string_delete(&world_str); - // bh_string_delete(&trim_str); - - // bh_string file_contents = bh_file_read_contents("path"); + bh_file_contents_delete(&fc); return 0; } diff --git a/progs/demo.onyx b/progs/demo.onyx index ee02d76b..32a37db1 100644 --- a/progs/demo.onyx +++ b/progs/demo.onyx @@ -1,9 +1,11 @@ -use "core"; +/* This is a comment +This is also the only way to do comments +*/ -Foo :: struct { - x, y i32; -} +use "core"; /* Looks for "core.onyx" in the current directory */ -main :: (argc i32, argv []*u8) int { - print("Hello World!"); -} +Foo :: struct { x i32, y i32 }; + +add :: (a i32, b i32) -> i32 { + return a + b; +}; \ No newline at end of file diff --git a/progs/mvp.onyx b/progs/mvp.onyx new file mode 100644 index 00000000..10f24015 --- /dev/null +++ b/progs/mvp.onyx @@ -0,0 +1,14 @@ +/* Comments need to be parsed */ + +export add :: (a: i32, b: i32) -> i32 { + return a + b; +} + +export max :: (a: i32, b: i32) -> i32 { + /* Curly braces are required */ + if a > b { + return a; + } else { + return b; + } +} \ No newline at end of file -- 2.25.1