From c1bea102df3f2907f345b89ff0f66f5055ac4767 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sun, 18 Aug 2019 06:25:52 +0200 Subject: Add extern funcs, parameter registers, fix asm_rm RSP bug --- README.md | 4 +- doc/Documentation.md | 42 +++- executor/executor.h | 33 +-- executor/x86_64/asm.c | 32 ++- executor/x86_64/asm.h | 1 + executor/x86_64/executor.c | 93 ++++++-- include/ast.h | 6 +- include/bytecode/bytecode.h | 66 +++--- include/defs.h | 1 + include/program.h | 28 ++- include/ssa/ssa.h | 27 ++- include/std/hash_map.h | 2 +- src/ast.c | 72 +++++- src/bytecode/bytecode.c | 107 ++++++--- src/compiler.c | 59 ++--- src/parser.c | 38 +++- src/program.c | 225 ++++++++++++++----- src/ssa/ssa.c | 262 ++++++++++++++-------- src/std/arena_allocator.c | 3 +- src/std/hash_map.c | 10 +- src/tokenizer.c | 1 + tests/bytecode.amal | 9 +- tests/errors/extern_closure_one_return_value.amal | 1 + tests/errors/too_long_var_name.amal | 1 + tests/main.c | 22 ++ 25 files changed, 832 insertions(+), 313 deletions(-) create mode 100644 tests/errors/extern_closure_one_return_value.amal create mode 100644 tests/errors/too_long_var_name.amal diff --git a/README.md b/README.md index ed64fd6..4a1f0b5 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,9 @@ to make work on little endian as little as possible, meaning it would be a small Verify all members of an extern struct are extern as well. Verify all parameters are of extern types for extern functions.\ Verify all code execution paths in a function return a value, if the function excepts return values.\ Show compile error if the result of a function call is ignored.\ -Show compile error if function result type and assigned to variable have different types. +Show compile error if function result type and assigned to variable have different types.\ +Show compile error if variables are assigned to but not used.\ +Push arguments in reverse order (right-to-left, cdecl) (in program.c, since on windows we will need to support stdcall which is left-to-right). ## Urgent Simplify src/compiler.c, it's pretty complex with the thread work done right now. The thread work should be put in a thread dispatch file that only handles thread job dispatching. diff --git a/doc/Documentation.md b/doc/Documentation.md index 3f9bd1a..a046057 100644 --- a/doc/Documentation.md +++ b/doc/Documentation.md @@ -1,22 +1,25 @@ # Opcode -Variable length opcodes. Sizes range from 1 to 4 bytes. +Variable length opcodes. Sizes range from 1 to 5 bytes. ## Instruction formats Instructions can be in 7 different formats: 1. 1 byte: Opcode(u8) -2. 2 bytes: Opcode(u8) + register(u8) -3. 3 bytes: Opcode(u8) + register(u8) + register(u8) +2. 2 bytes: Opcode(u8) + register(i8) +3. 3 bytes: Opcode(u8) + register(i8) + register(i8) 4. 3 bytes:\ 4.1 Opcode(u8) + intermediate(u16)\ 4.2 Opcode(u8) + data(u16)\ 4.3 Opcode(u8) + offset(i16)\ -4.4 Opcode(u8) + num_reg(u16)\ -4.5 Opcode(u8) + register(u8) + num_args(u8) -5. 4 bytes: Opcode(u8) + register(u8) + register(u8) + register(u8) +4.4 Opcode(u8) + register(i8) + num_args(u8) +5. 4 bytes: Opcode(u8) + register(i8) + register(i8) + register(i8) 6. 4 bytes:\ -6.1 Opcode(u8) + register(u8) + offset(i16)\ -6.2 Opcode(u8) + register(u8) + intermediate(u16)\ -6.3 Opcode(u8) + register(u8) + data(u16) -7. 4 bytes: Opcode(u8) + index(u16) + num_args(u8) +6.1 Opcode(u8) + register(i8) + offset(i16)\ +6.2 Opcode(u8) + register(i8) + intermediate(u16)\ +6.3 Opcode(u8) + register(i8) + data(u16)\ +6.4 Opcode(u8) + num_param_reg(u8) + num_local_var_reg(u16) +7. 5 bytes: Opcode(u8) + index(u16) + num_args(u8) + register(i8) +## Registers +Registers have a range of 128. Local variables start from register 0 and increment while parameters start from -1 +and decrement. # Compiler flow (Tokenize&parse -> Resolve AST -> Generate SSA -> Generate bytecode) -> Generate program\ @@ -26,7 +29,7 @@ and writing it to a file, which is an IO bottlenecked operation and it won't ben and may even lose performance because of it. # Bytecode -The layout of the full bytecode is: Header (Intermediates Strings Functions Instructions)* +The layout of the full bytecode is: Header (Intermediates Strings Functions External_Functions Instructions)* # Bytecode header ## Header layout @@ -63,7 +66,7 @@ The versions in the header only changes for every release, not every change. ## String |Type|Field|Description | |----|----|----------------------------------------------------------------------------------------| -|u16 |Size|The size of the string, in bytes. | +|u16 |Size|The size of the string, in bytes. Excluding the null-terminate character. | |u8* |Data|The data of the string, where the size is defined by @Size. Strings are null-terminated.| # Bytecode functions @@ -72,6 +75,21 @@ The versions in the header only changes for every release, not every change. |----|-------------------|---------------------------------| |u16 |Number of functions|The number of internal functions.| +# Bytecode external functions +## External functions layout +|Type |Field |Description | +|------------------|------------------|-----------------------------------------------------------------------------------------| +|u16 |num_extern_func |The number of external functions. | +|u32 |extern_funcs_size |The size of the external functions section, in bytes. | +|External function*|External functions|Multiple external functions, where the number of functions is defined by @num_extern_func| + +## External function +|Type|Field |Description | +|----|--------|-----------------------------------------------------------------------------------------------------| +|u8 |num_args|The number of arguments the functions has. | +|u8 |name_len|The length of the external function name, in bytes. Excluding the null-terminate character. | +|u8* |name |The name of the external function, where the size is defined by @name_len. Names are null-terminated.| + # Bytecode instructions ## Instructions layout |Type |Field |Description | diff --git a/executor/executor.h b/executor/executor.h index 5be9abc..979784b 100644 --- a/executor/executor.h +++ b/executor/executor.h @@ -25,25 +25,26 @@ CHECK_RESULT int amal_executor_instructions_start(amal_executor *self, u16 num_f CHECK_RESULT int amal_executor_instructions_end(amal_executor *self); CHECK_RESULT int amal_exec_nop(amal_executor *self); -CHECK_RESULT int amal_exec_setz(amal_executor *self, u8 dst_reg); -CHECK_RESULT int amal_exec_mov(amal_executor *self, u8 dst_reg, u8 src_reg); -CHECK_RESULT int amal_exec_movi(amal_executor *self, u8 dst_reg, i64 imm); -CHECK_RESULT int amal_exec_movd(amal_executor *self, u8 dst_reg, BufferView data); -CHECK_RESULT int amal_exec_add(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2); -CHECK_RESULT int amal_exec_sub(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2); -CHECK_RESULT int amal_exec_imul(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2); -CHECK_RESULT int amal_exec_mul(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2); -CHECK_RESULT int amal_exec_idiv(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2); -CHECK_RESULT int amal_exec_div(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2); -CHECK_RESULT int amal_exec_push(amal_executor *self, u8 reg); +CHECK_RESULT int amal_exec_setz(amal_executor *self, i8 dst_reg); +CHECK_RESULT int amal_exec_mov(amal_executor *self, i8 dst_reg, i8 src_reg); +CHECK_RESULT int amal_exec_movi(amal_executor *self, i8 dst_reg, i64 imm); +CHECK_RESULT int amal_exec_movd(amal_executor *self, i8 dst_reg, BufferView data); +CHECK_RESULT int amal_exec_add(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2); +CHECK_RESULT int amal_exec_sub(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2); +CHECK_RESULT int amal_exec_imul(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2); +CHECK_RESULT int amal_exec_mul(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2); +CHECK_RESULT int amal_exec_idiv(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2); +CHECK_RESULT int amal_exec_div(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2); +CHECK_RESULT int amal_exec_push(amal_executor *self, i8 reg); CHECK_RESULT int amal_exec_pushi(amal_executor *self, i64 imm); CHECK_RESULT int amal_exec_pushd(amal_executor *self, BufferView data); -CHECK_RESULT int amal_exec_call(amal_executor *self, u16 func_index, u8 num_args); -/*CHECK_RESULT int amal_exec_callr(u8 dst_reg, BufferView data);*/ -CHECK_RESULT int amal_exec_cmp(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2); -CHECK_RESULT int amal_exec_jz(amal_executor *self, u8 dst_reg, i16 offset); +CHECK_RESULT int amal_exec_call(amal_executor *self, u16 func_index, u8 num_args, i8 dst_reg); +/*CHECK_RESULT int amal_exec_callr(i8 dst_reg, BufferView data);*/ +CHECK_RESULT int amal_exec_calle(amal_executor *self, void *func, u8 num_args, i8 dst_reg); +CHECK_RESULT int amal_exec_cmp(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2); +CHECK_RESULT int amal_exec_jz(amal_executor *self, i8 dst_reg, i16 offset); CHECK_RESULT int amal_exec_jmp(amal_executor *self, i16 offset); -CHECK_RESULT int amal_exec_ret(amal_executor *self, u8 reg); +CHECK_RESULT int amal_exec_ret(amal_executor *self, i8 reg); CHECK_RESULT int amal_exec_func_start(amal_executor *self, u16 num_regs); CHECK_RESULT int amal_exec_func_end(amal_executor *self); diff --git a/executor/x86_64/asm.c b/executor/x86_64/asm.c index f032538..606e539 100644 --- a/executor/x86_64/asm.c +++ b/executor/x86_64/asm.c @@ -21,6 +21,7 @@ static usize ins_start_offset = 0; static void asm_debug_str_append(const char *str) { const usize len = strlen(str); am_memcpy(asm_debug_str_buffer + asm_debug_str_buffer_index, str, len); + asm_debug_str_buffer[asm_debug_str_buffer_index + len + 1] = '\0'; asm_debug_str_buffer_index += len; } @@ -101,12 +102,14 @@ static const char* asm_ptr_to_string(AsmPtr *self) { asm_debug_str_append(" * "); asm_debug_str_append_num(1 << self->scale); } - if(self->disp < 0) - asm_debug_str_append(" - "); - else - asm_debug_str_append(" + "); - asm_debug_str_append_num(abs(self->disp)); - asm_debug_str_append("]\0"); + if(self->disp != 0) { + if(self->disp < 0) + asm_debug_str_append(" - "); + else + asm_debug_str_append(" + "); + asm_debug_str_append_num(abs(self->disp)); + } + asm_debug_str_append("]"); return buf; } #else @@ -279,6 +282,9 @@ static void asm_rm(Asm *self, AsmPtr *mem, Reg64 reg) { disp_bytes = 4; } + /* Scale is not valid when index is RSP */ + assert(mem->index != RSP || mem->scale == 0); + #ifdef DEBUG if(mem->scale != 0 && mem->scale != 2 && mem->scale != 4 && mem->scale != 8) { amal_log_error("Invalid scale %d, expected 0, 2, 4, or 8", mem->scale); @@ -307,7 +313,11 @@ static void asm_rm(Asm *self, AsmPtr *mem, Reg64 reg) { rm_byte = 0x80 + mem->base; disp_bytes = 4; } + *self->code_it++ = (reg << 3) | rm_byte; + /* RSP requires SIB byte when displacement is not 0 */ + if(mem->base == RSP) + *self->code_it++ = 0x24; } am_memcpy(self->code_it, &mem->disp, disp_bytes); @@ -444,6 +454,16 @@ int asm_popr(Asm *self, Reg64 reg) { return 0; } +int asm_callr(Asm *self, Reg64 reg) { + ins_start(self); + return_if_error(asm_ensure_capacity(self, 3)); + *self->code_it++ = REX_W; + *self->code_it++ = 0xFF; + asm_rr(self, reg, 0x2); + ins_end(self, "call %s", reg64_to_str(reg)); + return 0; +} + /* Note: This is sometimes called with @relative 0 (will print call -5), in which case it's most likely a dummy call until the relative position is later changed with @asm_override_call_rel32. TODO: Update the ins_end debug print to take that into account somehow diff --git a/executor/x86_64/asm.h b/executor/x86_64/asm.h index 7e5ac67..51f2d84 100644 --- a/executor/x86_64/asm.h +++ b/executor/x86_64/asm.h @@ -80,6 +80,7 @@ CHECK_RESULT int asm_idiv_rr(Asm *self, Reg64 src); CHECK_RESULT int asm_pushr(Asm *self, Reg64 reg); CHECK_RESULT int asm_popr(Asm *self, Reg64 reg); +CHECK_RESULT int asm_callr(Asm *self, Reg64 reg); /* In x86 assembly, the @relative position starts from the next instruction. This offset shouldn't be calculated by the caller and is instead managed diff --git a/executor/x86_64/executor.c b/executor/x86_64/executor.c index b7aa91f..ebe848d 100644 --- a/executor/x86_64/executor.c +++ b/executor/x86_64/executor.c @@ -28,7 +28,16 @@ typedef struct { amal_executor_impl *impl; \ impl = (amal_executor_impl*)self; -#define get_register_stack_offset(reg) -(i32)(reg * (int)sizeof(usize) + (int)sizeof(usize)) +/* + @reg will be a positive value when accessing local variables, in which case the first + local variable is located at -sizeof(usize) and the next one is at -(2 * sizeof(usize)). + @reg will be a negative value starting at -1 when accessing parameters. + The first parameter is located at 3*sizeof(usize) and the next one is at 4*sizeof(usize). + Parameter starts at 3*sizeof(usize) because offset 0 is the return address, offset 1*sizeof(usize) is the + saved RBP and 2*sizeof(usize) is saved RBX. +*/ +#define get_register_stack_offset(reg) \ + (reg >= 0 ? (i32)(-reg * (int)sizeof(usize) - sizeof(usize)) : (i32)(-reg * (int)sizeof(usize) + 2*sizeof(usize))) static i64 abs_i64(i64 value) { return value >= 0 ? value : -value; @@ -88,14 +97,14 @@ int amal_exec_nop(amal_executor *self) { return asm_nop(&impl->asm); } -int amal_exec_setz(amal_executor *self, u8 dst_reg) { +int amal_exec_setz(amal_executor *self, i8 dst_reg) { AsmPtr dst; IMPL asm_ptr_init_disp(&dst, RBP, get_register_stack_offset(dst_reg)); return asm_mov_mi(&impl->asm, &dst, 0); } -int amal_exec_mov(amal_executor *self, u8 dst_reg, u8 src_reg) { +int amal_exec_mov(amal_executor *self, i8 dst_reg, i8 src_reg) { AsmPtr ptr; IMPL @@ -106,7 +115,7 @@ int amal_exec_mov(amal_executor *self, u8 dst_reg, u8 src_reg) { return asm_mov_mr(&impl->asm, &ptr, RAX); } -int amal_exec_movi(amal_executor *self, u8 dst_reg, i64 imm) { +int amal_exec_movi(amal_executor *self, i8 dst_reg, i64 imm) { IMPL /* TODO: if @number is a float then use float instructions */ if(abs_i64(imm) <= INT32_MAX) { @@ -122,7 +131,7 @@ int amal_exec_movi(amal_executor *self, u8 dst_reg, i64 imm) { return 0; } -int amal_exec_movd(amal_executor *self, u8 dst_reg, BufferView data) { +int amal_exec_movd(amal_executor *self, i8 dst_reg, BufferView data) { AsmPtr dst; IMPL @@ -131,7 +140,7 @@ int amal_exec_movd(amal_executor *self, u8 dst_reg, BufferView data) { return asm_mov_mr(&impl->asm, &dst, RAX); } -int amal_exec_add(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { +int amal_exec_add(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2) { AsmPtr dst; AsmPtr reg1; AsmPtr reg2; @@ -147,7 +156,7 @@ int amal_exec_add(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { return asm_mov_mr(&impl->asm, &dst, RAX); } -int amal_exec_sub(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { +int amal_exec_sub(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2) { AsmPtr dst; AsmPtr reg1; AsmPtr reg2; @@ -163,7 +172,7 @@ int amal_exec_sub(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { return asm_mov_mr(&impl->asm, &dst, RAX); } -int amal_exec_imul(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { +int amal_exec_imul(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2) { AsmPtr dst; AsmPtr reg1; AsmPtr reg2; @@ -179,7 +188,7 @@ int amal_exec_imul(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { return asm_mov_mr(&impl->asm, &dst, RAX); } -int amal_exec_mul(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { +int amal_exec_mul(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2) { (void)self; (void)dst_reg; (void)src_reg1; @@ -203,7 +212,7 @@ int amal_exec_mul(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { return 0; } -int amal_exec_idiv(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { +int amal_exec_idiv(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2) { AsmPtr dst; AsmPtr reg1; AsmPtr reg2; @@ -216,11 +225,12 @@ int amal_exec_idiv(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { return_if_error(asm_mov_rm(&impl->asm, RAX, ®1)); return_if_error(asm_mov_rm(&impl->asm, RCX, ®2)); return_if_error(asm_cqo(&impl->asm)); + /* TODO: Preserve RDX if needed, since it's also used as a parameter in system-v x86_64 abi */ return_if_error(asm_idiv_rr(&impl->asm, RCX)); return asm_mov_mr(&impl->asm, &dst, RAX); } -int amal_exec_div(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { +int amal_exec_div(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2) { (void)self; (void)dst_reg; (void)src_reg1; @@ -230,7 +240,7 @@ int amal_exec_div(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { return 0; } -int amal_exec_push(amal_executor *self, u8 reg) { +int amal_exec_push(amal_executor *self, i8 reg) { AsmPtr reg_ptr; IMPL @@ -255,7 +265,7 @@ int amal_exec_pushd(amal_executor *self, BufferView data) { return 0; } -int amal_exec_call(amal_executor *self, u16 func_index, u8 num_args) { +int amal_exec_call(amal_executor *self, u16 func_index, u8 num_args, i8 dst_reg) { isize asm_size; IMPL /* TODO: Preserve necessary registers before call? */ @@ -275,18 +285,65 @@ int amal_exec_call(amal_executor *self, u16 func_index, u8 num_args) { return_if_error(asm_call_rel32(&impl->asm, 0)); } + { + AsmPtr dst; + asm_ptr_init_disp(&dst, RBP, get_register_stack_offset(dst_reg)); + /* TODO: Make this work when result is not stored in RAX (multiple return results) */ + return_if_error(asm_mov_mr(&impl->asm, &dst, RAX)); + } + if(num_args > 0) + return asm_add_rm64_imm(&impl->asm, RSP, num_args * sizeof(isize)); + return 0; +} + +const Reg64 SYS_V_PARAM_REGS[] = { RDI, RSI, RDX, RCX }; + +/* + TODO: Make argument passing work for different calling conventions and different ABI. + This currently assumes x86_64 system v abi. + System-V ABI parameters: + RDI, RSI, RDX, RCX, R8, R9, XMM0–7. + The rest are passed in the stack. +*/ +/* TODO: Make this work when function returns something else than a POD */ +int amal_exec_calle(amal_executor *self, void *func, u8 num_args, i8 dst_reg) { + AsmPtr dst; + IMPL + + /* TODO: Support R and XMM registers so more than 5 arguments can be used for functions */ + assert(num_args < 5); + { + /* + TODO: Do this directly in @PUSH instruction instead. For now we copy + the pushed data to the registers that need to be set for the specific abi for parameters + */ + int i; + AsmPtr src; + asm_ptr_init_disp(&src, RSP, 0); + for(i = num_args - 1; i >= 0; --i) { + return_if_error(asm_mov_rm(&impl->asm, SYS_V_PARAM_REGS[i], &src)); + src.disp += 0x8; + } + } + + /* TODO: Preserve necessary registers before call? */ + /* TODO: This assumes all arguments are isize */ + return_if_error(asm_mov_ri(&impl->asm, RAX, (intptr_t)func)); + return_if_error(asm_callr(&impl->asm, RAX)); + asm_ptr_init_disp(&dst, RBP, get_register_stack_offset(dst_reg)); + return_if_error(asm_mov_mr(&impl->asm, &dst, RAX)); if(num_args > 0) return asm_add_rm64_imm(&impl->asm, RSP, num_args * sizeof(isize)); return 0; } /* -int amal_exec_callr(u8 dst_reg, BufferView data) { +int amal_exec_callr(i8 dst_reg, BufferView data) { } */ -int amal_exec_cmp(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { +int amal_exec_cmp(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2) { (void)self; (void)dst_reg; (void)src_reg1; @@ -296,7 +353,7 @@ int amal_exec_cmp(amal_executor *self, u8 dst_reg, u8 src_reg1, u8 src_reg2) { return 0; } -int amal_exec_jz(amal_executor *self, u8 dst_reg, i16 offset) { +int amal_exec_jz(amal_executor *self, i8 dst_reg, i16 offset) { (void)self; (void)dst_reg; (void)offset; @@ -313,12 +370,12 @@ int amal_exec_jmp(amal_executor *self, i16 offset) { return 0; } -int amal_exec_ret(amal_executor *self, u8 reg) { +int amal_exec_ret(amal_executor *self, i8 reg) { AsmPtr ret_reg; IMPL asm_ptr_init_disp(&ret_reg, RBP, get_register_stack_offset(reg)); - /* Result is returned in RAX register. TODO: Make this work for larger data */ + /* Result is returned in RAX register. TODO: Make this work when returning more than one result */ return_if_error(asm_mov_rm(&impl->asm, RAX, &ret_reg)); return amal_exec_func_end(self); } diff --git a/include/ast.h b/include/ast.h index 1198a98..bdab6c3 100644 --- a/include/ast.h +++ b/include/ast.h @@ -20,7 +20,6 @@ typedef struct Ast Ast; typedef struct FunctionParameter FunctionParameter; -typedef struct FunctionSignature FunctionSignature; typedef struct FunctionCall FunctionCall; typedef struct StructDecl StructDecl; typedef struct StructField StructField; @@ -87,6 +86,7 @@ typedef enum { typedef struct { union { + void *data; LhsExpr *lhs_expr; FunctionSignature *func_sig; } value; @@ -107,6 +107,7 @@ typedef enum { typedef struct { union { + void *data; LhsExpr *lhs_expr; FunctionParameter *func_param; } value; @@ -218,6 +219,7 @@ struct LhsExpr { BufferView var_name; VariableType type; nullable Ast *rhs_expr; + u16 extern_index; /* Index to extern func, extern variable, etc; if applicable */ }; #define LHS_EXPR_IS_EXTERN(expr) ((expr)->decl_flags & DECL_FLAG_EXTERN) @@ -297,6 +299,8 @@ CHECK_RESULT int function_signature_init(FunctionSignature *self, ArenaAllocator CHECK_RESULT int function_signature_add_parameter(FunctionSignature *self, const FunctionParameter *param); /* Adds a copy of @return_type to the function signature return type list */ CHECK_RESULT int function_signature_add_return_type(FunctionSignature *self, const VariableType *return_type); +CHECK_RESULT bool function_signature_equals(FunctionSignature *self, FunctionSignature *other); + void function_parameter_init(FunctionParameter *self); CHECK_RESULT int funcdecl_init(FunctionDecl *self, FunctionSignature *signature, Scope *parent, ArenaAllocator *allocator); CHECK_RESULT int funccall_init(FunctionCall *self, BufferView name, ArenaAllocator *allocator); diff --git a/include/bytecode/bytecode.h b/include/bytecode/bytecode.h index adad291..1e4c35d 100644 --- a/include/bytecode/bytecode.h +++ b/include/bytecode/bytecode.h @@ -9,48 +9,52 @@ #include /*doc(Opcode) - Variable length opcodes. Sizes range from 1 to 4 bytes. + Variable length opcodes. Sizes range from 1 to 5 bytes. # Instruction formats Instructions can be in 7 different formats: 1. 1 byte: Opcode(u8) - 2. 2 bytes: Opcode(u8) + register(u8) - 3. 3 bytes: Opcode(u8) + register(u8) + register(u8) + 2. 2 bytes: Opcode(u8) + register(i8) + 3. 3 bytes: Opcode(u8) + register(i8) + register(i8) 4. 3 bytes:\ 4.1 Opcode(u8) + intermediate(u16)\ 4.2 Opcode(u8) + data(u16)\ 4.3 Opcode(u8) + offset(i16)\ - 4.4 Opcode(u8) + num_reg(u16)\ - 4.5 Opcode(u8) + register(u8) + num_args(u8) - 5. 4 bytes: Opcode(u8) + register(u8) + register(u8) + register(u8) + 4.4 Opcode(u8) + register(i8) + num_args(u8) + 5. 4 bytes: Opcode(u8) + register(i8) + register(i8) + register(i8) 6. 4 bytes:\ - 6.1 Opcode(u8) + register(u8) + offset(i16)\ - 6.2 Opcode(u8) + register(u8) + intermediate(u16)\ - 6.3 Opcode(u8) + register(u8) + data(u16) - 7. 4 bytes: Opcode(u8) + index(u16) + num_args(u8) + 6.1 Opcode(u8) + register(i8) + offset(i16)\ + 6.2 Opcode(u8) + register(i8) + intermediate(u16)\ + 6.3 Opcode(u8) + register(i8) + data(u16)\ + 6.4 Opcode(u8) + num_param_reg(u8) + num_local_var_reg(u16) + 7. 5 bytes: Opcode(u8) + index(u16) + num_args(u8) + register(i8) + # Registers + Registers have a range of 128. Local variables start from register 0 and increment while parameters start from -1 + and decrement. */ typedef enum { AMAL_OP_NOP, /* No operation (do nothing). This can be used for patching code */ - AMAL_OP_SETZ, /* setz reg - Set register value to 0 */ - AMAL_OP_MOV, /* mov dst, src - Move src register to dst register */ - AMAL_OP_MOVI, /* movi dst, src - Move src intermediate to dst register */ - AMAL_OP_MOVD, /* movd dst, src - Move src data to dst register */ - AMAL_OP_ADD, /* add dst, reg1, reg2 - Add reg1 and reg2 and put the result in dst */ - AMAL_OP_SUB, /* sub dst, reg1, reg2 - Substract reg2 from reg1 and put the result in dst */ - AMAL_OP_IMUL, /* imul dst, reg1, reg2 - Signed multiplication */ - AMAL_OP_MUL, /* mul dst, reg1, reg2 - Unsigned multiplication */ - AMAL_OP_IDIV, /* idiv dst, reg1, reg2 - Signed division */ - AMAL_OP_DIV, /* div dst, reg1, reg2 - Unsigned division */ - AMAL_OP_PUSH, /* push reg - Push register onto stack */ - AMAL_OP_PUSHI, /* pushi int - Push intermediate onto stack */ - AMAL_OP_PUSHD, /* pushd data - Push data onto stack */ - AMAL_OP_CALL, /* call fi, num_args - Call a function using function index (fi) and num_args arguments. fi is u16 and num_args is u8 */ - AMAL_OP_CALLR, /* callr reg, num_args - Call a function using a register. Used for function pointers. num_args is u8 */ - AMAL_OP_CMP, /* cmp dst, reg1, reg2 - Set dst to 1 if reg1 equals reg2, otherwise set it to 0 */ - AMAL_OP_JZ, /* jz reg, offset - Jump to offset if reg is zero. offset is i16 */ - AMAL_OP_JMP, /* jmp offset - Unconditional jump to offset. offset is i16 */ - AMAL_OP_RET, /* ret reg - Return from the function with reg result */ - AMAL_OP_FUNC_START, /* func_start num_reg - Start of a function which has num_reg registers allocated. num_reg is a u16 */ - AMAL_OP_FUNC_END /* func_end - End of a function. Implementation should do a ret here */ + AMAL_OP_SETZ, /* setz reg - Set register value to 0 */ + AMAL_OP_MOV, /* mov dst, src - Move src register to dst register */ + AMAL_OP_MOVI, /* movi dst, src - Move src intermediate to dst register */ + AMAL_OP_MOVD, /* movd dst, src - Move src data to dst register */ + AMAL_OP_ADD, /* add dst, reg1, reg2 - Add reg1 and reg2 and put the result in dst */ + AMAL_OP_SUB, /* sub dst, reg1, reg2 - Substract reg2 from reg1 and put the result in dst */ + AMAL_OP_IMUL, /* imul dst, reg1, reg2 - Signed multiplication */ + AMAL_OP_MUL, /* mul dst, reg1, reg2 - Unsigned multiplication */ + AMAL_OP_IDIV, /* idiv dst, reg1, reg2 - Signed division */ + AMAL_OP_DIV, /* div dst, reg1, reg2 - Unsigned division */ + AMAL_OP_PUSH, /* push reg - Push register onto stack */ + AMAL_OP_PUSHI, /* pushi int - Push intermediate onto stack */ + AMAL_OP_PUSHD, /* pushd data - Push data onto stack */ + AMAL_OP_CALL, /* call fi, num_args, dst - Call a function using function index (fi) and num_args arguments. The result is stored in register dst. fi is u16 and num_args is u8 */ + AMAL_OP_CALLR, /* callr reg, num_args - Call a function using a register. Used for function pointers. num_args is u8 */ + AMAL_OP_CALLE, /* calle efi, num_args, dst - Call an extern function using extern function index (efi) and num_args arguments. The result is stored in register dst. efi is u16 and num_args is u8 */ + AMAL_OP_CMP, /* cmp dst, reg1, reg2 - Set dst to 1 if reg1 equals reg2, otherwise set it to 0 */ + AMAL_OP_JZ, /* jz reg, offset - Jump to offset if reg is zero. offset is i16 */ + AMAL_OP_JMP, /* jmp offset - Unconditional jump to offset. offset is i16 */ + AMAL_OP_RET, /* ret reg - Return from the function with reg result */ + AMAL_OP_FUNC_START, /* func_start num_param_reg, num_local_var_reg - Start of a function which has num_param_reg parameter registers allocated and num_local_var_reg local variable registers allocated. num_param_reg is u8 and num_local_var_reg is u16 */ + AMAL_OP_FUNC_END /* func_end - End of a function. Implementation should do a ret here */ } AmalOpcode; typedef u8 AmalOpcodeType; diff --git a/include/defs.h b/include/defs.h index d7f6692..8cd9d39 100644 --- a/include/defs.h +++ b/include/defs.h @@ -7,5 +7,6 @@ typedef struct Parser Parser; typedef struct Scope Scope; typedef struct FileScopeReference FileScopeReference; typedef struct FunctionDecl FunctionDecl; +typedef struct FunctionSignature FunctionSignature; #endif diff --git a/include/program.h b/include/program.h index 6603c63..543b38d 100644 --- a/include/program.h +++ b/include/program.h @@ -2,9 +2,12 @@ #define AMAL_PROGRAM_H #include "std/buffer.h" +#include "std/hash_map.h" +#include "std/arena_allocator.h" #include "bytecode/bytecode.h" #include "../executor/executor.h" +/* TODO: Remove all these errors and move program decoding and execution to another process (crash sandbox) */ #define AMAL_PROGRAM_OK 0 #define AMAL_PROGRAM_ERR -1 #define AMAL_PROGRAM_INVALID_HEADER -16 @@ -14,7 +17,7 @@ #define AMAL_PROGRAM_INVALID_INTERMEDIATES_SIZE -5 #define AMAL_PROGRAM_INVALID_STRINGS -6 #define AMAL_PROGRAM_INVALID_STRINGS_SIZE -7 -#define AMAL_PROGRAM_STRING_ALLOC_FAILURE -8 +#define AMAL_PROGRAM_ALLOC_FAILURE -8 #define AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE -9 #define AMAL_PROGRAM_INVALID_INSTRUCTION -10 #define AMAL_PROGRAM_INSTRUCTION_INVALID_INTERMEDIATE_INDEX -11 @@ -23,6 +26,10 @@ #define AMAL_PROGRAM_INSTRUCTION_STACK_OOM -14 #define AMAL_PROGRAM_INSTRUCTION_ILLEGAL_JUMP_TARGET -15 #define AMAL_PROGRAM_INVALID_FUNCTIONS -16 +#define AMAL_PROGRAM_INVALID_EXTERNAL_FUNCTIONS -17 +#define AMAL_PROGRAM_INVALID_EXTERNAL_FUNCTIONS_SIZE -18 +#define AMAL_PROGRAM_INSTRUCTION_INVALID_EXTERN_FUNC_INDEX -19 +#define AMAL_PROGRAM_NO_SUCH_EXTERNAL_FUNCTION -20 #define AMAL_PROGRAM_MAGIC_NUMBER (u32)0xdec05eba #define AMAL_PROGRAM_MAJOR_VERSION 1 @@ -31,21 +38,36 @@ #define AMAL_PROGRAM_NUM_REGISTERS 256 +#define AMAL_PROGRAM_ARGS_SIZE_VARARGS -1 + +typedef struct { + void *func; + int args_byte_size; /* -1 if varargs (AMAL_PROGRAM_ARGS_SIZE_VARARGS) */ +} ProgramExternFunc; + typedef struct { Buffer/*<...>*/ data; u32 *string_indices; - char *intermediates_start; /* Reference inside @data */ - char *strings_start; /* Reference inside @data */ + u32 *extern_func_indices; + u8 *intermediates_start; /* Reference inside @data */ + u8 *strings_start; /* Reference inside @data */ + u8 *extern_funcs_start; /* Reference inside @data */ usize read_index; u16 num_intermediates; u16 num_strings; u16 num_functions; + u16 num_extern_functions; + + ArenaAllocator allocator; /* Owned. Used by @extern_funcs_map */ + HashMapType(BufferView, ProgramExternFunc) extern_funcs_map; } amal_program; CHECK_RESULT int amal_program_init(amal_program *self); void amal_program_deinit(amal_program *self); +CHECK_RESULT int amal_program_add_extern_func(amal_program *self, BufferView name, void *func_ptr, int args_byte_size); + CHECK_RESULT int amal_program_append_bytecode(amal_program *self, Bytecode *bytecode); CHECK_RESULT int amal_program_run(amal_program *self); CHECK_RESULT int amal_program_save(amal_program *self, const char *filepath); diff --git a/include/ssa/ssa.h b/include/ssa/ssa.h index 21d7f40..b2c8065 100644 --- a/include/ssa/ssa.h +++ b/include/ssa/ssa.h @@ -8,6 +8,8 @@ #include +#define SSA_ERR_EXTERN_FUNC_SIG_MISMATCH -20 + typedef enum { SSA_ASSIGN_INTER, SSA_ASSIGN_STRING, @@ -23,6 +25,7 @@ typedef enum { SSA_FUNC_END, SSA_PUSH, SSA_CALL, + SSA_CALL_EXTERN, SSA_JUMP_ZERO, SSA_JUMP, SSA_RET @@ -41,10 +44,16 @@ typedef struct { SsaNumberType type; } SsaNumber; +typedef struct { + FunctionSignature *func_sig; + BufferView name; +} SsaExternFunc; + typedef i16 JumpOffset; -typedef u16 SsaRegister; +typedef i16 SsaRegister; typedef u16 SsaIntermediateIndex; typedef u16 SsaStringIndex; +typedef u16 SsaExternFuncIndex; typedef u16 SsaFuncIndex; typedef struct { @@ -53,10 +62,15 @@ typedef struct { Buffer/*SsaNumber*/ intermediates; HashMapType(BufferView, SsaStringIndex) strings_map; Buffer/*BufferView*/ strings; + HashMapType(BufferView, SsaExternFuncIndex) extern_funcs_map; + Buffer/*SsaExternFunc*/ extern_funcs; SsaIntermediateIndex intermediate_counter; SsaStringIndex string_counter; + SsaExternFuncIndex extern_func_counter; SsaRegister reg_counter; + SsaRegister param_counter; SsaFuncIndex func_counter; + Parser *parser; /* Borrowed */ } Ssa; typedef struct { @@ -72,7 +86,8 @@ typedef struct { typedef struct { SsaFuncIndex func_index; - u16 num_registers; + u16 num_params_regs; + u16 num_local_vars_regs; } SsaInsFuncStart; typedef struct { @@ -81,6 +96,12 @@ typedef struct { FunctionDecl *func_decl; } SsaInsFuncCall; +typedef struct { + u8 num_args; + SsaRegister result; + SsaExternFuncIndex extern_func_index; +} SsaInsFuncCallExtern; + typedef struct { SsaRegister condition_reg; JumpOffset jump_offset; @@ -97,7 +118,7 @@ SsaNumber create_ssa_float(f64 value); SsaNumber ssa_get_intermediate(Ssa *self, SsaIntermediateIndex index); BufferView ssa_get_string(Ssa *self, SsaStringIndex index); -CHECK_RESULT int ssa_init(Ssa *self, ArenaAllocator *allocator); +CHECK_RESULT int ssa_init(Ssa *self, Parser *parser); typedef struct { jmp_buf env; diff --git a/include/std/hash_map.h b/include/std/hash_map.h index 020748b..d789db4 100644 --- a/include/std/hash_map.h +++ b/include/std/hash_map.h @@ -23,7 +23,7 @@ struct HashMap { #define HashMapType(key_type, value_type) __attribute__((annotate(#key_type", "#value_type))) HashMap -CHECK_RESULT int hash_map_init(HashMap *self, ArenaAllocator *allocator, usize value_type_size, HashMapCompare compare_func, HashMapHash hash_func); +CHECK_RESULT int hash_map_init(HashMap *self, ArenaAllocator *allocator, usize value_type_size, HashMapCompare key_compare_func, HashMapHash key_hash_func); /* Not thread-safe. Expected @value size to be @self->value_type_size. diff --git a/src/ast.c b/src/ast.c index 413f77e..1a44ce1 100644 --- a/src/ast.c +++ b/src/ast.c @@ -21,7 +21,7 @@ static BufferView variable_type_get_name(VariableType *self); static void scope_named_object_init(ScopeNamedObject *self) { self->type = NAMED_OBJECT_NONE; - self->value.lhs_expr = NULL; + self->value.data = NULL; self->resolve_data = NULL; } @@ -75,13 +75,23 @@ static BufferView ast_resolved_type_get_name(AstResolvedType *self) { } static void ast_resolved_type_init(AstResolvedType *self) { - self->value.lhs_expr = NULL; + self->value.data = NULL; self->type = RESOLVED_TYPE_NONE; } static bool ast_resolved_type_equals(AstResolvedType *self, AstResolvedType *other) { - /* This should be fine, without checking AstResolvedType type, since they are only equal if the types are equal as well */ - return self->value.lhs_expr == other->value.lhs_expr; + if(self->type != other->type) + return bool_false; + + switch(self->type) { + case RESOLVED_TYPE_NONE: + return bool_true; + case RESOLVED_TYPE_LHS_EXPR: + return self->value.lhs_expr == other->value.lhs_expr; + case RESOLVED_TYPE_FUNC_SIG: + return function_signature_equals(self->value.func_sig, other->value.func_sig); + } + return bool_false; } static AstResolvedType lhs_expr_get_resolved_type(LhsExpr *self, AstCompilerContext *context) { @@ -214,6 +224,59 @@ int function_signature_add_return_type(FunctionSignature *self, const VariableTy return buffer_append(&self->return_types, &return_type, sizeof(return_type)); } +static CHECK_RESULT bool function_parameter_equals(FunctionParameter *self, FunctionParameter *other) { + /* It's fine if the name of the parameter is different. Only the type matters */ + return ast_resolved_type_equals(&self->resolve_data.type, &other->resolve_data.type); +} + +static CHECK_RESULT bool function_parameters_equals(Buffer *func_params, Buffer *other_func_params) { + FunctionParameter *func_param, *func_param_end; + FunctionParameter *other_func_param, *other_func_param_end; + + func_param = buffer_begin(func_params); + func_param_end = buffer_end(func_params); + other_func_param = buffer_begin(other_func_params); + other_func_param_end = buffer_end(other_func_params); + /* Different number of arguments */ + if(func_param_end - func_param != other_func_param_end - other_func_param) + return bool_false; + + for(; func_param != func_param_end; ++func_param, ++other_func_param) { + if(!function_parameter_equals(func_param, other_func_param)) + return bool_false; + } + return bool_true; +} + +static CHECK_RESULT bool function_return_type_equals(FunctionReturnType *self, FunctionReturnType *other) { + return ast_resolved_type_equals(&self->resolved_type, &other->resolved_type); +} + +static CHECK_RESULT bool function_return_types_equals(Buffer *func_return_types, Buffer *other_func_return_types) { + FunctionReturnType *func_return_type, *func_return_type_end; + FunctionReturnType *other_func_return_type, *other_func_return_type_end; + + func_return_type = buffer_begin(func_return_types); + func_return_type_end = buffer_end(func_return_types); + other_func_return_type = buffer_begin(other_func_return_types); + other_func_return_type_end = buffer_end(other_func_return_types); + /* Different number of arguments */ + if(func_return_type_end - func_return_type != other_func_return_type_end - other_func_return_type) + return bool_false; + + for(; func_return_type != func_return_type_end; ++func_return_type, ++other_func_return_type) { + if(!function_return_type_equals(func_return_type, other_func_return_type)) + return bool_false; + } + return bool_true; +} + +bool function_signature_equals(FunctionSignature *self, FunctionSignature *other) { + if(!function_parameters_equals(&self->parameters, &other->parameters)) + return bool_false; + return function_return_types_equals(&self->return_types, &other->return_types); +} + void function_parameter_init(FunctionParameter *self) { self->name = create_buffer_view_null(); self->type.type = VARIABLE_TYPE_NONE; @@ -256,6 +319,7 @@ void lhsexpr_init(LhsExpr *self, DeclFlag decl_flag, BufferView var_name) { self->type.value.variable = NULL; self->var_name = var_name; self->rhs_expr = NULL; + self->extern_index = 0; } void assignmentexpr_init(AssignmentExpr *self, Ast *lhs_expr, Ast *rhs_expr) { diff --git a/src/bytecode/bytecode.c b/src/bytecode/bytecode.c index 8a41900..edc8dba 100644 --- a/src/bytecode/bytecode.c +++ b/src/bytecode/bytecode.c @@ -3,6 +3,7 @@ #include "../../include/std/log.h" #include "../../include/ssa/ssa.h" #include "../../include/parser.h" +#include "../../include/ast.h" #include "../../include/compiler.h" #include #include /* TODO: Remove this */ @@ -37,8 +38,9 @@ static CHECK_RESULT usize ssa_extract_form2(u8 *instruction_data, SsaInsForm2 *r static CHECK_RESULT usize ssa_extract_func_start(u8 *instruction_data, SsaInsFuncStart *result) { am_memcpy(&result->func_index, instruction_data, sizeof(result->func_index)); - am_memcpy(&result->num_registers, instruction_data + sizeof(result->func_index), sizeof(result->num_registers)); - return sizeof(result->func_index) + sizeof(result->num_registers); + am_memcpy(&result->num_params_regs, instruction_data + sizeof(result->func_index), sizeof(result->num_params_regs)); + am_memcpy(&result->num_local_vars_regs, instruction_data + sizeof(result->func_index) + sizeof(result->num_params_regs), sizeof(result->num_local_vars_regs)); + return sizeof(result->func_index) + sizeof(result->num_params_regs) + sizeof(result->num_local_vars_regs); } static CHECK_RESULT usize ssa_extract_func_call(u8 *instruction_data, SsaInsFuncCall *result) { @@ -48,6 +50,13 @@ static CHECK_RESULT usize ssa_extract_func_call(u8 *instruction_data, SsaInsFunc return sizeof(u8) + sizeof(result->result) + sizeof(result->func_decl); } +static CHECK_RESULT usize ssa_extract_func_call_extern(u8 *instruction_data, SsaInsFuncCallExtern *result) { + result->num_args = instruction_data[0]; + am_memcpy(&result->result, instruction_data + 1, sizeof(result->result)); + am_memcpy(&result->extern_func_index, instruction_data + 1 + sizeof(result->result), sizeof(result->extern_func_index)); + return sizeof(u8) + sizeof(result->result) + sizeof(result->extern_func_index); +} + static CHECK_RESULT usize ssa_extract_jump_zero(u8 *instruction_data, SsaInsJumpZero *result) { am_memcpy(&result->condition_reg, instruction_data, sizeof(result->condition_reg)); am_memcpy(&result->jump_offset, instruction_data + sizeof(result->condition_reg), sizeof(result->jump_offset)); @@ -107,7 +116,7 @@ static void add_strings(BytecodeCompilerContext *self) { # String |Type|Field|Description | |----|----|----------------------------------------------------------------------------------------| - |u16 |Size|The size of the string, in bytes. | + |u16 |Size|The size of the string, in bytes. Excluding the null-terminate character. | |u8* |Data|The data of the string, where the size is defined by @Size. Strings are null-terminated.| */ @@ -115,18 +124,12 @@ static void add_strings(BytecodeCompilerContext *self) { Buffer *instructions; BufferView *string; BufferView *strings_end; - u16 num_strings; u32 strings_size; ssa = self->parser->ssa; instructions = &self->bytecode.data; string = buffer_begin(&ssa->strings); strings_end = buffer_end(&ssa->strings); - if(strings_end - string > UINT16_MAX) { - amal_log_error("Too many strings in the program"); - throw(-1); - } - num_strings = strings_end - string; strings_size = 0; for(; string != strings_end; ++string) { @@ -135,10 +138,10 @@ static void add_strings(BytecodeCompilerContext *self) { string = buffer_begin(&ssa->strings); throw_if_error(buffer_expand(instructions, sizeof(u16) + sizeof(u32) + strings_size)); - throw_if_error(buffer_append(instructions, &num_strings, sizeof(u16))); + throw_if_error(buffer_append(instructions, &ssa->string_counter, sizeof(u16))); throw_if_error(buffer_append(instructions, &strings_size, sizeof(u32))); for(; string != strings_end; ++string) { - char null_s = '\0'; + const char null_s = '\0'; throw_if_error(buffer_append(instructions, &string->size, sizeof(u16))); throw_if_error(buffer_append(instructions, string->data, string->size)); throw_if_error(buffer_append(instructions, &null_s, sizeof(char))); @@ -157,6 +160,55 @@ static void add_functions(BytecodeCompilerContext *self) { throw_if_error(buffer_append(&self->bytecode.data, &self->parser->ssa->func_counter, sizeof(u16))); } +static void add_extern_functions(BytecodeCompilerContext *self) { + /*doc(Bytecode external functions) + # External functions layout + |Type |Field |Description | + |------------------|------------------|-----------------------------------------------------------------------------------------| + |u16 |num_extern_func |The number of external functions. | + |u32 |extern_funcs_size |The size of the external functions section, in bytes. | + |External function*|External functions|Multiple external functions, where the number of functions is defined by @num_extern_func| + + # External function + |Type|Field |Description | + |----|--------|-----------------------------------------------------------------------------------------------------| + |u8 |num_args|The number of arguments the functions has. | + |u8 |name_len|The length of the external function name, in bytes. Excluding the null-terminate character. | + |u8* |name |The name of the external function, where the size is defined by @name_len. Names are null-terminated.| + */ + + Ssa *ssa; + Buffer *instructions; + SsaExternFunc *extern_func, *extern_func_end; + u32 extern_funcs_size; + + ssa = self->parser->ssa; + instructions = &self->bytecode.data; + extern_func = buffer_begin(&ssa->extern_funcs); + extern_func_end = buffer_end(&ssa->extern_funcs); + extern_funcs_size = 0; + + for(; extern_func != extern_func_end; ++extern_func) { + extern_funcs_size += sizeof(u8) + sizeof(u8) + extern_func->name.size + 1; /* +1 for null-termination of string */ + } + extern_func = buffer_begin(&ssa->extern_funcs); + + throw_if_error(buffer_expand(instructions, sizeof(u16) + sizeof(u32) + extern_funcs_size)); + throw_if_error(buffer_append(instructions, &ssa->extern_func_counter, sizeof(u16))); + throw_if_error(buffer_append(instructions, &extern_funcs_size, sizeof(u32))); + for(; extern_func != extern_func_end; ++extern_func) { + const char null_s = '\0'; + u8 num_args; + num_args = buffer_get_size(&extern_func->func_sig->parameters, FunctionParameter); + throw_if_error(buffer_append(instructions, &num_args, sizeof(num_args))); + throw_if_error(buffer_append(instructions, &extern_func->name.size, sizeof(u8))); + throw_if_error(buffer_append(instructions, extern_func->name.data, extern_func->name.size)); + throw_if_error(buffer_append(instructions, &null_s, sizeof(char))); + } + + assert(sizeof(SsaExternFuncIndex) == sizeof(u16) && "Program decoder needs to be updated since size of extern func index has changed"); +} + static void add_ins1(BytecodeCompilerContext *self, AmalOpcode opcode, const char *fmt) { throw_if_error(buffer_append(&self->bytecode.data, &opcode, sizeof(AmalOpcodeType))); fprintf(stderr, fmt); @@ -164,7 +216,7 @@ static void add_ins1(BytecodeCompilerContext *self, AmalOpcode opcode, const cha } -static void add_ins2(BytecodeCompilerContext *self, AmalOpcode opcode, u8 reg, const char *fmt) { +static void add_ins2(BytecodeCompilerContext *self, AmalOpcode opcode, i8 reg, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; @@ -177,7 +229,7 @@ static void add_ins2(BytecodeCompilerContext *self, AmalOpcode opcode, u8 reg, c fputc('\n', stderr); } -static void add_ins3(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 src_reg, const char *fmt) { +static void add_ins3(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_reg, i8 src_reg, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; @@ -204,7 +256,7 @@ static void add_ins4(BytecodeCompilerContext *self, AmalOpcode opcode, u16 data, fputc('\n', stderr); } -static void add_ins5(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 reg1, u8 reg2, const char *fmt) { +static void add_ins5(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_reg, i8 reg1, i8 reg2, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; @@ -219,7 +271,7 @@ static void add_ins5(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_re fputc('\n', stderr); } -static void add_ins6(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u16 data, const char *fmt) { +static void add_ins6(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_reg, u16 data, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; @@ -233,17 +285,18 @@ static void add_ins6(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_re fputc('\n', stderr); } -static void add_ins7(BytecodeCompilerContext *self, AmalOpcode opcode, u16 idx, u8 arg, const char *fmt) { +static void add_ins7(BytecodeCompilerContext *self, AmalOpcode opcode, u16 idx, i8 num_args, i8 dst_reg, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; - throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(idx) + sizeof(arg))); + throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(idx) + sizeof(num_args) + sizeof(dst_reg))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &idx, sizeof(idx)); - instructions->data[index + sizeof(AmalOpcodeType) + sizeof(idx)] = arg; - fprintf(stderr, fmt, idx, arg); + instructions->data[index + sizeof(AmalOpcodeType) + sizeof(idx)] = num_args; + instructions->data[index + sizeof(AmalOpcodeType) + sizeof(idx) + sizeof(num_args)] = dst_reg; + fprintf(stderr, fmt, idx, num_args, dst_reg); fputc('\n', stderr); } @@ -283,6 +336,7 @@ static void add_instructions(BytecodeCompilerContext *self) { SsaInsForm2 ssa_ins_form2; SsaInsFuncStart ssa_ins_func_start; SsaInsFuncCall ssa_ins_func_call; + SsaInsFuncCallExtern ssa_ins_func_call_extern; SsaInsJumpZero ssa_ins_jump_zero; SsaInsJump ssa_ins_jump; @@ -490,7 +544,7 @@ static void add_instructions(BytecodeCompilerContext *self) { } case SSA_FUNC_START: { instruction += ssa_extract_func_start(instruction, &ssa_ins_func_start); - add_ins4(self, AMAL_OP_FUNC_START, ssa_ins_func_start.num_registers, "func_start %u"); + add_ins6(self, AMAL_OP_FUNC_START, ssa_ins_func_start.num_params_regs, ssa_ins_func_start.num_local_vars_regs, "func_start %d, %u"); break; } case SSA_FUNC_END: { @@ -505,10 +559,6 @@ static void add_instructions(BytecodeCompilerContext *self) { break; } case SSA_CALL: { - /* - TODO: Pass return register to function. The register should be a pointer that - has the size of the function return values so the return values can fit in it. - */ /* TODO: Using ssa_func_index only works correctly if the function was defined in the same file as the function call. To make this work with calling functions in other files, @@ -518,8 +568,12 @@ static void add_instructions(BytecodeCompilerContext *self) { is defined as the size of all previous files' number of functions. */ instruction += ssa_extract_func_call(instruction, &ssa_ins_func_call); - add_ins7(self, AMAL_OP_CALL, ssa_ins_func_call.func_decl->ssa_func_index, ssa_ins_func_call.num_args, "call %d, %d"); - assert(bool_false && "TODO: Assign function result (RAX for x86_64) to ssa_ins_func_call.result reg"); + add_ins7(self, AMAL_OP_CALL, ssa_ins_func_call.func_decl->ssa_func_index, ssa_ins_func_call.num_args, ssa_ins_func_call.result, "call f%d, %d, r%d"); + break; + } + case SSA_CALL_EXTERN: { + instruction += ssa_extract_func_call_extern(instruction, &ssa_ins_func_call_extern); + add_ins7(self, AMAL_OP_CALLE, ssa_ins_func_call_extern.extern_func_index, ssa_ins_func_call_extern.num_args, ssa_ins_func_call_extern.result, "calle ef%d, %d, r%d"); break; } case SSA_JUMP_ZERO: { @@ -558,5 +612,6 @@ void generate_bytecode_from_ssa(BytecodeCompilerContext *self) { add_intermediates(self); add_strings(self); add_functions(self); + add_extern_functions(self); add_instructions(self); } diff --git a/src/compiler.c b/src/compiler.c index 228cb74..803515b 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -273,7 +273,7 @@ static CHECK_RESULT int thread_generate_ssa(Parser *parser) { int result; return_if_error(arena_allocator_alloc(parser->allocator, sizeof(Ssa), (void**)&compiler_context.ssa)); - return_if_error(ssa_init(compiler_context.ssa, parser->allocator)); + return_if_error(ssa_init(compiler_context.ssa, parser)); compiler_context.compiler = parser->compiler; parser->ssa = compiler_context.ssa; amal_log_debug("Generating SSA for file: %.*s", parser->tokenizer.code_name.size, parser->tokenizer.code_name.data); @@ -549,6 +549,36 @@ int amal_compiler_load_file(amal_compiler_options *options, amal_program *progra return result; } +/* TODO: Verify main func has correct signature */ +static CHECK_RESULT int validate_main_func(FileScopeReference *main_file_scope, LhsExpr **main_func) { + const BufferView main_func_name = { "main", 4 }; + LhsExpr *main_func_expr; + + main_func_expr = structdecl_get_field_by_name(&main_file_scope->parser->struct_decl, main_func_name); + if(!main_func_expr) { + amal_log_error("main function missing from start file \"%.*s\"", main_file_scope->canonical_path.size, main_file_scope->canonical_path.data); + return AMAL_COMPILER_ERR; + } + *main_func = main_func_expr; + + if(!main_func_expr->rhs_expr || main_func_expr->rhs_expr->type != AST_FUNCTION_DECL) { + amal_log_error("main exists in start file \"%.*s\" but it's not an non-extern function", main_file_scope->canonical_path.size, main_file_scope->canonical_path.data); + return AMAL_COMPILER_ERR; + } + + if(!LHS_EXPR_IS_CONST(main_func_expr)) { + amal_log_error("main function in start file \"%.*s\" has to be const", main_file_scope->canonical_path.size, main_file_scope->canonical_path.data); + return AMAL_COMPILER_ERR; + } + + if(LHS_EXPR_IS_EXTERN(main_func_expr)) { + amal_log_error("main function in start file \"%.*s\" can't be declared as extern", main_file_scope->canonical_path.size, main_file_scope->canonical_path.data); + return AMAL_COMPILER_ERR; + } + + return 0; +} + int amal_compiler_internal_load_file(amal_compiler *self, const char *filepath, FileScopeReference **file_scope) { int result; ParserThreadData *parser_thread_data; @@ -587,39 +617,18 @@ int amal_compiler_internal_load_file(amal_compiler *self, const char *filepath, and writing it to a file, which is an IO bottlenecked operation and it won't benefit from multithreading and may even lose performance because of it. */ - const BufferView main_func_name = { "main", 4 }; - LhsExpr *main_func_expr; + LhsExpr *main_func; return_if_error(amal_compiler_load_file_join_threads(self)); assert(amal_compiler_check_all_threads_done(self)); amal_log_info("Finished parsing all files, resolving AST"); - main_func_expr = structdecl_get_field_by_name(&(*file_scope)->parser->struct_decl, main_func_name); - if(!main_func_expr) { - amal_log_error("main function missing from start file \"%.*s\"", (*file_scope)->canonical_path.size, (*file_scope)->canonical_path.data); - return AMAL_COMPILER_ERR; - } - - if(!main_func_expr->rhs_expr || main_func_expr->rhs_expr->type != AST_FUNCTION_DECL) { - amal_log_error("main exists in start file \"%.*s\" but it's not an non-extern function", (*file_scope)->canonical_path.size, (*file_scope)->canonical_path.data); - return AMAL_COMPILER_ERR; - } - - if(!LHS_EXPR_IS_CONST(main_func_expr)) { - amal_log_error("main function in start file \"%.*s\" has to be const", (*file_scope)->canonical_path.size, (*file_scope)->canonical_path.data); - return AMAL_COMPILER_ERR; - } - - if(LHS_EXPR_IS_EXTERN(main_func_expr)) { - amal_log_error("main function in start file \"%.*s\" can't be declared as extern", (*file_scope)->canonical_path.size, (*file_scope)->canonical_path.data); - return AMAL_COMPILER_ERR; - } - + return_if_error(validate_main_func(*file_scope, &main_func)); /* The main function is the start file needs to be exported, so it's accessible in the program execution to find the entry (main) function. */ - main_func_expr->decl_flags |= DECL_FLAG_EXPORT; + main_func->decl_flags |= DECL_FLAG_EXPORT; return_if_error(amal_compiler_dispatch_generic(self, THREAD_WORK_RESOLVE_AST)); assert(amal_compiler_check_all_threads_done(self)); diff --git a/src/parser.c b/src/parser.c index 33e9146..cdf7c5b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -20,6 +20,8 @@ do { \ throw(return_if_result); \ } while(0) +#define VAR_MAX_LEN UINT8_MAX + static CHECK_RESULT Ast* parser_parse_rhs(Parser *self); static CHECK_RESULT Ast* parser_parse_body(Parser *self); static CHECK_RESULT Ast* parser_parse_struct_body(Parser *self); @@ -193,21 +195,27 @@ static void parser_parse_function_parameters(Parser *self, FunctionSignature *fu FUNC_RETURN_TYPES = VAR_TYPE (',' VAR_TYPE)* */ static void parser_parse_function_return_types(Parser *self, FunctionSignature *func_sig) { + int return_type_index; + return_type_index = 0; for(;;) { VariableType var_type; bool match; parser_parse_var_type(self, &var_type); if(var_type.type == VARIABLE_TYPE_NONE) { + /* If function has no return types */ + if(return_type_index == 0) + return; self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), - "Expected closure return type"); + "Expected type or closure signature"); throw(PARSER_UNEXPECTED_TOKEN); } throw_if_error(function_signature_add_return_type(func_sig, &var_type)); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_COMMA, &match)); if(!match) return; + ++return_type_index; } } @@ -256,14 +264,9 @@ void parser_parse_var_type(Parser *self, VariableType *result) { return; } - result->type = VARIABLE_TYPE_SIGNATURE; result->value.signature = parser_parse_function_signature(self); - if(!result->value.signature) { - self->error = tokenizer_create_error(&self->tokenizer, - tokenizer_get_code_reference_index(&self->tokenizer, self->tokenizer.value.identifier.data), - "Expected type or closure signature"); - throw(PARSER_UNEXPECTED_TOKEN); - } + if(result->value.signature) + result->type = VARIABLE_TYPE_SIGNATURE; } /* @@ -280,6 +283,12 @@ void parser_parse_var_type_def(Parser *self, VariableType *result) { } parser_parse_var_type(self, result); + if(result->type == VARIABLE_TYPE_NONE) { + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_error_index(&self->tokenizer), + "Expected type or closure signature"); + throw(PARSER_UNEXPECTED_TOKEN); + } } /* @@ -339,6 +348,13 @@ static CHECK_RESULT LhsExpr* parser_parse_declaration_lhs(Parser *self) { throw_if_error(arena_allocator_alloc(self->allocator, sizeof(LhsExpr), (void**)&result)); lhsexpr_init(result, decl_flag, var_name); + if(var_name.size > VAR_MAX_LEN) { + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_code_reference_index(&self->tokenizer, var_name.data), + "Variable name can't be longer than %d", VAR_MAX_LEN); + throw(PARSER_ERR); + } + parser_parse_var_type_def(self, &result->type); return result; } @@ -741,6 +757,12 @@ static Ast* parser_parse_lhs_rhs(Parser *self, LhsExpr *lhs_expr) { func_decl = parser_parse_closure(self); if(func_decl) { + if(buffer_get_size(&func_decl->signature->return_types, FunctionReturnType) > 1 && LHS_EXPR_IS_EXTERN(lhs_expr)) { + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_code_reference_index(&self->tokenizer, lhs_expr->var_name.data), + "Extern closure can only have one return value"); + throw(PARSER_ERR); + } throw_if_error(ast_create(self->allocator, func_decl, AST_FUNCTION_DECL, &result)); func_decl->lhs_expr = lhs_expr; return result; diff --git a/src/program.c b/src/program.c index a73cf72..9ec1b41 100644 --- a/src/program.c +++ b/src/program.c @@ -1,5 +1,6 @@ #include "../include/program.h" #include "../include/std/mem.h" +#include "../include/std/hash.h" #include "../include/std/alloc.h" #include "../include/std/log.h" #include "../include/std/buffer_view.h" @@ -26,7 +27,7 @@ typedef struct { } Number; /*doc(Bytecode) -The layout of the full bytecode is: Header (Intermediates Strings Functions Instructions)* +The layout of the full bytecode is: Header (Intermediates Strings Functions External_Functions Instructions)* */ static CHECK_RESULT int amal_program_append_header(amal_program *self) { @@ -58,13 +59,18 @@ static CHECK_RESULT int amal_program_append_header(amal_program *self) { int amal_program_init(amal_program *self) { ignore_result_int(buffer_init(&self->data, NULL)); self->string_indices = NULL; + self->extern_func_indices = NULL; self->intermediates_start = NULL; self->strings_start = NULL; + self->extern_funcs_start = NULL; self->read_index = 0; self->num_intermediates = 0; self->num_strings = 0; self->num_functions = 0; + self->num_extern_functions = 0; + cleanup_if_error(arena_allocator_init(&self->allocator)); + cleanup_if_error(hash_map_init(&self->extern_funcs_map, &self->allocator, sizeof(ProgramExternFunc), hash_map_compare_string, amal_hash_string)); cleanup_if_error(amal_program_append_header(self)); return 0; @@ -74,9 +80,51 @@ int amal_program_init(amal_program *self) { } void amal_program_deinit(amal_program *self) { - buffer_deinit(&self->data); + arena_allocator_deinit(&self->allocator); + am_free(self->extern_func_indices); am_free(self->string_indices); - self->string_indices = NULL; + if(self->data.data) + buffer_deinit(&self->data); +} + +int amal_program_add_extern_func(amal_program *self, BufferView name, void *func_ptr, int args_byte_size) { + ProgramExternFunc extern_func; + extern_func.func = func_ptr; + extern_func.args_byte_size = args_byte_size; + return hash_map_insert(&self->extern_funcs_map, name, &extern_func); +} + +static CHECK_RESULT int amal_program_get_extern_func_by_index(amal_program *self, u16 index, ProgramExternFunc *result) { + u8 *extern_func_ptr; + u8 num_args; + u8 func_name_len; + BufferView func_name; + + if(index >= self->num_extern_functions) { + result->func = NULL; + result->args_byte_size = 0; + amal_log_error("Extern func index index %ld is out of range (%ld)", index, self->num_extern_functions); + return AMAL_PROGRAM_INSTRUCTION_INVALID_EXTERN_FUNC_INDEX; + } + + extern_func_ptr = self->extern_funcs_start + self->extern_func_indices[index]; + + am_memcpy(&num_args, extern_func_ptr, sizeof(num_args)); + am_memcpy(&func_name_len, extern_func_ptr + sizeof(num_args), sizeof(func_name_len)); + func_name.size = func_name_len; + func_name.data = (const char*)(extern_func_ptr + sizeof(num_args) + sizeof(func_name_len)); + + if(!hash_map_get(&self->extern_funcs_map, func_name, result)) { + amal_log_error("No such extern function: %.*s", func_name.size, func_name.data); + return AMAL_PROGRAM_NO_SUCH_EXTERNAL_FUNCTION; + } + + /* TODO: This assumes all arguments are of size sizeof(isize) */ + if(result->args_byte_size != -1 && result->args_byte_size != num_args * (int)sizeof(isize)) { + amal_log_error("Extern function %.*s was registered to take %d byte(s), but the program says it takes %d byte(s)", func_name.size, func_name.data, result->args_byte_size, num_args * sizeof(isize)); + return AMAL_PROGRAM_NO_SUCH_EXTERNAL_FUNCTION; + } + return 0; } int amal_program_append_bytecode(amal_program *self, Bytecode *bytecode) { @@ -119,6 +167,14 @@ static CHECK_RESULT int amal_program_read_header(amal_program *self) { return AMAL_PROGRAM_OK; } +static CHECK_RESULT bool amal_program_read_advance(amal_program *self, void *output, usize bytes_to_read) { + if(bytes_left_to_read(self) < bytes_to_read) + return bool_false; + am_memcpy(output, self->data.data + self->read_index, bytes_to_read); + self->read_index += bytes_to_read; + return bool_true; +} + static CHECK_RESULT int amal_program_read_intermediates(amal_program *self) { u32 intermediates_size; /*u32 read_end;*/ @@ -136,7 +192,7 @@ static CHECK_RESULT int amal_program_read_intermediates(amal_program *self) { return AMAL_PROGRAM_INVALID_INTERMEDIATES_SIZE; } - self->intermediates_start = &self->data.data[self->read_index]; + self->intermediates_start = (u8*)(self->data.data + self->read_index); self->num_intermediates = intermediates_size / (sizeof(u8) + sizeof(u64)); self->read_index += intermediates_size; @@ -144,61 +200,98 @@ static CHECK_RESULT int amal_program_read_intermediates(amal_program *self) { } static CHECK_RESULT int amal_program_read_strings(amal_program *self) { - u16 num_strings; u32 strings_size; - u32 read_start; - u32 read_end; u32 *string_index_ptr; - if(bytes_left_to_read(self) < sizeof(num_strings)) + if(!amal_program_read_advance(self, &self->num_strings, sizeof(u16))) return AMAL_PROGRAM_INVALID_STRINGS; - am_memcpy(&num_strings, &self->data.data[self->read_index], sizeof(num_strings)); - self->read_index += sizeof(num_strings); - self->num_strings = num_strings; - - if(am_malloc(sizeof(u32) * num_strings, (void**)&self->string_indices) != 0) - return AMAL_PROGRAM_STRING_ALLOC_FAILURE; - string_index_ptr = self->string_indices; - - if(bytes_left_to_read(self) < sizeof(strings_size)) + if(!amal_program_read_advance(self, &strings_size, sizeof(strings_size))) return AMAL_PROGRAM_INVALID_STRINGS; - am_memcpy(&strings_size, &self->data.data[self->read_index], sizeof(strings_size)); - self->read_index += sizeof(strings_size); - if(bytes_left_to_read(self) < strings_size) return AMAL_PROGRAM_INVALID_STRINGS_SIZE; - read_start = self->read_index; - read_end = read_start + strings_size; - self->strings_start = &self->data.data[self->read_index]; - while(self->read_index < read_end) { - u16 string_size; + if(am_malloc(sizeof(u32) * self->num_strings, (void**)&self->string_indices) != 0) + return AMAL_PROGRAM_ALLOC_FAILURE; + string_index_ptr = self->string_indices; + + { + const u32 read_start = self->read_index; + const u32 read_end = read_start + strings_size; + self->strings_start = (u8*)(self->data.data + self->read_index); + while(self->read_index < read_end) { + u16 string_size; - if(bytes_left_to_read(self) < sizeof(string_size)) - return AMAL_PROGRAM_INVALID_STRINGS; + if(bytes_left_to_read(self) < sizeof(string_size)) + return AMAL_PROGRAM_INVALID_STRINGS; - *string_index_ptr = self->read_index - read_start; - ++string_index_ptr; - am_memcpy(&string_size, &self->data.data[self->read_index], sizeof(string_size)); - self->read_index += sizeof(string_size); + *string_index_ptr = self->read_index - read_start; + ++string_index_ptr; + am_memcpy(&string_size, &self->data.data[self->read_index], sizeof(string_size)); + self->read_index += sizeof(string_size); - if(bytes_left_to_read(self) < string_size) - return AMAL_PROGRAM_INVALID_STRINGS; + /* +1 to skip null-termination character */ + if(bytes_left_to_read(self) < string_size + 1U) + return AMAL_PROGRAM_INVALID_STRINGS; - self->read_index += string_size + 1; /* +1 to skip null-termination character */ + self->read_index += string_size + 1; /* +1 to skip null-termination character */ + } + assert(self->read_index == read_end); } - assert(self->read_index == read_end); return AMAL_PROGRAM_OK; } static CHECK_RESULT int amal_program_read_functions(amal_program *self) { - if(bytes_left_to_read(self) < sizeof(u16)) + if(!amal_program_read_advance(self, &self->num_functions, sizeof(u16))) return AMAL_PROGRAM_INVALID_FUNCTIONS; - am_memcpy(&self->num_functions, &self->data.data[self->read_index], sizeof(u16)); - self->read_index += sizeof(u16); + return AMAL_PROGRAM_OK; +} + +static CHECK_RESULT int amal_program_read_external_functions(amal_program *self) { + u32 extern_funcs_size; + u32 *extern_func_index_ptr; + + if(!amal_program_read_advance(self, &self->num_extern_functions, sizeof(u16))) + return AMAL_PROGRAM_INVALID_EXTERNAL_FUNCTIONS; + + if(!amal_program_read_advance(self, &extern_funcs_size, sizeof(extern_funcs_size))) + return AMAL_PROGRAM_INVALID_EXTERNAL_FUNCTIONS; + + if(bytes_left_to_read(self) < extern_funcs_size) + return AMAL_PROGRAM_INVALID_EXTERNAL_FUNCTIONS_SIZE; + + if(am_malloc(sizeof(u32) * self->num_extern_functions, (void**)&self->extern_func_indices) != 0) + return AMAL_PROGRAM_ALLOC_FAILURE; + extern_func_index_ptr = self->extern_func_indices; + + { + const u32 read_start = self->read_index; + const u32 read_end = read_start + extern_funcs_size; + self->extern_funcs_start = (u8*)(self->data.data + self->read_index); + while(self->read_index < read_end) { + u8 num_args; + u8 func_name_size; + + if(bytes_left_to_read(self) < sizeof(num_args) + sizeof(func_name_size)) + return AMAL_PROGRAM_INVALID_EXTERNAL_FUNCTIONS; + + *extern_func_index_ptr = self->read_index - read_start; + ++extern_func_index_ptr; + num_args = self->data.data[self->read_index]; + func_name_size = self->data.data[self->read_index + sizeof(num_args)]; + self->read_index += sizeof(num_args) + sizeof(func_name_size); + + /* +1 to skip null-termination character */ + if(bytes_left_to_read(self) < func_name_size + 1U) + return AMAL_PROGRAM_INVALID_STRINGS; + + self->read_index += func_name_size + 1; /* +1 to skip null-termination character */ + } + assert(self->read_index == read_end); + } + return AMAL_PROGRAM_OK; } @@ -212,7 +305,7 @@ static CHECK_RESULT int amal_program_get_intermediate_by_index(amal_program *sel } static CHECK_RESULT int amal_program_get_data_by_index(amal_program *self, u16 index, BufferView *result) { - char *str_ptr; + u8 *str_ptr; if(index >= self->num_strings) { amal_log_error("Data index %ld is out of range (%ld)", index, self->num_strings); @@ -221,7 +314,7 @@ static CHECK_RESULT int amal_program_get_data_by_index(amal_program *self, u16 i str_ptr = self->strings_start + self->string_indices[index]; am_memcpy(&result->size, str_ptr, sizeof(u16)); - result->data = str_ptr + sizeof(u16); + result->data = (const char*)(str_ptr + sizeof(u16)); return 0; } @@ -236,12 +329,9 @@ static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_ (void)inside_func; func_counter = 0; - if(bytes_left_to_read(self) < sizeof(instructions_size)) + if(!amal_program_read_advance(self, &instructions_size, sizeof(instructions_size))) return AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE; - am_memcpy(&instructions_size, &self->data.data[self->read_index], sizeof(instructions_size)); - self->read_index += sizeof(instructions_size); - if(bytes_left_to_read(self) < instructions_size) return AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE; @@ -278,7 +368,7 @@ static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_ u16 intermediate_index; Number number; - am_memcpy(&intermediate_index, &self->data.data[self->read_index + sizeof(u8)], sizeof(intermediate_index)); + am_memcpy(&intermediate_index, &self->data.data[self->read_index + sizeof(i8)], sizeof(intermediate_index)); return_if_error(amal_program_get_intermediate_by_index(self, intermediate_index, &number)); return_if_error(amal_exec_movi(executor, self->data.data[self->read_index], number.value.integer)); @@ -289,7 +379,7 @@ static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_ u16 data_index; BufferView data_ptr; - am_memcpy(&data_index, &self->data.data[self->read_index + sizeof(u8)], sizeof(data_index)); + am_memcpy(&data_index, &self->data.data[self->read_index + sizeof(i8)], sizeof(data_index)); return_if_error(amal_program_get_data_by_index(self, data_index, &data_ptr)); return_if_error(amal_exec_movd(executor, self->data.data[self->read_index], data_ptr)); @@ -355,16 +445,34 @@ static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_ case AMAL_OP_CALL: { u16 func_index; u8 num_args; + i8 dst_reg; am_memcpy(&func_index, self->data.data + self->read_index, sizeof(func_index)); - am_memcpy(&num_args, self->data.data + self->read_index + sizeof(func_index), sizeof(num_args)); - return_if_error(amal_exec_call(executor, func_index, num_args)); - self->read_index += 3; + num_args = self->data.data[self->read_index + sizeof(func_index)]; + dst_reg = self->data.data[self->read_index + sizeof(func_index) + sizeof(num_args)]; + return_if_error(amal_exec_call(executor, func_index, num_args, dst_reg)); + self->read_index += 4; break; } case AMAL_OP_CALLR: assert(bool_false && "TODO: Implement CALLR"); self->read_index += 2; break; + case AMAL_OP_CALLE: { + u16 extern_func_index; + u8 num_args; + i8 dst_reg; + am_memcpy(&extern_func_index, self->data.data + self->read_index, sizeof(extern_func_index)); + num_args = self->data.data[self->read_index + sizeof(extern_func_index)]; + dst_reg = self->data.data[self->read_index + sizeof(extern_func_index) + sizeof(num_args)]; + + { + ProgramExternFunc extern_func; + return_if_error(amal_program_get_extern_func_by_index(self, extern_func_index, &extern_func)); + return_if_error(amal_exec_calle(executor, extern_func.func, num_args, dst_reg)); + } + self->read_index += 4; + break; + } case AMAL_OP_CMP: { return_if_error(amal_exec_cmp(executor, self->data.data[self->read_index], self->data.data[self->read_index + 1], self->data.data[self->read_index + 2])); self->read_index += 3; @@ -372,7 +480,7 @@ static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_ } case AMAL_OP_JZ: { i16 jump_offset; - am_memcpy(&jump_offset, &self->data.data[self->read_index + sizeof(u8)], sizeof(jump_offset)); + am_memcpy(&jump_offset, &self->data.data[self->read_index + sizeof(i8)], sizeof(jump_offset)); return_if_error(amal_exec_jz(executor, self->data.data[self->read_index], jump_offset)); self->read_index += 3; break; @@ -385,21 +493,25 @@ static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_ break; } case AMAL_OP_RET: { - const u8 reg = self->data.data[self->read_index]; + const i8 reg = self->data.data[self->read_index]; return_if_error(amal_exec_ret(executor, reg)); self->read_index += 1; break; } case AMAL_OP_FUNC_START: { - u16 func_num_registers; + u8 func_num_param_regs; + u16 func_num_local_var_regs; + assert(!inside_func); inside_func = bool_true; assert(func_counter < self->num_functions); ++func_counter; - am_memcpy(&func_num_registers, &self->data.data[self->read_index], sizeof(func_num_registers)); - return_if_error(amal_exec_func_start(executor, func_num_registers)); - self->read_index += 2; + func_num_param_regs = self->data.data[self->read_index]; + (void)func_num_param_regs; + am_memcpy(&func_num_local_var_regs, self->data.data + self->read_index + sizeof(func_num_param_regs), sizeof(func_num_local_var_regs)); + return_if_error(amal_exec_func_start(executor, func_num_local_var_regs)); + self->read_index += 3; break; } case AMAL_OP_FUNC_END: { @@ -420,7 +532,7 @@ static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_ int amal_program_run(amal_program *self) { int result; amal_executor *executor; - result = 0; + result = AMAL_PROGRAM_ERR; return_if_error(amal_executor_init(&executor)); cleanup_if_error(amal_program_read_header(self)); @@ -428,6 +540,7 @@ int amal_program_run(amal_program *self) { cleanup_if_error(amal_program_read_intermediates(self)); cleanup_if_error(amal_program_read_strings(self)); cleanup_if_error(amal_program_read_functions(self)); + cleanup_if_error(amal_program_read_external_functions(self)); cleanup_if_error(amal_program_read_instructions(self, executor)); } result = amal_executor_run(executor); diff --git a/src/ssa/ssa.c b/src/ssa/ssa.c index 0aabc15..01c4f0f 100644 --- a/src/ssa/ssa.c +++ b/src/ssa/ssa.c @@ -5,8 +5,15 @@ #include "../../include/std/thread.h" #include "../../include/ast.h" #include "../../include/compiler.h" +#include "../../include/parser.h" #include +/* + TODO: Instead of using memcpy to copy data to the ssa, make it cleaner by + defining all the data in structs and copying the structs. Even if it takes more space, + it might even be faster. +*/ + #define throw(result) do { longjmp(context->env, (result)); } while(0) #define throw_if_error(result) \ do { \ @@ -20,13 +27,9 @@ do { \ #define MAX_STRING_LENGTH UINT16_MAX static int compare_number(const void *a, const void *b) { - const SsaNumber *lhs; - const SsaNumber *rhs; - lhs = a; - rhs = b; - if(rhs->type == lhs->type && rhs->value.integer == lhs->value.integer) - return 0; - return 1; + const SsaNumber *lhs = a; + const SsaNumber *rhs = b; + return (rhs->type == lhs->type && rhs->value.integer == lhs->value.integer); } static usize hash_number(const u8 *data, usize size) { @@ -50,23 +53,31 @@ SsaNumber create_ssa_float(f64 value) { return result; } -int ssa_init(Ssa *self, ArenaAllocator *allocator) { - return_if_error(buffer_init(&self->instructions, allocator)); - return_if_error(hash_map_init(&self->intermediates_map, allocator, sizeof(SsaIntermediateIndex), compare_number, hash_number)); - return_if_error(buffer_init(&self->intermediates, allocator)); - return_if_error(hash_map_init(&self->strings_map, allocator, sizeof(SsaStringIndex), hash_map_compare_string, amal_hash_string)); - return_if_error(buffer_init(&self->strings, allocator)); +int ssa_init(Ssa *self, Parser *parser) { + return_if_error(buffer_init(&self->instructions, parser->allocator)); + return_if_error(hash_map_init(&self->intermediates_map, parser->allocator, sizeof(SsaIntermediateIndex), compare_number, hash_number)); + return_if_error(buffer_init(&self->intermediates, parser->allocator)); + return_if_error(hash_map_init(&self->strings_map, parser->allocator, sizeof(SsaStringIndex), hash_map_compare_string, amal_hash_string)); + return_if_error(buffer_init(&self->strings, parser->allocator)); + return_if_error(hash_map_init(&self->extern_funcs_map, parser->allocator, sizeof(SsaExternFuncIndex), hash_map_compare_string, amal_hash_string)); + return_if_error(buffer_init(&self->extern_funcs, parser->allocator)); self->intermediate_counter = 0; self->string_counter = 0; + self->extern_func_counter = 0; self->reg_counter = 0; + self->param_counter = 0; self->func_counter = 0; + self->parser = parser; return 0; } static CHECK_RESULT int ssa_get_unique_reg(Ssa *self, SsaRegister *result) { + assert(result); /* Overflow */ - if(self->reg_counter + 1 < self->reg_counter) + if((u16)self->reg_counter + self->param_counter + 1 > INT16_MAX) { + amal_log_error("Ssa too many registers!"); return -1; + } *result = self->reg_counter++; return 0; } @@ -97,8 +108,10 @@ static CHECK_RESULT int ssa_try_add_intermediate(Ssa *self, SsaNumber number, Ss return 0; /* Overflow */ - if(self->intermediate_counter + 1 < self->intermediate_counter) + if(self->intermediate_counter + 1 <= self->intermediate_counter) { + amal_log_error("Ssa too many intermediates!"); return -1; + } *result_index = self->intermediate_counter; ++self->intermediate_counter; @@ -126,8 +139,10 @@ static CHECK_RESULT int ssa_try_add_string(Ssa *self, BufferView str, SsaStringI return 0; /* Overflow */ - if(self->string_counter + 1 < self->string_counter) + if(self->string_counter + 1 <= self->string_counter) { + amal_log_error("Ssa too many strings!"); return -1; + } if(str.size > MAX_STRING_LENGTH) { amal_log_error("String \"%.*s\" is longer than %d\n", str.size, str.data, MAX_STRING_LENGTH); @@ -141,10 +156,44 @@ static CHECK_RESULT int ssa_try_add_string(Ssa *self, BufferView str, SsaStringI return hash_map_insert(&self->strings_map, str, result_index); } -static CHECK_RESULT int ssa_add_ins_form1(Ssa *self, SsaInstruction ins_type, SsaRegister lhs, u16 rhs) { - usize index; - index = self->instructions.size; +/* + TODO: Right now this has the same scope as a file. This should be global, otherwise you could define multiple + extern func with the same name but different signature as long as they are defined in different files +*/ +static CHECK_RESULT int ssa_try_add_extern_func(Ssa *self, FunctionSignature *func_sig, BufferView name, SsaExternFuncIndex *result_index, BufferView *existing_func) { + bool exists; + assert(result_index); + assert(existing_func); + + exists = hash_map_get(&self->extern_funcs_map, name, result_index); + if(exists) { + const SsaExternFunc *existing_extern_func = buffer_get(&self->extern_funcs, *result_index, sizeof(SsaExternFunc)); + *existing_func = existing_extern_func->name; + if(!function_signature_equals(func_sig, existing_extern_func->func_sig)) + return SSA_ERR_EXTERN_FUNC_SIG_MISMATCH; + return 0; + } + + /* Overflow */ + if(self->extern_func_counter + 1 <= self->extern_func_counter) { + amal_log_error("Ssa too many extern closures!"); + return -1; + } + + *result_index = self->extern_func_counter; + ++self->extern_func_counter; + amal_log_debug("ef%u = \"%.*s\"", *result_index, name.size, name.data); + { + SsaExternFunc extern_func; + extern_func.func_sig = func_sig; + extern_func.name = name; + return_if_error(buffer_append(&self->extern_funcs, &extern_func, sizeof(extern_func))); + return hash_map_insert(&self->extern_funcs_map, name, result_index); + } +} +static CHECK_RESULT int ssa_add_ins_form1(Ssa *self, SsaInstruction ins_type, SsaRegister lhs, u16 rhs) { + const usize index = self->instructions.size; return_if_error(buffer_append_empty(&self->instructions, sizeof(u8) + sizeof(SsaRegister) + sizeof(u16))); self->instructions.data[index + 0] = ins_type; am_memcpy(self->instructions.data + index + 1, &lhs, sizeof(lhs)); @@ -165,40 +214,33 @@ static const char* binop_type_to_string(SsaInstruction binop_type) { } static CHECK_RESULT int ssa_add_ins_form2(Ssa *self, SsaInstruction ins_type, SsaRegister lhs, SsaRegister rhs, SsaRegister *result) { - usize index; - index = self->instructions.size; - - /* Overflow */ - if(self->reg_counter + 1 < self->reg_counter) - return -1; - - assert(result); + const usize index = self->instructions.size; + return_if_error(ssa_get_unique_reg(self, result)); return_if_error(buffer_append_empty(&self->instructions, sizeof(u8) + sizeof(SsaRegister) + sizeof(SsaRegister) + sizeof(SsaRegister))); - *result = self->reg_counter++; self->instructions.data[index + 0] = ins_type; am_memcpy(self->instructions.data + index + 1, result, sizeof(SsaRegister)); am_memcpy(self->instructions.data + index + 3, &lhs, sizeof(lhs)); am_memcpy(self->instructions.data + index + 5, &rhs, sizeof(rhs)); - amal_log_debug("r%u = r%u %s r%u", *result, lhs, binop_type_to_string(ins_type), rhs); + amal_log_debug("r%d = r%d %s r%d", *result, lhs, binop_type_to_string(ins_type), rhs); return 0; } static CHECK_RESULT int ssa_ins_assign_inter(Ssa *self, SsaRegister dest, SsaNumber number) { SsaIntermediateIndex index; return_if_error(ssa_try_add_intermediate(self, number, &index)); - amal_log_debug("r%u = i%u", dest, index); + amal_log_debug("r%d = i%u", dest, index); return ssa_add_ins_form1(self, SSA_ASSIGN_INTER, dest, index); } static CHECK_RESULT int ssa_ins_assign_string(Ssa *self, SsaRegister dest, BufferView str) { SsaStringIndex index; return_if_error(ssa_try_add_string(self, str, &index)); - amal_log_debug("r%u = s%u", dest, index); + amal_log_debug("r%d = s%u", dest, index); return ssa_add_ins_form1(self, SSA_ASSIGN_STRING, dest, index); } static CHECK_RESULT int ssa_ins_assign_reg(Ssa *self, SsaRegister dest, SsaRegister src) { - amal_log_debug("r%u = r%u", dest, src); + amal_log_debug("r%d = r%d", dest, src); return ssa_add_ins_form1(self, SSA_ASSIGN_REG, dest, src); } @@ -207,79 +249,82 @@ static CHECK_RESULT int ssa_ins_binop(Ssa *self, SsaInstruction binop_type, SsaR return ssa_add_ins_form2(self, binop_type, lhs, rhs, result); } -static CHECK_RESULT int ssa_ins_func_start(Ssa *self, SsaFuncIndex *result, usize *func_metadata_index) { - usize index; - index = self->instructions.size; +static CHECK_RESULT int ssa_ins_func_start(Ssa *self, SsaRegister num_reg_params, SsaFuncIndex *result, usize *func_metadata_index) { + const usize index = self->instructions.size; /* Overflow */ - if(self->func_counter + 1 < self->func_counter) + if(self->func_counter + 1 <= self->func_counter) { + amal_log_error("Ssa too many closures!"); return -1; + } - return_if_error(buffer_append_empty(&self->instructions, sizeof(u8) + sizeof(SsaFuncIndex) + sizeof(u16))); + return_if_error(buffer_append_empty(&self->instructions, sizeof(u8) + sizeof(SsaFuncIndex) + sizeof(SsaRegister) + sizeof(u16))); *result = self->func_counter++; self->instructions.data[index + 0] = SSA_FUNC_START; am_memcpy(self->instructions.data + index + 1, result, sizeof(SsaFuncIndex)); - *func_metadata_index = index + 1 + sizeof(SsaFuncIndex); + am_memcpy(self->instructions.data + index + 1 + sizeof(SsaFuncIndex), &num_reg_params, sizeof(SsaRegister)); + *func_metadata_index = index + 1 + sizeof(SsaFuncIndex) + sizeof(num_reg_params); /* No need to add data to instructions.data here, it can contain undefined data until we set it (@ the caller) */ - amal_log_debug("FUNC_START f%u", *result); + amal_log_debug("FUNC_START f%u, %d", *result, num_reg_params); return 0; } static CHECK_RESULT int ssa_ins_func_end(Ssa *self) { - u8 ins; - ins = SSA_FUNC_END; + const u8 ins = SSA_FUNC_END; amal_log_debug("FUNC_END"); return buffer_append(&self->instructions, &ins, 1); } static CHECK_RESULT int ssa_ins_push(Ssa *self, SsaRegister reg) { - usize index; - index = self->instructions.size; - + const usize index = self->instructions.size; return_if_error(buffer_append_empty(&self->instructions, sizeof(u8) + sizeof(SsaRegister))); self->instructions.data[index + 0] = SSA_PUSH; am_memcpy(self->instructions.data + index + 1, ®, sizeof(SsaRegister)); - amal_log_debug("PUSH r%u", reg); + amal_log_debug("PUSH r%d", reg); return 0; } static CHECK_RESULT int ssa_ins_call(Ssa *self, FunctionDecl *func_decl, u8 num_args, SsaRegister *result) { - usize index; - index = self->instructions.size; - - /* Overflow */ - if(self->reg_counter + 1 < self->reg_counter) - return -1; - + const usize index = self->instructions.size; + return_if_error(ssa_get_unique_reg(self, result)); return_if_error(buffer_append_empty(&self->instructions, sizeof(u8) + sizeof(u8) + sizeof(SsaRegister) + sizeof(func_decl))); - *result = self->reg_counter++; self->instructions.data[index + 0] = SSA_CALL; self->instructions.data[index + 1] = num_args; am_memcpy(self->instructions.data + index + 2, result, sizeof(SsaRegister)); am_memcpy(self->instructions.data + index + 2 + sizeof(SsaRegister), &func_decl, sizeof(func_decl)); - amal_log_debug("r%u = CALL %d, %p", *result, num_args, func_decl); + amal_log_debug("r%d = CALL %d, %p", *result, num_args, func_decl); + return 0; +} + +static CHECK_RESULT int ssa_ins_call_extern(Ssa *self, SsaExternFuncIndex extern_func_index, u8 num_args, SsaRegister *result) { + const usize index = self->instructions.size; + assert(extern_func_index < self->extern_func_counter); + return_if_error(ssa_get_unique_reg(self, result)); + return_if_error(buffer_append_empty(&self->instructions, sizeof(u8) + sizeof(u8) + sizeof(SsaRegister) + sizeof(extern_func_index))); + self->instructions.data[index + 0] = SSA_CALL_EXTERN; + self->instructions.data[index + 1] = num_args; + am_memcpy(self->instructions.data + index + 2, result, sizeof(SsaRegister)); + am_memcpy(self->instructions.data + index + 2 + sizeof(SsaRegister), &extern_func_index, sizeof(extern_func_index)); + amal_log_debug("r%d = CALL_EXTERN %d, %d", *result, num_args, extern_func_index); return 0; } static CHECK_RESULT int ssa_ins_jumpzero(Ssa *self, SsaRegister condition_reg, JumpOffset jump_offset) { - usize index; - index = self->instructions.size; + const usize index = self->instructions.size; return_if_error(buffer_append_empty(&self->instructions, sizeof(u8) + sizeof(SsaRegister) + sizeof(JumpOffset))); self->instructions.data[index + 0] = SSA_JUMP_ZERO; am_memcpy(self->instructions.data + index + 1, &condition_reg, sizeof(SsaRegister)); am_memcpy(self->instructions.data + index + 1 + sizeof(SsaRegister), &jump_offset, sizeof(JumpOffset)); if(jump_offset == 0) - amal_log_debug("JUMP_ZERO r%u, DUMMY", condition_reg); + amal_log_debug("JUMP_ZERO r%d, DUMMY", condition_reg); else - amal_log_debug("JUMP_ZERO r%u, %d", condition_reg, jump_offset); + amal_log_debug("JUMP_ZERO r%d, %d", condition_reg, jump_offset); return 0; } static CHECK_RESULT int ssa_ins_jump(Ssa *self, JumpOffset jump_offset) { - usize index; - index = self->instructions.size; - + const usize index = self->instructions.size; return_if_error(buffer_append_empty(&self->instructions, sizeof(u8) + sizeof(JumpOffset))); self->instructions.data[index + 0] = SSA_JUMP; am_memcpy(self->instructions.data + index + 1, &jump_offset, sizeof(JumpOffset)); @@ -288,9 +333,7 @@ static CHECK_RESULT int ssa_ins_jump(Ssa *self, JumpOffset jump_offset) { } static CHECK_RESULT int ssa_ins_return(Ssa *self, SsaRegister reg) { - usize index; - index = self->instructions.size; - + const usize index = self->instructions.size; return_if_error(buffer_append_empty(&self->instructions, sizeof(u8) + sizeof(SsaRegister))); self->instructions.data[index + 0] = SSA_RET; am_memcpy(self->instructions.data + index + 1, ®, sizeof(SsaRegister)); @@ -359,9 +402,34 @@ static CHECK_RESULT SsaRegister number_generate_ssa(Number *self, SsaCompilerCon static CHECK_RESULT SsaRegister lhsexpr_extern_generate_ssa(LhsExpr *self, SsaCompilerContext *context) { /* TODO: SsaRegister should be extended to include static and extern data */ - (void)self; - (void)context; - amal_log_error("TODO: Implement lhsexpr_extern_generate_ssa"); + if(self->type.type == VARIABLE_TYPE_SIGNATURE) { + int err; + BufferView existing_func; + + err = ssa_try_add_extern_func(context->ssa, self->type.value.signature, self->var_name, &self->extern_index, &existing_func); + if(err == SSA_ERR_EXTERN_FUNC_SIG_MISMATCH) { + Tokenizer *tokenizer; + tokenizer = &context->ssa->parser->tokenizer; + tokenizer_print_error(tokenizer, tokenizer_get_code_reference_index(tokenizer, self->var_name.data), + "Extern closure defined here with the name %.*s doesn't match extern closure with the same name defined in another location", + self->var_name.size, self->var_name.data); + /* + TODO: This wont work right now since the other location might belong to another parser. + There should be a function to get a parser from a code reference (loop all tokens and check code range). + Then the parsers that belong to scopes can also be removed. This is fine, since the lookup is only done on error. + */ + #if 0 + tokenizer_print_error(tokenizer, tokenizer_get_code_reference_index(tokenizer, self->var_name.data), + "Extern closure defined here with the name %.*s doesn't match extern closure with the same name defined in another location", + self->var_name.size, self->var_name.data); + #endif + throw(err); + } + if(err != 0) + throw(err); + } else { + assert(bool_false && "TODO: Implement lhsexpr_extern_generate_ssa for other data than functions"); + } return 0; } @@ -385,12 +453,7 @@ static CHECK_RESULT SsaRegister lhsexpr_generate_ssa(LhsExpr *self, AstResolveDa /*assert(bool_false);*/ return 0; } - throw_if_error(ssa_get_unique_reg(context->ssa, ®)); - if(reg == rhs_reg) { - amal_log_error("rhs_expr is same as reg.. rhs type: %d", rhs_expr->type); - } - assert(reg != rhs_reg); - throw_if_error(ssa_ins_assign_reg(context->ssa, reg, rhs_reg)); + return rhs_reg; } else { /* TODO: Do not assign if we dont want default value */ if(resolve_data->type.type == RESOLVED_TYPE_LHS_EXPR) { @@ -426,6 +489,12 @@ static CHECK_RESULT SsaRegister function_parameter_generate_ssa(FunctionParamete return self->resolve_data.ssa_reg; throw_if_error(ssa_get_unique_reg(context->ssa, ®)); + /* Parameters start at -1 and decrement */ + if((u16)reg - 1 >= (u16)reg) { + amal_log_error("Ssa too many parameters!"); + throw(-1); + } + reg = -1 - reg; self->resolve_data.status = AST_SSA_RESOLVED; self->resolve_data.ssa_reg = reg; return reg; @@ -453,9 +522,11 @@ static CHECK_RESULT SsaRegister funcdecl_generate_ssa(FunctionDecl *self, SsaCom that is reset after function end */ SsaRegister prev_reg_counter; + SsaRegister prev_param_counter; usize func_metadata_index; - int num_params; + prev_reg_counter = context->ssa->reg_counter; + prev_param_counter = context->ssa->param_counter; context->ssa->reg_counter = 0; /* @@ -463,44 +534,49 @@ static CHECK_RESULT SsaRegister funcdecl_generate_ssa(FunctionDecl *self, SsaCom This way we can know if a register access is for a parameter or not by checking the number */ function_signature_generate_params_ssa(self->signature, context); - num_params = buffer_get_size(&self->signature->parameters, FunctionParameter); + context->ssa->param_counter = context->ssa->reg_counter; + context->ssa->reg_counter = 0; amal_log_debug("SSA funcdecl %p", self); - throw_if_error(ssa_ins_func_start(context->ssa, &self->ssa_func_index, &func_metadata_index)); + throw_if_error(ssa_ins_func_start(context->ssa, context->ssa->param_counter, &self->ssa_func_index, &func_metadata_index)); scope_generate_ssa(&self->body, context); throw_if_error(ssa_ins_func_end(context->ssa)); /* Add the number of registers used to the function metadata (FUNC_START) */ - context->ssa->reg_counter -= num_params; - am_memcpy(&context->ssa->instructions.data[func_metadata_index], &context->ssa->reg_counter, sizeof(u16)); + am_memcpy(context->ssa->instructions.data + func_metadata_index, &context->ssa->reg_counter, sizeof(u16)); + + context->ssa->param_counter = prev_param_counter; context->ssa->reg_counter = prev_reg_counter; return 0; } static CHECK_RESULT SsaRegister funccall_generate_ssa(FunctionCall *self, AstResolveData *resolve_data, SsaCompilerContext *context) { - Ast **ast; - Ast **ast_end; SsaRegister reg; FunctionDecl *func_decl; - - ast = buffer_begin(&self->args); - ast_end = buffer_end(&self->args); - for(; ast != ast_end; ++ast) { - SsaRegister arg_reg; - arg_reg = ast_generate_ssa(*ast, context); - throw_if_error(ssa_ins_push(context->ssa, arg_reg)); + LhsExpr *func_lhs_expr; + + { + Ast **arg; + Ast **arg_end; + arg = buffer_begin(&self->args); + arg_end = buffer_end(&self->args); + for(; arg != arg_end; ++arg) { + SsaRegister arg_reg; + arg_reg = ast_generate_ssa(*arg, context); + throw_if_error(ssa_ins_push(context->ssa, arg_reg)); + } } func_decl = resolve_data->type.value.func_sig->func_decl; assert(resolve_data->type.type == RESOLVED_TYPE_FUNC_SIG); - assert(func_decl && "TODO: Implement function call for anonymous closures"); - if(func_decl && func_decl->lhs_expr && LHS_EXPR_IS_EXTERN(func_decl->lhs_expr)) { - amal_log_error("TODO: Implement extern function call (extern function %.*s was called)", self->func.name.size, self->func.name.data); - reg = 0; - assert(bool_false && "TODO: Implement extern function call!"); + func_lhs_expr = NULL; + if(self->func.resolved_var.type == NAMED_OBJECT_LHS_EXPR) + func_lhs_expr = self->func.resolved_var.value.lhs_expr; + + if(func_lhs_expr && LHS_EXPR_IS_EXTERN(func_lhs_expr)) { + throw_if_error(ssa_ins_call_extern(context->ssa, func_lhs_expr->extern_index, buffer_get_size(&self->args, Ast*), ®)); } else { /* rhs wont be null here because only extern variable can't have rhs */ - amal_log_debug("SSA funccall %.*s, func index ptr: %p", self->func.name.size, self->func.name.data, func_decl); throw_if_error(ssa_ins_call(context->ssa, func_decl, buffer_get_size(&self->args, Ast*), ®)); } diff --git a/src/std/arena_allocator.c b/src/std/arena_allocator.c index 73111dd..4934925 100644 --- a/src/std/arena_allocator.c +++ b/src/std/arena_allocator.c @@ -25,9 +25,10 @@ void arena_allocator_node_deinit(ArenaAllocatorNode *self) { } int arena_allocator_init(ArenaAllocator *self) { + ignore_result_int(buffer_init(&self->mems, NULL)); return_if_error(arena_allocator_node_init(&self->head)); self->current = &self->head; - return buffer_init(&self->mems, NULL); + return 0; } static void arena_allocator_deinit_buffers(ArenaAllocator *self) { diff --git a/src/std/hash_map.c b/src/std/hash_map.c index c2e42c1..2b29f2e 100644 --- a/src/std/hash_map.c +++ b/src/std/hash_map.c @@ -74,14 +74,14 @@ static void* bucket_node_get_value(HashMapBucketNode *self) { } int hash_map_init(HashMap *self, ArenaAllocator *allocator, usize value_type_size, - HashMapCompare compare_func, HashMapHash hash_func) { - assert(compare_func); - assert(hash_func); + HashMapCompare key_compare_func, HashMapHash key_hash_func) { + assert(key_compare_func); + assert(key_hash_func); self->allocator = allocator; self->value_type_size = value_type_size; self->num_elements = 0; - self->compare_func = compare_func; - self->hash_func = hash_func; + self->compare_func = key_compare_func; + self->hash_func = key_hash_func; return_if_error(buffer_init(&self->buckets, self->allocator)); assert(self->buckets.size == 0); return_if_error(buffer_append_empty(&self->buckets, sizeof(HashMapBucket) * HASH_MAP_INITIAL_SIZE)); diff --git a/src/tokenizer.c b/src/tokenizer.c index 1586691..556a20b 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -91,6 +91,7 @@ static CHECK_RESULT int find_end_of_string(BufferView buf, int index) { } /* TODO: Optimize string to integer and string to float */ +/* TODO: Correctly check multiplication overflow */ #define I64_OVERFLOW_ERROR -1 static CHECK_RESULT int string_to_integer_unchecked(BufferView str, i64 *result) { diff --git a/tests/bytecode.amal b/tests/bytecode.amal index dd9bd3d..096f921 100644 --- a/tests/bytecode.amal +++ b/tests/bytecode.amal @@ -1,4 +1,5 @@ -extern const printf: fn; +extern const print_extern: fn; +extern const print_extern_num: fn(num: i32); const main = fn { var value = 23; @@ -6,9 +7,8 @@ const main = fn { const value2: i64 = 23; const value3 = 2 + 5 - 1 * 10 / 2; const str_value = "hello, world"; - //printf(); print(); - const result = print_num(value3); + const result = print_num(1337); } const print = fn { @@ -16,5 +16,8 @@ const print = fn { } const print_num = fn(num: i32) i32 { + print_extern(); + print_extern_num(num); + print_extern_num(8080); return num; } \ No newline at end of file diff --git a/tests/errors/extern_closure_one_return_value.amal b/tests/errors/extern_closure_one_return_value.amal new file mode 100644 index 0000000..982897f --- /dev/null +++ b/tests/errors/extern_closure_one_return_value.amal @@ -0,0 +1 @@ +extern const func: fn() i32, i32; \ No newline at end of file diff --git a/tests/errors/too_long_var_name.amal b/tests/errors/too_long_var_name.amal new file mode 100644 index 0000000..eff8ff1 --- /dev/null +++ b/tests/errors/too_long_var_name.amal @@ -0,0 +1 @@ +extern const veryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongname: fn; \ No newline at end of file diff --git a/tests/main.c b/tests/main.c index 3336c46..05725f7 100644 --- a/tests/main.c +++ b/tests/main.c @@ -131,6 +131,16 @@ static CHECK_RESULT int get_thread_count_env_var(int *thread_count) { return 0; } +static int print_extern() { + printf("hello from amalgam extern func, print_extern!\n"); + return 0; +} + +static int print_extern_num(i64 num) { + printf("hello from amalgam extern func, print_extern_num, value: %ld!\n", num); + return 0; +} + static void test_load(const char *filepath) { amal_compiler_options options; amal_program program; @@ -146,6 +156,16 @@ static void test_load(const char *filepath) { fprintf(stderr, "Failed to initialize amal program\n"); FAIL_TEST(full_path); } + + if(amal_program_add_extern_func(&program, create_buffer_view("print_extern", 12), print_extern, 0) != 0) { + fprintf(stderr, "Unexpected error (alloc failure)\n"); + FAIL_TEST(full_path); + } + if(amal_program_add_extern_func(&program, create_buffer_view("print_extern_num", 16), print_extern_num, sizeof(i64)) != 0) { + fprintf(stderr, "Unexpected error (alloc failure)\n"); + FAIL_TEST(full_path); + } + result = amal_compiler_load_file(&options, &program, filepath); if(result != AMAL_COMPILER_OK) { fprintf(stderr, "Failed to load file %s, result: %d\n", full_path, result); @@ -252,6 +272,8 @@ static void run_all_tests() { " ^\n"); test_load_error("tests/errors/no_main_func.amal", NULL); test_load_error("tests/errors/closure_duplicate_param_name.amal", "TODO: Add expected error here"); + test_load_error("tests/errors/extern_closure_one_return_value.amal", "TODO: Add expected error here"); + test_load_error("tests/errors/too_long_var_name.amal", "TODO: Add expected error here"); } /* TODO: Restrict variables in global scope to const */ -- cgit v1.2.3