From 16aaaa19a3ef4220726007d3e644ced0c9e06513 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Mon, 9 Sep 2019 01:08:34 +0200 Subject: Allow referencing code in imported file (right now for function calls, allow calling a function in another file) --- README.md | 47 +++++--- doc/Documentation.md | 94 +++++++++++----- doc/IMPLEMENTATION.md | 21 ---- executor/executor.h | 3 +- executor/x86_64/asm.c | 34 +++--- executor/x86_64/asm.h | 8 +- executor/x86_64/executor.c | 77 +++++-------- include/ast.h | 34 ++++-- include/bytecode/bytecode.h | 46 +++++++- include/defs.h | 3 + include/parser.h | 10 +- include/program.h | 11 ++ include/ssa/ssa.h | 18 ++- include/std/buffer.h | 2 + include/std/buffer_view.h | 2 +- include/std/hash_map.h | 1 + include/std/log.h | 2 +- include/std/mem.h | 2 +- include/std/misc.h | 8 +- include/std/thread.h | 4 +- src/ast.c | 149 +++++++++++++++++------- src/bytecode/bytecode.c | 268 +++++++++++++++++++++++++++++++++----------- src/compiler.c | 93 +++++++++++---- src/parser.c | 91 ++++++++++----- src/program.c | 238 +++++++++++++++++++++++++++++++++------ src/ssa/ssa.c | 99 +++++++++++----- src/std/buffer.c | 13 +++ src/std/buffer_view.c | 2 +- src/std/hash_map.c | 32 ++++++ src/std/log.c | 4 +- src/std/mem.c | 3 +- src/std/thread.c | 8 +- std/io.amal | 5 + tests/bytecode.amal | 17 ++- tests/main.c | 6 +- 35 files changed, 1054 insertions(+), 401 deletions(-) delete mode 100644 doc/IMPLEMENTATION.md create mode 100644 std/io.amal diff --git a/README.md b/README.md index 2760c93..2668852 100644 --- a/README.md +++ b/README.md @@ -9,32 +9,45 @@ Files have to be in utf-8 format and can optionally have utf-8 BOM. ## Important Amalgam is not ready to be used yet. # Fast compilation -Every stage of the compiler is multithreaded and data copy is kept to a minimal, for example tokenization +Every stage of the compiler runs concurrently, each compiling different files. Memory copying is kept to a minimal, for example tokenization is done without storing tokens in a list. Almost all allocation is done using an arena allocator that is only cleaned up once (when the compiler is finished), and the data is allocated sequentially. +Locks are only used in one place in the whole compilation stage, during @import statements. # Dependencies Right now only the C standard library (C89) is required for a release build. In the future dependency on the C standard library might be removed and then amalgam would have 0 dependencies.\ python2 is needed for tests to run additional code analyzis. +# Limits +Amalgam places limits on code for performance reasons. These are the limits: + +* One file can't have more than 254 imports, have more than 2^16 functions or use more than 2^16 functions. +* Every function can only use up to 2^16 registers and parameters (combined). +* Every function can only have up to 128 parameters and 128 return values. +* Exported and external function can only have 0 or 1 return values, as that is what C supports. + # TODO -Build with -nostdlib and replace use of libc with syscalls (on linux).\ -Don't parse files unless the variable they are assigned to (with @import) is used. This is useful when only using small parts of a library. +* Build with -nostdlib and replace use of libc with syscalls (on linux). +* Don't parse files unless the variable they are assigned to (with @import) is used. This is useful when only using small parts of a library. This could be done checking if an AST expression is referenced before evaluating it. There would then need to be a compile option that compiles everything even if not referenced, since another user of the program/library may use the functions that are not used in your program -and they might have compile-issues.\ -Align machine code to word boundary for the start of functions. No need to pad with NOP, as functions return before the padding.\ -Use const to cleanup ANSI C style variable declarations, since const allows you to declare and assign variables on the same line.\ -Make the bytecode work with big endian. On a big endian machine, the bytecode should be converted to little endian -to make work on little endian as little as possible, meaning it would be a small penality to use big endian.\ -Verify all members of an extern struct are extern as well. Verify all parameters are of extern types for extern functions.\ -Verify all code execution paths in a function return a value, if the function excepts return values.\ -Show compile error if the result of a function call is ignored.\ -Show compile error if function result type and assigned to variable have different types.\ -Show compile error if variables are assigned to but not used.\ -Push arguments in reverse order (right-to-left, cdecl) (in program.c, since on windows we will need to support stdcall which is left-to-right).\ -After tokenizing files, unload file data to disk if possible.\ -Parallelize program decoding, if there is an advantage to doing it.\ -Logical AND binop should skip expressions the moment the result becomes false. +and they might have compile-issues. +* Align machine code to word boundary for the start of functions. No need to pad with NOP, as functions return before the padding. +* Use const to cleanup ANSI C style variable declarations, since const allows you to declare and assign variables on the same line. +* Make the bytecode work with big endian. On a big endian machine, the bytecode should be converted to little endian +to make work on little endian as little as possible, meaning it would be a small penality to use big endian. +* Verify all members of an extern struct are extern as well. Verify all parameters are of extern types for extern functions. +* Verify all code execution paths in a function return a value, if the function excepts return values. +* Show compile error if the result of a function call is ignored. +* Show compile error if variables are assigned to but not used. +* Push arguments in reverse order (right-to-left, cdecl) (in program.c, since on windows we will need to support stdcall which is left-to-right). +* After tokenizing files, unload file data to disk if possible. +* Parallelize program decoding, if there is an advantage to doing it. +* Logical AND binop should skip expressions the moment the result becomes false. +* Reorder functions and imports by how they are used. Functions and imports that are used in some files +should be in the bytecode before the files that they are used from, to reduce deferred function calls. +* Document all limits. +* To remove some overhead that external variables have on LhsExpr, make a ExternLhsExpr type. +* Make function calls work for functions that return no value or returns multiple values. # Documents Documents are located under doc. The file doc/Documentation.md is generated from source files by running doc/doc_extract.py but there is no need to run this script unless you are modifying documentation in the source. diff --git a/doc/Documentation.md b/doc/Documentation.md index adb0bef..ca579a2 100644 --- a/doc/Documentation.md +++ b/doc/Documentation.md @@ -16,11 +16,14 @@ Instructions can be in 7 different formats: 6.2 Opcode(u8) + register(i8) + intermediate(u16)\ 6.3 Opcode(u8) + register(i8) + data(u16)\ 6.4 Opcode(u8) + flags(u8) + num_local_var_reg(u16) -7. 5 bytes: Opcode(u8) + index(u16) + num_args(u8) + register(i8) +7. 5 bytes: Opcode(u8) + index(u8) + index(u16) + num_args(u8) ## Registers Registers have a range of 128. Local variables start from register 0 and increment while parameters start from -1 and decrement. Registers have the scope of functions and reset after instructions reach a new function (AMAL_OP_FUNC_START). +If import index for call and calle is 0, then that means the function resides in the same file the function call +is being called from. Which means that import index 1 is actually import index 0 into the import list. + # Compiler flow (Tokenize&parse -> Resolve AST -> Generate SSA -> Generate bytecode) -> Generate program\ Each step except the last is done using multiple threads in parallel and the output of each step is used @@ -29,7 +32,9 @@ and writing it to a file, which is an IO bottlenecked operation and it won't ben and may even lose performance because of it. # Bytecode -The layout of the full bytecode is: Header (Intermediates Strings Functions External_Functions Exported_Functions Instructions)* +The layout of the full bytecode is: Header (X Intermediates X Strings X Functions X External_Functions X Exported_Functions X Imports X Instructions)*\ +Where the X is a magic number to make it easier to find errors while decoding the bytecode.\ +The value of the magic number is @AMAL_BYTECODE_SECTION_MAGIC_NUMBER # Bytecode header ## Header layout @@ -44,10 +49,10 @@ The versions in the header only changes for every release, not every change. # Bytecode intermediates ## Intermediates layout -|Type |Field |Description | -|------------|------------------|-------------------------------------------------------------------------------| -|u32 |Intermediates size|The size of the intermediates section, in bytes. | -|Intermediate|Intermediate data |Multiple intermediates, where the total size is defined by @Intermediates size.| +|Type |Field |Description | +|--------------|------------------|-------------------------------------------------------------------------------| +|u32 |Intermediates size|The size of all intermediates, in bytes. | +|Intermediate[]|Intermediate data |Multiple intermediates, where the total size is defined by @Intermediates size.| ## Intermediate |Type|Field|Description | @@ -57,11 +62,11 @@ The versions in the header only changes for every release, not every change. # Bytecode strings ## Strings layout -|Type |Field |Description | -|------|-----------------|------------------------------------------------------------------| -|u16 |Number of strings|The number of strings. | -|u32 |Strings size |The size of the strings section, in bytes. | -|String|Strings data |Multiple strings, where the total size is defined by @Strings size| +|Type |Field |Description | +|--------|-----------------|------------------------------------------------------------------| +|u16 |Number of strings|The number of strings. | +|u32 |Strings size |The size of all strings, in bytes. | +|String[]|Strings data |Multiple strings, where the total size is defined by @Strings size| ## String |Type|Field|Description | @@ -70,33 +75,46 @@ The versions in the header only changes for every release, not every change. |u8* |Data|The data of the string, where the size is defined by @Size. Strings are null-terminated.| # Bytecode functions -## Internal functions layout -|Type|Field |Description | -|----|-------------------|---------------------------------| -|u16 |Number of functions|The number of internal functions.| +## Functions layout +|Type |Field |Description | +|----------|----------|--------------------------------------------------------------------------------------| +|u16 |num_funcs |The number of non-extern functions. | +|u32 |funcs_size|The size of all functions, in bytes. | +|Function[]|Functions |Multiple non-extern functions, where the number of functions is defined by @num_funcs.| + +## Function +|Type|Field |Description | +|----|-------------------------|------------------------------------------------------------------------------------------------------------------------| +|u32 |func_offset |The offset in the program code (machine code) where the function starts. Is always 0 until the program has been started.| +|u8 |num_params |The number of parameters. | +|u32 |params_num_pointers |The number of pointers in the parameters. | +|u32 |params_fixed_size |The size of all non-pointer type parameters, in bytes. | +|u8 |num_return_types |The number of return values. | +|u32 |return_types_num_pointers|The number of pointers in the return types. | +|u32 |return_types_fixed_size |The size of all non-pointer type return types, in bytes. | # Bytecode external functions ## External functions layout -|Type |Field |Description | -|-----------------|------------------|-----------------------------------------------------------------------------------------| -|u16 |num_extern_func |The number of external functions. | -|u32 |extern_funcs_size |The size of the external functions section, in bytes. | -|External function|External functions|Multiple external functions, where the number of functions is defined by @num_extern_func| +|Type |Field |Description | +|-------------------|------------------|-----------------------------------------------------------------------------------------| +|u16 |num_extern_func |The number of external functions. | +|u32 |extern_funcs_size |The size of all external functions, in bytes. | +|External function[]|External functions|Multiple external functions, where the number of functions is defined by @num_extern_func| ## External function -|Type|Field |Description | -|----|--------|-----------------------------------------------------------------------------------------------------| -|u8 |num_args|The number of arguments the functions has. | -|u8 |name_len|The length of the external function name, in bytes. Excluding the null-terminate character. | -|u8* |name |The name of the external function, where the size is defined by @name_len. Names are null-terminated.| +|Type|Field |Description | +|----|----------|-----------------------------------------------------------------------------------------------------| +|u8 |num_params|The number of parameters the functions has. | +|u8 |name_len |The length of the external function name, in bytes. Excluding the null-terminate character. | +|u8[]|name |The name of the external function, where the size is defined by @name_len. Names are null-terminated.| # Bytecode exported functions ## Exported functions layout -|Type |Field |Description | -|-----------------|------------------|-----------------------------------------------------------------------------------------| -|u16 |num_export_func |The number of exported functions. | -|u32 |export_funcs_size |The size of the exported functions section, in bytes. | -|Exported function|Exported functions|Multiple exported functions, where the number of functions is defined by @num_export_func| +|Type |Field |Description | +|-------------------|------------------|-----------------------------------------------------------------------------------------| +|u16 |num_export_func |The number of exported functions. | +|u32 |export_funcs_size |The size of all exported functions, in bytes. | +|Exported function[]|Exported functions|Multiple exported functions, where the number of functions is defined by @num_export_func| ## Exported function |Type|Field |Description | @@ -104,7 +122,21 @@ The versions in the header only changes for every release, not every change. |u32 |instruction_offset|The offset in the instruction data where the exported function is defined. Is always 0 until the program has been started.| |u8 |num_args |The number of arguments the functions has. | |u8 |name_len |The length of the exported function name, in bytes. Excluding the null-terminate character. | -|u8* |name |The name of the exported function, where the size is defined by @name_len. Names are null-terminated. | +|u8[]|name |The name of the exported function, where the size is defined by @name_len. Names are null-terminated. | + +# Bytecode imports +## Imports layout +|Type |Field |Description | +|--------|------------|-------------------------------------------------------------------------| +|u8 |num_imports |The number of imports. | +|u32 |imports_size|The size of all imports, in bytes. | +|Import[]|Import |Multiple imports, where the number of imports is defined by @num_imports.| + +## Import +|Type|Field |Description | +|----|---------------------|----------------------------------------------------------------------------------------| +|u32 |function_index |The index in the bytecode where function header begins for the imported file. | +|u32 |extern_function_index|The index in the bytecode where the extern function header begins for the imported file.| # Bytecode instructions ## Instructions layout diff --git a/doc/IMPLEMENTATION.md b/doc/IMPLEMENTATION.md deleted file mode 100644 index 387c6eb..0000000 --- a/doc/IMPLEMENTATION.md +++ /dev/null @@ -1,21 +0,0 @@ -# Goal -1. In the first stage the parser parses multiple files at the same time using multiple threads. -The tokenization should be done without storing the tokens in a list (streaming) but AST needs to be stored in a list -because the compiler needs to support out of order declarations. -2. In the second stage the ast is handled using multiple threads. In this stage, variables, parameters -and types are defined and resolved and if a type is defined after there is a reference to it, -then the compiler first resolves that type. There are flags set to make sure there aren't recursive dependencies. -3. In the third stage the resolved ast is used to create SSA form (static single assignment form). If optimization is -enabled then then some inlining for ast is done by copying ast from functions to the places they are called from -before the SSA is created. -4. In the fourth stage the SSA form is used to create the bytecode. If optimization is enabled then the SSA form -is optimized before creating the bytecode. -5. If optimization is enabled then the bytecode is optimized. - -# Progress -1. Parsing using multiple threads is done, but the parser is not finished. -2. Resolving ast using multiple threads is done, but the ast resolver is not finished. -3. Generating ssa using multiple threads is done, but the ssa generator is not finished. -4. Generating bytecode using multiple threads is done, but the bytecode generator is not finished. -Currently it generates C code. -5. Not started. \ No newline at end of file diff --git a/executor/executor.h b/executor/executor.h index a0be5c9..743500c 100644 --- a/executor/executor.h +++ b/executor/executor.h @@ -39,7 +39,8 @@ CHECK_RESULT int amal_exec_div(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 CHECK_RESULT int amal_exec_push(amal_executor *self, i8 reg); CHECK_RESULT int amal_exec_pushi(amal_executor *self, i64 imm); CHECK_RESULT int amal_exec_pushd(amal_executor *self, BufferView data); -CHECK_RESULT int amal_exec_call(amal_executor *self, u16 func_index, u8 num_args, i8 dst_reg); +CHECK_RESULT int amal_exec_call(amal_executor *self, u32 code_offset, u8 num_args, i8 dst_reg); +void amal_exec_call_overwrite(amal_executor *self, u32 call_code_offset, i32 new_target_rel32); /*CHECK_RESULT int amal_exec_callr(i8 dst_reg, BufferView data);*/ CHECK_RESULT int amal_exec_calle(amal_executor *self, void *func, u8 num_args, i8 dst_reg); CHECK_RESULT int amal_exec_cmp(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2); diff --git a/executor/x86_64/asm.c b/executor/x86_64/asm.c index c2b00ef..e29130e 100644 --- a/executor/x86_64/asm.c +++ b/executor/x86_64/asm.c @@ -218,24 +218,22 @@ int asm_execute(Asm *self, u32 offset) { /*asm_print_code_hex(self);*/ /* TODO: Verify if this is valid on all platforms. According to ISO C standard it isn't? */ - *(void**)(&func) = self->code + offset; + *(void**)(&func) = (u8*)self->code + offset; func(); return 0; } /* TODO: See how this can be optimized */ int asm_ensure_capacity(Asm *self, usize size) { - usize current_offset; - current_offset = (u8*)self->code_it - (u8*)self->code; + usize current_offset = (u8*)self->code_it - (u8*)self->code; if(current_offset + size > self->allocated_size) { - void *new_mem; - usize new_size; - new_size = self->allocated_size + am_pagesize(); - new_mem = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if(self->code == MAP_FAILED) + usize new_size = self->allocated_size + am_pagesize(); + void *new_mem = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if(new_mem == MAP_FAILED) return -errno; am_memcpy(new_mem, self->code, self->allocated_size); + munmap(self->code, self->allocated_size); self->code = new_mem; self->allocated_size = new_size; self->code_it = (u8*)self->code + current_offset; @@ -435,7 +433,7 @@ void asm_callr(Asm *self, Reg64 reg) { /* Note: This is sometimes called with @relative 0 (will print call -5), in which case it's most likely a dummy call until the relative position - is later changed with @asm_override_call_rel32. TODO: Update the ins_end debug print to take that into account somehow + is later changed with @asm_overwrite_call_rel32. TODO: Update the ins_end debug print to take that into account somehow */ void asm_call_rel32(Asm *self, i32 relative) { ins_start(self); @@ -446,8 +444,8 @@ void asm_call_rel32(Asm *self, i32 relative) { ins_end(self, "call 0x%x", relative); } -void asm_override_call_rel32(Asm *self, u32 asm_index, i32 new_relative) { - assert(*(u8*)(self->code + asm_index) == 0xE8); +void asm_overwrite_call_rel32(Asm *self, u32 asm_index, i32 new_relative) { + assert(*((u8*)self->code + asm_index) == 0xE8); new_relative -= 5; /* In x86, the relative position starts from the next instruction */ am_memcpy((u8*)self->code + asm_index + 1, &new_relative, sizeof(new_relative)); } @@ -480,7 +478,7 @@ void asm_sete_r(Asm *self, Reg64 dst) { /* Note: This is sometimes called with @relative INT32_MAX-(2 or 6) (will print jz 0x7ffffff9), in which case it's most likely a dummy - jump until the relative position is later changed with @asm_override_jcc_rel32. + jump until the relative position is later changed with @asm_overwrite_jcc_rel32. TODO: Update the ins_end debug print to take that into account somehow */ void asm_jz(Asm *self, i32 relative) { @@ -503,17 +501,17 @@ void asm_jz(Asm *self, i32 relative) { ins_end(self, "jz 0x%x", relative); } -void asm_override_jcc_rel32(Asm *self, u32 asm_index, i32 new_relative) { +void asm_overwrite_jcc_rel32(Asm *self, u32 asm_index, i32 new_relative) { /* +2 because rel32 variant of the jump instruction opcode is 2 bytes */ - assert(*(u8*)(self->code + asm_index) == 0x0F); - assert(*(u8*)(self->code + asm_index + 1) == 0x84); + assert(*((u8*)self->code + asm_index) == 0x0F); + assert(*((u8*)self->code + asm_index + 1) == 0x84); new_relative -= 6; /* In x86, the relative position starts from the next instruction */ am_memcpy((u8*)self->code + asm_index + 2, &new_relative, sizeof(new_relative)); } /* Note: This is sometimes called with @relative INT32_MAX-(2 or 5) (will print jmp 0x7ffffffa), in which case it's most likely a dummy - jump until the relative position is later changed with @asm_override_jmp_rel32. + jump until the relative position is later changed with @asm_overwrite_jmp_rel32. TODO: Update the ins_end debug print to take that into account somehow */ void asm_jmp(Asm *self, i32 relative) { @@ -535,9 +533,9 @@ void asm_jmp(Asm *self, i32 relative) { ins_end(self, "jmp 0x%x", relative); } -void asm_override_jmp_rel32(Asm *self, u32 asm_index, i32 new_relative) { +void asm_overwrite_jmp_rel32(Asm *self, u32 asm_index, i32 new_relative) { /* +1 to skip instruction opcode */ - assert(*(u8*)(self->code + asm_index) == 0xE9); + assert(*((u8*)self->code + asm_index) == 0xE9); new_relative -= 5; /* In x86, the relative position starts from the next instruction */ am_memcpy((u8*)self->code + asm_index + 1, &new_relative, sizeof(new_relative)); } diff --git a/executor/x86_64/asm.h b/executor/x86_64/asm.h index a3f1b5a..6ac74f4 100644 --- a/executor/x86_64/asm.h +++ b/executor/x86_64/asm.h @@ -88,7 +88,7 @@ void asm_callr(Asm *self, Reg64 reg); by this asm library itself. */ void asm_call_rel32(Asm *self, i32 relative); -void asm_override_call_rel32(Asm *self, u32 asm_index, i32 new_relative); +void asm_overwrite_call_rel32(Asm *self, u32 asm_index, i32 new_relative); void asm_cmp_rm(Asm *self, Reg64 reg1, AsmPtr *reg2); /* @@ -104,15 +104,15 @@ void asm_sete_r(Asm *self, Reg64 dst); by this asm library itself. */ void asm_jz(Asm *self, i32 relative); -/* Override conditional jump target */ -void asm_override_jcc_rel32(Asm *self, u32 asm_index, i32 new_relative); +/* Overwrite conditional jump target */ +void asm_overwrite_jcc_rel32(Asm *self, u32 asm_index, i32 new_relative); /* In x86 assembly, the @relative position starts from the next instruction. This offset shouldn't be calculated by the caller and is instead managed by this asm library itself. */ void asm_jmp(Asm *self, i32 relative); -void asm_override_jmp_rel32(Asm *self, u32 asm_index, i32 new_relative); +void asm_overwrite_jmp_rel32(Asm *self, u32 asm_index, i32 new_relative); diff --git a/executor/x86_64/executor.c b/executor/x86_64/executor.c index 9083e14..c442da8 100644 --- a/executor/x86_64/executor.c +++ b/executor/x86_64/executor.c @@ -1,6 +1,7 @@ #include "../executor.h" #include "../../include/std/alloc.h" #include "../../include/std/buffer.h" +#include "../../include/std/log.h" #include "asm.h" #include @@ -17,11 +18,6 @@ */ #define MAX_LABELS 128 -typedef struct { - u32 asm_index; - u16 func_index; -} CallDefer; - typedef struct { u32 asm_index; u16 target_label; @@ -30,18 +26,17 @@ typedef struct { typedef struct { Asm asm; - usize *function_indices; - u16 num_functions; u16 func_counter; - Buffer/*CallDefer*/ call_defer; Buffer/*JumpDefer*/ jump_defer; u32 label_asm_index[MAX_LABELS]; int label_counter; } amal_executor_impl; +#define ASM_ENSURE_CAPACITY return_if_error(asm_ensure_capacity(&impl->asm, 256)); + #define IMPL_START \ amal_executor_impl *impl = (amal_executor_impl*)self; \ - return_if_error(asm_ensure_capacity(&impl->asm, 256)); + ASM_ENSURE_CAPACITY /* @reg will be a positive value when accessing local variables, in which case the first @@ -64,10 +59,7 @@ int amal_executor_init(amal_executor **self) { impl = (amal_executor_impl**)self; *impl = NULL; return_if_error(am_malloc(sizeof(amal_executor_impl), (void**)impl)); - (*impl)->function_indices = NULL; - (*impl)->num_functions = 0; (*impl)->func_counter = 0; - ignore_result_int(buffer_init(&(*impl)->call_defer, NULL)); ignore_result_int(buffer_init(&(*impl)->jump_defer, NULL)); (*impl)->label_counter = 0; return asm_init(&(*impl)->asm); @@ -76,8 +68,6 @@ int amal_executor_init(amal_executor **self) { void amal_executor_deinit(amal_executor *self) { amal_executor_impl *impl = (amal_executor_impl*)self; buffer_deinit(&impl->jump_defer); - buffer_deinit(&impl->call_defer); - am_free(impl->function_indices); asm_deinit(&impl->asm); am_free(impl); } @@ -93,26 +83,13 @@ u32 amal_exec_get_code_offset(amal_executor *self) { } int amal_executor_instructions_start(amal_executor *self, u16 num_functions) { - amal_executor_impl *impl = (amal_executor_impl*)self; - return_if_error(am_realloc(impl->function_indices, num_functions * sizeof(usize), (void**)&impl->function_indices)); - impl->num_functions = num_functions; + (void)self; + (void)num_functions; return 0; } int amal_executor_instructions_end(amal_executor *self) { amal_executor_impl *impl = (amal_executor_impl*)self; - CallDefer *call_defer = buffer_begin(&impl->call_defer); - CallDefer *call_defer_end = buffer_end(&impl->call_defer); - for(; call_defer != call_defer_end; ++call_defer) { - i32 func_offset; - if(call_defer->func_index >= impl->num_functions) { - amal_log_error("Program attempted to call a function that doesn't exist (index %u, while there are only %u functions)", call_defer->func_index, impl->num_functions); - return -1; - } - func_offset = (isize)impl->function_indices[call_defer->func_index] - (isize)call_defer->asm_index; - asm_override_call_rel32(&impl->asm, call_defer->asm_index, func_offset); - } - buffer_clear(&impl->call_defer); impl->func_counter = 0; return 0; } @@ -299,25 +276,16 @@ int amal_exec_pushd(amal_executor *self, BufferView data) { return 0; } -int amal_exec_call(amal_executor *self, u16 func_index, u8 num_args, i8 dst_reg) { - IMPL_START +int amal_exec_call(amal_executor *self, u32 code_offset, u8 num_args, i8 dst_reg) { + amal_executor_impl *impl = (amal_executor_impl*)self; /* TODO: Preserve necessary registers before call? */ /* TODO: This assumes all arguments are isize */ /* Do the function call */ isize asm_offset = asm_get_size(&impl->asm); - if(func_index < impl->func_counter) { - asm_call_rel32(&impl->asm, (isize)impl->function_indices[func_index] - asm_offset); - } else { - /* - The location of the function has not been defined yet. Use call instruction with dummy data and change - the location once the location to the function is known - */ - CallDefer call_defer; - call_defer.asm_index = asm_offset; - call_defer.func_index = func_index; - return_if_error(buffer_append(&impl->call_defer, &call_defer, sizeof(call_defer))); - asm_call_rel32(&impl->asm, 0); - } + ASM_ENSURE_CAPACITY + + assert(code_offset < asm_offset); + asm_call_rel32(&impl->asm, (isize)code_offset - asm_offset); /* Handle function result and cleanup */ { @@ -331,6 +299,11 @@ int amal_exec_call(amal_executor *self, u16 func_index, u8 num_args, i8 dst_reg) return 0; } +void amal_exec_call_overwrite(amal_executor *self, u32 call_code_offset, i32 new_target_rel32) { + amal_executor_impl *impl = (amal_executor_impl*)self; + asm_overwrite_call_rel32(&impl->asm, call_code_offset, new_target_rel32); +} + const Reg64 SYS_V_PARAM_REGS[] = { RDI, RSI, RDX, RCX }; /* @@ -379,9 +352,9 @@ int amal_exec_callr(i8 dst_reg, BufferView data) { */ int amal_exec_cmp(amal_executor *self, i8 dst_reg, i8 src_reg1, i8 src_reg2) { + AsmPtr dst, src1, src2; IMPL_START - AsmPtr dst, src1, src2; asm_ptr_init_disp(&dst, RBP, get_register_stack_offset(dst_reg)); asm_ptr_init_disp(&src1, RBP, get_register_stack_offset(src_reg1)); asm_ptr_init_disp(&src2, RBP, get_register_stack_offset(src_reg2)); @@ -424,8 +397,9 @@ int amal_exec_jz(amal_executor *self, i8 reg, u16 target_label) { } int amal_exec_jmp(amal_executor *self, u16 target_label) { - IMPL_START + amal_executor_impl *impl = (amal_executor_impl*)self; u32 asm_offset = asm_get_size(&impl->asm); + ASM_ENSURE_CAPACITY if(target_label < impl->label_counter) { asm_jmp(&impl->asm, (i32)impl->label_asm_index[target_label] - (i32)asm_offset); return 0; @@ -466,7 +440,6 @@ int amal_exec_func_start(amal_executor *self, u16 num_regs) { 64-bit Linux,BSD,Mac: RBX, RBP, R12-R15 */ IMPL_START - impl->function_indices[impl->func_counter++] = asm_get_size(&impl->asm); asm_pushr(&impl->asm, RBX); asm_pushr(&impl->asm, RBP); asm_mov_rr(&impl->asm, RBP, RSP); @@ -475,10 +448,10 @@ int amal_exec_func_start(amal_executor *self, u16 num_regs) { } int amal_exec_func_end(amal_executor *self) { - IMPL_START - + amal_executor_impl *impl = (amal_executor_impl*)self; JumpDefer *jump_defer = buffer_begin(&impl->jump_defer); JumpDefer *jump_defer_end = buffer_end(&impl->jump_defer); + ASM_ENSURE_CAPACITY for(; jump_defer != jump_defer_end; ++jump_defer) { i32 jump_offset; if(jump_defer->target_label >= impl->label_counter) { @@ -487,9 +460,9 @@ int amal_exec_func_end(amal_executor *self) { } jump_offset = (isize)impl->label_asm_index[jump_defer->target_label] - (isize)jump_defer->asm_index; if(jump_defer->condition) - asm_override_jcc_rel32(&impl->asm, jump_defer->asm_index, jump_offset); + asm_overwrite_jcc_rel32(&impl->asm, jump_defer->asm_index, jump_offset); else - asm_override_jmp_rel32(&impl->asm, jump_defer->asm_index, jump_offset); + asm_overwrite_jmp_rel32(&impl->asm, jump_defer->asm_index, jump_offset); } buffer_clear(&impl->jump_defer); impl->label_counter = 0; @@ -502,7 +475,7 @@ int amal_exec_func_end(amal_executor *self) { } int amal_exec_label(amal_executor *self) { - IMPL_START + amal_executor_impl *impl = (amal_executor_impl*)self; assert(impl->label_counter < MAX_LABELS); impl->label_asm_index[impl->label_counter++] = asm_get_size(&impl->asm); return 0; diff --git a/include/ast.h b/include/ast.h index edf225b..0c34d0b 100644 --- a/include/ast.h +++ b/include/ast.h @@ -23,7 +23,6 @@ typedef struct FunctionParameter FunctionParameter; typedef struct FunctionCall FunctionCall; typedef struct StructDecl StructDecl; typedef struct StructField StructField; -typedef struct LhsExpr LhsExpr; typedef struct AssignmentExpr AssignmentExpr; typedef struct Import Import; typedef struct String String; @@ -147,10 +146,16 @@ typedef struct { } VariableType; struct FileScopeReference { - Parser *parser; + Parser *parser; /* Borrowed, the parser that belongs to the same file as the FileScopeReference */ Buffer canonical_path; }; +struct ParserFileScopeReference { + FileScopeReference *file_scope_ref; + /* Index in the parser import list. Used by bytecode to know which import an expression belongs to */ + int import_index; +}; + struct Variable { BufferView name; ScopeNamedObject resolved_var; @@ -177,7 +182,7 @@ struct FunctionSignature { struct FunctionDecl { LhsExpr *lhs_expr; /* Borrowed from the LhsExpr that owns this FunctionDecl if it exists, otherwise NULL */ - FunctionSignature *signature; + FunctionSignature *signature; /* TODO: Make this a non-pointer type? */ Scope body; SsaFuncIndex ssa_func_index; }; @@ -188,7 +193,15 @@ struct FunctionCall { }; struct StructDecl { + /* + TODO: Instead of having a scope body, use a body with only structdecl. This improves performance + and simplifies the code. + */ Scope body; + /* The number of fields that are pointers or a type that is different on different platforms (i.e isize, usize, etc), recursive */ + u32 fields_num_pointers; + /* The total size of all fields which are of a type that have the same size on all platforms (i.e u8, i32, etc), recursive */ + u32 fields_fixed_size_bytes; }; struct StructField { @@ -232,7 +245,7 @@ struct AssignmentExpr { struct Import { BufferView path; - FileScopeReference *file_scope; + ParserFileScopeReference *file_scope; }; struct String { @@ -278,6 +291,11 @@ struct ReturnExpr { Ast *rhs_expr; }; +typedef struct { + u32 num_pointers; + u32 fixed_size; +} TypeSize; + typedef struct { jmp_buf env; amal_compiler *compiler; /* Borrowed */ @@ -303,6 +321,7 @@ void function_parameter_init(FunctionParameter *self); CHECK_RESULT int funcdecl_init(FunctionDecl *self, FunctionSignature *signature, Scope *parent, ArenaAllocator *allocator); CHECK_RESULT int funccall_init(FunctionCall *self, BufferView name, ArenaAllocator *allocator); CHECK_RESULT int structdecl_init(StructDecl *self, Scope *parent, ArenaAllocator *allocator); +CHECK_RESULT int structdecl_add_field(StructDecl *self, StructField *field, ArenaAllocator *allocator); LhsExpr* structdecl_get_field_by_name(StructDecl *self, BufferView field_name); /* Copies @type */ void structfield_init(StructField *self, BufferView name, VariableType *type); @@ -318,14 +337,13 @@ CHECK_RESULT int else_if_statement_init(ElseIfStatement *self, Scope *parent, Ar CHECK_RESULT int while_statement_init(WhileStatement *self, Scope *parent, ArenaAllocator *allocator); void return_expr_init(ReturnExpr *self, Ast *rhs_expr); +TypeSize resolved_type_get_byte_size(AstResolvedType *self); + CHECK_RESULT int scope_init(Scope *self, Scope *parent, ArenaAllocator *allocator); CHECK_RESULT int file_scope_reference_init(FileScopeReference *self, BufferView canonical_path, ArenaAllocator *allocator); CHECK_RESULT int scope_add_child(Scope *self, Ast *child); + /* longjump to compiler env on failure */ void scope_resolve(Scope *self, AstCompilerContext *context); - - -CHECK_RESULT bool resolved_type_is_func_decl(Ast *self); - #endif diff --git a/include/bytecode/bytecode.h b/include/bytecode/bytecode.h index b1b5dda..c495d7a 100644 --- a/include/bytecode/bytecode.h +++ b/include/bytecode/bytecode.h @@ -26,11 +26,15 @@ 6.2 Opcode(u8) + register(i8) + intermediate(u16)\ 6.3 Opcode(u8) + register(i8) + data(u16)\ 6.4 Opcode(u8) + flags(u8) + num_local_var_reg(u16) - 7. 5 bytes: Opcode(u8) + index(u16) + num_args(u8) + register(i8) + 7. 5 bytes: Opcode(u8) + index(u8) + index(u16) + num_args(u8) # Registers Registers have a range of 128. Local variables start from register 0 and increment while parameters start from -1 and decrement. Registers have the scope of functions and reset after instructions reach a new function (AMAL_OP_FUNC_START). + + If import index for call and calle is 0, then that means the function resides in the same file the function call + is being called from. Which means that import index 1 is actually import index 0 into the import list. */ +/* Important: The number of fields in this enum can't exceed 255 */ typedef enum { AMAL_OP_NOP, /* No operation (do nothing). This can be used for patching code */ AMAL_OP_SETZ, /* setz reg - Set register value to 0 */ @@ -46,16 +50,17 @@ typedef enum { AMAL_OP_PUSH, /* push reg - Push register onto stack */ AMAL_OP_PUSHI, /* pushi int - Push intermediate onto stack */ AMAL_OP_PUSHD, /* pushd data - Push data onto stack */ - AMAL_OP_CALL, /* call fi, num_args, dst - Call a function using function index (fi) and num_args arguments. The result is stored in register dst. fi is u16 and num_args is u8 */ + AMAL_OP_PUSH_RET, /* push_ret reg - Push register onto stack as a return value of the next function call */ + AMAL_OP_CALL, /* call ii, fi, num_args - Call a function in imported file (ii, import index) using function index (fi) and num_args arguments. ii is u8, fi is u16 and num_args is u8 */ AMAL_OP_CALLR, /* callr reg, num_args - Call a function using a register. Used for function pointers. num_args is u8 */ - AMAL_OP_CALLE, /* calle efi, num_args, dst - Call an extern function using extern function index (efi) and num_args arguments. The result is stored in register dst. efi is u16 and num_args is u8 */ + AMAL_OP_CALLE, /* calle ii, efi, num_args - Call an extern function in imported file (ii, import index) using extern function index (efi) and num_args arguments. ii is u8, efi is u16 and num_args is u8 */ AMAL_OP_CMP, /* cmp dst, reg1, reg2 - Set dst to 1 if reg1 equals reg2, otherwise set it to 0 */ AMAL_OP_JZ, /* jz reg, label - Jump to label in the current function if reg is zero. label is u16 */ AMAL_OP_JMP, /* jmp label - Unconditional jump to label in the current function. label is u16 */ AMAL_OP_RET, /* ret reg - Return from the function with reg result */ AMAL_OP_FUNC_START, /* func_start flags, num_local_var_reg - Start of a function which has @num_local_var_reg local variable registers allocated and has the flag @flag. @flag is u8 and @num_local_var_reg is u16 */ AMAL_OP_FUNC_END, /* func_end - End of a function. Implementation should do a ret here */ - AMAL_OP_LABEL, /* label - Label. This is the target of a jump instruction. Jump instructions only jump to labels in the same function scope */ + AMAL_OP_LABEL /* label - Label. This is the target of a jump instruction. Jump instructions only jump to labels in the same function scope */ } AmalOpcode; #define AMAL_BYTECODE_MAGIC_NUMBER (u32)0xdec05eba @@ -63,6 +68,8 @@ typedef enum { #define AMAL_BYTECODE_MINOR_VERSION 0 #define AMAL_BYTECODE_PATCH_VERSION 0 +#define AMAL_BYTECODE_SECTION_MAGIC_NUMBER (u32)0x004005e4 /* "section\0" in ascii */ + #define AMAL_BYTECODE_NUM_REGISTERS 256 typedef enum { @@ -72,13 +79,40 @@ typedef enum { typedef u8 AmalOpcodeType; +/* TODO: Make sure this pragma pack works on all platforms */ +#pragma pack(push, 1) +typedef struct { + u32 func_offset; + u8 num_params; + u32 params_num_pointers; + u32 params_fixed_size; + + u8 num_return_types; + u32 return_types_num_pointers; + u32 return_types_fixed_size; +} BytecodeHeaderFunction; +#pragma pack(pop) + +/* TODO: Make sure this pragma pack works on all platforms */ +#pragma pack(push, 1) typedef struct { + u32 function_index; + #define parser_index function_index + u32 extern_function_index; +} BytecodeHeaderImport; +#pragma pack(pop) + +struct Bytecode { Buffer/**/ data; -} Bytecode; + usize import_index; /* Reference inside @data where imports start */ + u32 funcs_index; /* Reference inside @data where funcs start */ + u32 extern_funcs_index; /* Reference inside @data where extern funcs start */ + u32 offset; /* Offset that this bytecode starts from in the final program (all bytecodes combined) */ +}; typedef struct { jmp_buf env; - Bytecode bytecode; + Bytecode *bytecode; Parser *parser; /* borrowed */ } BytecodeCompilerContext; diff --git a/include/defs.h b/include/defs.h index 669d9c9..bb96421 100644 --- a/include/defs.h +++ b/include/defs.h @@ -5,8 +5,11 @@ typedef struct amal_compiler amal_compiler; typedef struct Parser Parser; typedef struct Scope Scope; typedef struct FileScopeReference FileScopeReference; +typedef struct ParserFileScopeReference ParserFileScopeReference; typedef struct FunctionDecl FunctionDecl; typedef struct FunctionSignature FunctionSignature; +typedef struct LhsExpr LhsExpr; typedef struct Tokenizer Tokenizer; +typedef struct Bytecode Bytecode; #endif diff --git a/include/parser.h b/include/parser.h index 81895e1..ae5cfa6 100644 --- a/include/parser.h +++ b/include/parser.h @@ -4,7 +4,6 @@ #include "std/buffer_view.h" #include "std/arena_allocator.h" #include "std/thread.h" -#include "bytecode/bytecode.h" #include "tokenizer.h" #include "ast.h" #include "defs.h" @@ -36,7 +35,14 @@ struct Parser { TokenizerError error; ErrorContext error_context; jmp_buf parse_env; - Bytecode bytecode; + Bytecode *bytecode; + Buffer/**/ imports; + HashMapType(BufferView, usize) imports_by_name; /* The value is an index inside @imports */ + /* + Index in the compilers list of parsers. Used by bytecodes import header + to point to the index that contains references to another files variables. + */ + u32 index; }; CHECK_RESULT int parser_init(Parser *self, amal_compiler *compiler, ArenaAllocator *allocator); diff --git a/include/program.h b/include/program.h index e3a4ac9..b398a3a 100644 --- a/include/program.h +++ b/include/program.h @@ -35,9 +35,13 @@ #define AMAL_PROGRAM_INVALID_EXPORTED_FUNCTIONS_SIZE -23 #define AMAL_PROGRAM_INSTRUCTION_INVALID_EXPORTED_FUNC_INDEX -24 #define AMAL_PROGRAM_NO_MAIN_FUNC -25 +#define AMAL_PROGRAM_INVALID_IMPORTS -26 +#define AMAL_PROGRAM_SECTION_ERROR -27 #define AMAL_PROGRAM_ARGS_SIZE_VARARGS -1 +#define AMAL_PROGRAM_MAX_RETURN_VALUES 128 + typedef struct { void *func; int args_byte_size; /* -1 if varargs (AMAL_PROGRAM_ARGS_SIZE_VARARGS) */ @@ -49,9 +53,11 @@ typedef struct { u32 *extern_func_indices; u8 *intermediates_start; /* Reference inside @data */ u8 *strings_start; /* Reference inside @data */ + u8 *funcs_start; /* Reference inside @data */ u8 *extern_funcs_start; /* Reference inside @data */ u8 *exported_funcs; /* Reference inside @data */ u8 *exported_funcs_end; /* Reference inside @data */ + u8 *imports_start; /* Reference inside @data */ usize read_index; u32 main_func_instruction_offset; @@ -61,9 +67,14 @@ typedef struct { u16 num_functions; u16 num_extern_functions; u16 num_exported_functions; + u8 num_imports; ArenaAllocator allocator; /* Owned. Used by @extern_funcs_map */ HashMapType(BufferView, ProgramExternFunc) extern_funcs_map; + /* key=((func_index<<32)&funcs_start), value=Buffer */ + HashMapType(u64, Buffer) deferred_func_calls; + i8 return_values_stack[AMAL_PROGRAM_MAX_RETURN_VALUES]; + int return_value_index; } amal_program; CHECK_RESULT int amal_program_init(amal_program *self); diff --git a/include/ssa/ssa.h b/include/ssa/ssa.h index 9d6949d..0b6501b 100644 --- a/include/ssa/ssa.h +++ b/include/ssa/ssa.h @@ -10,6 +10,7 @@ #define SSA_ERR_EXTERN_FUNC_SIG_MISMATCH -20 +/* Important: The number of fields in this enum can't exceed 255 */ typedef enum { SSA_ASSIGN_INTER, SSA_ASSIGN_STRING, @@ -24,6 +25,7 @@ typedef enum { SSA_FUNC_START, SSA_FUNC_END, SSA_PUSH, + SSA_PUSH_RET, SSA_CALL, SSA_CALL_EXTERN, SSA_JUMP_ZERO, @@ -55,6 +57,10 @@ typedef struct { BufferView name; } SsaExportFunc; +typedef struct { + FunctionSignature *func_sig; +} SsaFunc; + typedef i16 JumpOffset; typedef i16 SsaRegister; typedef u16 SsaIntermediateIndex; @@ -73,13 +79,15 @@ typedef struct { HashMapType(BufferView, SsaExternFuncIndex) extern_funcs_map; Buffer/*SsaExternFunc*/ extern_funcs; Buffer/*SsaExportFunc*/ export_funcs; + Buffer/*SsaFunc*/ funcs; + SsaIntermediateIndex intermediate_counter; SsaStringIndex string_counter; SsaExternFuncIndex extern_func_counter; SsaExportFuncIndex export_func_counter; + SsaFuncIndex func_counter; SsaRegister reg_counter; SsaRegister param_counter; - SsaFuncIndex func_counter; SsaLabelIndex label_counter; Parser *parser; /* Borrowed */ } Ssa; @@ -102,14 +110,14 @@ typedef struct { typedef struct { u8 num_args; - SsaRegister result; FunctionDecl *func_decl; + u8 import_index; } SsaInsFuncCall; typedef struct { u8 num_args; - SsaRegister result; - SsaExternFuncIndex extern_func_index; + LhsExpr *func_decl_lhs; + int import_index; } SsaInsFuncCallExtern; typedef struct { @@ -134,6 +142,8 @@ typedef struct { jmp_buf env; Ssa *ssa; amal_compiler *compiler; + /* 0 if the current scope belongs to the file, otherwise ParserFileScopeReference's import_index (the import file that contains the scope) */ + u8 import_index; } SsaCompilerContext; /* longjump to context->env on failure */ diff --git a/include/std/buffer.h b/include/std/buffer.h index d194881..3c2d79c 100644 --- a/include/std/buffer.h +++ b/include/std/buffer.h @@ -32,6 +32,8 @@ void* buffer_get(Buffer *self, usize index, usize type_size); CHECK_RESULT int buffer_pop(Buffer *self, void *data, usize size); /* Set buffer size to 0, doesn't change the capacity */ void buffer_clear(Buffer *self); +/* Also changes size if @new_capacity is less than buffer size */ +CHECK_RESULT int buffer_set_capacity(Buffer *self, usize new_capacity); void* buffer_begin(Buffer *self); void* buffer_end(Buffer *self); usize __buffer_get_size(Buffer *self, usize type_size); diff --git a/include/std/buffer_view.h b/include/std/buffer_view.h index 59a97d3..9c0a722 100644 --- a/include/std/buffer_view.h +++ b/include/std/buffer_view.h @@ -8,7 +8,7 @@ typedef struct { usize size; } BufferView; -BufferView create_buffer_view_null(); +BufferView create_buffer_view_null(void); BufferView create_buffer_view(const char *data, usize size); bool buffer_view_equals(const BufferView *self, const BufferView *other); diff --git a/include/std/hash_map.h b/include/std/hash_map.h index c9e5b51..05755ce 100644 --- a/include/std/hash_map.h +++ b/include/std/hash_map.h @@ -35,6 +35,7 @@ CHECK_RESULT int hash_map_insert(HashMap *self, BufferView key, void *value); If @value is NULL, then the value is not copied and the functions works the same as @hash_map_contains */ CHECK_RESULT bool hash_map_get(HashMap *self, BufferView key, void *value); +CHECK_RESULT bool hash_map_get_ref(HashMap *self, BufferView key, void **value); CHECK_RESULT bool hash_map_contains(HashMap *self, BufferView key); int hash_map_compare_string(const void *a, const void *b); diff --git a/include/std/log.h b/include/std/log.h index cd376c6..12b52e1 100644 --- a/include/std/log.h +++ b/include/std/log.h @@ -3,7 +3,7 @@ #include "defs.h" -amal_mutex* amal_log_get_mutex(); +amal_mutex* amal_log_get_mutex(void); void amal_log_debug(const char *fmt, ...); void amal_log_error(const char *fmt, ...); void amal_log_info(const char *fmt, ...); diff --git a/include/std/mem.h b/include/std/mem.h index 2afce0d..1cc06bf 100644 --- a/include/std/mem.h +++ b/include/std/mem.h @@ -9,6 +9,6 @@ int am_memcmp(const void *lhs, const void *rhs, usize size); bool am_memeql(const void *lhs, const void *rhs, usize size); void am_memset(void *dest, int value, usize size); -long am_pagesize(); +long am_pagesize(void); #endif diff --git a/include/std/misc.h b/include/std/misc.h index e89f1eb..a9bd5b6 100644 --- a/include/std/misc.h +++ b/include/std/misc.h @@ -21,8 +21,7 @@ #define return_if_error(result) \ do { \ - int return_if_result; \ - return_if_result = (result); \ + int return_if_result = (result); \ if(return_if_result != 0) { \ return_if_debug_msg; \ return return_if_result; \ @@ -31,6 +30,11 @@ #define cleanup_if_error(result) do { if((result) != 0) { cleanup_if_debug_msg; goto cleanup; } } while(0) +typedef struct { + int result; + void *data; +} ResultMem; + #if defined(__GNUC__) && __GNUC__ >= 4 #define CHECK_RESULT __attribute__ ((warn_unused_result)) #elif defined(_MSC_VER) && _MSC_VER >= 1700 diff --git a/include/std/thread.h b/include/std/thread.h index 2765204..ae8fe39 100644 --- a/include/std/thread.h +++ b/include/std/thread.h @@ -43,9 +43,9 @@ CHECK_RESULT int amal_thread_deinit(amal_thread *self); CHECK_RESULT int amal_thread_detach(amal_thread *self); CHECK_RESULT int amal_thread_join(amal_thread *self, void **result); -bool amal_thread_is_main(); +bool amal_thread_is_main(void); /* Returns 0 if the number of usable threads is unknown */ -int amal_get_usable_thread_count(); +int amal_get_usable_thread_count(void); CHECK_RESULT int amal_mutex_init(amal_mutex *self); void amal_mutex_deinit(amal_mutex *self); diff --git a/src/ast.c b/src/ast.c index 76cbf9c..912b9c7 100644 --- a/src/ast.c +++ b/src/ast.c @@ -296,9 +296,17 @@ int funccall_init(FunctionCall *self, BufferView name, ArenaAllocator *allocator } int structdecl_init(StructDecl *self, Scope *parent, ArenaAllocator *allocator) { + self->fields_num_pointers = 0; + self->fields_fixed_size_bytes = 0; return scope_init(&self->body, parent, allocator); } +int structdecl_add_field(StructDecl *self, StructField *field, ArenaAllocator *allocator) { + Ast *body_obj; + return_if_error(ast_create(allocator, field, AST_STRUCT_FIELD, &body_obj)); + return scope_add_child(&self->body, body_obj); +} + LhsExpr* structdecl_get_field_by_name(StructDecl *self, BufferView field_name) { Ast* result; if(!hash_map_get(&self->body.named_objects, field_name, &result)) @@ -431,17 +439,16 @@ void scope_resolve(Scope *self, AstCompilerContext *context) { } static void compiler_print_error(amal_compiler *compiler, const char *ref, const char *fmt, ...) { - Tokenizer *tokenizer = amal_compiler_find_tokenizer_by_code_reference(compiler, ref); + Tokenizer *tokenizer; + va_list args; + va_start(args, fmt); + + tokenizer = amal_compiler_find_tokenizer_by_code_reference(compiler, ref); if(!tokenizer) { amal_log_error("Failed to find tokenizer for code reference %p. Is it an invalid reference?", ref ? ref : "(null)"); return; } - - va_list args; - va_start(args, fmt); - tokenizer_print_error_args(tokenizer, - tokenizer_get_code_reference_index(tokenizer, ref), - fmt, args); + tokenizer_print_error_args(tokenizer, tokenizer_get_code_reference_index(tokenizer, ref), fmt, args); va_end(args); } @@ -672,19 +679,20 @@ static void import_resolve(Ast *ast, AstCompilerContext *context) { (void)context; self = ast->value.import; ast->resolve_data.type.type = RESOLVED_TYPE_LHS_EXPR; - ast->resolve_data.type.value.lhs_expr = &self->file_scope->parser->file_decl; + ast->resolve_data.type.value.lhs_expr = &self->file_scope->file_scope_ref->parser->file_decl; } static Scope* lhsexpr_get_scope(LhsExpr *self) { - AstValue value; - value = self->rhs_expr->value; + AstValue value = self->rhs_expr->value; switch(self->rhs_expr->type) { case AST_FUNCTION_DECL: return &value.func_decl->body; case AST_STRUCT_DECL: return &value.struct_decl->body; case AST_IMPORT: - return &value.import->file_scope->parser->struct_decl.body; + /* *import_index = 1 + value.import->file_scope->import_index;*/ + assert(bool_false); + return &value.import->file_scope->file_scope_ref->parser->struct_decl.body; default: break; } @@ -715,25 +723,11 @@ static void funcdecl_resolve(Ast *self, AstCompilerContext *context) { self->resolve_data.type.value.func_sig = self->value.func_decl->signature; } -/* - Dont need to check if @self is resolved, since it will always be partially resolved when called from @funccall_resolve. - Meaning the resolve status wont be set to solved but the resolve type will be set. -*/ -bool resolved_type_is_func_decl(Ast *self) { - LhsExpr *resolved_type; - if(self->resolve_data.type.type != RESOLVED_TYPE_LHS_EXPR) - return bool_false; - resolved_type = self->resolve_data.type.value.lhs_expr; - return (resolved_type->rhs_expr && resolved_type->rhs_expr->type == AST_FUNCTION_DECL) || - resolved_type->type.type == VARIABLE_TYPE_SIGNATURE; -} - static void funccall_resolve(Ast *self, AstCompilerContext *context) { - FunctionCall *func_call; Ast **ast; Ast **ast_end; - func_call = self->value.func_call; + FunctionCall *func_call = self->value.func_call; variable_resolve(&func_call->func, context, &self->resolve_data.type); /* Attemping to use call syntax (variable_name ( ) ) with a variable that is not a function */ if(self->resolve_data.type.type != RESOLVED_TYPE_FUNC_SIG) { @@ -753,16 +747,46 @@ static void funccall_resolve(Ast *self, AstCompilerContext *context) { } } +static TypeSize variable_type_get_byte_size(VariableType *self) { + TypeSize type_size; + type_size.num_pointers = 0; + type_size.fixed_size = 0; + switch(self->type) { + case VARIABLE_TYPE_NONE: + break; + case VARIABLE_TYPE_VARIABLE: + type_size = resolved_type_get_byte_size(&self->value.variable->resolved_var.resolve_data->type); + break; + case VARIABLE_TYPE_SIGNATURE: + type_size.num_pointers = 1; + break; + } + return type_size; +} + static void structdecl_resolve(Ast *self, AstCompilerContext *context) { - StructDecl *struct_decl; - struct_decl = self->value.struct_decl; - scope_resolve(&struct_decl->body, context); + StructDecl *struct_decl = self->value.struct_decl; + Scope *body = &struct_decl->body; + scope_resolve(body, context); + + { + /* + Sum the size of all the fields into the struct, so the struct can know it's full size + without searching for it. + TODO: Exclude functions, but not function pointers. + */ + Ast **ast = buffer_begin(&body->ast_objects); + Ast **ast_end = buffer_end(&body->ast_objects); + for(; ast != ast_end; ++ast) { + TypeSize type_size = variable_type_get_byte_size(&(*ast)->value.struct_field->type); + struct_decl->fields_num_pointers += type_size.num_pointers; + struct_decl->fields_fixed_size_bytes += type_size.fixed_size; + } + } } static void structfield_resolve(Ast *self, AstCompilerContext *context) { - /* TODO: Implement */ - StructField *struct_field; - struct_field = self->value.struct_field; + StructField *struct_field = self->value.struct_field; variable_type_resolve(&struct_field->type, context, &self->resolve_data.type); } @@ -806,12 +830,30 @@ static void binop_resolve_dot_access(Ast *ast, AstCompilerContext *context) { throw(AST_ERR); } - if(self->rhs->resolve_data.type.type != RESOLVED_TYPE_LHS_EXPR || !LHS_EXPR_IS_PUB(self->rhs->resolve_data.type.value.lhs_expr)) { - compiler_print_error(context->compiler, caller_code_ref.data, "Can't access non-public field \"%.*s\"", caller_code_ref.size, caller_code_ref.data); - /* TODO: use tokenizer_print_note, once it has been added */ - /* TODO: Print type */ - compiler_print_error(context->compiler, callee_code_ref.data, "Type was declared non-public here"); - throw(AST_ERR); + { + bool invalid_dot_access = bool_true; + switch(self->rhs->resolve_data.type.type) { + case RESOLVED_TYPE_NONE: + assert(bool_false); + break; + case RESOLVED_TYPE_FUNC_SIG: { + FunctionSignature *func_sig = self->rhs->resolve_data.type.value.func_sig; + if(func_sig->func_decl && func_sig->func_decl->lhs_expr && LHS_EXPR_IS_PUB(func_sig->func_decl->lhs_expr)) + invalid_dot_access = bool_false; + break; + } + case RESOLVED_TYPE_LHS_EXPR: + invalid_dot_access = !LHS_EXPR_IS_PUB(self->rhs->resolve_data.type.value.lhs_expr); + break; + } + + if(invalid_dot_access) { + compiler_print_error(context->compiler, caller_code_ref.data, "Can't access non-public field \"%.*s\"", caller_code_ref.size, caller_code_ref.data); + /* TODO: use tokenizer_print_note, once it has been added */ + /* TODO: Print type */ + compiler_print_error(context->compiler, callee_code_ref.data, "Type was declared non-public here"); + throw(AST_ERR); + } } } @@ -895,6 +937,37 @@ static void return_expr_resolve(ReturnExpr *self, AstCompilerContext *context) { ast_resolve(self->rhs_expr, context); } +TypeSize resolved_type_get_byte_size(AstResolvedType *self) { + TypeSize type_size; + type_size.num_pointers = 0; + type_size.fixed_size = 0; + switch(self->type) { + case RESOLVED_TYPE_NONE: + break; + case RESOLVED_TYPE_LHS_EXPR: { + /* Resolved type until rhs is StructDecl or FunctionSignature */ + LhsExpr *lhs_expr = self->value.lhs_expr; + if(lhs_expr->type.type != VARIABLE_TYPE_NONE) + type_size = variable_type_get_byte_size(&lhs_expr->type); + else { + assert(lhs_expr->rhs_expr); + if(lhs_expr->rhs_expr->type == AST_STRUCT_DECL) { + StructDecl *struct_decl = lhs_expr->rhs_expr->value.struct_decl; + type_size.num_pointers = struct_decl->fields_num_pointers; + type_size.fixed_size = struct_decl->fields_fixed_size_bytes; + } else { + type_size = resolved_type_get_byte_size(&lhs_expr->rhs_expr->resolve_data.type); + } + } + break; + } + case RESOLVED_TYPE_FUNC_SIG: + type_size.num_pointers = 1; + break; + } + return type_size; +} + void ast_resolve(Ast *self, AstCompilerContext *context) { assert(self); assert(context->parser); diff --git a/src/bytecode/bytecode.c b/src/bytecode/bytecode.c index 47d492c..0ee066c 100644 --- a/src/bytecode/bytecode.c +++ b/src/bytecode/bytecode.c @@ -20,11 +20,17 @@ } while(0) int bytecode_init(Bytecode *self, ArenaAllocator *allocator) { + self->import_index = 0; + self->funcs_index = 0; + self->extern_funcs_index = 0; + self->offset = 0; return buffer_init(&self->data, allocator); } /*doc(Bytecode) -The layout of the full bytecode is: Header (Intermediates Strings Functions External_Functions Exported_Functions Instructions)* +The layout of the full bytecode is: Header (X Intermediates X Strings X Functions X External_Functions X Exported_Functions X Imports X Instructions)*\ +Where the X is a magic number to make it easier to find errors while decoding the bytecode.\ +The value of the magic number is @AMAL_BYTECODE_SECTION_MAGIC_NUMBER */ CHECK_RESULT int buffer_append_header(Buffer *program_data) { @@ -61,10 +67,10 @@ static CHECK_RESULT usize ssa_extract_data(u8 *instruction_data, void *result, u static void add_intermediates(BytecodeCompilerContext *self) { /*doc(Bytecode intermediates) # Intermediates layout - |Type |Field |Description | - |------------|------------------|-------------------------------------------------------------------------------| - |u32 |Intermediates size|The size of the intermediates section, in bytes. | - |Intermediate|Intermediate data |Multiple intermediates, where the total size is defined by @Intermediates size.| + |Type |Field |Description | + |--------------|------------------|-------------------------------------------------------------------------------| + |u32 |Intermediates size|The size of all intermediates, in bytes. | + |Intermediate[]|Intermediate data |Multiple intermediates, where the total size is defined by @Intermediates size.| # Intermediate |Type|Field|Description | @@ -74,7 +80,7 @@ static void add_intermediates(BytecodeCompilerContext *self) { */ Ssa *ssa = self->parser->ssa; - Buffer *instructions = &self->bytecode.data; + Buffer *instructions = &self->bytecode->data; SsaNumber *intermediate = buffer_begin(&ssa->intermediates); SsaNumber *intermediates_end = buffer_end(&ssa->intermediates); int i = 0; @@ -93,11 +99,11 @@ static void add_intermediates(BytecodeCompilerContext *self) { static void add_strings(BytecodeCompilerContext *self) { /*doc(Bytecode strings) # Strings layout - |Type |Field |Description | - |------|-----------------|------------------------------------------------------------------| - |u16 |Number of strings|The number of strings. | - |u32 |Strings size |The size of the strings section, in bytes. | - |String|Strings data |Multiple strings, where the total size is defined by @Strings size| + |Type |Field |Description | + |--------|-----------------|------------------------------------------------------------------| + |u16 |Number of strings|The number of strings. | + |u32 |Strings size |The size of all strings, in bytes. | + |String[]|Strings data |Multiple strings, where the total size is defined by @Strings size| # String |Type|Field|Description | @@ -107,7 +113,7 @@ static void add_strings(BytecodeCompilerContext *self) { */ Ssa *ssa = self->parser->ssa; - Buffer *instructions = &self->bytecode.data; + Buffer *instructions = &self->bytecode->data; BufferView *string = buffer_begin(&ssa->strings); BufferView *strings_end = buffer_end(&ssa->strings); u32 strings_size = 0; @@ -128,36 +134,105 @@ static void add_strings(BytecodeCompilerContext *self) { } } +static TypeSize function_signature_get_params_size(FunctionSignature *self) { + FunctionParameter *param = buffer_begin(&self->parameters); + FunctionParameter *param_end = buffer_end(&self->parameters); + TypeSize params_total_size; + params_total_size.num_pointers = 0; + params_total_size.fixed_size = 0; + for(; param != param_end; ++param) { + TypeSize param_size = resolved_type_get_byte_size(¶m->resolve_data.type); + params_total_size.num_pointers += param_size.num_pointers; + params_total_size.fixed_size += param_size.fixed_size; + } + return params_total_size; +} + +static TypeSize function_signature_get_return_types_size(FunctionSignature *self) { + FunctionReturnType *return_type = buffer_begin(&self->return_types); + FunctionReturnType *return_type_end = buffer_end(&self->return_types); + TypeSize return_types_total_size; + return_types_total_size.num_pointers = 0; + return_types_total_size.fixed_size = 0; + for(; return_type != return_type_end; ++return_type) { + TypeSize param_size = resolved_type_get_byte_size(&return_type->resolved_type); + return_types_total_size.num_pointers += param_size.num_pointers; + return_types_total_size.fixed_size += param_size.fixed_size; + } + return return_types_total_size; +} + static void add_functions(BytecodeCompilerContext *self) { /*doc(Bytecode functions) - # Internal functions layout - |Type|Field |Description | - |----|-------------------|---------------------------------| - |u16 |Number of functions|The number of internal functions.| + # Functions layout + |Type |Field |Description | + |----------|----------|--------------------------------------------------------------------------------------| + |u16 |num_funcs |The number of non-extern functions. | + |u32 |funcs_size|The size of all functions, in bytes. | + |Function[]|Functions |Multiple non-extern functions, where the number of functions is defined by @num_funcs.| + + # Function + |Type|Field |Description | + |----|-------------------------|------------------------------------------------------------------------------------------------------------------------| + |u32 |func_offset |The offset in the program code (machine code) where the function starts. Is always 0 until the program has been started.| + |u8 |num_params |The number of parameters. | + |u32 |params_num_pointers |The number of pointers in the parameters. | + |u32 |params_fixed_size |The size of all non-pointer type parameters, in bytes. | + |u8 |num_return_types |The number of return values. | + |u32 |return_types_num_pointers|The number of pointers in the return types. | + |u32 |return_types_fixed_size |The size of all non-pointer type return types, in bytes. | */ - assert(sizeof(SsaFuncIndex) == sizeof(u16) && "Program decoder needs to be updated since size of func index has changed"); - throw_if_error(buffer_append(&self->bytecode.data, &self->parser->ssa->func_counter, sizeof(u16))); + Ssa *ssa = self->parser->ssa; + Buffer *instructions = &self->bytecode->data; + SsaFunc *func = buffer_begin(&ssa->funcs); + SsaFunc *func_end = buffer_end(&ssa->funcs); + u32 funcs_size = ssa->func_counter * sizeof(BytecodeHeaderFunction); + assert(sizeof(BytecodeHeaderFunction) == 22); + + self->bytecode->funcs_index = instructions->size; + amal_log_debug("func index: %d", self->bytecode->funcs_index); + throw_if_error(buffer_expand(instructions, sizeof(u16) + sizeof(u32) + funcs_size)); + throw_if_error(buffer_append(instructions, &ssa->func_counter, sizeof(u16))); + throw_if_error(buffer_append(instructions, &funcs_size, sizeof(u32))); + for(; func != func_end; ++func) { + BytecodeHeaderFunction header_func; + TypeSize params_total_size = function_signature_get_params_size(func->func_sig); + TypeSize return_types_total_size = function_signature_get_return_types_size(func->func_sig); + + header_func.func_offset = 0; + + header_func.num_params = buffer_get_size(&func->func_sig->parameters, FunctionParameter); + header_func.params_num_pointers = params_total_size.num_pointers; + header_func.params_fixed_size = params_total_size.fixed_size; + + header_func.num_return_types = buffer_get_size(&func->func_sig->return_types, FunctionReturnType); + header_func.return_types_num_pointers = return_types_total_size.num_pointers; + header_func.return_types_fixed_size = return_types_total_size.fixed_size; + throw_if_error(buffer_append(instructions, &header_func, sizeof(header_func))); + } + + assert(sizeof(ssa->func_counter) == sizeof(u16) && "Program decoder needs to be updated since size of func index has changed"); } static void add_extern_functions(BytecodeCompilerContext *self) { /*doc(Bytecode external functions) # External functions layout - |Type |Field |Description | - |-----------------|------------------|-----------------------------------------------------------------------------------------| - |u16 |num_extern_func |The number of external functions. | - |u32 |extern_funcs_size |The size of the external functions section, in bytes. | - |External function|External functions|Multiple external functions, where the number of functions is defined by @num_extern_func| + |Type |Field |Description | + |-------------------|------------------|-----------------------------------------------------------------------------------------| + |u16 |num_extern_func |The number of external functions. | + |u32 |extern_funcs_size |The size of all external functions, in bytes. | + |External function[]|External functions|Multiple external functions, where the number of functions is defined by @num_extern_func| # External function - |Type|Field |Description | - |----|--------|-----------------------------------------------------------------------------------------------------| - |u8 |num_args|The number of arguments the functions has. | - |u8 |name_len|The length of the external function name, in bytes. Excluding the null-terminate character. | - |u8* |name |The name of the external function, where the size is defined by @name_len. Names are null-terminated.| + |Type|Field |Description | + |----|----------|-----------------------------------------------------------------------------------------------------| + |u8 |num_params|The number of parameters the functions has. | + |u8 |name_len |The length of the external function name, in bytes. Excluding the null-terminate character. | + |u8[]|name |The name of the external function, where the size is defined by @name_len. Names are null-terminated.| */ Ssa *ssa = self->parser->ssa; - Buffer *instructions = &self->bytecode.data; + Buffer *instructions = &self->bytecode->data; SsaExternFunc *extern_func = buffer_begin(&ssa->extern_funcs); SsaExternFunc *extern_func_end = buffer_end(&ssa->extern_funcs); u32 extern_funcs_size = 0; @@ -167,14 +242,16 @@ static void add_extern_functions(BytecodeCompilerContext *self) { } extern_func = buffer_begin(&ssa->extern_funcs); + self->bytecode->extern_funcs_index = instructions->size; throw_if_error(buffer_expand(instructions, sizeof(u16) + sizeof(u32) + extern_funcs_size)); throw_if_error(buffer_append(instructions, &ssa->extern_func_counter, sizeof(u16))); throw_if_error(buffer_append(instructions, &extern_funcs_size, sizeof(u32))); for(; extern_func != extern_func_end; ++extern_func) { const char null_s = '\0'; - u8 num_args = buffer_get_size(&extern_func->func_sig->parameters, FunctionParameter); - throw_if_error(buffer_append(instructions, &num_args, sizeof(num_args))); + u8 num_params = buffer_get_size(&extern_func->func_sig->parameters, FunctionParameter); + throw_if_error(buffer_append(instructions, &num_params, sizeof(num_params))); /* TODO: Add namespace to the function name */ + /* u8 is fine, because the max length of a variable is 255 */ throw_if_error(buffer_append(instructions, &extern_func->name.size, sizeof(u8))); throw_if_error(buffer_append(instructions, extern_func->name.data, extern_func->name.size)); throw_if_error(buffer_append(instructions, &null_s, sizeof(char))); @@ -186,11 +263,11 @@ static void add_extern_functions(BytecodeCompilerContext *self) { static void add_export_functions(BytecodeCompilerContext *self) { /*doc(Bytecode exported functions) # Exported functions layout - |Type |Field |Description | - |-----------------|------------------|-----------------------------------------------------------------------------------------| - |u16 |num_export_func |The number of exported functions. | - |u32 |export_funcs_size |The size of the exported functions section, in bytes. | - |Exported function|Exported functions|Multiple exported functions, where the number of functions is defined by @num_export_func| + |Type |Field |Description | + |-------------------|------------------|-----------------------------------------------------------------------------------------| + |u16 |num_export_func |The number of exported functions. | + |u32 |export_funcs_size |The size of all exported functions, in bytes. | + |Exported function[]|Exported functions|Multiple exported functions, where the number of functions is defined by @num_export_func| # Exported function |Type|Field |Description | @@ -198,10 +275,10 @@ static void add_export_functions(BytecodeCompilerContext *self) { |u32 |instruction_offset|The offset in the instruction data where the exported function is defined. Is always 0 until the program has been started.| |u8 |num_args |The number of arguments the functions has. | |u8 |name_len |The length of the exported function name, in bytes. Excluding the null-terminate character. | - |u8* |name |The name of the exported function, where the size is defined by @name_len. Names are null-terminated. | + |u8[]|name |The name of the exported function, where the size is defined by @name_len. Names are null-terminated. | */ Ssa *ssa = self->parser->ssa; - Buffer *instructions = &self->bytecode.data; + Buffer *instructions = &self->bytecode->data; SsaExportFunc *export_func = buffer_begin(&ssa->export_funcs); SsaExportFunc *export_func_end = buffer_end(&ssa->export_funcs); u32 export_funcs_size = 0; @@ -228,8 +305,51 @@ static void add_export_functions(BytecodeCompilerContext *self) { assert(sizeof(SsaExportFuncIndex) == sizeof(u16) && "Program decoder needs to be updated since size of export func index has changed"); } +static void add_imports(BytecodeCompilerContext *self) { + /*doc(Bytecode imports) + # Imports layout + |Type |Field |Description | + |--------|------------|-------------------------------------------------------------------------| + |u8 |num_imports |The number of imports. | + |u32 |imports_size|The size of all imports, in bytes. | + |Import[]|Import |Multiple imports, where the number of imports is defined by @num_imports.| + + # Import + |Type|Field |Description | + |----|---------------------|----------------------------------------------------------------------------------------| + |u32 |function_index |The index in the bytecode where function header begins for the imported file. | + |u32 |extern_function_index|The index in the bytecode where the extern function header begins for the imported file.| + */ + Parser *parser = self->parser; + Buffer *instructions = &self->bytecode->data; + ParserFileScopeReference **import = buffer_begin(&parser->imports); + ParserFileScopeReference **import_end = buffer_end(&parser->imports); + u8 num_imports = 1 + (import_end - import); + u32 imports_size = num_imports * sizeof(BytecodeHeaderImport); + assert(sizeof(BytecodeHeaderImport) == 8); + + self->bytecode->import_index = instructions->size; + throw_if_error(buffer_expand(instructions, sizeof(u8) + sizeof(u32) + imports_size)); + throw_if_error(buffer_append(instructions, &num_imports, sizeof(num_imports))); + throw_if_error(buffer_append(instructions, &imports_size, sizeof(imports_size))); + + /* The first import is always a reference to itself */ + throw_if_error(buffer_append(instructions, &self->bytecode->funcs_index, sizeof(self->bytecode->funcs_index))); + throw_if_error(buffer_append(instructions, &self->bytecode->extern_funcs_index, sizeof(self->bytecode->extern_funcs_index))); + for(; import != import_end; ++import) { + /* + We don't know the index to the functions yet, so first fill them with the parser index that owns them + and after bytecode has been generated for each parser (file), modify these function indices + to point to the parsers function index in the bytecode. + */ + u32 parser_index = (*import)->file_scope_ref->parser->index; + throw_if_error(buffer_append(instructions, &parser_index, sizeof(parser_index))); + throw_if_error(buffer_append(instructions, &parser_index, sizeof(parser_index))); + } +} + static void add_ins1(BytecodeCompilerContext *self, AmalOpcode opcode, const char *fmt) { - throw_if_error(buffer_append(&self->bytecode.data, &opcode, sizeof(AmalOpcodeType))); + throw_if_error(buffer_append(&self->bytecode->data, &opcode, sizeof(AmalOpcodeType))); if(fmt) { fprintf(stderr, fmt); fputc('\n', stderr); @@ -237,7 +357,7 @@ static void add_ins1(BytecodeCompilerContext *self, AmalOpcode opcode, const cha } static void add_ins2(BytecodeCompilerContext *self, AmalOpcode opcode, i8 reg, const char *fmt) { - Buffer *instructions = &self->bytecode.data; + Buffer *instructions = &self->bytecode->data; size_t index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(reg))); @@ -248,7 +368,7 @@ static void add_ins2(BytecodeCompilerContext *self, AmalOpcode opcode, i8 reg, c } static void add_ins3(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_reg, i8 src_reg, const char *fmt) { - Buffer *instructions = &self->bytecode.data; + Buffer *instructions = &self->bytecode->data; size_t index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(src_reg))); @@ -260,7 +380,7 @@ static void add_ins3(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_re } static void add_ins4(BytecodeCompilerContext *self, AmalOpcode opcode, u16 data, const char *fmt) { - Buffer *instructions = &self->bytecode.data; + Buffer *instructions = &self->bytecode->data; size_t index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(data))); @@ -271,7 +391,7 @@ static void add_ins4(BytecodeCompilerContext *self, AmalOpcode opcode, u16 data, } static void add_ins5(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_reg, i8 reg1, i8 reg2, const char *fmt) { - Buffer *instructions = &self->bytecode.data; + Buffer *instructions = &self->bytecode->data; size_t index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1) + sizeof(reg2))); @@ -284,7 +404,7 @@ static void add_ins5(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_re } static void add_ins6(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_reg, u16 data, const char *fmt) { - Buffer *instructions = &self->bytecode.data; + Buffer *instructions = &self->bytecode->data; size_t index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(data))); @@ -295,16 +415,16 @@ static void add_ins6(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_re fputc('\n', stderr); } -static void add_ins7(BytecodeCompilerContext *self, AmalOpcode opcode, u16 idx, i8 num_args, i8 dst_reg, const char *fmt) { - Buffer *instructions = &self->bytecode.data; +static void add_ins7(BytecodeCompilerContext *self, AmalOpcode opcode, u8 import_index, u16 func_index, i8 num_args, const char *fmt) { + Buffer *instructions = &self->bytecode->data; size_t index = instructions->size; - throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(idx) + sizeof(num_args) + sizeof(dst_reg))); + throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(import_index) + sizeof(func_index) + sizeof(num_args))); instructions->data[index] = opcode; - memcpy(instructions->data + index + sizeof(AmalOpcodeType), &idx, sizeof(idx)); - instructions->data[index + sizeof(AmalOpcodeType) + sizeof(idx)] = num_args; - instructions->data[index + sizeof(AmalOpcodeType) + sizeof(idx) + sizeof(num_args)] = dst_reg; - fprintf(stderr, fmt, idx, num_args, dst_reg); + memcpy(instructions->data + index + sizeof(AmalOpcodeType), &import_index, sizeof(import_index)); + memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(import_index), &func_index, sizeof(func_index)); + memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(import_index) + sizeof(func_index), &num_args, sizeof(num_args)); + fprintf(stderr, fmt, import_index, func_index, num_args); fputc('\n', stderr); } @@ -330,8 +450,8 @@ static void add_instructions(BytecodeCompilerContext *self) { u8 *instructions_end = buffer_end(&ssa->instructions); u16 label_counter = 0; - u32 num_instructions_index = self->bytecode.data.size; - throw_if_error(buffer_append_empty(&self->bytecode.data, sizeof(num_instructions_index))); + u32 num_instructions_index = self->bytecode->data.size; + throw_if_error(buffer_append_empty(&self->bytecode->data, sizeof(num_instructions_index))); /* TODO: Keep all registers under 256 */ while(instruction != instructions_end) { @@ -404,22 +524,21 @@ static void add_instructions(BytecodeCompilerContext *self) { add_ins2(self, AMAL_OP_PUSH, reg, "push r%d"); break; } + case SSA_PUSH_RET: { + SsaRegister reg; + am_memcpy(®, instruction, sizeof(SsaRegister)); + instruction += sizeof(SsaRegister); + add_ins2(self, AMAL_OP_PUSH_RET, reg, "push_ret r%d"); + break; + } case SSA_CALL: { - /* - TODO: Using ssa_func_index only works correctly if the function was defined in the same - file as the function call. To make this work with calling functions in other files, - ssa_func_index should also have an offset index or something like that. - So each file has it's own function list with indices and when they need to be combined in the end, - the function indices can be increased by their block index (ssa_func_index + block index), where block index - is defined as the size of all previous files' number of functions. - */ instruction += ssa_extract_data(instruction, &ssa_ins_func_call, sizeof(ssa_ins_func_call)); - add_ins7(self, AMAL_OP_CALL, ssa_ins_func_call.func_decl->ssa_func_index, ssa_ins_func_call.num_args, ssa_ins_func_call.result, "call f%d, %d, r%d"); + add_ins7(self, AMAL_OP_CALL, ssa_ins_func_call.import_index, ssa_ins_func_call.func_decl->ssa_func_index, ssa_ins_func_call.num_args, "call f(%d,%d), %d"); break; } case SSA_CALL_EXTERN: { instruction += ssa_extract_data(instruction, &ssa_ins_func_call_extern, sizeof(ssa_ins_func_call_extern)); - add_ins7(self, AMAL_OP_CALLE, ssa_ins_func_call_extern.extern_func_index, ssa_ins_func_call_extern.num_args, ssa_ins_func_call_extern.result, "calle ef%d, %d, r%d"); + add_ins7(self, AMAL_OP_CALLE, ssa_ins_func_call_extern.import_index, ssa_ins_func_call_extern.func_decl_lhs->extern_index, ssa_ins_func_call_extern.num_args, "calle ef(%d,%d), %d"); break; } case SSA_JUMP_ZERO: { @@ -450,16 +569,35 @@ static void add_instructions(BytecodeCompilerContext *self) { /* Prepend instructions with its size */ { /* -sizeof to Remove the count itself from the size of the instructions size */ - const u32 instructions_size = self->bytecode.data.size - num_instructions_index - sizeof(instructions_size); - am_memcpy(self->bytecode.data.data + num_instructions_index, &instructions_size, sizeof(instructions_size)); + const u32 instructions_size = self->bytecode->data.size - num_instructions_index - sizeof(instructions_size); + am_memcpy(self->bytecode->data.data + num_instructions_index, &instructions_size, sizeof(instructions_size)); } } +static void add_section_magic_number(BytecodeCompilerContext *self) { + const u32 section_magic_number = AMAL_BYTECODE_SECTION_MAGIC_NUMBER; + throw_if_error(buffer_append(&self->bytecode->data, §ion_magic_number, sizeof(section_magic_number))); +} + void generate_bytecode_from_ssa(BytecodeCompilerContext *self) { + add_section_magic_number(self); add_intermediates(self); + + add_section_magic_number(self); add_strings(self); + + add_section_magic_number(self); add_functions(self); + + add_section_magic_number(self); add_extern_functions(self); + + add_section_magic_number(self); add_export_functions(self); + + add_section_magic_number(self); + add_imports(self); + + add_section_magic_number(self); add_instructions(self); } diff --git a/src/compiler.c b/src/compiler.c index dbc1498..8dda1c6 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -25,13 +25,15 @@ static usize strnlen(const char *str, usize max_length) { } /* TODO: Allow to specify size and members? */ -static CHECK_RESULT int create_default_type(amal_compiler *compiler, const char *name, amal_default_type **default_type) { +static CHECK_RESULT int create_default_type(amal_compiler *compiler, const char *name, u32 num_pointers, u32 fixed_size, amal_default_type **default_type) { StructDecl *struct_decl; Ast *expr; LhsExpr *lhs_expr; return_if_error(arena_allocator_alloc(&compiler->allocator, sizeof(StructDecl), (void**)&struct_decl)); return_if_error(structdecl_init(struct_decl, &compiler->root_scope, &compiler->allocator)); + struct_decl->fields_num_pointers = num_pointers; + struct_decl->fields_fixed_size_bytes = fixed_size; return_if_error(arena_allocator_alloc(&compiler->allocator, sizeof(amal_default_type), (void**)default_type)); lhs_expr = &(*default_type)->lhs_expr; @@ -45,20 +47,29 @@ static CHECK_RESULT int create_default_type(amal_compiler *compiler, const char return scope_add_child(&compiler->root_scope, expr); } +static CHECK_RESULT int create_default_type_num_pointers(amal_compiler *compiler, const char *name, u32 num_pointers, amal_default_type **default_type) { + return create_default_type(compiler, name, num_pointers, 0, default_type); +} + +static CHECK_RESULT int create_default_type_fixed_size(amal_compiler *compiler, const char *name, u32 byte_size, amal_default_type **default_type) { + return create_default_type(compiler, name, 0, byte_size, default_type); +} + static CHECK_RESULT int init_default_types(amal_compiler *compiler) { - return_if_error(create_default_type(compiler, "i8", &compiler->default_types.i8)); - return_if_error(create_default_type(compiler, "i16", &compiler->default_types.i16)); - return_if_error(create_default_type(compiler, "i32", &compiler->default_types.i32)); - return_if_error(create_default_type(compiler, "i64", &compiler->default_types.i64)); - return_if_error(create_default_type(compiler, "u8", &compiler->default_types.u8)); - return_if_error(create_default_type(compiler, "u16", &compiler->default_types.u16)); - return_if_error(create_default_type(compiler, "u32", &compiler->default_types.u32)); - return_if_error(create_default_type(compiler, "u64", &compiler->default_types.u64)); - return_if_error(create_default_type(compiler, "isize", &compiler->default_types.isize)); - return_if_error(create_default_type(compiler, "usize", &compiler->default_types.usize)); - return_if_error(create_default_type(compiler, "f32", &compiler->default_types.f32)); - return_if_error(create_default_type(compiler, "f64", &compiler->default_types.f64)); - return_if_error(create_default_type(compiler, "str", &compiler->default_types.str)); + return_if_error(create_default_type_fixed_size(compiler, "i8", 1, &compiler->default_types.i8)); + return_if_error(create_default_type_fixed_size(compiler, "i16", 2, &compiler->default_types.i16)); + return_if_error(create_default_type_fixed_size(compiler, "i32", 3, &compiler->default_types.i32)); + return_if_error(create_default_type_fixed_size(compiler, "i64", 4, &compiler->default_types.i64)); + return_if_error(create_default_type_fixed_size(compiler, "u8", 1, &compiler->default_types.u8)); + return_if_error(create_default_type_fixed_size(compiler, "u16", 2, &compiler->default_types.u16)); + return_if_error(create_default_type_fixed_size(compiler, "u32", 3, &compiler->default_types.u32)); + return_if_error(create_default_type_fixed_size(compiler, "u64", 4, &compiler->default_types.u64)); + return_if_error(create_default_type_num_pointers(compiler, "isize", 1, &compiler->default_types.isize)); + return_if_error(create_default_type_num_pointers(compiler, "usize", 1, &compiler->default_types.usize)); + return_if_error(create_default_type_fixed_size(compiler, "f32", 4, &compiler->default_types.f32)); + return_if_error(create_default_type_fixed_size(compiler, "f64", 8, &compiler->default_types.f64)); + /* TODO: str should be a struct with the fields @data (ptr) and @size (usize) */ + return_if_error(create_default_type_num_pointers(compiler, "str", 1, &compiler->default_types.str)); compiler->default_types.arithmetic_types[0] = compiler->default_types.i8; compiler->default_types.arithmetic_types[1] = compiler->default_types.u8; @@ -185,6 +196,7 @@ static CHECK_RESULT int amal_compiler_load_in_this_thread(amal_compiler *compile file_scope->parser = parser; return_if_error(parser_parse_file(parser, filepath)); cleanup_if_error(amal_mutex_lock(&compiler->mutex, "amal_compiler_load_in_this_thread, add parser")); + parser->index = buffer_get_size(&compiler->parsers, Parser*); cleanup_if_error(buffer_append(&compiler->parsers, &parser, sizeof(parser))); amal_log_info("Finished parsing %.*s", filepath.size, filepath.data); result = AMAL_COMPILER_OK; @@ -224,6 +236,7 @@ static CHECK_RESULT int thread_generate_ssa(Parser *parser) { return_if_error(arena_allocator_alloc(parser->allocator, sizeof(Ssa), (void**)&compiler_context.ssa)); return_if_error(ssa_init(compiler_context.ssa, parser)); compiler_context.compiler = parser->compiler; + compiler_context.import_index = 0; parser->ssa = compiler_context.ssa; amal_log_debug("Generating SSA for file: %.*s", parser->tokenizer.code_name.size, parser->tokenizer.code_name.data); result = setjmp(compiler_context.env); @@ -238,7 +251,8 @@ static CHECK_RESULT int thread_generate_bytecode(Parser *parser) { BytecodeCompilerContext compiler_context; int result; - return_if_error(bytecode_init(&compiler_context.bytecode, parser->allocator)); + return_if_error(arena_allocator_alloc(parser->allocator, sizeof(Bytecode), (void**)&compiler_context.bytecode)); + return_if_error(bytecode_init(compiler_context.bytecode, parser->allocator)); compiler_context.parser = parser; amal_log_debug("Generating bytecode for file: %.*s", parser->tokenizer.code_name.size, parser->tokenizer.code_name.data); result = setjmp(compiler_context.env); @@ -320,14 +334,11 @@ static CHECK_RESULT int amal_compiler_dispatch_generic(amal_compiler *self, Thre } static CHECK_RESULT int amal_compiler_generate_program(amal_compiler *self) { - /* - TODO: Copying the bytecode to the program can be done using multiple threads. - Use self->threads for that. - */ + /* TODO: Copying the bytecode to the program can be done using multiple threads */ Parser **parser = buffer_begin(&self->parsers); Parser **parser_end = buffer_end(&self->parsers); for(; parser != parser_end; ++parser) { - return_if_error(amal_program_append_bytecode(self->program, &(*parser)->bytecode)); + return_if_error(amal_program_append_bytecode(self->program, (*parser)->bytecode)); } return 0; } @@ -419,6 +430,47 @@ static CHECK_RESULT int validate_main_func(FileScopeReference *main_file_scope, return 0; } +static void amal_compiler_parsers_set_bytecode_offsets(amal_compiler *self) { + /* magic number + major version + minor version + patch version */ + u32 offset = sizeof(u32) + sizeof(u8) + sizeof(u8) + sizeof(u8); + Parser **parser = buffer_begin(&self->parsers); + Parser **parser_end = buffer_end(&self->parsers); + for(; parser != parser_end; ++parser) { + (*parser)->bytecode->offset = offset; + offset += (*parser)->bytecode->data.size; + } +} + +/* TODO: Parallelize this? */ +static void amal_compiler_update_import_references(amal_compiler *self) { + Parser **parser_start = buffer_begin(&self->parsers); + Parser **parser = parser_start; + Parser **parser_end = buffer_end(&self->parsers); + amal_compiler_parsers_set_bytecode_offsets(self); + + for(; parser != parser_end; ++parser) { + u8 *import_start = (u8*)(*parser)->bytecode->data.data + (*parser)->bytecode->import_index; + u8 num_imports = *import_start; + /* TODO: Remove these kinds of offset with sizeof. They are prone to hard-to-find bugs after code change */ + BytecodeHeaderImport *header_import = (BytecodeHeaderImport*)(import_start + sizeof(u8) + sizeof(u32)); + BytecodeHeaderImport *header_import_end = header_import + num_imports; + + /* + The first import is the file itself and it already has function index, + but the index is localized to the function itself, so we need to add the offset + to the file itself as well. + */ + header_import->function_index += (*parser)->bytecode->offset; + header_import->extern_function_index += (*parser)->bytecode->offset; + ++header_import; + for(; header_import != header_import_end; ++header_import) { + Parser *imported_parser = *(parser_start + header_import->parser_index); + header_import->function_index = imported_parser->bytecode->offset + imported_parser->bytecode->funcs_index; + header_import->extern_function_index = imported_parser->bytecode->offset + imported_parser->bytecode->extern_funcs_index; + } + } +} + int amal_compiler_internal_load_file(amal_compiler *self, const char *filepath, FileScopeReference **file_scope) { ThreadWorkData thread_work_data; bool main_job; @@ -472,6 +524,7 @@ int amal_compiler_internal_load_file(amal_compiler *self, const char *filepath, amal_log_info("Finished generating SSA"); return_if_error(amal_compiler_dispatch_generic(self, THREAD_WORK_GENERATE_BYTECODE)); + amal_compiler_update_import_references(self); amal_log_info("Finished generating bytecode"); return_if_error(amal_compiler_generate_program(self)); diff --git a/src/parser.c b/src/parser.c index df326f0..01c1d9f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -6,6 +6,7 @@ #include "../include/std/mem.h" #include "../include/std/log.h" #include "../include/std/alloc.h" +#include "../include/std/hash.h" #include #include @@ -21,14 +22,16 @@ do { \ } while(0) #define VAR_MAX_LEN UINT8_MAX +#define FUNC_MAX_PARAMS 128 +#define FUNC_MAX_RETURN_TYPES 128 static CHECK_RESULT Ast* parser_parse_rhs(Parser *self); static CHECK_RESULT Ast* parser_parse_body(Parser *self); -static CHECK_RESULT Ast* parser_parse_struct_body(Parser *self); +static CHECK_RESULT StructField* parser_parse_struct_body(Parser *self); static CHECK_RESULT Ast* parser_parse_rhs_binop(Parser *self); static void parser_parse_var_type(Parser *self, VariableType *result); static void parser_parse_var_type_def(Parser *self, VariableType *result); -static void parser_queue_file(Parser *self, BufferView path, FileScopeReference **file_scope); +static void parser_queue_file(Parser *self, BufferView path, ParserFileScopeReference **parser_file_scope); int parser_init(Parser *self, amal_compiler *compiler, ArenaAllocator *allocator) { self->allocator = allocator; @@ -38,12 +41,15 @@ int parser_init(Parser *self, amal_compiler *compiler, ArenaAllocator *allocator self->error.index = 0; self->error.str = NULL; self->error_context = ERROR_CONTEXT_NONE; + self->index = 0; return_if_error(structdecl_init(&self->struct_decl, &compiler->root_scope, allocator)); lhsexpr_init(&self->file_decl, DECL_FLAG_EXTERN | DECL_FLAG_PUB | DECL_FLAG_CONST, create_buffer_view_null()); return_if_error(ast_create(self->allocator, &self->struct_decl, AST_STRUCT_DECL, &self->file_decl.rhs_expr)); + return_if_error(buffer_init(&self->imports, allocator)); + return_if_error(hash_map_init(&self->imports_by_name, allocator, sizeof(usize), hash_map_compare_string, amal_hash_string)); self->current_scope = &self->struct_decl.body; self->has_func_parent = bool_false; - am_memset(&self->bytecode, 0, sizeof(self->bytecode)); + self->bytecode = NULL; return PARSER_OK; } @@ -85,27 +91,24 @@ static void parser_parse_body_loop(Parser *self, Scope *scope, Token end_token) /* STRUCT_BODY_LOOP = '{' STRUCT_BODY* '}' */ -static void parser_parse_struct_body_loop(Parser *self, Scope *scope) { +static void parser_parse_struct_body_loop(Parser *self, StructDecl *struct_decl) { int result; throw_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_BRACE)); for(;;) { - Ast *body_obj; + StructField *struct_field; bool is_end_token; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_BRACE, &is_end_token)); if(is_end_token) break; - body_obj = parser_parse_struct_body(self); - result = scope_add_child(scope, body_obj); + struct_field = parser_parse_struct_body(self); + result = structdecl_add_field(struct_decl, struct_field, self->allocator); if(result == 0) { continue; } else if(result == AST_ERR_DEF_DUP) { - /* TODO: Convert ast type to string for error message */ - BufferView obj_name; - obj_name = ast_get_name(body_obj); - self->error = tokenizer_create_error(&self->tokenizer, - tokenizer_get_code_reference_index(&self->tokenizer, obj_name.data), - "Variable with the name %.*s was declared twice in the struct", obj_name.size, obj_name.data); + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_code_reference_index(&self->tokenizer, struct_field->name.data), + "Variable with the name %.*s was declared twice in the struct", struct_field->name.size, struct_field->name.data); self->error_context = ERROR_CONTEXT_NONE; throw(result); } else { @@ -183,18 +186,32 @@ static void parser_parse_function_return_types(Parser *self, FunctionSignature * if(var_type.type == VARIABLE_TYPE_NONE) { /* If function has no return types */ if(return_type_index == 0) - return; + break; self->error = tokenizer_create_error(&self->tokenizer, - tokenizer_get_error_index(&self->tokenizer), - "Expected type or closure signature"); + tokenizer_get_error_index(&self->tokenizer), + "Expected type or closure signature"); throw(PARSER_UNEXPECTED_TOKEN); } throw_if_error(function_signature_add_return_type(func_sig, &var_type)); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_COMMA, &match)); if(!match) - return; + break; ++return_type_index; } + + if (buffer_get_size(&func_sig->parameters, FunctionParameter) > FUNC_MAX_PARAMS) { + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_error_index(&self->tokenizer), + "A closure can't have more than %d parameters", FUNC_MAX_PARAMS); + throw(PARSER_ERR); + } + + if (buffer_get_size(&func_sig->return_types, FunctionReturnType) > FUNC_MAX_RETURN_TYPES) { + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_error_index(&self->tokenizer), + "A closure can't have more than %d return values", FUNC_MAX_RETURN_TYPES); + throw(PARSER_ERR); + } } /* @@ -399,7 +416,7 @@ static CHECK_RESULT StructDecl* parser_parse_struct_decl(Parser *self) { throw_if_error(structdecl_init(result, self->current_scope, self->allocator)); self->current_scope = &result->body; - parser_parse_struct_body_loop(self, self->current_scope); + parser_parse_struct_body_loop(self, result); self->current_scope = result->body.parent; return result; } @@ -896,8 +913,7 @@ Ast* parser_parse_body(Parser *self) { /* STRUCT_BODY = TOK_IDENTIFIER VAR_TYPE_DEF ';' */ -Ast* parser_parse_struct_body(Parser *self) { - Ast *result; +StructField* parser_parse_struct_body(Parser *self) { BufferView var_name; VariableType var_type; StructField *struct_field; @@ -912,10 +928,9 @@ Ast* parser_parse_struct_body(Parser *self) { throw(PARSER_UNEXPECTED_TOKEN); } throw_if_error(tokenizer_accept(&self->tokenizer, TOK_SEMICOLON)); - throw_if_error(arena_allocator_alloc(self->allocator, sizeof(LhsExpr), (void**)&struct_field)); + throw_if_error(arena_allocator_alloc(self->allocator, sizeof(StructField), (void**)&struct_field)); structfield_init(struct_field, var_name, &var_type); - throw_if_error(ast_create(self->allocator, struct_field, AST_STRUCT_FIELD, &result)); - return result; + return struct_field; } /* @@ -974,8 +989,9 @@ static CHECK_RESULT int file_path_join(BufferView directory, BufferView file, ch Path can be path to included library path (or system library path) in which case the path separator is a dot, otherwise the path separator is forward slash '/' */ -void parser_queue_file(Parser *self, BufferView path, FileScopeReference **file_scope) { +void parser_queue_file(Parser *self, BufferView path, ParserFileScopeReference **parser_file_scope) { /* TODO: Parse special path (to include library path with dots) */ + FileScopeReference *file_scope; BufferView file_directory; char *path_relative; int result; @@ -983,7 +999,7 @@ void parser_queue_file(Parser *self, BufferView path, FileScopeReference **file_ file_directory = file_get_parent_directory(self->tokenizer.code_name); throw_if_error(file_path_join(file_directory, path, &path_relative)); /* We want buffer to be null-terminated but null character should not be included for the size */ - result = amal_compiler_internal_load_file(self->compiler, path_relative, file_scope); + result = amal_compiler_internal_load_file(self->compiler, path_relative, &file_scope); if(result != 0) { self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_code_reference_index(&self->tokenizer, path.data), @@ -992,4 +1008,29 @@ void parser_queue_file(Parser *self, BufferView path, FileScopeReference **file_ throw(result); } am_free(path_relative); + + { + usize parser_file_scope_index; + int num_imports; + BufferView import_path_canonical = create_buffer_view(file_scope->canonical_path.data, file_scope->canonical_path.size); + + if(hash_map_get(&self->imports_by_name, import_path_canonical, &parser_file_scope_index)) { + *parser_file_scope = buffer_get(&self->imports, parser_file_scope_index, sizeof(ParserFileScopeReference*)); + return; + } + + num_imports = buffer_get_size(&self->imports, ParserFileScopeReference*); + if(num_imports == 254) { + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_error_index(&self->tokenizer), + "One file can't have more than 255 imports"); + throw(PARSER_ERR); + } + + throw_if_error(arena_allocator_alloc(self->allocator, sizeof(ParserFileScopeReference*), (void**)parser_file_scope)); + (*parser_file_scope)->file_scope_ref = file_scope; + (*parser_file_scope)->import_index = num_imports; + throw_if_error(buffer_append(&self->imports, parser_file_scope, sizeof(ParserFileScopeReference*))); + throw_if_error(hash_map_insert(&self->imports_by_name, import_path_canonical, &parser_file_scope_index)); + } } diff --git a/src/program.c b/src/program.c index eef49b6..a0a2407 100644 --- a/src/program.c +++ b/src/program.c @@ -1,4 +1,5 @@ #include "../include/program.h" + #include "../include/std/mem.h" #include "../include/std/hash.h" #include "../include/std/alloc.h" @@ -29,6 +30,15 @@ typedef struct { NumberUnion value; } Number; +static int hash_map_compare_u64(const void *a, const void *b) { + return *(u64*)a == *(u64*)b; +} + +static usize hash_u64(const u8 *data, usize size) { + (void)size; + return *(u64*)data; +} + int amal_program_init(amal_program *self) { ignore_result_int(buffer_init(&self->data, NULL)); self->string_indices = NULL; @@ -38,16 +48,20 @@ int amal_program_init(amal_program *self) { self->extern_funcs_start = NULL; self->exported_funcs = NULL; self->exported_funcs_end = NULL; + self->imports_start = NULL; self->read_index = 0; - self->main_func_instruction_offset = ~0U; + self->main_func_instruction_offset = ~(u32)0U; self->num_intermediates = 0; self->num_strings = 0; self->num_functions = 0; self->num_extern_functions = 0; self->num_exported_functions = 0; + self->num_imports = 0; + self->return_value_index = 0; cleanup_if_error(arena_allocator_init(&self->allocator)); cleanup_if_error(hash_map_init(&self->extern_funcs_map, &self->allocator, sizeof(ProgramExternFunc), hash_map_compare_string, amal_hash_string)); + cleanup_if_error(hash_map_init(&self->deferred_func_calls, &self->allocator, sizeof(Buffer), hash_map_compare_u64, hash_u64)); cleanup_if_error(buffer_append_header(&self->data)); return 0; @@ -60,6 +74,8 @@ void amal_program_deinit(amal_program *self) { arena_allocator_deinit(&self->allocator); am_free(self->extern_func_indices); am_free(self->string_indices); + self->extern_func_indices = NULL; + self->string_indices = NULL; if(self->data.data) buffer_deinit(&self->data); } @@ -75,34 +91,46 @@ int amal_program_add_extern_func(amal_program *self, BufferView name, void *func return hash_map_insert(&self->extern_funcs_map, name, &extern_func); } -static CHECK_RESULT int amal_program_get_extern_func_by_index(amal_program *self, u16 index, ProgramExternFunc *result) { - u8 *extern_func_ptr; - u8 num_args; - u8 func_name_len; - BufferView func_name; - - if(index >= self->num_extern_functions) { - result->func = NULL; - result->args_byte_size = 0; - amal_log_error("Extern func index index %ld is out of range (%ld)", index, self->num_extern_functions); - return AMAL_PROGRAM_INSTRUCTION_INVALID_EXTERN_FUNC_INDEX; +static u8* amal_program_get_extern_funcs_start_by_import_index(amal_program *self, u8 import_index) { + BytecodeHeaderImport *header_import = (BytecodeHeaderImport*)self->imports_start; + header_import += import_index; + return (u8*)self->data.data + header_import->extern_function_index; +} + +typedef struct { + u8 num_params; + BufferView name; +} BytecodeHeaderExternFunction; + +/* TODO: Optimize this */ +static void amal_program_get_header_extern_function_by_index(amal_program *self, u8 import_index, u16 index, BytecodeHeaderExternFunction *result) { + u32 i; + u8 *extern_funcs_start = amal_program_get_extern_funcs_start_by_import_index(self, import_index); + extern_funcs_start += sizeof(u16) + sizeof(u32); + for(i = 0; i < (u32)index; ++i) { + u8 name_len = *(extern_funcs_start + sizeof(u8)); + /* +1 for the null-terminated character */ + extern_funcs_start += sizeof(u8) + sizeof(u8) + name_len + 1; } + result->num_params = extern_funcs_start[0]; + result->name.size = extern_funcs_start[1]; + result->name.data = (const char*)extern_funcs_start + sizeof(u8) + sizeof(u8); +} - extern_func_ptr = self->extern_funcs_start + self->extern_func_indices[index]; - am_memcpy(&num_args, extern_func_ptr, sizeof(num_args)); - am_memcpy(&func_name_len, extern_func_ptr + sizeof(num_args), sizeof(func_name_len)); - func_name.size = func_name_len; - func_name.data = (const char*)(extern_func_ptr + sizeof(num_args) + sizeof(func_name_len)); +static CHECK_RESULT int amal_program_get_extern_func_by_index(amal_program *self, u8 import_index, u16 index, ProgramExternFunc *result) { + BytecodeHeaderExternFunction extern_func; + amal_program_get_header_extern_function_by_index(self, import_index, index, &extern_func); - if(!hash_map_get(&self->extern_funcs_map, func_name, result)) { - amal_log_error("No such extern function: %.*s", func_name.size, func_name.data); + if(!hash_map_get(&self->extern_funcs_map, extern_func.name, result)) { + amal_log_error("No such extern function: %.*s", extern_func.name.size, extern_func.name.data); return AMAL_PROGRAM_NO_SUCH_EXTERNAL_FUNCTION; } /* TODO: This assumes all arguments are of size sizeof(isize) */ - if(result->args_byte_size != -1 && result->args_byte_size != num_args * (int)sizeof(isize)) { - amal_log_error("Extern function %.*s was registered to take %d byte(s), but the program says it takes %d byte(s)", func_name.size, func_name.data, result->args_byte_size, num_args * sizeof(isize)); + if(result->args_byte_size != -1 && result->args_byte_size != extern_func.num_params * (int)sizeof(isize)) { + amal_log_error("Extern function %.*s was registered to take %d byte(s), but the program says it takes %d byte(s)", + extern_func.name.size, extern_func.name.data, result->args_byte_size, extern_func.num_params * sizeof(isize)); return AMAL_PROGRAM_NO_SUCH_EXTERNAL_FUNCTION; } return 0; @@ -125,7 +153,7 @@ static CHECK_RESULT int amal_program_set_exported_function_instruction_offset_ad num_args = self->exported_funcs[sizeof(instruction_offset)]; func_name_size = self->exported_funcs[sizeof(instruction_offset) + sizeof(num_args)]; self->exported_funcs += sizeof(instruction_offset) + sizeof(num_args) + sizeof(func_name_size); - if(self->main_func_instruction_offset == ~0U && func_name_size == 4 && am_memeql(self->exported_funcs, "main", 4)) + if(self->main_func_instruction_offset == ~(u32)0U && func_name_size == 4 && am_memeql(self->exported_funcs, "main", 4)) self->main_func_instruction_offset = instruction_offset; /* +1 to skip null-termination character */ @@ -191,10 +219,25 @@ static CHECK_RESULT bool amal_program_read_advance(amal_program *self, void *out return bool_true; } +static CHECK_RESULT int amal_program_advance_section_magic_number(amal_program *self) { + u32 magic_number; + if(bytes_left_to_read(self) < sizeof(u32)) + return AMAL_PROGRAM_SECTION_ERROR; + + am_memcpy(&magic_number, self->data.data + self->read_index, sizeof(magic_number)); + if(magic_number != AMAL_BYTECODE_SECTION_MAGIC_NUMBER) + return AMAL_PROGRAM_SECTION_ERROR; + + self->read_index += sizeof(magic_number); + return 0; +} + static CHECK_RESULT int amal_program_read_intermediates(amal_program *self) { u32 intermediates_size; /*u32 read_end;*/ + return_if_error(amal_program_advance_section_magic_number(self)); + if(bytes_left_to_read(self) < sizeof(intermediates_size)) { amal_log_error("Not enough space in program to intermediates size"); return AMAL_PROGRAM_INVALID_INTERMEDIATES; @@ -219,6 +262,8 @@ static CHECK_RESULT int amal_program_read_strings(amal_program *self) { u32 strings_size; u32 *string_index_ptr; + return_if_error(amal_program_advance_section_magic_number(self)); + if(!amal_program_read_advance(self, &self->num_strings, sizeof(u16))) return AMAL_PROGRAM_INVALID_STRINGS; @@ -262,8 +307,18 @@ static CHECK_RESULT int amal_program_read_strings(amal_program *self) { } static CHECK_RESULT int amal_program_read_functions(amal_program *self) { + u32 funcs_size; + + return_if_error(amal_program_advance_section_magic_number(self)); + if(!amal_program_read_advance(self, &self->num_functions, sizeof(u16))) return AMAL_PROGRAM_INVALID_FUNCTIONS; + + if(!amal_program_read_advance(self, &funcs_size, sizeof(funcs_size)) || bytes_left_to_read(self) < funcs_size) + return AMAL_PROGRAM_INVALID_FUNCTIONS; + + self->funcs_start = (u8*)(self->data.data + self->read_index); + self->read_index += funcs_size; return AMAL_PROGRAM_OK; } @@ -271,6 +326,8 @@ static CHECK_RESULT int amal_program_read_external_functions(amal_program *self) u32 extern_funcs_size; u32 *extern_func_index_ptr; + return_if_error(amal_program_advance_section_magic_number(self)); + if(!amal_program_read_advance(self, &self->num_extern_functions, sizeof(u16))) return AMAL_PROGRAM_INVALID_EXTERNAL_FUNCTIONS; @@ -291,17 +348,17 @@ static CHECK_RESULT int amal_program_read_external_functions(amal_program *self) const u32 read_end = read_start + extern_funcs_size; self->extern_funcs_start = (u8*)(self->data.data + self->read_index); while(self->read_index < read_end) { - u8 num_args; + u8 num_params; u8 func_name_size; - if(bytes_left_to_read(self) < sizeof(num_args) + sizeof(func_name_size)) + if(bytes_left_to_read(self) < sizeof(num_params) + sizeof(func_name_size)) return AMAL_PROGRAM_INVALID_EXTERNAL_FUNCTIONS; *extern_func_index_ptr = self->read_index - read_start; ++extern_func_index_ptr; - num_args = self->data.data[self->read_index]; - func_name_size = self->data.data[self->read_index + sizeof(num_args)]; - self->read_index += sizeof(num_args) + sizeof(func_name_size); + num_params = self->data.data[self->read_index]; + func_name_size = self->data.data[self->read_index + sizeof(num_params)]; + self->read_index += sizeof(num_params) + sizeof(func_name_size); /* +1 to skip null-termination character */ if(bytes_left_to_read(self) < func_name_size + 1U) @@ -318,6 +375,8 @@ static CHECK_RESULT int amal_program_read_external_functions(amal_program *self) static CHECK_RESULT int amal_program_read_exported_functions(amal_program *self) { u32 export_funcs_size; + return_if_error(amal_program_advance_section_magic_number(self)); + if(!amal_program_read_advance(self, &self->num_exported_functions, sizeof(u16))) return AMAL_PROGRAM_INVALID_EXPORTED_FUNCTIONS; @@ -337,6 +396,22 @@ static CHECK_RESULT int amal_program_read_exported_functions(amal_program *self) return AMAL_PROGRAM_OK; } +static CHECK_RESULT int amal_program_read_imports(amal_program *self) { + u32 imports_size; + + return_if_error(amal_program_advance_section_magic_number(self)); + + if(!amal_program_read_advance(self, &self->num_imports, sizeof(u8))) + return AMAL_PROGRAM_INVALID_IMPORTS; + + if(!amal_program_read_advance(self, &imports_size, sizeof(imports_size)) || bytes_left_to_read(self) < imports_size) + return AMAL_PROGRAM_INVALID_IMPORTS; + + self->imports_start = (u8*)(self->data.data + self->read_index); + self->read_index += imports_size; + return AMAL_PROGRAM_OK; +} + static CHECK_RESULT int amal_program_get_intermediate_by_index(amal_program *self, u16 index, Number *result) { if(index >= self->num_intermediates) return AMAL_PROGRAM_INSTRUCTION_INVALID_INTERMEDIATE_INDEX; @@ -360,6 +435,48 @@ static CHECK_RESULT int amal_program_get_data_by_index(amal_program *self, u16 i return 0; } +static u8* amal_program_get_funcs_start_by_import_index(amal_program *self, u8 import_index) { + BytecodeHeaderImport *header_import = (BytecodeHeaderImport*)self->imports_start; + header_import += import_index; + return (u8*)self->data.data + header_import->function_index; +} + +static BytecodeHeaderFunction* amal_program_get_header_function_by_index(amal_program *self, u8 import_index, u16 index) { + u8 *funcs_start = amal_program_get_funcs_start_by_import_index(self, import_index); + BytecodeHeaderFunction *header_func = (BytecodeHeaderFunction*)(funcs_start + sizeof(u16) + sizeof(u32)); + return header_func + index; +} + +static u64 deferred_func_call_get_key(amal_program *self, u8 import_index, u16 func_index) { + BytecodeHeaderImport *header_import = (BytecodeHeaderImport*)self->imports_start; + header_import += import_index; + return ((u64)func_index << 32) | (u64)header_import->function_index; +} + +static CHECK_RESULT int resolve_deferred_func_calls(amal_program *self, amal_executor *executor, u16 func_index) { + u64 key = deferred_func_call_get_key(self, 0, func_index); + BufferView key_mem = create_buffer_view((char*)&key, sizeof(key)); + u32 current_code_offset = amal_exec_get_code_offset(executor); + + Buffer* deferred_func_call_list; + if(!hash_map_get_ref(&self->deferred_func_calls, key_mem, (void**)&deferred_func_call_list)) + return 0; + + { + u32 *code_offset = buffer_begin(deferred_func_call_list); + u32 *code_offset_end = buffer_end(deferred_func_call_list); + for(; code_offset != code_offset_end; ++code_offset) { + amal_exec_call_overwrite(executor, *code_offset, current_code_offset - *code_offset); + } + return buffer_set_capacity(deferred_func_call_list, 0); + } +} + +static void header_func_set_offset(amal_program *self, u16 func_index, u32 code_offset) { + BytecodeHeaderFunction *header_func = ((BytecodeHeaderFunction*)self->funcs_start) + func_index; + header_func->func_offset = code_offset; +} + static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_executor *executor) { u32 instructions_size; u32 read_start; @@ -370,6 +487,9 @@ static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_ inside_func = bool_false; (void)inside_func; func_counter = 0; + self->return_value_index = 0; + + return_if_error(amal_program_advance_section_magic_number(self)); if(!amal_program_read_advance(self, &instructions_size, sizeof(instructions_size))) return AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE; @@ -484,14 +604,51 @@ static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_ self->read_index += 2; break; } + case AMAL_OP_PUSH_RET: { + /* TODO: Validate return value index doesn't go out of bounds? */ + self->return_values_stack[self->return_value_index++] = self->data.data[self->read_index]; + self->read_index += 1; + break; + } case AMAL_OP_CALL: { + u8 import_index; u16 func_index; u8 num_args; + BytecodeHeaderFunction *func_def; i8 dst_reg; - am_memcpy(&func_index, self->data.data + self->read_index, sizeof(func_index)); - num_args = self->data.data[self->read_index + sizeof(func_index)]; - dst_reg = self->data.data[self->read_index + sizeof(func_index) + sizeof(num_args)]; - return_if_error(amal_exec_call(executor, func_index, num_args, dst_reg)); + + am_memcpy(&import_index, self->data.data + self->read_index, sizeof(import_index)); + am_memcpy(&func_index, self->data.data + self->read_index + sizeof(import_index), sizeof(func_index)); + am_memcpy(&num_args, self->data.data + self->read_index + sizeof(import_index) + sizeof(func_index), sizeof(num_args)); + func_def = amal_program_get_header_function_by_index(self, import_index, func_index); + assert(func_def->num_return_types == 1 && "TODO: Support 0 and more than 1 return values"); + assert(self->return_value_index == 1); + dst_reg = self->return_values_stack[0]; + self->return_value_index -= func_def->num_return_types; + + if((char*)func_def < self->data.data + self->read_index) { + return_if_error(amal_exec_call(executor, func_def->func_offset, num_args, dst_reg)); + } else { + /* + The code for the function has not been generated yet (the function is defined after the current location). + Make a dummy call and replace the call target after the function has been generated + */ + u64 key = deferred_func_call_get_key(self, import_index, func_index); + BufferView key_mem = create_buffer_view((char*)&key, sizeof(key)); + u32 code_offset = amal_exec_get_code_offset(executor); + + Buffer* deferred_func_call_list; + if(hash_map_get_ref(&self->deferred_func_calls, key_mem, (void**)&deferred_func_call_list)) + return_if_error(buffer_append(deferred_func_call_list, &code_offset, sizeof(code_offset))); + else { + Buffer new_deferred_call_list; + return_if_error(buffer_init(&new_deferred_call_list, &self->allocator)); + return_if_error(buffer_append(&new_deferred_call_list, &code_offset, sizeof(code_offset))); + return_if_error(hash_map_insert(&self->deferred_func_calls, key_mem, &new_deferred_call_list)); + } + /* Dummy call to offset 0, offset will be replace later when the target function hits AMAL_OP_FUNC_START */ + return_if_error(amal_exec_call(executor, 0, num_args, dst_reg)); + } self->read_index += 4; break; } @@ -500,16 +657,21 @@ static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_ self->read_index += 2; break; case AMAL_OP_CALLE: { + u8 import_index; u16 extern_func_index; u8 num_args; i8 dst_reg; - am_memcpy(&extern_func_index, self->data.data + self->read_index, sizeof(extern_func_index)); - num_args = self->data.data[self->read_index + sizeof(extern_func_index)]; - dst_reg = self->data.data[self->read_index + sizeof(extern_func_index) + sizeof(num_args)]; + + am_memcpy(&import_index, self->data.data + self->read_index, sizeof(import_index)); + am_memcpy(&extern_func_index, self->data.data + self->read_index + sizeof(import_index), sizeof(extern_func_index)); + am_memcpy(&num_args, self->data.data + self->read_index + sizeof(import_index) + sizeof(extern_func_index), sizeof(num_args)); + assert(self->return_value_index == 1 && "TODO: Support extern functions that don't return any value"); + dst_reg = self->return_values_stack[0]; + self->return_value_index = 0; { ProgramExternFunc extern_func; - return_if_error(amal_program_get_extern_func_by_index(self, extern_func_index, &extern_func)); + return_if_error(amal_program_get_extern_func_by_index(self, import_index, extern_func_index, &extern_func)); return_if_error(amal_exec_calle(executor, extern_func.func, num_args, dst_reg)); } self->read_index += 4; @@ -549,6 +711,9 @@ static CHECK_RESULT int amal_program_read_instructions(amal_program *self, amal_ assert(!inside_func); inside_func = bool_true; assert(func_counter < self->num_functions); + + header_func_set_offset(self, func_counter, amal_exec_get_code_offset(executor)); + return_if_error(resolve_deferred_func_calls(self, executor, func_counter)); ++func_counter; func_flags = self->data.data[self->read_index]; @@ -595,9 +760,10 @@ int amal_program_run(amal_program *self) { cleanup_if_error(amal_program_read_functions(self)); cleanup_if_error(amal_program_read_external_functions(self)); cleanup_if_error(amal_program_read_exported_functions(self)); + cleanup_if_error(amal_program_read_imports(self)); cleanup_if_error(amal_program_read_instructions(self, executor)); } - if(self->main_func_instruction_offset == ~0U) { + if(self->main_func_instruction_offset == ~(u32)0U) { amal_log_error("The program is missing a main function"); result = AMAL_PROGRAM_NO_MAIN_FUNC; goto cleanup; diff --git a/src/ssa/ssa.c b/src/ssa/ssa.c index 5c625b2..19aa036 100644 --- a/src/ssa/ssa.c +++ b/src/ssa/ssa.c @@ -26,6 +26,8 @@ do { \ /* Max length of a string that fits in u16 */ #define MAX_STRING_LENGTH UINT16_MAX +static CHECK_RESULT SsaRegister variable_generate_ssa(Variable *self, SsaCompilerContext *context); + static int compare_number(const void *a, const void *b) { const SsaNumber *lhs = a; const SsaNumber *rhs = b; @@ -62,13 +64,14 @@ int ssa_init(Ssa *self, Parser *parser) { return_if_error(hash_map_init(&self->extern_funcs_map, parser->allocator, sizeof(SsaExternFuncIndex), hash_map_compare_string, amal_hash_string)); return_if_error(buffer_init(&self->extern_funcs, parser->allocator)); return_if_error(buffer_init(&self->export_funcs, parser->allocator)); + return_if_error(buffer_init(&self->funcs, parser->allocator)); self->intermediate_counter = 0; self->string_counter = 0; self->extern_func_counter = 0; self->export_func_counter = 0; + self->func_counter = 0; self->reg_counter = 0; self->param_counter = 0; - self->func_counter = 0; self->label_counter = 0; self->parser = parser; return 0; @@ -186,7 +189,7 @@ static CHECK_RESULT int ssa_try_add_extern_func(Ssa *self, FunctionSignature *fu *result_index = self->extern_func_counter; ++self->extern_func_counter; - amal_log_debug("ef%u = \"%.*s\"", *result_index, name.size, name.data); + amal_log_debug("extern_func%u = %.*s", *result_index, name.size, name.data); { SsaExternFunc extern_func; extern_func.func_sig = func_sig; @@ -209,7 +212,7 @@ static CHECK_RESULT int ssa_try_add_export_func(Ssa *self, FunctionSignature *fu return -1; } - amal_log_debug("ef%u = \"%.*s\"", self->export_func_counter, name.size, name.data); + amal_log_debug("exported_func%u = %.*s", self->export_func_counter, name.size, name.data); ++self->export_func_counter; { SsaExportFunc export_func; @@ -274,7 +277,7 @@ static CHECK_RESULT int ssa_ins_binop(Ssa *self, SsaInstruction binop_type, SsaR return ssa_add_ins_form2(self, binop_type, lhs, rhs, result); } -static CHECK_RESULT int ssa_ins_func_start(Ssa *self, u8 func_flags, SsaFuncIndex *result, usize *func_metadata_index) { +static CHECK_RESULT int ssa_ins_func_start(Ssa *self, u8 func_flags, FunctionSignature *func_sig, SsaFuncIndex *result, usize *func_metadata_index) { const u8 ins_type = SSA_FUNC_START; SsaInsFuncStart ins_func_start; @@ -285,13 +288,18 @@ static CHECK_RESULT int ssa_ins_func_start(Ssa *self, u8 func_flags, SsaFuncInde } *result = self->func_counter++; + { + SsaFunc func; + func.func_sig = func_sig; + return_if_error(buffer_append(&self->funcs, &func, sizeof(func))); + } ins_func_start.flags = func_flags; /* Dont set number of local registers yet. That will be set by @func_metadata_index later when it's known */ /*ins_func_start.num_local_vars_regs = ---*/ return_if_error(buffer_append(&self->instructions, &ins_type, 1)); return_if_error(buffer_append(&self->instructions, &ins_func_start, sizeof(ins_func_start))); *func_metadata_index = self->instructions.size - sizeof(ins_func_start.num_local_vars_regs); - amal_log_debug("FUNC_START f%u", *result); + amal_log_debug("FUNC_START f%u(%d) %d", *result, buffer_get_size(&func_sig->parameters, FunctionParameter), buffer_get_size(&func_sig->return_types, FunctionReturnType)); return 0; } @@ -308,27 +316,31 @@ static CHECK_RESULT int ssa_ins_push(Ssa *self, SsaRegister reg) { return buffer_append(&self->instructions, ®, sizeof(reg)); } -static CHECK_RESULT int ssa_ins_call(Ssa *self, FunctionDecl *func_decl, u8 num_args, SsaRegister *result) { +static CHECK_RESULT int ssa_ins_push_ret(Ssa *self, SsaRegister reg) { + const u8 ins_type = SSA_PUSH_RET; + amal_log_debug("PUSH RET r%d", reg); + return_if_error(buffer_append(&self->instructions, &ins_type, 1)); + return buffer_append(&self->instructions, ®, sizeof(reg)); +} + +static CHECK_RESULT int ssa_ins_call(Ssa *self, int import_index, FunctionDecl *func_decl, u8 num_args) { const u8 ins_type = SSA_CALL; SsaInsFuncCall ins_func_call; - return_if_error(ssa_get_unique_reg(self, result)); ins_func_call.num_args = num_args; - ins_func_call.result = *result; ins_func_call.func_decl = func_decl; - amal_log_debug("r%d = CALL %d, %p", *result, num_args, func_decl); + ins_func_call.import_index = import_index; + amal_log_debug("CALL %d, f(%d,%p)", num_args, import_index, func_decl); return_if_error(buffer_append(&self->instructions, &ins_type, 1)); return buffer_append(&self->instructions, &ins_func_call, sizeof(ins_func_call)); } -static CHECK_RESULT int ssa_ins_call_extern(Ssa *self, SsaExternFuncIndex extern_func_index, u8 num_args, SsaRegister *result) { +static CHECK_RESULT int ssa_ins_call_extern(Ssa *self, int import_index, LhsExpr *func_decl_lhs, u8 num_args) { const u8 ins_type = SSA_CALL_EXTERN; SsaInsFuncCallExtern ins_func_call_extern; - return_if_error(ssa_get_unique_reg(self, result)); - assert(extern_func_index < self->extern_func_counter); ins_func_call_extern.num_args = num_args; - ins_func_call_extern.result = *result; - ins_func_call_extern.extern_func_index = extern_func_index; - amal_log_debug("r%d = CALL_EXTERN %d, %d", *result, num_args, extern_func_index); + ins_func_call_extern.func_decl_lhs = func_decl_lhs; + ins_func_call_extern.import_index = import_index; + amal_log_debug("CALL_EXTERN %d, ef(%d,%p)", num_args, import_index, func_decl_lhs); return_if_error(buffer_append(&self->instructions, &ins_type, 1)); return buffer_append(&self->instructions, &ins_func_call_extern, sizeof(ins_func_call_extern)); } @@ -608,10 +620,11 @@ static CHECK_RESULT SsaRegister funcdecl_generate_ssa(FunctionDecl *self, SsaCom amal_log_debug("SSA funcdecl %p", self); /* Anonymous closure doesn't have lhs_expr, and neither can it have any flags (extern, export etc) */ if(self->lhs_expr) { + assert(!LHS_EXPR_IS_EXTERN(self->lhs_expr)); if(LHS_EXPR_IS_EXPORT(self->lhs_expr)) func_flags |= FUNC_FLAG_EXPORTED; } - throw_if_error(ssa_ins_func_start(context->ssa, func_flags, &self->ssa_func_index, &func_metadata_index)); + throw_if_error(ssa_ins_func_start(context->ssa, func_flags, self->signature, &self->ssa_func_index, &func_metadata_index)); scope_generate_ssa(&self->body, context); throw_if_error(ssa_ins_func_end(context->ssa)); @@ -624,28 +637,41 @@ static CHECK_RESULT SsaRegister funccall_generate_ssa(FunctionCall *self, AstRes SsaRegister reg; FunctionDecl *func_decl; LhsExpr *func_lhs_expr; + int import_index = context->import_index; + context->import_index = 0; + throw_if_error(ssa_get_unique_reg(context->ssa, ®)); + + func_decl = resolve_data->type.value.func_sig->func_decl; + assert(resolve_data->type.type == RESOLVED_TYPE_FUNC_SIG); + func_lhs_expr = NULL; + if(self->func.resolved_var.type == NAMED_OBJECT_LHS_EXPR) + func_lhs_expr = self->func.resolved_var.value.lhs_expr; + + /* Push return arguments */ + { + /* + TODO: When amalgam supports multiple return types in assignment/declaration, update this to take + all of them into account. Right now it only uses one return type. + It should also take into account the size of the type. + */ + throw_if_error(ssa_ins_push_ret(context->ssa, reg)); + } + /* Push parameter arguments */ { Ast **arg = buffer_begin(&self->args); Ast **arg_end = buffer_end(&self->args); for(; arg != arg_end; ++arg) { - SsaRegister arg_reg; - arg_reg = ast_generate_ssa(*arg, context); + SsaRegister arg_reg = ast_generate_ssa(*arg, context); throw_if_error(ssa_ins_push(context->ssa, arg_reg)); } } - func_decl = resolve_data->type.value.func_sig->func_decl; - assert(resolve_data->type.type == RESOLVED_TYPE_FUNC_SIG); - func_lhs_expr = NULL; - if(self->func.resolved_var.type == NAMED_OBJECT_LHS_EXPR) - func_lhs_expr = self->func.resolved_var.value.lhs_expr; - if(func_lhs_expr && LHS_EXPR_IS_EXTERN(func_lhs_expr)) { - throw_if_error(ssa_ins_call_extern(context->ssa, func_lhs_expr->extern_index, buffer_get_size(&self->args, Ast*), ®)); + throw_if_error(ssa_ins_call_extern(context->ssa, import_index, func_lhs_expr, buffer_get_size(&self->args, Ast*))); } else { /* rhs wont be null here because only extern variable can't have rhs */ - throw_if_error(ssa_ins_call(context->ssa, func_decl, buffer_get_size(&self->args, Ast*), ®)); + throw_if_error(ssa_ins_call(context->ssa, import_index, func_decl, buffer_get_size(&self->args, Ast*))); } return reg; @@ -673,7 +699,7 @@ static CHECK_RESULT SsaRegister string_generate_ssa(String *self, SsaCompilerCon return reg; } -static CHECK_RESULT SsaRegister variable_generate_ssa(Variable *self, SsaCompilerContext *context) { +SsaRegister variable_generate_ssa(Variable *self, SsaCompilerContext *context) { /* TODO: If resolved_var refers to a variable in another file, use a cross file reference that requires no locking (not yet implemented) */ /* This is not thread-safe:*/ assert(self->resolved_var.type != NAMED_OBJECT_NONE); @@ -699,6 +725,16 @@ static SsaInstruction binop_type_to_ssa_type(BinopType binop_type, amal_default_ return 0; } +/* Returns the import statement for lhs of binop dot expression, where lhs is a variable name */ +static Import* binop_lhs_get_import_or_null(Binop *self) { + if(self->lhs->type == AST_VARIABLE) { + ScopeNamedObject *resolved_var = &self->lhs->value.variable->resolved_var; + if(resolved_var->type == NAMED_OBJECT_LHS_EXPR && resolved_var->value.lhs_expr->rhs_expr && resolved_var->value.lhs_expr->rhs_expr->type == AST_IMPORT) + return resolved_var->value.lhs_expr->rhs_expr->value.import; + } + return NULL; +} + static CHECK_RESULT SsaRegister binop_generate_ssa(Binop *self, SsaCompilerContext *context) { SsaRegister reg; @@ -707,8 +743,12 @@ static CHECK_RESULT SsaRegister binop_generate_ssa(Binop *self, SsaCompilerConte const std = @import("std.amal"); std.printf */ - if(self->type == BINOP_DOT && resolved_type_is_func_decl(self->rhs)) { + if(self->type == BINOP_DOT && self->rhs->resolve_data.type.type == RESOLVED_TYPE_FUNC_SIG) { + Import *lhs_import = binop_lhs_get_import_or_null(self); + if(lhs_import) + context->import_index = 1 + lhs_import->file_scope->import_index; reg = ast_generate_ssa(self->rhs, context); + context->import_index = 0; } else { const SsaRegister lhs_reg = ast_generate_ssa(self->lhs, context); const SsaRegister rhs_reg = ast_generate_ssa(self->rhs, context); @@ -774,9 +814,10 @@ static void while_statement_generate_ssa(WhileStatement *while_stmt, SsaCompiler SsaLabelIndex before_condition_label; SsaLabelIndex skip_body_label; usize jump_after_condition_index; + SsaRegister condition_reg; throw_if_error(ssa_ins_label(context->ssa, &before_condition_label)); - SsaRegister condition_reg = ast_generate_ssa(while_stmt->condition, context); + condition_reg = ast_generate_ssa(while_stmt->condition, context); throw_if_error(ssa_ins_jumpzero(context->ssa, condition_reg, 0, &jump_after_condition_index)); scope_generate_ssa(&while_stmt->body, context); diff --git a/src/std/buffer.c b/src/std/buffer.c index a482bb9..021fce8 100644 --- a/src/std/buffer.c +++ b/src/std/buffer.c @@ -94,6 +94,19 @@ void buffer_clear(Buffer *self) { self->size = 0; } +int buffer_set_capacity(Buffer *self, usize new_capacity) { + if(am_realloc(self->data, new_capacity, (void**)&self->data) != ALLOC_OK) + return BUFFER_ALLOC_FAIL; + + self->capacity = new_capacity; + if(self->size < self->capacity) + self->size = self->capacity; + /* Update list of buffers in the allocator with the new address of the buffer data */ + if(self->allocator) + am_memcpy(self->allocator->mems.data + sizeof(void*) * self->allocator_index, &self->data, sizeof(void*)); + return BUFFER_OK; +} + void* buffer_begin(Buffer *self) { return self->data; } diff --git a/src/std/buffer_view.c b/src/std/buffer_view.c index f2d79c0..249928b 100644 --- a/src/std/buffer_view.c +++ b/src/std/buffer_view.c @@ -1,7 +1,7 @@ #include "../../include/std/buffer_view.h" #include "../../include/std/mem.h" -BufferView create_buffer_view_null() { +BufferView create_buffer_view_null(void) { BufferView buffer_view; buffer_view.data = NULL; buffer_view.size = 0; diff --git a/src/std/hash_map.c b/src/std/hash_map.c index 98ebf40..234f3e3 100644 --- a/src/std/hash_map.c +++ b/src/std/hash_map.c @@ -178,6 +178,7 @@ int hash_map_insert(HashMap *self, BufferView key, void *value) { } bool hash_map_get(HashMap *self, BufferView key, void *value) { +#if 0 usize bucket_size; usize bucket_index; usize hash; @@ -199,6 +200,37 @@ bool hash_map_get(HashMap *self, BufferView key, void *value) { } } + return bool_false; +#endif + void *ref; + if(!hash_map_get_ref(self, key, &ref)) + return bool_false; + am_memcpy(value, ref, self->value_type_size); + return bool_true; +} + +bool hash_map_get_ref(HashMap *self, BufferView key, void **value) { + usize bucket_size; + usize bucket_index; + usize hash; + HashMapBucket *bucket; + HashMapBucketNode *bucket_node; + + bucket_size = buffer_get_size(&self->buckets, HashMapBucket); + hash = self->hash_func((const u8*)key.data, key.size); + bucket_index = hash % bucket_size; + + bucket = ((HashMapBucket*)self->buckets.data) + bucket_index; + for(bucket_node = bucket->start; bucket_node; bucket_node = bucket_node_get_next(bucket_node)) { + BufferView bucket_key; + bucket_key = bucket_node_get_key(bucket_node); + if(hash == bucket_node_get_hash(bucket_node) && self->compare_func(&key, &bucket_key) == 0) { + if(value) + *value = bucket_node_get_value(bucket_node); + return bool_true; + } + } + return bool_false; } diff --git a/src/std/log.c b/src/std/log.c index 59e0319..68f1f2e 100644 --- a/src/std/log.c +++ b/src/std/log.c @@ -10,14 +10,14 @@ static amal_mutex mutex; static bool mutex_initialized = bool_false; /* Safe to call multiple times */ -static void mutex_init() { +static void mutex_init(void) { if(!mutex_initialized) { ignore_result_int(amal_mutex_init(&mutex)); mutex_initialized = bool_true; } } -amal_mutex* amal_log_get_mutex() { +amal_mutex* amal_log_get_mutex(void) { mutex_init(); return &mutex; } diff --git a/src/std/mem.c b/src/std/mem.c index 95edcb9..8658781 100644 --- a/src/std/mem.c +++ b/src/std/mem.c @@ -18,7 +18,6 @@ void am_memset(void *dest, int value, usize size) { memset(dest, value, size); } -long am_pagesize() { +long am_pagesize(void) { return sysconf(_SC_PAGESIZE); } - diff --git a/src/std/thread.c b/src/std/thread.c index 87362d2..9b9b764 100644 --- a/src/std/thread.c +++ b/src/std/thread.c @@ -120,11 +120,11 @@ void amal_mutex_deinit(amal_mutex *self) { pthread_mutex_destroy(&self->mutex); } -static long amal_process_get_id() { +static long amal_process_get_id(void) { return getpid(); } -static long amal_thread_get_id() { +static long amal_thread_get_id(void) { return syscall(SYS_gettid); } @@ -173,11 +173,11 @@ void amal_mutex_tryunlock(amal_mutex *self) { ignore_result_int(amal_mutex_unlock(self)); } -bool amal_thread_is_main() { +bool amal_thread_is_main(void) { /* TODO: This only works for linux, use equivalent functions on other platforms */ return amal_thread_get_id() == amal_process_get_id(); } -int amal_get_usable_thread_count() { +int amal_get_usable_thread_count(void) { return get_nprocs(); } diff --git a/std/io.amal b/std/io.amal new file mode 100644 index 0000000..9023280 --- /dev/null +++ b/std/io.amal @@ -0,0 +1,5 @@ +extern const print_extern: fn() i32; + +pub const print = fn() i32 { + return print_extern(); +} \ No newline at end of file diff --git a/tests/bytecode.amal b/tests/bytecode.amal index ef255cd..6941bc8 100644 --- a/tests/bytecode.amal +++ b/tests/bytecode.amal @@ -1,8 +1,9 @@ -extern const print_extern: fn; -extern const print_extern_num: fn(num: i32); - -const print = fn { +/* +extern const print_extern: fn() i32; +extern const print_extern_num: fn(num: i32) i32; +const print = fn() i32 { + return 0; } const main = fn { @@ -21,4 +22,10 @@ const print_num = fn(num: i32) i32 { print_extern_num(num); print_extern_num(8080); return num; -} \ No newline at end of file +} +*/ + +const io = @import("../std/io.amal"); +const main = fn { + io.print(); +} diff --git a/tests/main.c b/tests/main.c index b2b53cc..eec3a78 100644 --- a/tests/main.c +++ b/tests/main.c @@ -19,7 +19,7 @@ static int num_tests_run = 0; }\ }while(0) -static CHECK_RESULT int test_hash_map() { +static CHECK_RESULT int test_hash_map(void) { ArenaAllocator arena_allocator; HashMapType(BufferView, int) hash_map; int value; @@ -132,7 +132,7 @@ static CHECK_RESULT int get_thread_count_env_var(int *thread_count) { return 0; } -static int print_extern() { +static int print_extern(void) { printf("hello from amalgam extern func, print_extern!\n"); return 0; } @@ -228,7 +228,7 @@ static void test_load_error(const char *filepath, const char *expected_error) { free(expected_data.expected_error); } -static void run_all_tests() { +static void run_all_tests(void) { test_load("tests/main.amal"); test_load("tests/utf8bom.amal"); test_load("tests/bytecode.amal"); -- cgit v1.2.3