#include "../../include/bytecode/bytecode.h" #include "../../include/std/mem.h" #include "../../include/std/log.h" #include "../../include/ssa/ssa.h" #include "../../include/parser.h" #include "../../include/ast.h" #include "../../include/compiler.h" #include #include /* TODO: Remove this */ #include #include #define throw(result) do { throw_debug_msg; longjmp(self->env, (result)); } while(0) #define throw_if_error(result) \ do { \ int return_if_result; \ return_if_result = (result); \ if((return_if_result) != 0) \ throw(return_if_result); \ } while(0) int bytecode_init(Bytecode *self, ArenaAllocator *allocator) { return buffer_init(&self->data, allocator); } /*doc(Bytecode) The layout of the full bytecode is: Header (Intermediates Strings Functions External_Functions Exported_Functions Instructions)* */ CHECK_RESULT int buffer_append_header(Buffer *program_data) { /*doc(Bytecode header) # Header layout |Type|Field |Description | |----|-------------|----------------------------------------------------------------------------| |u32 |Magic number |The magic number used to identify an amalgam bytecode file. | |u8 |Major version|The major version of the bytecode. Updates in this is a breaking change. | |u8 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.| |u8 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.| The versions in the header only changes for every release, not every change. */ const u32 magic_number = AMAL_BYTECODE_MAGIC_NUMBER; const u8 major_version = AMAL_BYTECODE_MAJOR_VERSION; const u8 minor_version = AMAL_BYTECODE_MINOR_VERSION; const u8 patch_version = AMAL_BYTECODE_PATCH_VERSION; return_if_error(buffer_append(program_data, &magic_number, 4)); return_if_error(buffer_append(program_data, &major_version, 1)); return_if_error(buffer_append(program_data, &minor_version, 1)); return_if_error(buffer_append(program_data, &patch_version, 1)); return 0; } static CHECK_RESULT usize ssa_extract_data(u8 *instruction_data, void *result, usize size) { am_memcpy(result, instruction_data, size); return size; } static void add_intermediates(BytecodeCompilerContext *self) { /*doc(Bytecode intermediates) # Intermediates layout |Type |Field |Description | |------------|------------------|-------------------------------------------------------------------------------| |u32 |Intermediates size|The size of the intermediates section, in bytes. | |Intermediate|Intermediate data |Multiple intermediates, where the total size is defined by @Intermediates size.| # Intermediate |Type|Field|Description | |----|-----|----------------------------------------------------| |u8 |Type |The type of the number. 0=integer, 1=float. | |u64 |Value|The type of the value depends on the value of @Type.| */ Ssa *ssa = self->parser->ssa; Buffer *instructions = &self->bytecode.data; SsaNumber *intermediate = buffer_begin(&ssa->intermediates); SsaNumber *intermediates_end = buffer_end(&ssa->intermediates); u32 intemediates_size = (sizeof(u8) + sizeof(u64)) * buffer_get_size(&ssa->intermediates, SsaNumber); throw_if_error(buffer_expand(instructions, sizeof(u32) + intemediates_size)); throw_if_error(buffer_append(instructions, &intemediates_size, sizeof(u32))); for(; intermediate != intermediates_end; ++intermediate) { throw_if_error(buffer_append(instructions, &intermediate->type, sizeof(u8))); /* TODO: Store value using an encoding that will save space when using low numbers */ throw_if_error(buffer_append(instructions, &intermediate->value.integer, sizeof(u64))); } } static void add_strings(BytecodeCompilerContext *self) { /*doc(Bytecode strings) # Strings layout |Type |Field |Description | |------|-----------------|------------------------------------------------------------------| |u16 |Number of strings|The number of strings. | |u32 |Strings size |The size of the strings section, in bytes. | |String|Strings data |Multiple strings, where the total size is defined by @Strings size| # String |Type|Field|Description | |----|----|----------------------------------------------------------------------------------------| |u16 |Size|The size of the string, in bytes. Excluding the null-terminate character. | |u8* |Data|The data of the string, where the size is defined by @Size. Strings are null-terminated.| */ Ssa *ssa = self->parser->ssa; Buffer *instructions = &self->bytecode.data; BufferView *string = buffer_begin(&ssa->strings); BufferView *strings_end = buffer_end(&ssa->strings); u32 strings_size = 0; for(; string != strings_end; ++string) { strings_size += sizeof(u16) + string->size + 1; /* +1 for null-termination of string */ } string = buffer_begin(&ssa->strings); throw_if_error(buffer_expand(instructions, sizeof(u16) + sizeof(u32) + strings_size)); throw_if_error(buffer_append(instructions, &ssa->string_counter, sizeof(u16))); throw_if_error(buffer_append(instructions, &strings_size, sizeof(u32))); for(; string != strings_end; ++string) { const char null_s = '\0'; throw_if_error(buffer_append(instructions, &string->size, sizeof(u16))); throw_if_error(buffer_append(instructions, string->data, string->size)); throw_if_error(buffer_append(instructions, &null_s, sizeof(char))); } } static void add_functions(BytecodeCompilerContext *self) { /*doc(Bytecode functions) # Internal functions layout |Type|Field |Description | |----|-------------------|---------------------------------| |u16 |Number of functions|The number of internal functions.| */ assert(sizeof(SsaFuncIndex) == sizeof(u16) && "Program decoder needs to be updated since size of func index has changed"); throw_if_error(buffer_append(&self->bytecode.data, &self->parser->ssa->func_counter, sizeof(u16))); } static void add_extern_functions(BytecodeCompilerContext *self) { /*doc(Bytecode external functions) # External functions layout |Type |Field |Description | |-----------------|------------------|-----------------------------------------------------------------------------------------| |u16 |num_extern_func |The number of external functions. | |u32 |extern_funcs_size |The size of the external functions section, in bytes. | |External function|External functions|Multiple external functions, where the number of functions is defined by @num_extern_func| # External function |Type|Field |Description | |----|--------|-----------------------------------------------------------------------------------------------------| |u8 |num_args|The number of arguments the functions has. | |u8 |name_len|The length of the external function name, in bytes. Excluding the null-terminate character. | |u8* |name |The name of the external function, where the size is defined by @name_len. Names are null-terminated.| */ Ssa *ssa = self->parser->ssa; Buffer *instructions = &self->bytecode.data; SsaExternFunc *extern_func = buffer_begin(&ssa->extern_funcs); SsaExternFunc *extern_func_end = buffer_end(&ssa->extern_funcs); u32 extern_funcs_size = 0; for(; extern_func != extern_func_end; ++extern_func) { extern_funcs_size += sizeof(u8) + sizeof(u8) + extern_func->name.size + 1; /* +1 for null-termination of string */ } extern_func = buffer_begin(&ssa->extern_funcs); throw_if_error(buffer_expand(instructions, sizeof(u16) + sizeof(u32) + extern_funcs_size)); throw_if_error(buffer_append(instructions, &ssa->extern_func_counter, sizeof(u16))); throw_if_error(buffer_append(instructions, &extern_funcs_size, sizeof(u32))); for(; extern_func != extern_func_end; ++extern_func) { const char null_s = '\0'; u8 num_args = buffer_get_size(&extern_func->func_sig->parameters, FunctionParameter); throw_if_error(buffer_append(instructions, &num_args, sizeof(num_args))); /* TODO: Add namespace to the function name */ throw_if_error(buffer_append(instructions, &extern_func->name.size, sizeof(u8))); throw_if_error(buffer_append(instructions, extern_func->name.data, extern_func->name.size)); throw_if_error(buffer_append(instructions, &null_s, sizeof(char))); } assert(sizeof(SsaExternFuncIndex) == sizeof(u16) && "Program decoder needs to be updated since size of extern func index has changed"); } static void add_export_functions(BytecodeCompilerContext *self) { /*doc(Bytecode exported functions) # Exported functions layout |Type |Field |Description | |-----------------|------------------|-----------------------------------------------------------------------------------------| |u16 |num_export_func |The number of exported functions. | |u32 |export_funcs_size |The size of the exported functions section, in bytes. | |Exported function|Exported functions|Multiple exported functions, where the number of functions is defined by @num_export_func| # Exported function |Type|Field |Description | |----|------------------|--------------------------------------------------------------------------------------------------------------------------| |u32 |instruction_offset|The offset in the instruction data where the exported function is defined. Is always 0 until the program has been started.| |u8 |num_args |The number of arguments the functions has. | |u8 |name_len |The length of the exported function name, in bytes. Excluding the null-terminate character. | |u8* |name |The name of the exported function, where the size is defined by @name_len. Names are null-terminated. | */ Ssa *ssa = self->parser->ssa; Buffer *instructions = &self->bytecode.data; SsaExportFunc *export_func = buffer_begin(&ssa->export_funcs); SsaExportFunc *export_func_end = buffer_end(&ssa->export_funcs); u32 export_funcs_size = 0; for(; export_func != export_func_end; ++export_func) { export_funcs_size += sizeof(u32) + sizeof(u8) + sizeof(u8) + export_func->name.size + 1; /* +1 for null-termination of string */ } export_func = buffer_begin(&ssa->export_funcs); throw_if_error(buffer_expand(instructions, sizeof(u16) + sizeof(u32) + export_funcs_size)); throw_if_error(buffer_append(instructions, &ssa->export_func_counter, sizeof(u16))); throw_if_error(buffer_append(instructions, &export_funcs_size, sizeof(u32))); for(; export_func != export_func_end; ++export_func) { const char null_s = '\0'; const u32 instruction_offset = 0; u8 num_args = buffer_get_size(&export_func->func_sig->parameters, FunctionParameter); throw_if_error(buffer_append(instructions, &instruction_offset, sizeof(instruction_offset))); throw_if_error(buffer_append(instructions, &num_args, sizeof(num_args))); throw_if_error(buffer_append(instructions, &export_func->name.size, sizeof(u8))); throw_if_error(buffer_append(instructions, export_func->name.data, export_func->name.size)); throw_if_error(buffer_append(instructions, &null_s, sizeof(char))); } assert(sizeof(SsaExportFuncIndex) == sizeof(u16) && "Program decoder needs to be updated since size of export func index has changed"); } static void add_ins1(BytecodeCompilerContext *self, AmalOpcode opcode, const char *fmt) { throw_if_error(buffer_append(&self->bytecode.data, &opcode, sizeof(AmalOpcodeType))); fprintf(stderr, fmt); fputc('\n', stderr); } static void add_ins2(BytecodeCompilerContext *self, AmalOpcode opcode, i8 reg, const char *fmt) { Buffer *instructions = &self->bytecode.data; size_t index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(reg))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), ®, sizeof(reg)); fprintf(stderr, fmt, reg); fputc('\n', stderr); } static void add_ins3(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_reg, i8 src_reg, const char *fmt) { Buffer *instructions = &self->bytecode.data; size_t index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(src_reg))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &src_reg, sizeof(src_reg)); fprintf(stderr, fmt, dst_reg, src_reg); fputc('\n', stderr); } static void add_ins4(BytecodeCompilerContext *self, AmalOpcode opcode, u16 data, const char *fmt) { Buffer *instructions = &self->bytecode.data; size_t index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(data))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &data, sizeof(data)); fprintf(stderr, fmt, data); fputc('\n', stderr); } static void add_ins5(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_reg, i8 reg1, i8 reg2, const char *fmt) { Buffer *instructions = &self->bytecode.data; size_t index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1) + sizeof(reg2))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), ®1, sizeof(reg1)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1), ®2, sizeof(reg2)); fprintf(stderr, fmt, dst_reg, reg1, reg2); fputc('\n', stderr); } static void add_ins6(BytecodeCompilerContext *self, AmalOpcode opcode, i8 dst_reg, u16 data, const char *fmt) { Buffer *instructions = &self->bytecode.data; size_t index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(data))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &data, sizeof(data)); fprintf(stderr, fmt, dst_reg, data); fputc('\n', stderr); } static void add_ins7(BytecodeCompilerContext *self, AmalOpcode opcode, u16 idx, i8 num_args, i8 dst_reg, const char *fmt) { Buffer *instructions = &self->bytecode.data; size_t index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(idx) + sizeof(num_args) + sizeof(dst_reg))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &idx, sizeof(idx)); instructions->data[index + sizeof(AmalOpcodeType) + sizeof(idx)] = num_args; instructions->data[index + sizeof(AmalOpcodeType) + sizeof(idx) + sizeof(num_args)] = dst_reg; fprintf(stderr, fmt, idx, num_args, dst_reg); fputc('\n', stderr); } static void add_instructions(BytecodeCompilerContext *self) { /*doc(Bytecode instructions) # Instructions layout |Type |Field |Description | |-----------|-----------------|---------------------------------------------------------------------------| |u32 |Instructions size|The size of the instructions section, in bytes. | |Instruction|Instructions data|The instructions data. Each instructions begins with an opcode, see #Opcode| */ SsaInsForm1 ssa_ins_form1; SsaInsForm2 ssa_ins_form2; SsaInsFuncStart ssa_ins_func_start; SsaInsFuncCall ssa_ins_func_call; SsaInsFuncCallExtern ssa_ins_func_call_extern; SsaInsJumpZero ssa_ins_jump_zero; SsaInsJump ssa_ins_jump; Ssa *ssa = self->parser->ssa; u8 *instruction = buffer_begin(&ssa->instructions); u8 *instructions_end = buffer_end(&ssa->instructions); u32 num_instructions_index = self->bytecode.data.size; throw_if_error(buffer_append_empty(&self->bytecode.data, sizeof(num_instructions_index))); /* TODO: Keep all registers under 256 */ while(instruction != instructions_end) { SsaInstruction ins = (SsaInstruction)*instruction++; switch(ins) { case SSA_ASSIGN_INTER: { instruction += ssa_extract_data(instruction, &ssa_ins_form1, sizeof(ssa_ins_form1)); add_ins6(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d"); break; } case SSA_ASSIGN_STRING: { instruction += ssa_extract_data(instruction, &ssa_ins_form1, sizeof(ssa_ins_form1)); add_ins6(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d"); break; } case SSA_ASSIGN_REG: { instruction += ssa_extract_data(instruction, &ssa_ins_form1, sizeof(ssa_ins_form1)); add_ins3(self, AMAL_OP_MOV, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "mov r%d, r%d"); break; } case SSA_ADD: { instruction += ssa_extract_data(instruction, &ssa_ins_form2, sizeof(ssa_ins_form2)); add_ins5(self, AMAL_OP_ADD, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "add r%d, r%d, r%d"); break; } case SSA_SUB: { instruction += ssa_extract_data(instruction, &ssa_ins_form2, sizeof(ssa_ins_form2)); add_ins5(self, AMAL_OP_SUB, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "sub r%d, r%d, r%d"); break; } case SSA_IMUL: { instruction += ssa_extract_data(instruction, &ssa_ins_form2, sizeof(ssa_ins_form2)); add_ins5(self, AMAL_OP_IMUL, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "imul r%d, r%d, r%d"); break; } case SSA_MUL: { instruction += ssa_extract_data(instruction, &ssa_ins_form2, sizeof(ssa_ins_form2)); add_ins5(self, AMAL_OP_MUL, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "mul r%d, r%d, r%d"); break; } case SSA_IDIV: { instruction += ssa_extract_data(instruction, &ssa_ins_form2, sizeof(ssa_ins_form2)); add_ins5(self, AMAL_OP_IDIV, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "idiv r%d, r%d, r%d"); break; } case SSA_DIV: { instruction += ssa_extract_data(instruction, &ssa_ins_form2, sizeof(ssa_ins_form2)); add_ins5(self, AMAL_OP_DIV, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "div r%d, r%d, r%d"); break; } case SSA_EQUALS: { instruction += ssa_extract_data(instruction, &ssa_ins_form2, sizeof(ssa_ins_form2)); add_ins5(self, AMAL_OP_CMP, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "cmp r%d, r%d, r%d"); break; } case SSA_FUNC_START: { instruction += ssa_extract_data(instruction, &ssa_ins_func_start, sizeof(ssa_ins_func_start)); add_ins6(self, AMAL_OP_FUNC_START, ssa_ins_func_start.flags, ssa_ins_func_start.num_local_vars_regs, "func_start 0x%02x, %u"); break; } case SSA_FUNC_END: { add_ins1(self, AMAL_OP_FUNC_END, "func_end"); break; } case SSA_PUSH: { SsaRegister reg; am_memcpy(®, instruction, sizeof(SsaRegister)); instruction += sizeof(SsaRegister); add_ins2(self, AMAL_OP_PUSH, reg, "push r%d"); break; } case SSA_CALL: { /* TODO: Using ssa_func_index only works correctly if the function was defined in the same file as the function call. To make this work with calling functions in other files, ssa_func_index should also have an offset index or something like that. So each file has it's own function list with indices and when they need to be combined in the end, the function indices can be increased by their block index (ssa_func_index + block index), where block index is defined as the size of all previous files' number of functions. */ instruction += ssa_extract_data(instruction, &ssa_ins_func_call, sizeof(ssa_ins_func_call)); add_ins7(self, AMAL_OP_CALL, ssa_ins_func_call.func_decl->ssa_func_index, ssa_ins_func_call.num_args, ssa_ins_func_call.result, "call f%d, %d, r%d"); break; } case SSA_CALL_EXTERN: { instruction += ssa_extract_data(instruction, &ssa_ins_func_call_extern, sizeof(ssa_ins_func_call_extern)); add_ins7(self, AMAL_OP_CALLE, ssa_ins_func_call_extern.extern_func_index, ssa_ins_func_call_extern.num_args, ssa_ins_func_call_extern.result, "calle ef%d, %d, r%d"); break; } case SSA_JUMP_ZERO: { instruction += ssa_extract_data(instruction, &ssa_ins_jump_zero, sizeof(ssa_ins_jump_zero)); add_ins6(self, AMAL_OP_JZ, ssa_ins_jump_zero.condition_reg, ssa_ins_jump_zero.jump_offset, "jz r%d, %d"); break; } case SSA_JUMP: { instruction += ssa_extract_data(instruction, &ssa_ins_jump, sizeof(ssa_ins_jump)); add_ins4(self, AMAL_OP_JMP, ssa_ins_jump.jump_offset, "jmp %d"); break; } case SSA_RET: { SsaRegister reg; am_memcpy(®, instruction, sizeof(SsaRegister)); instruction += sizeof(SsaRegister); add_ins2(self, AMAL_OP_RET, reg, "ret r%d"); break; } } } /* Prepend instructions with its size */ { /* -sizeof to Remove the count itself from the size of the instructions size */ const u32 instructions_size = self->bytecode.data.size - num_instructions_index - sizeof(instructions_size); am_memcpy(self->bytecode.data.data + num_instructions_index, &instructions_size, sizeof(instructions_size)); } } void generate_bytecode_from_ssa(BytecodeCompilerContext *self) { add_intermediates(self); add_strings(self); add_functions(self); add_extern_functions(self); add_export_functions(self); add_instructions(self); }