From 85c654a102701958d3748e82ecac9c1bc4dbbcba Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 16 Jul 2019 00:27:53 +0200 Subject: Start on real bytecode & doc parsing --- src/bytecode/bytecode.c | 165 ++++++++++++++++++++++++++++++++++++++++++------ src/compiler.c | 9 +++ src/ssa/ssa.c | 12 ++++ 3 files changed, 168 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/bytecode/bytecode.c b/src/bytecode/bytecode.c index 6ceacf0..29d99c0 100644 --- a/src/bytecode/bytecode.c +++ b/src/bytecode/bytecode.c @@ -58,6 +58,31 @@ static CHECK_RESULT usize ssa_extract_jump(u8 *instruction_data, SsaInsJump *res return sizeof(result->jump_offset); } +static void add_header(BytecodeCompilerContext *self) { + /*doc(BytecodeHeader) + # Header layout + |Size|Name |Description | + |----|-------------|----------------------------------------------------------------------------| + |4 |Magic number |The magic number used to identify an amalgam bytecode file. | + |1 |Major version|The major version of the bytecode. Updates in this is a breaking change. | + |1 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.| + |1 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.| + The versions in the header only changes for every release, not every change. + */ + + const u32 magic_number = 0xdec05eba; + const u8 major_version = 1; + const u8 minor_version = 0; + const u8 patch_version = 0; + + Buffer *instructions; + instructions = &self->bytecode->instructions; + throw_if_error(buffer_append(instructions, &magic_number, 4)); + throw_if_error(buffer_append(instructions, &major_version, 1)); + throw_if_error(buffer_append(instructions, &minor_version, 1)); + throw_if_error(buffer_append(instructions, &patch_version, 1)); +} + static void add_intermediates(BytecodeCompilerContext *self) { Ssa *ssa; Buffer *instructions; @@ -78,6 +103,107 @@ static void add_intermediates(BytecodeCompilerContext *self) { } } +void add_strings(BytecodeCompilerContext *self) { + Ssa *ssa; + Buffer *instructions; + BufferView *string; + BufferView *strings_end; + + ssa = self->parser->ssa; + instructions = &self->bytecode->instructions; + string = buffer_begin(&ssa->strings); + strings_end = buffer_end(&ssa->strings); + + /* + The 8 here is a arbitrary chosen number since we don't know the actual + size of all strings without counting. The logic is that the average + size of all strings length would be 8. + */ + throw_if_error(buffer_expand(instructions, + sizeof(u16) + (sizeof(u16) + 8) * ssa->strings.size)); + throw_if_error(buffer_append(instructions, &ssa->strings.size, sizeof(u16))); + for(; string != strings_end; ++string) { + throw_if_error(buffer_append(instructions, &string->size, sizeof(u16))); + throw_if_error(buffer_append(instructions, &string->data, string->size)); + } +} + +static void add_ins1(BytecodeCompilerContext *self, AmalOpcode opcode, const char *fmt) { + throw_if_error(buffer_append(&self->bytecode->instructions, &opcode, sizeof(AmalOpcodeType))); + fprintf(stderr, fmt); + fputc('\n', stderr); +} + + +static void add_ins2(BytecodeCompilerContext *self, AmalOpcode opcode, u8 reg, const char *fmt) { + Buffer *instructions; + size_t index; + instructions = &self->bytecode->instructions; + index = instructions->size; + + throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(reg))); + instructions->data[index] = opcode; + memcpy(instructions->data + index + sizeof(AmalOpcodeType), ®, sizeof(reg)); + fprintf(stderr, fmt, reg); + fputc('\n', stderr); +} + +static void add_ins3(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 src_reg, const char *fmt) { + Buffer *instructions; + size_t index; + instructions = &self->bytecode->instructions; + index = instructions->size; + + throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(src_reg))); + instructions->data[index] = opcode; + memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); + memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &src_reg, sizeof(src_reg)); + fprintf(stderr, fmt, dst_reg, src_reg); + fputc('\n', stderr); +} + +static void add_ins4(BytecodeCompilerContext *self, AmalOpcode opcode, u16 data, const char *fmt) { + Buffer *instructions; + size_t index; + instructions = &self->bytecode->instructions; + index = instructions->size; + + throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(data))); + instructions->data[index] = opcode; + memcpy(instructions->data + index + sizeof(AmalOpcodeType), &data, sizeof(data)); + fprintf(stderr, fmt, data); + fputc('\n', stderr); +} + +static void add_ins5(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 reg1, u8 reg2, const char *fmt) { + Buffer *instructions; + size_t index; + instructions = &self->bytecode->instructions; + index = instructions->size; + + throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1) + sizeof(reg2))); + instructions->data[index] = opcode; + memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); + memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), ®1, sizeof(reg1)); + memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1), ®2, sizeof(reg2)); + fprintf(stderr, fmt, dst_reg, reg1, reg2); + fputc('\n', stderr); +} + +static void add_ins6(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u16 data, const char *fmt) { + Buffer *instructions; + size_t index; + instructions = &self->bytecode->instructions; + index = instructions->size; + + throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(data))); + instructions->data[index] = opcode; + memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); + memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &data, sizeof(data)); + fprintf(stderr, fmt, dst_reg, data); + fputc('\n', stderr); +} + #if 0 #define NUM_MAX_REGS 256 #define NUM_MAX_FUNC_ARGS 32 @@ -251,63 +377,64 @@ static void add_instructions(BytecodeCompilerContext *self) { } } #else - #define ARITH_OP(op) do {\ - instruction += ssa_extract_form2(instruction, &ssa_ins_form2); \ - fprintf(file, "%s r%d, r%d, r%d\n", (op), ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs); \ - } while(0) while(instruction != instructions_end) { SsaInstruction ins = (SsaInstruction)*instruction++; switch(ins) { case SSA_ASSIGN_INTER: { instruction += ssa_extract_form1(instruction, &ssa_ins_form1); - fprintf(file, "mov r%d, i%d\n", ssa_ins_form1.lhs, ssa_ins_form1.rhs); + add_ins3(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d"); break; } case SSA_ASSIGN_STRING: { instruction += ssa_extract_form1(instruction, &ssa_ins_form1); - fprintf(file, "mov r%d, s%d\n", ssa_ins_form1.lhs, ssa_ins_form1.rhs); + add_ins3(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d"); break; } case SSA_ASSIGN_REG: { instruction += ssa_extract_form1(instruction, &ssa_ins_form1); - fprintf(file, "mov r%d, r%d\n", ssa_ins_form1.lhs, ssa_ins_form1.rhs); + add_ins3(self, AMAL_OP_MOV, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "mov r%d, d%d"); break; } case SSA_ADD: { - ARITH_OP("add"); + instruction += ssa_extract_form2(instruction, &ssa_ins_form2); + add_ins5(self, AMAL_OP_ADD, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "add r%d, r%d, r%d"); break; } case SSA_SUB: { - ARITH_OP("sub"); + instruction += ssa_extract_form2(instruction, &ssa_ins_form2); + add_ins5(self, AMAL_OP_SUB, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "sub r%d, r%d, r%d"); break; } case SSA_MUL: { - ARITH_OP("mul"); + instruction += ssa_extract_form2(instruction, &ssa_ins_form2); + add_ins5(self, AMAL_OP_MUL, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "mul r%d, r%d, r%d"); break; } case SSA_DIV: { - ARITH_OP("div"); + instruction += ssa_extract_form2(instruction, &ssa_ins_form2); + add_ins5(self, AMAL_OP_DIV, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "div r%d, r%d, r%d"); break; } case SSA_EQUALS: { - ARITH_OP("eq"); + instruction += ssa_extract_form2(instruction, &ssa_ins_form2); + add_ins5(self, AMAL_OP_CMP, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "cmp r%d, r%d, r%d"); break; } case SSA_FUNC_START: { instruction += ssa_extract_func_start(instruction, &ssa_ins_func_start); - fprintf(file, "FUNC_START %d\n", ssa_ins_func_start.num_args); + add_ins1(self, AMAL_OP_FUNC_START, "func_start"); break; } case SSA_FUNC_END: { - fprintf(file, "FUNC_END\n"); + add_ins1(self, AMAL_OP_FUNC_START, "func_end"); break; } case SSA_PUSH: { SsaRegister reg; am_memcpy(®, instruction, sizeof(SsaRegister)); instruction += sizeof(SsaRegister); - fprintf(file, "push r%d\n", reg); + add_ins2(self, AMAL_OP_PUSH, reg, "push r%d"); break; } case SSA_CALL: { @@ -325,17 +452,17 @@ static void add_instructions(BytecodeCompilerContext *self) { is defined as the size of all previous files' number of functions. */ instruction += ssa_extract_func_call(instruction, &ssa_ins_func_call); - fprintf(file, "call %d\n", ssa_ins_func_call.func_decl->ssa_func_index); + add_ins4(self, AMAL_OP_CALL, ssa_ins_func_call.func_decl->ssa_func_index, "call %d"); break; } case SSA_JUMP_ZERO: { instruction += ssa_extract_jump_zero(instruction, &ssa_ins_jump_zero); - fprintf(file, "jz r%d, %d\n", ssa_ins_jump_zero.condition_reg, ssa_ins_jump_zero.jump_offset); + add_ins6(self, AMAL_OP_JZ, ssa_ins_jump_zero.condition_reg, ssa_ins_jump_zero.jump_offset, "jz r%d, %d"); break; } case SSA_JUMP: { instruction += ssa_extract_jump(instruction, &ssa_ins_jump); - fprintf(file, "jmp %d\n", ssa_ins_jump.jump_offset); + add_ins4(self, AMAL_OP_JMP, ssa_ins_jump.jump_offset, "jmp %d"); break; } default: @@ -348,7 +475,9 @@ static void add_instructions(BytecodeCompilerContext *self) { } void generate_bytecode_from_ssa(BytecodeCompilerContext *self) { + add_header(self); add_intermediates(self); + add_strings(self); /* TODO: Also add strings in ssa, so we can index them */ add_instructions(self); } diff --git a/src/compiler.c b/src/compiler.c index 9f003c8..e7b242b 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -510,6 +510,15 @@ int amal_compiler_internal_load_file(amal_compiler *self, const char *filepath, return_if_error(amal_compiler_select_thread_for_work(self, thread_work_data, &parser_thread_data)); if(main_job) { + /*doc(CompilerFlow) + # Compiler flow + (Tokenize&parse -> Resolve AST -> Generate SSA -> Generate bytecode) -> Generate program\ + Each step except the last is done using multiple threads in parallel and the output of each step is used + in the next step. The last step is not done in parallel because the last step is combining all bytecode + and writing it to a file, which is an IO bottlenecked operation and it won't benefit from multithreading + and may even lose performance because of it. + */ + return_if_error(amal_compiler_load_file_join_threads(self)); assert(amal_compiler_check_all_threads_done(self)); amal_log_info("Finished parsing all files, resolving AST"); diff --git a/src/ssa/ssa.c b/src/ssa/ssa.c index 91ba185..34e3e3e 100644 --- a/src/ssa/ssa.c +++ b/src/ssa/ssa.c @@ -16,6 +16,9 @@ do { \ throw(return_if_result); \ } while(0) +/* Max length of a string that fits in u16 */ +#define MAX_STRING_LENGTH ((2 << 16) - 1) + static int compare_number(const void *a, const void *b) { const SsaNumber *lhs; const SsaNumber *rhs; @@ -125,6 +128,11 @@ static CHECK_RESULT int ssa_try_add_string(Ssa *self, BufferView str, SsaStringI /* Overflow */ if(self->string_counter + 1 < self->string_counter) return -1; + + if(str.size > MAX_STRING_LENGTH) { + amal_log_error("String \"%.*s\" is longer than %d\n", str.size, str.data, MAX_STRING_LENGTH); + return -2; + } *result_index = self->string_counter; ++self->string_counter; @@ -386,6 +394,10 @@ in any order. */ static CHECK_RESULT SsaRegister funcdecl_generate_ssa(FunctionDecl *self, SsaCompilerContext *context) { /* TODO: Implement */ + /* + Reset reg counter in each function, because each function has a separate register context + that is reset after function end + */ SsaRegister prev_reg_counter; prev_reg_counter = context->ssa->reg_counter; context->ssa->reg_counter = 0; -- cgit v1.2.3