#include "../../include/bytecode/bytecode.h" #include "../../include/std/mem.h" #include "../../include/std/log.h" #include "../../include/ssa/ssa.h" #include "../../include/parser.h" #include "../../include/compiler.h" #include #include /* TODO: Remove this */ #include #include #define throw(result) do { throw_debug_msg; longjmp(self->env, (result)); } while(0) #define throw_if_error(result) \ do { \ int return_if_result; \ return_if_result = (result); \ if((return_if_result) != 0) \ throw(return_if_result); \ } while(0) int bytecode_init(Bytecode *self, ArenaAllocator *allocator) { return buffer_init(&self->data, allocator); } static CHECK_RESULT usize ssa_extract_form1(u8 *instruction_data, SsaInsForm1 *result) { am_memcpy(&result->lhs, instruction_data, sizeof(result->lhs)); am_memcpy(&result->rhs, instruction_data + sizeof(result->lhs), sizeof(result->rhs)); return sizeof(result->lhs) + sizeof(result->rhs); } static CHECK_RESULT usize ssa_extract_form2(u8 *instruction_data, SsaInsForm2 *result) { am_memcpy(&result->result, instruction_data, sizeof(result->result)); am_memcpy(&result->lhs, instruction_data + sizeof(result->result), sizeof(result->lhs)); am_memcpy(&result->rhs, instruction_data + sizeof(result->result) + sizeof(result->lhs), sizeof(result->rhs)); return sizeof(result->result) + sizeof(result->lhs) + sizeof(result->rhs); } static CHECK_RESULT usize ssa_extract_func_start(u8 *instruction_data, SsaInsFuncStart *result) { am_memcpy(&result->func_index, instruction_data, sizeof(result->func_index)); am_memcpy(&result->num_registers, instruction_data + sizeof(result->func_index), sizeof(result->num_registers)); return sizeof(result->func_index) + sizeof(result->num_registers); } static CHECK_RESULT usize ssa_extract_func_call(u8 *instruction_data, SsaInsFuncCall *result) { am_memcpy(&result->result, instruction_data, sizeof(result->result)); am_memcpy(&result->func_decl, instruction_data + sizeof(result->result), sizeof(result->func_decl)); return sizeof(result->result) + sizeof(result->func_decl); } static CHECK_RESULT usize ssa_extract_jump_zero(u8 *instruction_data, SsaInsJumpZero *result) { am_memcpy(&result->condition_reg, instruction_data, sizeof(result->condition_reg)); am_memcpy(&result->jump_offset, instruction_data + sizeof(result->condition_reg), sizeof(result->jump_offset)); return sizeof(result->condition_reg) + sizeof(result->jump_offset); } static CHECK_RESULT usize ssa_extract_jump(u8 *instruction_data, SsaInsJump *result) { am_memcpy(&result->jump_offset, instruction_data, sizeof(result->jump_offset)); return sizeof(result->jump_offset); } static void add_intermediates(BytecodeCompilerContext *self) { Ssa *ssa; Buffer *instructions; SsaNumber *intermediate; SsaNumber *intermediates_end; u32 intemediates_size; ssa = self->parser->ssa; instructions = &self->bytecode.data; intermediate = buffer_begin(&ssa->intermediates); intermediates_end = buffer_end(&ssa->intermediates); intemediates_size = (sizeof(u8) + sizeof(u64)) * buffer_get_size(&ssa->intermediates, SsaNumber); throw_if_error(buffer_expand(instructions, sizeof(u32) + intemediates_size)); throw_if_error(buffer_append(instructions, &intemediates_size, sizeof(u32))); for(; intermediate != intermediates_end; ++intermediate) { throw_if_error(buffer_append(instructions, &intermediate->type, sizeof(u8))); /* TODO: Store value using an encoding that will save space when using low numbers */ throw_if_error(buffer_append(instructions, &intermediate->value.integer, sizeof(u64))); } } void add_strings(BytecodeCompilerContext *self) { Ssa *ssa; Buffer *instructions; BufferView *string; BufferView *strings_end; u16 num_strings; u32 strings_size; ssa = self->parser->ssa; instructions = &self->bytecode.data; string = buffer_begin(&ssa->strings); strings_end = buffer_end(&ssa->strings); if(strings_end - string > UINT16_MAX) { amal_log_error("Too many strings in the program"); throw(-1); } num_strings = strings_end - string; strings_size = 0; for(; string != strings_end; ++string) { strings_size += sizeof(u16) + string->size; } string = buffer_begin(&ssa->strings); throw_if_error(buffer_expand(instructions, sizeof(u16) + sizeof(u32) + strings_size)); throw_if_error(buffer_append(instructions, &num_strings, sizeof(u16))); throw_if_error(buffer_append(instructions, &strings_size, sizeof(u32))); for(; string != strings_end; ++string) { throw_if_error(buffer_append(instructions, &string->size, sizeof(u16))); throw_if_error(buffer_append(instructions, string->data, string->size)); } } static void add_ins1(BytecodeCompilerContext *self, AmalOpcode opcode, const char *fmt) { throw_if_error(buffer_append(&self->bytecode.data, &opcode, sizeof(AmalOpcodeType))); fprintf(stderr, fmt); fputc('\n', stderr); } static void add_ins2(BytecodeCompilerContext *self, AmalOpcode opcode, u8 reg, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(reg))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), ®, sizeof(reg)); fprintf(stderr, fmt, reg); fputc('\n', stderr); } static void add_ins3(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 src_reg, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(src_reg))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &src_reg, sizeof(src_reg)); fprintf(stderr, fmt, dst_reg, src_reg); fputc('\n', stderr); } static void add_ins4(BytecodeCompilerContext *self, AmalOpcode opcode, u16 data, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(data))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &data, sizeof(data)); fprintf(stderr, fmt, data); fputc('\n', stderr); } static void add_ins5(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 reg1, u8 reg2, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1) + sizeof(reg2))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), ®1, sizeof(reg1)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1), ®2, sizeof(reg2)); fprintf(stderr, fmt, dst_reg, reg1, reg2); fputc('\n', stderr); } static void add_ins6(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u16 data, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(data))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &data, sizeof(data)); fprintf(stderr, fmt, dst_reg, data); fputc('\n', stderr); } static void add_ins7(BytecodeCompilerContext *self, AmalOpcode opcode, u16 idx, u8 arg, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(idx) + sizeof(arg))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &idx, sizeof(idx)); instructions->data[index + sizeof(AmalOpcodeType) + sizeof(idx)] = arg; fprintf(stderr, fmt, idx, arg); fputc('\n', stderr); } #if 0 #define NUM_MAX_REGS 256 #define NUM_MAX_FUNC_ARGS 32 static const char* lhs_expr_get_c_name(BytecodeCompilerContext *self, LhsExpr *lhs_expr) { if(lhs_expr == self->parser->compiler->default_types.i64) { return "i64"; } else if(lhs_expr == self->parser->compiler->default_types.f64) { return "f64"; } else if(lhs_expr == self->parser->compiler->default_types.str) { return"const char*"; } else { amal_log_error("Invalid rhs type %p", lhs_expr); assert(bool_false && "TODO: Implement"); return ""; } } #endif static void add_instructions(BytecodeCompilerContext *self) { Ssa *ssa; u8 *instruction; u8 *instructions_end; SsaInsForm1 ssa_ins_form1; SsaInsForm2 ssa_ins_form2; SsaInsFuncStart ssa_ins_func_start; SsaInsFuncCall ssa_ins_func_call; SsaInsJumpZero ssa_ins_jump_zero; SsaInsJump ssa_ins_jump; FILE *file; char *filename; u32 num_instructions_index; num_instructions_index = self->bytecode.data.size; throw_if_error(buffer_append_empty(&self->bytecode.data, sizeof(num_instructions_index))); #ifdef COMPILE_TO_C LhsExpr *reg_types[NUM_MAX_REGS]; /* TODO: Remove this. Encode this data in the register itself */ SsaRegister func_arg_stack[NUM_MAX_FUNC_ARGS]; /* TODO: Remove this? */ int func_arg_index; #endif ssa = self->parser->ssa; instruction = buffer_begin(&ssa->instructions); instructions_end = buffer_end(&ssa->instructions); /*#warning "dont forget to remove this" */ filename = malloc(self->parser->tokenizer.code_name.size + 3); filename[0] = '\0'; strcat(filename, self->parser->tokenizer.code_name.data); strcat(filename, ".z"); file = fopen(filename, "wb"); free(filename); #ifdef COMPILE_TO_C #ifdef DEBUG am_memset(reg_types, 0, sizeof(reg_types)); #endif func_arg_index = 0; fputs("typedef i64 signed long long;\n", file); fputs("typedef f64 double;\n", file); #define ARITH_OP(op) do {\ const char *rhs_type_name; \ instruction += ssa_extract_form2(instruction, &ssa_ins_form2); \ assert(ssa_ins_form2.result < NUM_MAX_REGS); \ assert(ssa_ins_form2.lhs < NUM_MAX_REGS); \ rhs_type_name = lhs_expr_get_c_name(self, reg_types[ssa_ins_form2.lhs]); \ fprintf(file, "%s r%d = r%d %s r%d;\n", rhs_type_name, ssa_ins_form2.result, ssa_ins_form2.lhs, (op), ssa_ins_form2.rhs); \ reg_types[ssa_ins_form2.result] = reg_types[ssa_ins_form2.lhs]; \ } while(0) while(instruction != instructions_end) { switch((SsaInstruction)*instruction++) { case SSA_ASSIGN_INTER: { SsaNumber number; instruction += ssa_extract_form1(instruction, &ssa_ins_form1); number = ssa_get_intermediate(ssa, ssa_ins_form1.rhs); assert(ssa_ins_form1.lhs < NUM_MAX_REGS); if(number.type == SSA_NUMBER_TYPE_INTEGER) { fprintf(file, "i64 r%d = %zu;\n", ssa_ins_form1.lhs, number.value.integer); reg_types[ssa_ins_form1.lhs] = self->parser->compiler->default_types.i64; } else if(number.type == SSA_NUMBER_TYPE_FLOAT) { fprintf(file, "f64 r%d = %f;\n", ssa_ins_form1.lhs, number.value.floating); reg_types[ssa_ins_form1.lhs] = self->parser->compiler->default_types.f64; } else { assert(bool_false && "TODO: Implement"); } break; } case SSA_ASSIGN_STRING: { BufferView str; instruction += ssa_extract_form1(instruction, &ssa_ins_form1); str = ssa_get_string(ssa, ssa_ins_form1.rhs); fprintf(file, "const char* r%d = \"%.*s\";\n", ssa_ins_form1.lhs, (int)str.size, str.data); assert(ssa_ins_form1.lhs < NUM_MAX_REGS); reg_types[ssa_ins_form1.lhs] = self->parser->compiler->default_types.str; break; } case SSA_ASSIGN_REG: { const char *rhs_type_name; instruction += ssa_extract_form1(instruction, &ssa_ins_form1); assert(ssa_ins_form1.rhs < NUM_MAX_REGS); rhs_type_name = lhs_expr_get_c_name(self, reg_types[ssa_ins_form1.rhs]); fprintf(file, "%s r%d = r%d;\n", rhs_type_name, ssa_ins_form1.lhs, ssa_ins_form1.rhs); reg_types[ssa_ins_form1.lhs] = reg_types[ssa_ins_form1.rhs]; break; } case SSA_ADD: { ARITH_OP("+"); break; } case SSA_SUB: { ARITH_OP("-"); break; } case SSA_MUL: { ARITH_OP("*"); break; } case SSA_DIV: { ARITH_OP("/"); break; } case SSA_EQUALS: { ARITH_OP("=="); break; } case SSA_FUNC_START: { int i; instruction += ssa_extract_func_start(instruction, &ssa_ins_func_start); fprintf(file, "void f%zu(", ssa_ins_func_start.func_index); fputs(") {\n", file); for(i = 0; i < ssa_ins_func_start.num_args; ++i) { if(i > 0) fputs(", ", file); fprintf(file, "p%d", i); } break; } case SSA_FUNC_END: fputs("}\n", file); break; case SSA_PUSH: { SsaRegister reg; am_memcpy(®, instruction, sizeof(SsaRegister)); instruction += sizeof(SsaRegister); assert(func_arg_index < NUM_MAX_FUNC_ARGS); func_arg_stack[func_arg_index++] = reg; break; } case SSA_CALL: { int i; instruction += ssa_extract_func_call(instruction, &ssa_ins_func_call); fprintf(file, "r%d = f%p(", ssa_ins_func_call.result, ssa_ins_func_call.func_decl); for(i = 0; i < func_arg_index; ++i) { if(i > 0) fputs(", ", file); fprintf(file, "r%d", func_arg_stack[i]); } func_arg_index = 0; fputs(");\n", file); break; } case SSA_JUMP_ZERO: { assert(bool_false && "Not implemented!"); instruction += ssa_extract_jump_zero(instruction, &ssa_ins_jump_zero); break; } case SSA_JUMP: { assert(bool_false && "Not implemented!"); instruction += ssa_extract_jump(instruction, &ssa_ins_jump); break; } } } #else /* TODO: Keep all registers under 256 */ while(instruction != instructions_end) { SsaInstruction ins = (SsaInstruction)*instruction++; switch(ins) { case SSA_ASSIGN_INTER: { instruction += ssa_extract_form1(instruction, &ssa_ins_form1); add_ins6(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d"); break; } case SSA_ASSIGN_STRING: { instruction += ssa_extract_form1(instruction, &ssa_ins_form1); add_ins6(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d"); break; } case SSA_ASSIGN_REG: { instruction += ssa_extract_form1(instruction, &ssa_ins_form1); add_ins3(self, AMAL_OP_MOV, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "mov r%d, d%d"); break; } case SSA_ADD: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_ADD, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "add r%d, r%d, r%d"); break; } case SSA_SUB: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_SUB, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "sub r%d, r%d, r%d"); break; } case SSA_IMUL: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_IMUL, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "imul r%d, r%d, r%d"); break; } case SSA_MUL: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_MUL, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "mul r%d, r%d, r%d"); break; } case SSA_IDIV: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_IDIV, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "idiv r%d, r%d, r%d"); break; } case SSA_DIV: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_DIV, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "div r%d, r%d, r%d"); break; } case SSA_EQUALS: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_CMP, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "cmp r%d, r%d, r%d"); break; } case SSA_FUNC_START: { instruction += ssa_extract_func_start(instruction, &ssa_ins_func_start); add_ins4(self, AMAL_OP_FUNC_START, ssa_ins_func_start.num_registers, "func_start %u"); break; } case SSA_FUNC_END: { add_ins1(self, AMAL_OP_FUNC_END, "func_end"); break; } case SSA_PUSH: { SsaRegister reg; am_memcpy(®, instruction, sizeof(SsaRegister)); instruction += sizeof(SsaRegister); add_ins2(self, AMAL_OP_PUSH, reg, "push r%d"); break; } case SSA_CALL: { /* TODO: Pass return register to function. The register should be a pointer that has the size of the function return values so the return values can fit in it. */ /* TODO: Using ssa_func_index only works correctly if the function was defined in the same file as the function call. To make this work with calling functions in other files, ssa_func_index should also have an offset index or something like that. So each file has it's own function list with indices and when they need to be combined in the end, the function indices can be increased by their block index (ssa_func_index + block index), where block index is defined as the size of all previous files' number of functions. */ instruction += ssa_extract_func_call(instruction, &ssa_ins_func_call); /* TODO: Replace 0 with the number of arguments */ add_ins7(self, AMAL_OP_CALL, ssa_ins_func_call.func_decl->ssa_func_index, 0, "call %d, %d"); break; } case SSA_JUMP_ZERO: { instruction += ssa_extract_jump_zero(instruction, &ssa_ins_jump_zero); add_ins6(self, AMAL_OP_JZ, ssa_ins_jump_zero.condition_reg, ssa_ins_jump_zero.jump_offset, "jz r%d, %d"); break; } case SSA_JUMP: { instruction += ssa_extract_jump(instruction, &ssa_ins_jump); add_ins4(self, AMAL_OP_JMP, ssa_ins_jump.jump_offset, "jmp %d"); break; } } } /* Prepend instructions with its size */ { u32 instructions_size; instructions_size = self->bytecode.data.size - num_instructions_index - sizeof(instructions_size); /* Remove the count itself from the size of the instructions size */ am_memcpy(&self->bytecode.data.data[num_instructions_index], &instructions_size, sizeof(instructions_size)); } #endif /* COMPILE_TO_C */ fclose(file); } void generate_bytecode_from_ssa(BytecodeCompilerContext *self) { add_intermediates(self); add_strings(self); /* TODO: Also add strings in ssa, so we can index them */ add_instructions(self); }