#include "../../include/bytecode/bytecode.h" #include "../../include/std/mem.h" #include "../../include/std/log.h" #include "../../include/ssa/ssa.h" #include "../../include/parser.h" #include "../../include/compiler.h" #include #include /* TODO: Remove this */ #include #include #define throw(result) do { throw_debug_msg; longjmp(self->env, (result)); } while(0) #define throw_if_error(result) \ do { \ int return_if_result; \ return_if_result = (result); \ if((return_if_result) != 0) \ throw(return_if_result); \ } while(0) int bytecode_init(Bytecode *self, ScopedAllocator *allocator) { return buffer_init(&self->data, allocator); } static CHECK_RESULT usize ssa_extract_form1(u8 *instruction_data, SsaInsForm1 *result) { am_memcpy(&result->lhs, instruction_data, sizeof(result->lhs)); am_memcpy(&result->rhs, instruction_data + sizeof(result->lhs), sizeof(result->rhs)); return sizeof(result->lhs) + sizeof(result->rhs); } static CHECK_RESULT usize ssa_extract_form2(u8 *instruction_data, SsaInsForm2 *result) { am_memcpy(&result->result, instruction_data, sizeof(result->result)); am_memcpy(&result->lhs, instruction_data + sizeof(result->result), sizeof(result->lhs)); am_memcpy(&result->rhs, instruction_data + sizeof(result->result) + sizeof(result->lhs), sizeof(result->rhs)); return sizeof(result->result) + sizeof(result->lhs) + sizeof(result->rhs); } static CHECK_RESULT usize ssa_extract_func_start(u8 *instruction_data, SsaInsFuncStart *result) { am_memcpy(&result->func_index, instruction_data, sizeof(result->func_index)); am_memcpy(&result->num_args, instruction_data + sizeof(result->func_index), sizeof(result->num_args)); return sizeof(result->func_index) + sizeof(result->num_args); } static CHECK_RESULT usize ssa_extract_func_call(u8 *instruction_data, SsaInsFuncCall *result) { am_memcpy(&result->result, instruction_data, sizeof(result->result)); am_memcpy(&result->func_decl, instruction_data + sizeof(result->result), sizeof(result->func_decl)); return sizeof(result->result) + sizeof(result->func_decl); } static CHECK_RESULT usize ssa_extract_jump_zero(u8 *instruction_data, SsaInsJumpZero *result) { am_memcpy(&result->condition_reg, instruction_data, sizeof(result->condition_reg)); am_memcpy(&result->jump_offset, instruction_data + sizeof(result->condition_reg), sizeof(result->jump_offset)); return sizeof(result->condition_reg) + sizeof(result->jump_offset); } static CHECK_RESULT usize ssa_extract_jump(u8 *instruction_data, SsaInsJump *result) { am_memcpy(&result->jump_offset, instruction_data, sizeof(result->jump_offset)); return sizeof(result->jump_offset); } static void add_header(BytecodeCompilerContext *self) { /*doc(Bytecode header) # Header layout |Size|Name |Description | |----|-------------|----------------------------------------------------------------------------| |4 |Magic number |The magic number used to identify an amalgam bytecode file. | |1 |Major version|The major version of the bytecode. Updates in this is a breaking change. | |1 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.| |1 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.| The versions in the header only changes for every release, not every change. */ const u32 magic_number = 0xdec05eba; const u8 major_version = 1; const u8 minor_version = 0; const u8 patch_version = 0; Buffer *instructions; instructions = &self->bytecode.data; throw_if_error(buffer_append(instructions, &magic_number, 4)); throw_if_error(buffer_append(instructions, &major_version, 1)); throw_if_error(buffer_append(instructions, &minor_version, 1)); throw_if_error(buffer_append(instructions, &patch_version, 1)); } static void add_intermediates(BytecodeCompilerContext *self) { Ssa *ssa; Buffer *instructions; SsaNumber *intermediate; SsaNumber *intermediates_end; ssa = self->parser->ssa; instructions = &self->bytecode.data; intermediate = buffer_begin(&ssa->intermediates); intermediates_end = buffer_end(&ssa->intermediates); throw_if_error(buffer_expand(instructions, sizeof(u16) + (sizeof(u8) + sizeof(u64)) * ssa->intermediates.size)); throw_if_error(buffer_append(instructions, &ssa->intermediates.size, sizeof(u16))); for(; intermediate != intermediates_end; ++intermediate) { throw_if_error(buffer_append(instructions, &intermediate->type, sizeof(u8))); throw_if_error(buffer_append(instructions, &intermediate->value.integer, sizeof(u64))); } } void add_strings(BytecodeCompilerContext *self) { Ssa *ssa; Buffer *instructions; BufferView *string; BufferView *strings_end; u32 strings_size; ssa = self->parser->ssa; instructions = &self->bytecode.data; string = buffer_begin(&ssa->strings); strings_end = buffer_end(&ssa->strings); strings_size = 0; for(; string != strings_end; ++string) { strings_size += sizeof(u16) + string->size; } string = buffer_begin(&ssa->strings); throw_if_error(buffer_expand(instructions, sizeof(u32) + strings_size)); throw_if_error(buffer_append(instructions, &strings_size, sizeof(u32))); for(; string != strings_end; ++string) { throw_if_error(buffer_append(instructions, &string->size, sizeof(u16))); throw_if_error(buffer_append(instructions, &string->data, string->size)); } } static void add_ins1(BytecodeCompilerContext *self, AmalOpcode opcode, const char *fmt) { throw_if_error(buffer_append(&self->bytecode.data, &opcode, sizeof(AmalOpcodeType))); fprintf(stderr, fmt); fputc('\n', stderr); } static void add_ins2(BytecodeCompilerContext *self, AmalOpcode opcode, u8 reg, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(reg))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), ®, sizeof(reg)); fprintf(stderr, fmt, reg); fputc('\n', stderr); } static void add_ins3(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 src_reg, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(src_reg))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &src_reg, sizeof(src_reg)); fprintf(stderr, fmt, dst_reg, src_reg); fputc('\n', stderr); } static void add_ins4(BytecodeCompilerContext *self, AmalOpcode opcode, u16 data, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(data))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &data, sizeof(data)); fprintf(stderr, fmt, data); fputc('\n', stderr); } static void add_ins5(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 reg1, u8 reg2, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1) + sizeof(reg2))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), ®1, sizeof(reg1)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1), ®2, sizeof(reg2)); fprintf(stderr, fmt, dst_reg, reg1, reg2); fputc('\n', stderr); } static void add_ins6(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u16 data, const char *fmt) { Buffer *instructions; size_t index; instructions = &self->bytecode.data; index = instructions->size; throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(data))); instructions->data[index] = opcode; memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg)); memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &data, sizeof(data)); fprintf(stderr, fmt, dst_reg, data); fputc('\n', stderr); } #if 0 #define NUM_MAX_REGS 256 #define NUM_MAX_FUNC_ARGS 32 static const char* lhs_expr_get_c_name(BytecodeCompilerContext *self, LhsExpr *lhs_expr) { if(lhs_expr == self->parser->compiler->default_types.i64) { return "i64"; } else if(lhs_expr == self->parser->compiler->default_types.f64) { return "f64"; } else if(lhs_expr == self->parser->compiler->default_types.str) { return"const char*"; } else { amal_log_error("Invalid rhs type %p", lhs_expr); assert(bool_false && "TODO: Implement"); return ""; } } #endif static void add_instructions(BytecodeCompilerContext *self) { Ssa *ssa; u8 *instruction; u8 *instructions_end; SsaInsForm1 ssa_ins_form1; SsaInsForm2 ssa_ins_form2; SsaInsFuncStart ssa_ins_func_start; SsaInsFuncCall ssa_ins_func_call; SsaInsJumpZero ssa_ins_jump_zero; SsaInsJump ssa_ins_jump; FILE *file; char *filename; u32 num_instructions_index; num_instructions_index = self->bytecode.data.size; throw_if_error(buffer_append_empty(&self->bytecode.data, sizeof(num_instructions_index))); #ifdef COMPILE_TO_C LhsExpr *reg_types[NUM_MAX_REGS]; /* TODO: Remove this. Encode this data in the register itself */ SsaRegister func_arg_stack[NUM_MAX_FUNC_ARGS]; /* TODO: Remove this? */ int func_arg_index; #endif ssa = self->parser->ssa; instruction = buffer_begin(&ssa->instructions); instructions_end = buffer_end(&ssa->instructions); /*#warning "dont forget to remove this" */ filename = malloc(self->parser->tokenizer.code_name.size + 3); filename[0] = '\0'; strcat(filename, self->parser->tokenizer.code_name.data); strcat(filename, ".z"); file = fopen(filename, "wb"); free(filename); #ifdef COMPILE_TO_C #ifdef DEBUG am_memset(reg_types, 0, sizeof(reg_types)); #endif func_arg_index = 0; fputs("typedef i64 signed long long;\n", file); fputs("typedef f64 double;\n", file); #define ARITH_OP(op) do {\ const char *rhs_type_name; \ instruction += ssa_extract_form2(instruction, &ssa_ins_form2); \ assert(ssa_ins_form2.result < NUM_MAX_REGS); \ assert(ssa_ins_form2.lhs < NUM_MAX_REGS); \ rhs_type_name = lhs_expr_get_c_name(self, reg_types[ssa_ins_form2.lhs]); \ fprintf(file, "%s r%d = r%d %s r%d;\n", rhs_type_name, ssa_ins_form2.result, ssa_ins_form2.lhs, (op), ssa_ins_form2.rhs); \ reg_types[ssa_ins_form2.result] = reg_types[ssa_ins_form2.lhs]; \ } while(0) while(instruction != instructions_end) { switch((SsaInstruction)*instruction++) { case SSA_ASSIGN_INTER: { SsaNumber number; instruction += ssa_extract_form1(instruction, &ssa_ins_form1); number = ssa_get_intermediate(ssa, ssa_ins_form1.rhs); assert(ssa_ins_form1.lhs < NUM_MAX_REGS); if(number.type == SSA_NUMBER_TYPE_INTEGER) { fprintf(file, "i64 r%d = %zu;\n", ssa_ins_form1.lhs, number.value.integer); reg_types[ssa_ins_form1.lhs] = self->parser->compiler->default_types.i64; } else if(number.type == SSA_NUMBER_TYPE_FLOAT) { fprintf(file, "f64 r%d = %f;\n", ssa_ins_form1.lhs, number.value.floating); reg_types[ssa_ins_form1.lhs] = self->parser->compiler->default_types.f64; } else { assert(bool_false && "TODO: Implement"); } break; } case SSA_ASSIGN_STRING: { BufferView str; instruction += ssa_extract_form1(instruction, &ssa_ins_form1); str = ssa_get_string(ssa, ssa_ins_form1.rhs); fprintf(file, "const char* r%d = \"%.*s\";\n", ssa_ins_form1.lhs, (int)str.size, str.data); assert(ssa_ins_form1.lhs < NUM_MAX_REGS); reg_types[ssa_ins_form1.lhs] = self->parser->compiler->default_types.str; break; } case SSA_ASSIGN_REG: { const char *rhs_type_name; instruction += ssa_extract_form1(instruction, &ssa_ins_form1); assert(ssa_ins_form1.rhs < NUM_MAX_REGS); rhs_type_name = lhs_expr_get_c_name(self, reg_types[ssa_ins_form1.rhs]); fprintf(file, "%s r%d = r%d;\n", rhs_type_name, ssa_ins_form1.lhs, ssa_ins_form1.rhs); reg_types[ssa_ins_form1.lhs] = reg_types[ssa_ins_form1.rhs]; break; } case SSA_ADD: { ARITH_OP("+"); break; } case SSA_SUB: { ARITH_OP("-"); break; } case SSA_MUL: { ARITH_OP("*"); break; } case SSA_DIV: { ARITH_OP("/"); break; } case SSA_EQUALS: { ARITH_OP("=="); break; } case SSA_FUNC_START: { int i; instruction += ssa_extract_func_start(instruction, &ssa_ins_func_start); fprintf(file, "void f%zu(", ssa_ins_func_start.func_index); fputs(") {\n", file); for(i = 0; i < ssa_ins_func_start.num_args; ++i) { if(i > 0) fputs(", ", file); fprintf(file, "p%d", i); } break; } case SSA_FUNC_END: fputs("}\n", file); break; case SSA_PUSH: { SsaRegister reg; am_memcpy(®, instruction, sizeof(SsaRegister)); instruction += sizeof(SsaRegister); assert(func_arg_index < NUM_MAX_FUNC_ARGS); func_arg_stack[func_arg_index++] = reg; break; } case SSA_CALL: { int i; instruction += ssa_extract_func_call(instruction, &ssa_ins_func_call); fprintf(file, "r%d = f%p(", ssa_ins_func_call.result, ssa_ins_func_call.func_decl); for(i = 0; i < func_arg_index; ++i) { if(i > 0) fputs(", ", file); fprintf(file, "r%d", func_arg_stack[i]); } func_arg_index = 0; fputs(");\n", file); break; } case SSA_JUMP_ZERO: { assert(bool_false && "Not implemented!"); instruction += ssa_extract_jump_zero(instruction, &ssa_ins_jump_zero); break; } case SSA_JUMP: { assert(bool_false && "Not implemented!"); instruction += ssa_extract_jump(instruction, &ssa_ins_jump); break; } } } #else while(instruction != instructions_end) { SsaInstruction ins = (SsaInstruction)*instruction++; switch(ins) { case SSA_ASSIGN_INTER: { instruction += ssa_extract_form1(instruction, &ssa_ins_form1); add_ins3(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d"); break; } case SSA_ASSIGN_STRING: { instruction += ssa_extract_form1(instruction, &ssa_ins_form1); add_ins3(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d"); break; } case SSA_ASSIGN_REG: { instruction += ssa_extract_form1(instruction, &ssa_ins_form1); add_ins3(self, AMAL_OP_MOV, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "mov r%d, d%d"); break; } case SSA_ADD: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_ADD, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "add r%d, r%d, r%d"); break; } case SSA_SUB: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_SUB, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "sub r%d, r%d, r%d"); break; } case SSA_MUL: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_MUL, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "mul r%d, r%d, r%d"); break; } case SSA_DIV: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_DIV, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "div r%d, r%d, r%d"); break; } case SSA_EQUALS: { instruction += ssa_extract_form2(instruction, &ssa_ins_form2); add_ins5(self, AMAL_OP_CMP, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "cmp r%d, r%d, r%d"); break; } case SSA_FUNC_START: { instruction += ssa_extract_func_start(instruction, &ssa_ins_func_start); add_ins1(self, AMAL_OP_FUNC_START, "func_start"); break; } case SSA_FUNC_END: { add_ins1(self, AMAL_OP_FUNC_START, "func_end"); break; } case SSA_PUSH: { SsaRegister reg; am_memcpy(®, instruction, sizeof(SsaRegister)); instruction += sizeof(SsaRegister); add_ins2(self, AMAL_OP_PUSH, reg, "push r%d"); break; } case SSA_CALL: { /* TODO: Add args, using number of bytes to pop after function call. */ /* TODO: Pass return register to function. The register should be a pointer that has the size of the function return values so the return values can fit in it. */ /* TODO: Using ssa_func_index only works correctly if the function was defined in the same file as the function call. To make this work with calling functions in other files, ssa_func_index should also have an offset index or something like that. So each file has it's own function list with indices and when they need to be combined in the end, the function indices can be increased by their block index (ssa_func_index + block index), where block index is defined as the size of all previous files' number of functions. */ instruction += ssa_extract_func_call(instruction, &ssa_ins_func_call); add_ins4(self, AMAL_OP_CALL, ssa_ins_func_call.func_decl->ssa_func_index, "call %d"); break; } case SSA_JUMP_ZERO: { instruction += ssa_extract_jump_zero(instruction, &ssa_ins_jump_zero); add_ins6(self, AMAL_OP_JZ, ssa_ins_jump_zero.condition_reg, ssa_ins_jump_zero.jump_offset, "jz r%d, %d"); break; } case SSA_JUMP: { instruction += ssa_extract_jump(instruction, &ssa_ins_jump); add_ins4(self, AMAL_OP_JMP, ssa_ins_jump.jump_offset, "jmp %d"); break; } default: amal_log_error("Instruction not yet implemented: %d", ins); assert(bool_false && "Instruction not yet implemented"); } } /* Prepend instructions with its size */ { u32 instructions_size; instructions_size = self->bytecode.data.size - num_instructions_index; am_memcpy(&self->bytecode.data.data[num_instructions_index], &instructions_size, sizeof(instructions_size)); } #endif /* COMPILE_TO_C */ fclose(file); } void generate_bytecode_from_ssa(BytecodeCompilerContext *self) { add_header(self); add_intermediates(self); add_strings(self); /* TODO: Also add strings in ssa, so we can index them */ add_instructions(self); }