#include "../include/program.h" #include "../include/std/mem.h" #include "../include/std/alloc.h" #include "../include/std/log.h" #include "../include/std/buffer_view.h" #include #include #include /* TODO: If system is big-endian, then do endian conversion for all reads */ /* This matches SsaNumberType */ typedef enum { NUMBER_TYPE_INTEGER, NUMBER_TYPE_FLOAT } NumberType; typedef union { i64 integer; f64 floating; } NumberUnion; typedef struct { NumberType type; NumberUnion value; } Number; static CHECK_RESULT int amal_program_append_header(amal_program *self) { /*doc(Bytecode header) # Header layout |Size|Name |Description | |----|-------------|----------------------------------------------------------------------------| |4 |Magic number |The magic number used to identify an amalgam bytecode file. | |1 |Major version|The major version of the bytecode. Updates in this is a breaking change. | |1 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.| |1 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.| The versions in the header only changes for every release, not every change. */ const u32 magic_number = AMAL_PROGRAM_MAGIC_NUMBER; const u8 major_version = AMAL_PROGRAM_MAJOR_VERSION; const u8 minor_version = AMAL_PROGRAM_MINOR_VERSION; const u8 patch_version = AMAL_PROGRAM_PATCH_VERSION; return_if_error(buffer_append(&self->data, &magic_number, 4)); return_if_error(buffer_append(&self->data, &major_version, 1)); return_if_error(buffer_append(&self->data, &minor_version, 1)); return_if_error(buffer_append(&self->data, &patch_version, 1)); return 0; } int amal_program_init(amal_program *self) { ignore_result_int(buffer_init(&self->data, NULL)); self->string_indices = NULL; self->intermediates_start = NULL; self->strings_start = NULL; self->read_index = 0; self->num_strings = 0; self->num_intermediates = 0; am_memset(self->reg, 0, sizeof(self->reg)); self->stack = NULL; self->stack_size = 4096; am_memset(&self->asm, 0, sizeof(self->asm)); cleanup_if_error(am_malloc(self->stack_size, (void**)&self->stack)); cleanup_if_error(asm_init(&self->asm)); self->stack_index = 0; cleanup_if_error(amal_program_append_header(self)); return 0; cleanup: amal_program_deinit(self); return -1; } void amal_program_deinit(amal_program *self) { buffer_deinit(&self->data); am_free(self->string_indices); self->string_indices = NULL; am_free(self->stack); self->stack = NULL; asm_deinit(&self->asm); } int amal_program_append_bytecode(amal_program *self, Bytecode *bytecode) { return buffer_append(&self->data, bytecode->data.data, bytecode->data.size); } static usize bytes_left_to_read(amal_program *self) { assert(self->read_index <= self->data.size); return self->data.size - self->read_index; } static CHECK_RESULT int amal_program_read_header(amal_program *self) { u32 magic_number; u8 major_version; u8 minor_version; u8 patch_version; if(bytes_left_to_read(self) < sizeof(u32) + sizeof(u8) * 3) return AMAL_PROGRAM_INVALID_HEADER; am_memcpy(&magic_number, &self->data.data[self->read_index], sizeof(magic_number)); self->read_index += sizeof(u32); am_memcpy(&major_version, &self->data.data[self->read_index], sizeof(major_version)); self->read_index += sizeof(u8); am_memcpy(&minor_version, &self->data.data[self->read_index], sizeof(minor_version)); self->read_index += sizeof(u8); am_memcpy(&patch_version, &self->data.data[self->read_index], sizeof(patch_version)); self->read_index += sizeof(u8); if(magic_number != AMAL_PROGRAM_MAGIC_NUMBER) return AMAL_PROGRAM_INVALID_MAGIC_NUMBER; /* A program is only incompatible if the major version is newer than the version that is used to run it. TODO: Implement backwards compatible reads, starting from when the program bytecode breaks backwards compatibility */ if(major_version > AMAL_PROGRAM_MAJOR_VERSION) return AMAL_PROGRAM_INCOMPATIBLE; return AMAL_PROGRAM_OK; } static CHECK_RESULT int amal_program_read_intermediates(amal_program *self) { u32 intermediates_size; /*u32 read_end;*/ if(bytes_left_to_read(self) < sizeof(intermediates_size)) { amal_log_error("Not enough space in program to intermediates size"); return AMAL_PROGRAM_INVALID_INTERMEDIATES; } am_memcpy(&intermediates_size, &self->data.data[self->read_index], sizeof(intermediates_size)); self->read_index += sizeof(intermediates_size); if(bytes_left_to_read(self) < intermediates_size) { amal_log_error("Not enough space in program to read all intermediates"); return AMAL_PROGRAM_INVALID_INTERMEDIATES_SIZE; } self->intermediates_start = &self->data.data[self->read_index]; if(intermediates_size > 0) self->num_intermediates = intermediates_size / (sizeof(u8) + sizeof(u64)); /* read_end = self->read_index + intermediates_size; while(self->read_index < read_end) { NumberType type; NumberUnion value; am_memcpy(&type, &self->data.data[self->read_index], sizeof(u8)); am_memcpy(&value, &self->data.data[self->read_index + sizeof(u8)], sizeof(u64)); self->read_index += sizeof(u8) + sizeof(u64); } */ self->read_index += intermediates_size; return AMAL_PROGRAM_OK; } static CHECK_RESULT int amal_program_read_strings(amal_program *self) { u16 num_strings; u32 strings_size; u32 read_start; u32 read_end; u32 *string_index_ptr; if(bytes_left_to_read(self) < sizeof(num_strings)) return AMAL_PROGRAM_INVALID_STRINGS; am_memcpy(&num_strings, &self->data.data[self->read_index], sizeof(num_strings)); self->read_index += sizeof(num_strings); self->num_strings = num_strings; if(am_malloc(sizeof(u32) * num_strings, (void**)&self->string_indices) != 0) return AMAL_PROGRAM_STRING_ALLOC_FAILURE; string_index_ptr = self->string_indices; if(bytes_left_to_read(self) < sizeof(strings_size)) return AMAL_PROGRAM_INVALID_STRINGS; am_memcpy(&strings_size, &self->data.data[self->read_index], sizeof(strings_size)); self->read_index += sizeof(strings_size); if(bytes_left_to_read(self) < strings_size) return AMAL_PROGRAM_INVALID_STRINGS_SIZE; read_start = self->read_index; read_end = read_start + strings_size; self->strings_start = &self->data.data[self->read_index]; while(self->read_index < read_end) { u16 string_size; if(bytes_left_to_read(self) < sizeof(string_size)) return AMAL_PROGRAM_INVALID_STRINGS; *string_index_ptr = self->read_index - read_start; ++string_index_ptr; am_memcpy(&string_size, &self->data.data[self->read_index], sizeof(string_size)); self->read_index += sizeof(string_size); if(bytes_left_to_read(self) < string_size) return AMAL_PROGRAM_INVALID_STRINGS; self->read_index += string_size; } assert(self->read_index == read_end); return AMAL_PROGRAM_OK; } static CHECK_RESULT int amal_program_get_intermediate_by_index(amal_program *self, u16 index, Number *result) { if(index >= self->num_intermediates) return AMAL_PROGRAM_INSTRUCTION_INVALID_INTERMEDIATE_INDEX; am_memcpy(&result->type, &self->intermediates_start[(sizeof(u8) + sizeof(u64)) * (usize)index], sizeof(u8)); am_memcpy(&result->value, &self->intermediates_start[(sizeof(u8) + sizeof(u64)) * (usize)index + sizeof(u8)], sizeof(u64)); return 0; } static CHECK_RESULT int amal_program_get_data_by_index(amal_program *self, u16 index, char **result) { char *str_ptr; if(index >= self->num_strings) { amal_log_error("Data index %ld is out of range (%ld)", index, self->num_strings); return AMAL_PROGRAM_INSTRUCTION_INVALID_DATA_INDEX; } str_ptr = self->strings_start + self->string_indices[index]; am_memcpy(result, &str_ptr, sizeof(char**)); return 0; } static CHECK_RESULT int ensure_stack_capacity_for_push(amal_program *self) { if(self->stack_index >= self->stack_size) { self->stack_size *= 2; /* 4MB */ if(self->stack_size >= (1<<22)) return AMAL_PROGRAM_INSTRUCTION_STACK_OVERFLOW; if(am_realloc(self->stack, self->stack_size, (void**)&self->stack) != 0) return AMAL_PROGRAM_INSTRUCTION_STACK_OOM; } return 0; } static i64 abs_i64(i64 value) { return value >= 0 ? value : -value; } static int assert_reg_outside_stack() { assert(bool_false && "Register outside stack!"); return 0; } static CHECK_RESULT int amal_program_read_instructions(amal_program *self) { u32 instructions_size; u32 read_start; u32 read_end; bool inside_func; u16 func_num_registers; func_num_registers = 0; inside_func = bool_false; if(bytes_left_to_read(self) < sizeof(instructions_size)) return AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE; am_memcpy(&instructions_size, &self->data.data[self->read_index], sizeof(instructions_size)); self->read_index += sizeof(instructions_size); if(bytes_left_to_read(self) < instructions_size) return AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE; /* TODO: self->reg should be of type Number and each arithmetic operation should operate on the type of the register. TODO: Currently almost all operations are performed on memory. This should be optimized to take advantage of registers. TODO: Operations with memory registers could access outside the stack. Should this be checked? */ #ifdef DEBUG #define get_register_at_offset(offset) \ (self->data.data[self->read_index + (offset)] < func_num_registers ? self->data.data[self->read_index + (offset)] * (int)sizeof(usize) + (int)sizeof(usize) : assert_reg_outside_stack()) #else #define get_register_at_offset(offset) (self->data.data[self->read_index + (offset)] * (int)sizeof(usize) + (int)sizeof(usize)) #endif read_start = self->read_index; read_end = read_start + instructions_size; while(self->read_index < read_end) { AmalOpcode opcode; opcode = self->data.data[self->read_index]; self->read_index += sizeof(AmalOpcodeType); switch(opcode) { case AMAL_OP_NOP: { return_if_error(asm_nop(&self->asm)); break; } case AMAL_OP_SETZ: { AsmPtr dst; asm_ptr_init_disp(&dst, RBP, -(i32)get_register_at_offset(0)); return_if_error(asm_mov_mi(&self->asm, &dst, 0)); self->reg[(u8)self->data.data[self->read_index]] = 0; self->read_index += 1; break; } case AMAL_OP_MOV: { AsmPtr ptr; asm_ptr_init_disp(&ptr, RBP, -(i32)get_register_at_offset(1)); return_if_error(asm_mov_rm(&self->asm, RAX, &ptr)); asm_ptr_init_disp(&ptr, RBP, -(i32)get_register_at_offset(0)); return_if_error(asm_mov_mr(&self->asm, &ptr, RAX)); self->reg[(u8)self->data.data[self->read_index]] = self->reg[(u8)self->data.data[self->read_index + 1]]; self->read_index += 2; break; } case AMAL_OP_MOVI: { u8 dst_reg; u16 intermediate_index; Number number; dst_reg = self->reg[(u8)self->data.data[self->read_index]]; am_memcpy(&intermediate_index, &self->data.data[self->read_index + sizeof(u8)], sizeof(intermediate_index)); return_if_error(amal_program_get_intermediate_by_index(self, intermediate_index, &number)); self->reg[dst_reg] = number.value.integer; /* TODO: if @number is a float then use float instructions */ if(abs_i64(number.value.integer) <= INT32_MAX) { AsmPtr dst; asm_ptr_init_disp(&dst, RBP, -(i32)get_register_at_offset(0)); return_if_error(asm_mov_mi(&self->asm, &dst, number.value.integer)); } else { AsmPtr dst; asm_ptr_init_disp(&dst, RBP, -(i32)get_register_at_offset(0)); return_if_error(asm_mov_ri(&self->asm, RAX, number.value.integer)); return_if_error(asm_mov_mr(&self->asm, &dst, RAX)); } self->read_index += 3; break; } case AMAL_OP_MOVD: { u8 dst_reg; u16 data_index; char *data_ptr; AsmPtr dst; dst_reg = self->reg[(u8)self->data.data[self->read_index]]; am_memcpy(&data_index, &self->data.data[self->read_index + sizeof(u8)], sizeof(data_index)); return_if_error(amal_program_get_data_by_index(self, data_index, &data_ptr)); self->reg[dst_reg] = (uintptr_t)data_ptr; asm_ptr_init_disp(&dst, RBP, -(i32)get_register_at_offset(0)); return_if_error(asm_mov_ri(&self->asm, RAX, (uintptr_t)data_ptr)); return_if_error(asm_mov_mr(&self->asm, &dst, RAX)); self->read_index += 3; break; } case AMAL_OP_ADD: { AsmPtr dst; AsmPtr reg1; AsmPtr reg2; asm_ptr_init_disp(&dst, RBP, -(i32)get_register_at_offset(0)); asm_ptr_init_disp(®1, RBP, -(i32)get_register_at_offset(1)); asm_ptr_init_disp(®2, RBP, -(i32)get_register_at_offset(2)); return_if_error(asm_mov_rm(&self->asm, RAX, ®1)); return_if_error(asm_mov_rm(&self->asm, RCX, ®2)); return_if_error(asm_add_rr(&self->asm, RAX, RCX)); return_if_error(asm_mov_mr(&self->asm, &dst, RAX)); self->read_index += 3; break; } case AMAL_OP_SUB: { AsmPtr dst; AsmPtr reg1; AsmPtr reg2; asm_ptr_init_disp(&dst, RBP, -(i32)get_register_at_offset(0)); asm_ptr_init_disp(®1, RBP, -(i32)get_register_at_offset(1)); asm_ptr_init_disp(®2, RBP, -(i32)get_register_at_offset(2)); return_if_error(asm_mov_rm(&self->asm, RAX, ®1)); return_if_error(asm_mov_rm(&self->asm, RCX, ®2)); return_if_error(asm_sub_rr(&self->asm, RAX, RCX)); return_if_error(asm_mov_mr(&self->asm, &dst, RAX)); self->read_index += 3; break; } case AMAL_OP_IMUL: { AsmPtr dst; AsmPtr reg1; AsmPtr reg2; asm_ptr_init_disp(&dst, RBP, -(i32)get_register_at_offset(0)); asm_ptr_init_disp(®1, RBP, -(i32)get_register_at_offset(1)); asm_ptr_init_disp(®2, RBP, -(i32)get_register_at_offset(2)); return_if_error(asm_mov_rm(&self->asm, RAX, ®1)); return_if_error(asm_mov_rm(&self->asm, RCX, ®2)); return_if_error(asm_imul_rr(&self->asm, RAX, RCX)); return_if_error(asm_mov_mr(&self->asm, &dst, RAX)); self->read_index += 3; break; } case AMAL_OP_MUL: { #if 0 AsmPtr dst; AsmPtr reg1; AsmPtr reg2; asm_ptr_init_disp(&dst, RBP, -(i32)get_register_at_offset(0)); asm_ptr_init_disp(®1, RBP, -(i32)get_register_at_offset(1)); asm_ptr_init_disp(®2, RBP, -(i32)get_register_at_offset(2)); return_if_error(asm_mov_rm(&self->asm, RAX, ®1)); return_if_error(asm_mov_rm(&self->asm, RCX, ®2)); return_if_error(asm_mul_rr(&self->asm, RAX, RCX)); return_if_error(asm_mov_mr(&self->asm, &dst, RAX)); #endif self->read_index += 3; break; } case AMAL_OP_IDIV: { self->read_index += 3; break; } case AMAL_OP_DIV: { self->read_index += 3; break; } case AMAL_OP_PUSH: { #if 0 AsmPtr reg; asm_ptr_init_disp(®, RBP, -(i32)get_register_at_offset(0)); return_if_error(ensure_stack_capacity_for_push(self)); self->stack[self->stack_index] = self->reg[(u8)self->data.data[self->read_index]]; ++self->stack_index; return_if_error(asm_pushm(&self->asm, ®)); #endif self->read_index += 1; break; } case AMAL_OP_PUSHI: { u16 intermediate_index; Number number; am_memcpy(&intermediate_index, &self->data.data[self->read_index], sizeof(intermediate_index)); return_if_error(amal_program_get_intermediate_by_index(self, intermediate_index, &number)); return_if_error(ensure_stack_capacity_for_push(self)); self->stack[self->stack_index] = number.value.integer; ++self->stack_index; self->read_index += 2; break; } case AMAL_OP_PUSHD: { u16 data_index; char *data_ptr; am_memcpy(&data_index, &self->data.data[self->read_index], sizeof(data_index)); return_if_error(amal_program_get_data_by_index(self, data_index, &data_ptr)); return_if_error(ensure_stack_capacity_for_push(self)); self->stack[self->stack_index] = (uintptr_t)data_ptr; ++self->stack_index; self->read_index += 2; break; } case AMAL_OP_CALL: /*assert(bool_false && "TODO: Implement CALL");*/ self->read_index += 3; break; case AMAL_OP_CALLR: /*assert(bool_false && "TODO: Implement CALLR");*/ self->read_index += 2; break; case AMAL_OP_CMP: { self->reg[(u8)self->data.data[self->read_index]] = self->reg[(u8)self->data.data[self->read_index + 1]] == self->reg[(u8)self->data.data[self->read_index + 2]]; self->read_index += 3; break; } case AMAL_OP_JZ: { #if 0 u8 reg; i16 jump_offset; u32 jump_target; reg = (u8)self->data.data[self->read_index]; am_memcpy(&jump_offset, &self->data.data[self->read_index + 1], sizeof(jump_offset)); jump_target = (isize)self->read_index + jump_offset; if(jump_target < read_start || jump_target >= read_end) return AMAL_PROGRAM_INSTRUCTION_ILLEGAL_JUMP_TARGET; #endif self->read_index += 3; break; } case AMAL_OP_JMP: { #if 0 i16 jump_offset; u32 jump_target; am_memcpy(&jump_offset, &self->data.data[self->read_index], sizeof(jump_offset)); jump_target = (isize)self->read_index + jump_offset; if(jump_target < read_start || jump_target >= read_end) return AMAL_PROGRAM_INSTRUCTION_ILLEGAL_JUMP_TARGET; #endif self->read_index += 2; break; } case AMAL_OP_RET: /* return_if_error(asm_ret(&self->asm, 0)); */ assert(bool_false && "TODO: Implement RET. RET needs to restore the stack before returning"); break; case AMAL_OP_FUNC_START: { assert(!inside_func); inside_func = bool_true; am_memcpy(&func_num_registers, &self->data.data[self->read_index], sizeof(func_num_registers)); /* TODO: Validate stack size, or maybe remove all validation? do we really need validation? If we need security, we could fork the process instead. */ /* Some registers need to be preserved before entering a function scope and these registers are different on different platforms. 32-bit: EBX, ESI, EDI, EBP 64-bit Windows: RBX, RSI, RDI, RBP, R12-R15, XMM6-XMM15 64-bit Linux,BSD,Mac: RBX, RBP, R12-R15 */ return_if_error(asm_pushr(&self->asm, RBX)); return_if_error(asm_pushr(&self->asm, RBP)); return_if_error(asm_mov_rr(&self->asm, RBP, RSP)); return_if_error(asm_sub_rm64_imm(&self->asm, RSP, func_num_registers * sizeof(usize))); self->read_index += 2; break; } case AMAL_OP_FUNC_END: { assert(inside_func); inside_func = bool_false; /*assert(bool_false && "TODO: Implement FUNC_END");*/ /* TODO: Validate FUNC_END is called for every FUNC_START, otherwise stack will be corrupted */ /* TODO: Use mov_rr(RSP, RBP) instead? why doesn't gcc do this? */ return_if_error(asm_mov_rr(&self->asm, RSP, RBP)); return_if_error(asm_popr(&self->asm, RBP)); return_if_error(asm_popr(&self->asm, RBX)); return_if_error(asm_ret(&self->asm, 0)); break; } } } return AMAL_PROGRAM_OK; } int amal_program_run(amal_program *self) { return_if_error(amal_program_read_header(self)); while(bytes_left_to_read(self) > 0) { return_if_error(amal_program_read_intermediates(self)); return_if_error(amal_program_read_strings(self)); return_if_error(amal_program_read_instructions(self)); } return asm_execute(&self->asm); } int amal_program_save(amal_program *self, const char *filepath) { FILE *file; file = fopen(filepath, "wb"); if(!file) { int err; err = errno; perror(filepath); return -err; } if(fwrite(self->data.data, 1, self->data.size, file) != self->data.size) { int err; err = errno; perror(filepath); return -err; } fclose(file); return 0; }