#include "asm.h" #include "../../include/std/mem.h" #include "../../include/std/log.h" #include #include #include #include /* REX documentation: https://wiki.osdev.org/X86-64_Instruction_Encoding#Encoding */ #define REX_W 0x48 /* 0 = 32-bit operand size, 1 = 64-bit operand size - For most operations... */ static u8 rex_rr(Reg64 dst, Reg64 src) { u8 rex = REX_W; rex |= ((dst & REG64_EXTENDED_REG_BIT) >> 3); /* REX.B */ rex |= ((src & REG64_EXTENDED_REG_BIT) >> 1); /* REX.R */ assert(REG64_EXTENDED_REG_BIT == (1<<3)); return rex; } static u8 rex_sib(Reg64 dst, Reg64 src) { u8 rex = REX_W; rex |= ((src & REG64_EXTENDED_REG_BIT) >> 3); /* REX.B */ rex |= ((dst & REG64_EXTENDED_REG_BIT) >> 2); /* REX.X */ assert(REG64_EXTENDED_REG_BIT == (1<<3)); return rex; } static u8 rex_rm(AsmPtr *dst, Reg64 src) { assert(REG64_EXTENDED_REG_BIT == (1<<3)); if(dst->index == 0 && (dst->base & REG64_REG_BITS) != RBP) return rex_rr(dst->base, src); return rex_sib(dst->base, src); } #ifdef DEBUG #include #include static u8 asm_debug_str_buffer[256]; static usize asm_debug_str_buffer_index = 0; static usize ins_start_offset = 0; static void asm_debug_str_append(const char *str) { const usize len = strlen(str); am_memcpy(asm_debug_str_buffer + asm_debug_str_buffer_index, str, len); asm_debug_str_buffer[asm_debug_str_buffer_index + len + 1] = '\0'; asm_debug_str_buffer_index += len; } static void asm_debug_str_append_num(int num) { char num_str[32]; sprintf(num_str, "0x%x", num); asm_debug_str_append(num_str); } static void ins_start(Asm *self) { ins_start_offset = (u8*)self->code_it - (u8*)self->code; asm_debug_str_buffer_index = 0; } static int max(int a, int b) { return a >= b ? a : b; } static void ins_end(Asm *self, const char *fmt, ...) { usize ins_end_offset; usize i; va_list args; va_start(args, fmt); ins_end_offset = (u8*)self->code_it - (u8*)self->code; for(i = ins_start_offset; i < ins_end_offset; ++i) { fprintf(stderr, "%02x ", ((u8*)self->code)[i]); } /* Same padding for all instructions, no matter how long they are */ for(i = 0; i < (usize)max(0, 35 - (ins_end_offset - ins_start_offset)*3); ++i) { putc(' ', stderr); } vfprintf(stderr, fmt, args); putc('\n', stderr); va_end(args); } static const char* reg32_to_str(Reg32 reg) { switch(reg) { case EAX: return "eax"; case ECX: return "ecx"; case EDX: return "edx"; case EBX: return "ebx"; case ESP: return "esp"; case EBP: return "ebp"; case ESI: return "esi"; case EDI: return "edi"; } assert(bool_false); return NULL; } static const char* reg64_to_str(Reg64 reg) { switch(reg) { case RAX: return "rax"; case RCX: return "rcx"; case RDX: return "rdx"; case RBX: return "rbx"; case RSP: return "rsp"; case RBP: return "rbp"; case RSI: return "rsi"; case RDI: return "rdi"; case R8: return "r8"; case R9: return "r9"; case R10: return "r10"; case R11: return "r11"; case R12: return "r12"; case R13: return "r13"; case R14: return "r14"; case R15: return "r15"; } assert(bool_false); return NULL; } static i32 abs(i32 num) { return num >= 0 ? num : -num; } static const char* asm_ptr_to_string(AsmPtr *self) { const char *buf = (const char*)(asm_debug_str_buffer + asm_debug_str_buffer_index); asm_debug_str_append("QWORD PTR ["); asm_debug_str_append(reg64_to_str(self->base)); if((int)self->index != -1) { asm_debug_str_append(reg64_to_str(self->index)); asm_debug_str_append(" * "); asm_debug_str_append_num(1 << self->scale); } if(self->disp != 0) { if(self->disp < 0) asm_debug_str_append(" - "); else asm_debug_str_append(" + "); asm_debug_str_append_num(abs(self->disp)); } asm_debug_str_append("]"); return buf; } #else static void ins_start(Asm *self) { (void)self; } static void ins_end(Asm *self, const char *fmt, ...) { (void)self; (void)fmt; } static const char* reg32_to_str(Reg32 reg) { (void)reg; return NULL; } static const char* reg64_to_str(Reg64 reg) { (void)reg; return NULL; } static const char* asm_ptr_to_string(AsmPtr *self) { (void)self; return NULL; } #endif void asm_ptr_init(AsmPtr *self, Reg64 base) { self->base = base; self->index = -1; self->disp = 0; self->scale = 0; } void asm_ptr_init_index(AsmPtr *self, Reg64 base, Reg64 index) { self->base = base; self->index = index; self->disp = 0; self->scale = 0; } void asm_ptr_init_disp(AsmPtr *self, Reg64 base, i32 disp) { self->base = base; self->index = -1; self->disp = disp; self->scale = 0; } void asm_ptr_init_index_disp(AsmPtr *self, Reg64 base, Reg64 index, i32 disp) { self->base = base; self->index = index; self->disp = disp; self->scale = 0; } int asm_init(Asm *self) { self->allocated_size = am_pagesize(); amal_log_debug("asm: page size: %u", self->allocated_size); self->code = mmap(NULL, self->allocated_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if(self->code == MAP_FAILED) return -errno; self->code_it = self->code; return 0; } void asm_deinit(Asm *self) { if(self->code) munmap(self->code, self->allocated_size); self->code = NULL; self->code_it = NULL; self->allocated_size = 0; } usize asm_get_size(Asm *self) { return self->code_it - (u8*)self->code; } #if 0 static void asm_print_code_hex(Asm *self) { u8 *ptr; int off; ptr = self->code; off = 0; while(ptr != self->code_it) { printf("%02x", *ptr); ++ptr; ++off; if(off == 8) { putc('\n', stdout); off = 0; } else { putc(' ', stdout); } } if(off != 0) putc('\n', stdout); } #endif typedef union { u8 *data; int (*func)(void); } RawFuncCallPtr; int asm_execute(Asm *self, u32 offset) { RawFuncCallPtr raw_func_ptr; if(mprotect(self->code, self->allocated_size, PROT_READ | PROT_EXEC) != 0) return -errno; /*asm_print_code_hex(self);*/ /* TODO: Verify if this is valid on all platforms. According to ISO C standard it isn't? */ raw_func_ptr.data = (u8*)self->code + offset; raw_func_ptr.func(); return 0; } /* TODO: See how this can be optimized */ int asm_ensure_capacity(Asm *self, usize size) { usize current_offset = (u8*)self->code_it - (u8*)self->code; if(current_offset + size > self->allocated_size) { usize new_size = self->allocated_size + am_pagesize(); void *new_mem = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if(new_mem == MAP_FAILED) return -errno; am_memcpy(new_mem, self->code, self->allocated_size); munmap(self->code, self->allocated_size); self->code = new_mem; self->allocated_size = new_size; self->code_it = (u8*)self->code + current_offset; } return 0; } #ifdef DEBUG static isize asm_get_capacity_left(Asm *self) { return (isize)self->allocated_size - (isize)((u8*)self->code_it - (u8*)self->code); } #endif void asm_nop(Asm *self) { ins_start(self); *self->code_it++ = 0x90; ins_end(self, "nop"); } static i32 abs_i32(i32 value) { return value >= 0 ? value : -value; } /* TODO: Implement 1 and 2 byte displacement? There has to be at least 6 bytes left in the asm buffer before calling this function. */ static void asm_rm(Asm *self, AsmPtr *mem, Reg64 reg) { u8 rm_byte; u8 disp_bytes; assert(asm_get_capacity_left(self) >= 6); if((int)mem->index != -1) { /* SIB */ u8 sib_offset; if(mem->disp == 0) { rm_byte = 0x04; disp_bytes = 0; } else if(abs_i32(mem->disp) <= INT8_MAX) { rm_byte = 0x44; disp_bytes = 1; } else { rm_byte = 0x84; disp_bytes = 4; } /* Scale is not valid when index is RSP */ assert(mem->index != RSP || mem->scale == 0); #ifdef DEBUG if(mem->scale != 0 && mem->scale != 2 && mem->scale != 4 && mem->scale != 8) { amal_log_error("Invalid scale %d, expected 0, 2, 4, or 8", mem->scale); assert(bool_false); } #endif assert(mem->base != RBP && "TODO: Implement RBP base for sib byte. RBP is special and requires different logic"); assert(mem->index != R13 && "TODO: Implement R13 base for sib byte. R13 is special and requires different logic"); sib_offset = (mem->scale << 5) + 8*(mem->index & REG64_REG_BITS) + (mem->base & REG64_REG_BITS); *self->code_it++ = rm_byte; *self->code_it++ = sib_offset; } else { u8 base = (mem->base & REG64_REG_BITS); assert(mem->scale == 0); /* Scale isn't valid without index reg */ if(mem->disp == 0) { if(base == RBP) { rm_byte = 0x45; disp_bytes = 1; /* RBP requires use of disp byte, even if it's not used */ } else { rm_byte = base; disp_bytes = 0; } } else if(abs_i32(mem->disp) <= INT8_MAX) { rm_byte = 0x40 + base; disp_bytes = 1; } else { rm_byte = 0x80 + base; disp_bytes = 4; } *self->code_it++ = ((reg & REG64_REG_BITS) << 3) | rm_byte; /* RSP requires SIB byte */ if(base == RSP) *self->code_it++ = 0x24; } am_memcpy(self->code_it, &mem->disp, disp_bytes); self->code_it += disp_bytes; } /* There has to be at least 1 byte left in the asm buffer before calling this function. */ static void asm_rr(Asm *self, Reg64 dst, Reg64 src) { assert(asm_get_capacity_left(self) >= 1); *self->code_it++ = 0xC0 + (dst & REG64_REG_BITS) + 8*(src & REG64_REG_BITS); } /* TODO: Implement 1 and 2 byte immediate? */ void asm_mov_mi(Asm *self, AsmPtr *dst, i32 immediate) { ins_start(self); *self->code_it++ = rex_rm(dst, 0); *self->code_it++ = 0xC7; asm_rm(self, dst, 0); am_memcpy(self->code_it, &immediate, sizeof(immediate)); self->code_it += sizeof(immediate); ins_end(self, "mov %s, 0x%x", asm_ptr_to_string(dst), immediate); } void asm_mov_mr(Asm *self, AsmPtr *dst, Reg64 src) { ins_start(self); *self->code_it++ = rex_rm(dst, src); *self->code_it++ = 0x89; asm_rm(self, dst, src); ins_end(self, "mov %s, %s", asm_ptr_to_string(dst), reg64_to_str(src)); } void asm_mov_rm(Asm *self, Reg64 dst, AsmPtr *src) { ins_start(self); *self->code_it++ = rex_rm(src, dst); *self->code_it++ = 0x8B; asm_rm(self, src, dst); ins_end(self, "mov %s, %s", reg64_to_str(dst), asm_ptr_to_string(src)); } /* Note: This shows as instruction movabs in intel assembly format */ void asm_mov_ri(Asm *self, Reg64 dst, i64 immediate) { ins_start(self); *self->code_it++ = rex_rr(dst, 0); *self->code_it++ = 0xB8 + dst; am_memcpy(self->code_it, &immediate, sizeof(immediate)); self->code_it += sizeof(immediate); ins_end(self, "mov %s, 0x%x", reg64_to_str(dst), immediate); } void asm_mov_rr(Asm *self, Reg64 dst, Reg64 src) { ins_start(self); *self->code_it++ = rex_rr(dst, src); *self->code_it++ = 0x89; asm_rr(self, dst, src); ins_end(self, "mov %s, %s", reg64_to_str(dst), reg64_to_str(src)); } void asm_add_rr(Asm *self, Reg64 dst, Reg64 src) { ins_start(self); *self->code_it++ = rex_rr(dst, src); *self->code_it++ = 0x01; asm_rr(self, dst, src); ins_end(self, "add %s, %s", reg64_to_str(dst), reg64_to_str(src)); } void asm_sub_rr(Asm *self, Reg64 dst, Reg64 src) { ins_start(self); *self->code_it++ = rex_rr(dst, src); *self->code_it++ = 0x29; asm_rr(self, dst, src); ins_end(self, "sub %s, %s", reg64_to_str(dst), reg64_to_str(src)); } void asm_imul_rr(Asm *self, Reg64 dst, Reg64 src) { ins_start(self); *self->code_it++ = rex_rr(dst, src); *self->code_it++ = 0x0F; *self->code_it++ = 0xAF; asm_rr(self, dst, src); ins_end(self, "imul %s, %s", reg64_to_str(dst), reg64_to_str(src)); } void asm_cqo(Asm *self) { ins_start(self); *self->code_it++ = REX_W; *self->code_it++ = 0x99; ins_end(self, "cqo"); } void asm_idiv_rr(Asm *self, Reg64 src) { ins_start(self); *self->code_it++ = rex_rr(src, 0); *self->code_it++ = 0xF7; asm_rr(self, src, 0x7); ins_end(self, "idiv %s", reg64_to_str(src)); } void asm_pushr(Asm *self, Reg64 reg) { ins_start(self); *self->code_it++ = 0x50 + reg; ins_end(self, "push %s", reg64_to_str(reg)); } void asm_popr(Asm *self, Reg64 reg) { ins_start(self); *self->code_it++ = 0x58 + reg; ins_end(self, "pop %s", reg64_to_str(reg)); } void asm_callr(Asm *self, Reg64 reg) { ins_start(self); *self->code_it++ = rex_rr(reg, 0); *self->code_it++ = 0xFF; asm_rr(self, reg, 0x2); ins_end(self, "call %s", reg64_to_str(reg)); } /* Note: This is sometimes called with @relative 0 (will print call -5), in which case it's most likely a dummy call until the relative position is later changed with @asm_overwrite_call_rel32. TODO: Update the ins_end debug print to take that into account somehow */ void asm_call_rel32(Asm *self, i32 relative) { ins_start(self); relative -= 5; /* In x86, the relative position starts from the next instruction */ *self->code_it++ = 0xE8; am_memcpy(self->code_it, &relative, sizeof(relative)); self->code_it += sizeof(relative); ins_end(self, "call 0x%x", relative); } void asm_overwrite_call_rel32(Asm *self, u32 asm_index, i32 new_relative) { assert(*((u8*)self->code + asm_index) == 0xE8); new_relative -= 5; /* In x86, the relative position starts from the next instruction */ am_memcpy((u8*)self->code + asm_index + 1, &new_relative, sizeof(new_relative)); } void asm_cmp_rm(Asm *self, Reg64 reg1, AsmPtr *reg2) { ins_start(self); *self->code_it++ = rex_rm(reg2, reg1); *self->code_it++ = 0x3B; asm_rm(self, reg2, reg1); ins_end(self, "cmp %s, %s", reg64_to_str(reg1), asm_ptr_to_string(reg2)); } void asm_sete_m(Asm *self, AsmPtr *dst) { assert(dst->base != RSP && dst->base != RBP && dst->base != RSI && dst->base != RDI); ins_start(self); *self->code_it++ = 0x0F; *self->code_it++ = 0x94; asm_rm(self, dst, 0x0); /* the @src bits are not used */ ins_end(self, "sete %s", asm_ptr_to_string(dst)); } void asm_sete_r(Asm *self, Reg64 dst) { assert(dst != RSP && dst != RBP && dst != RSI && dst != RDI); ins_start(self); *self->code_it++ = 0x0F; *self->code_it++ = 0x94; asm_rr(self, dst, 0x0); /* the @src bits are not used */ ins_end(self, "sete %s", reg64_to_str(dst)); } /* Note: This is sometimes called with @relative INT32_MAX-(2 or 6) (will print jz 0x7ffffff9), in which case it's most likely a dummy jump until the relative position is later changed with @asm_overwrite_jcc_rel32. TODO: Update the ins_end debug print to take that into account somehow */ void asm_jz(Asm *self, i32 relative) { /* Note: We dont use the 16-bit relative variant, as it will clear the upper two bytes of the EIP registers, resulting in a maximum instruction pointer size of 16 bits */ ins_start(self); if(abs(relative - 2) <= INT8_MAX) { relative -= 2; *self->code_it++ = 0x74; *self->code_it++ = (i8)relative; } else { relative -= 6; *self->code_it++ = 0x0F; *self->code_it++ = 0x84; am_memcpy(self->code_it, &relative, sizeof(relative)); self->code_it += sizeof(relative); } ins_end(self, "jz 0x%x", relative); } void asm_overwrite_jcc_rel32(Asm *self, u32 asm_index, i32 new_relative) { /* +2 because rel32 variant of the jump instruction opcode is 2 bytes */ assert(*((u8*)self->code + asm_index) == 0x0F); assert(*((u8*)self->code + asm_index + 1) == 0x84); new_relative -= 6; /* In x86, the relative position starts from the next instruction */ am_memcpy((u8*)self->code + asm_index + 2, &new_relative, sizeof(new_relative)); } /* Note: This is sometimes called with @relative INT32_MAX-(2 or 5) (will print jmp 0x7ffffffa), in which case it's most likely a dummy jump until the relative position is later changed with @asm_overwrite_jmp_rel32. TODO: Update the ins_end debug print to take that into account somehow */ void asm_jmp(Asm *self, i32 relative) { /* Note: We dont use the 16-bit relative variant, as it will clear the upper two bytes of the EIP registers, resulting in a maximum instruction pointer size of 16 bits */ ins_start(self); if(abs(relative - 2) <= INT8_MAX) { relative -= 2; *self->code_it++ = 0xEB; *self->code_it++ = (i8)relative; } else { relative -= 5; *self->code_it++ = 0xE9; am_memcpy(self->code_it, &relative, sizeof(relative)); self->code_it += sizeof(relative); } ins_end(self, "jmp 0x%x", relative); } void asm_overwrite_jmp_rel32(Asm *self, u32 asm_index, i32 new_relative) { /* +1 to skip instruction opcode */ assert(*((u8*)self->code + asm_index) == 0xE9); new_relative -= 5; /* In x86, the relative position starts from the next instruction */ am_memcpy((u8*)self->code + asm_index + 1, &new_relative, sizeof(new_relative)); } /* TODO: Remove these !*/ /* /r */ #define DEFINE_INS_RM(mnemonic, opcode) \ void asm_##mnemonic##_rm32(Asm *self, Reg32 dst, Reg32 src) { \ ins_start(self); \ *self->code_it++ = opcode; \ asm_rr(self, (Reg64)src, (Reg64)dst); \ ins_end(self, #mnemonic" %s, %s", reg32_to_str(dst), reg32_to_str(src)); \ } \ \ void asm_##mnemonic##_rm64(Asm *self, Reg64 dst, Reg64 src) { \ ins_start(self); \ *self->code_it++ = opcode; \ asm_rr(self, src, dst); \ ins_end(self, #mnemonic" %s, %s", reg64_to_str(dst), reg64_to_str(src)); \ } DEFINE_INS_RM(mov, 0x8B) DEFINE_INS_RM(add, 0x03) DEFINE_INS_RM(sub, 0x2B) DEFINE_INS_RM(and, 0x23) DEFINE_INS_RM(or, 0x0B) DEFINE_INS_RM(xor, 0x33) DEFINE_INS_RM(cmp, 0x3B) /* /number The number is called the extension, a number from 0 to 7; It's a number used to extend the opcode type, since the instruction only uses one register the other register can be encoded for that. */ #define DEFINE_INS_EXT_IMM(mnemonic, extension) \ void asm_##mnemonic##_rmb_imm(Asm *self, Reg32 reg, i32 immediate) { \ if(abs_i32(immediate) <= INT8_MAX) { \ *self->code_it++ = 0x83; \ asm_rr(self, (Reg64)reg, (Reg64)(extension)); \ *self->code_it++ = (u8)immediate; \ } else { \ *self->code_it++ = 0x81; \ asm_rr(self, (Reg64)reg, (Reg64)(extension)); \ am_memcpy(self->code_it, &immediate, sizeof(immediate)); \ self->code_it += sizeof(immediate); \ } \ } \ \ void asm_##mnemonic##_rm32_imm(Asm *self, Reg32 reg, i32 immediate) { \ ins_start(self); \ asm_##mnemonic##_rmb_imm(self, (Reg32)reg, immediate); \ ins_end(self, #mnemonic" %s, 0x%x", reg32_to_str(reg), immediate); \ } \ \ void asm_##mnemonic##_rm64_imm(Asm *self, Reg64 reg, i32 immediate) { \ ins_start(self); \ *self->code_it++ = rex_rr(reg, 0); \ asm_##mnemonic##_rmb_imm(self, (Reg32)reg, immediate); \ ins_end(self, #mnemonic" %s, 0x%x", reg64_to_str(reg), immediate); \ } DEFINE_INS_EXT_IMM(add, 0) DEFINE_INS_EXT_IMM(or, 1) DEFINE_INS_EXT_IMM(adc, 2) DEFINE_INS_EXT_IMM(sbb, 3) DEFINE_INS_EXT_IMM(and, 4) DEFINE_INS_EXT_IMM(sub, 5) DEFINE_INS_EXT_IMM(xor, 6) DEFINE_INS_EXT_IMM(cmp, 7) /* /number The number is called the extension, a number from 0 to 7; It's a number used to extend the opcode type, since the instruction only uses one register the other register can be encoded for that. */ #define DEFINE_INS_SHIFT_IMM8(mnemonic, extension) \ void asm_##mnemonic##_rmb_imm(Asm *self, Reg32 reg, i8 immediate) { \ if(immediate == 1) { \ *self->code_it++ = 0xC1; \ asm_rr(self, (Reg64)reg, (Reg64)(extension)); \ } else { \ *self->code_it++ = 0xD1; \ asm_rr(self, (Reg64)reg, (Reg64)(extension)); \ *self->code_it++ = immediate; \ } \ } \ \ void asm_##mnemonic##_rm32_imm(Asm *self, Reg32 reg, i8 immediate) { \ ins_start(self); \ ins_end(self, #mnemonic" %s, 0x%x", reg32_to_str(reg), immediate); \ } \ \ void asm_##mnemonic##_rm64_imm(Asm *self, Reg64 reg, i8 immediate) { \ ins_start(self); \ *self->code_it++ = rex_rr(reg, 0); \ asm_##mnemonic##_rmb_imm(self, (Reg32)reg, immediate); \ ins_end(self, #mnemonic" %s, 0x%x", reg64_to_str(reg), immediate); \ } DEFINE_INS_SHIFT_IMM8(rol, 0) DEFINE_INS_SHIFT_IMM8(ror, 1) DEFINE_INS_SHIFT_IMM8(rcl, 2) DEFINE_INS_SHIFT_IMM8(rcr, 3) DEFINE_INS_SHIFT_IMM8(shl, 4) DEFINE_INS_SHIFT_IMM8(shr, 5) /*DEFINE_INS_SHIFT_IMM8(shl, 6)*/ DEFINE_INS_SHIFT_IMM8(sar, 7) void asm_ret(Asm *self, u16 bytes) { ins_start(self); if(bytes == 0) { *self->code_it++ = 0xC3; ins_end(self, "ret"); } else { *self->code_it++ = 0xC2; am_memcpy(self->code_it, &bytes, sizeof(bytes)); ins_end(self, "ret 0x%x", bytes); } }