From ea97370f973374f863e4296c2bb872be8b5235a3 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Mon, 12 Aug 2019 09:48:55 +0200 Subject: Before interpreter. Cleanup build script. Begin writing code analyzer tool to find common mistakes --- README.md | 7 +- build.sh | 98 +++++------ executor/x86_64/asm.c | 385 +++++++++++++++++++++++++++++++++++++++++ executor/x86_64/asm.h | 136 +++++++++++++++ include/asm/x86_64.h | 136 --------------- include/ast.h | 24 ++- include/compiler.h | 2 +- include/nullable.h | 2 +- include/program.h | 2 +- include/ssa/ssa.h | 4 +- include/std/hash_map.h | 2 + src/asm/x86_64.c | 385 ----------------------------------------- src/ast.c | 18 +- src/compiler.c | 35 +++- src/parser.c | 38 ++-- src/ssa/ssa.c | 4 +- src/std/hash_map.c | 11 +- tests/errors/no_main_func.amal | 1 + tests/main.c | 16 +- tools/highlevel_c.py | 81 +++++++++ 20 files changed, 766 insertions(+), 621 deletions(-) create mode 100644 executor/x86_64/asm.c create mode 100644 executor/x86_64/asm.h delete mode 100644 include/asm/x86_64.h delete mode 100644 src/asm/x86_64.c create mode 100644 tests/errors/no_main_func.amal create mode 100755 tools/highlevel_c.py diff --git a/README.md b/README.md index f4f228c..fc7d054 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,12 @@ Every stage of the compiler is multithreaded and data copy is kept to a minimal, is done without storing tokens in a list. Almost all allocation is done using an arena allocator that is only cleaned up once (when the compiler is finished), and the data is allocated sequentially. # TODO -Build with -nostdlib and replace use of libc with syscalls (on linux). +Build with -nostdlib and replace use of libc with syscalls (on linux).\ Don't parse files unless the variable they are assigned to (with @import) is used. This is useful when only using small parts of a library. -Align machine code to word boundary for the start of functions. No need to pad with NOP, as functions return before the padding. +This could be done checking if an AST expression is referenced before evaluating it. There would then need to be a compile option +that compiles everything even if not referenced, since another user of the program/library may use the functions that are not used in your program +and they might have compile-issues.\ +Align machine code to word boundary for the start of functions. No need to pad with NOP, as functions return before the padding.\ Use const to cleanup ANSI C style variable declarations, since const allows you to declare and assign variables on the same line. # Documents Documents are located under doc. The file doc/Documentation.md is generated from source files by running doc/doc_extract.py diff --git a/build.sh b/build.sh index cac8275..26edf31 100755 --- a/build.sh +++ b/build.sh @@ -4,48 +4,45 @@ set -e this_script_path=$(readlink -f "$0") this_script_dir=$(dirname "$this_script_path") -source_files=$(readlink -f $(find "$this_script_dir/src" -name "*.c")) +cd "$this_script_dir" +source_files=$(find "src" -name "*.c") + +cpu_arch="$ARCH" +if [ -z "$ARCH" ]; then + cpu_arch=$(uname -m) + echo "Cpu architecture detected: $cpu_arch." 'You can override the architecture with the environment variable $ARCH' +fi + +if [ "$cpu_arch" = "x86_64" ]; then + source_files="$source_files $(find "executor/x86_64" -name "*.c")" +else + echo "WARNING: There is no machine code implementation for your cpu architecture: $cpu_arch. An interpreter will be used instead" +fi if [ -z "$CC" ]; then CC=cc fi -CFLAGS="-Wall -Wextra -Werror -Wno-format-security -Wnull-dereference -std=c89 -D_GNU_SOURCE " +CFLAGS="-Wall -Wextra -Werror -Wno-format-security -Wno-error=attributes -Wno-attributes -Wnull-dereference -std=c89 -D_GNU_SOURCE" LIBS="-pthread " -if [ ! -z "$SANITIZE_ADDRESS" ]; then - CFLAGS+="-fsanitize=address " -elif [ ! -z "$SANITIZE_THREAD" ]; then - CFLAGS+="-fsanitize=thread " +if [ -n "$SANITIZE_ADDRESS" ]; then + CFLAGS="$CFLAGS -fsanitize=address " +elif [ -n "$SANITIZE_THREAD" ]; then + CFLAGS="$CFLAGS -fsanitize=thread " fi -if [ ! -z "$PEDANTIC" ]; then - CFLAGS+="-DAMAL_PEDANTIC -pedantic " +if [ -n "$PEDANTIC" ]; then + CFLAGS="$CFLAGS -DAMAL_PEDANTIC -pedantic " fi -build_test() { - CFLAGS+="-g -O0 -DDEBUG" - - BUILD_ARGS="$source_files $CFLAGS $LIBS -shared -fpic -o "$this_script_dir/libamalgam.so"" - set -x - time $CC $BUILD_ARGS - if [ ! -z "$SCAN_BUILD" ]; then - scan-build $CC $BUILD_ARGS - fi - set +x - - if [ -z "$NO_TEST" ]; then - source_files_tests=$(readlink -f $(find "$this_script_dir/tests" -name "*.c")) - set -x - time $CC $source_files_tests $CFLAGS $LIBS -o test "$this_script_dir/libamalgam.so" - fi - +build_compile_commands() { set +x compile_commands=$( first=0 echo "[" - for source_file in $source_files $source_files_tests; do - if [ $first == 1 ]; then + for source_file in $@; do + if [ $first = 1 ]; then echo " ," fi first=1 @@ -57,38 +54,41 @@ build_test() { echo " }" done echo "]") - echo "$compile_commands" > "$this_script_dir/compile_commands.json" + echo "$compile_commands" > "compile_commands.json" } -build_release() { - CFLAGS+="-O2 -DNDEBUG -s" +build_test() { + CFLAGS="$CFLAGS -g -O0 -DDEBUG" - BUILD_ARGS="$source_files $CFLAGS $LIBS -shared -fpic -o "$this_script_dir/libamalgam.so"" + BUILD_ARGS="$source_files $CFLAGS $LIBS -shared -fpic -o libamalgam.so" set -x time $CC $BUILD_ARGS - if [ ! -z "$SCAN_BUILD" ]; then + if [ -n "$SCAN_BUILD" ]; then scan-build $CC $BUILD_ARGS fi set +x + source_files_test=$(find "tests" -name "*.c") + if [ -z "$NO_TEST" ]; then + set -x + time $CC $source_files_test $CFLAGS $LIBS -o test "./libamalgam.so" + fi + + build_compile_commands $source_files $source_files_test +} + +build_release() { + CFLAGS="$CFLAGS -O2 -DNDEBUG -s" + + BUILD_ARGS="$source_files $CFLAGS $LIBS -shared -fpic -o libamalgam.so" + set -x + time $CC $BUILD_ARGS + if [ -n "$SCAN_BUILD" ]; then + scan-build $CC $BUILD_ARGS + fi set +x - compile_commands=$( - first=0 - echo "[" - for source_file in $source_files; do - if [ $first == 1 ]; then - echo " ," - fi - first=1 - o_file="${source_file}.o" - echo " {" - echo " \"file\": \"$source_file\"," - echo " \"directory\": \"$this_script_dir\"," - echo " \"command\": \"$CC -o $o_file $CFLAGS $LIBS -c $source_file\"" - echo " }" - done - echo "]") - echo "$compile_commands" > "$this_script_dir/compile_commands.json" + + build_compile_commands $source_files } case "$1" in diff --git a/executor/x86_64/asm.c b/executor/x86_64/asm.c new file mode 100644 index 0000000..8e07ee3 --- /dev/null +++ b/executor/x86_64/asm.c @@ -0,0 +1,385 @@ +#include "asm.h" + +#include "../../include/std/mem.h" +#include "../../include/std/log.h" +#include +#include +#include + +#include + +#define REX_W 0x48 + +void asm_ptr_init(AsmPtr *self, Reg64 base) { + self->base = base; + self->index = -1; + self->disp = 0; + self->scale = 0; +} + +void asm_ptr_init_index(AsmPtr *self, Reg64 base, Reg64 index) { + self->base = base; + self->index = index; + self->disp = 0; + self->scale = 0; +} + +void asm_ptr_init_disp(AsmPtr *self, Reg64 base, i32 disp) { + self->base = base; + self->index = -1; + self->disp = disp; + self->scale = 0; +} + +void asm_ptr_init_index_disp(AsmPtr *self, Reg64 base, Reg64 index, i32 disp) { + self->base = base; + self->index = index; + self->disp = disp; + self->scale = 0; +} + +int asm_init(Asm *self) { + self->size = am_pagesize(); + amal_log_debug("asm: page size: %u", self->size); + self->code = mmap(NULL, self->size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if(self->code == MAP_FAILED) + return -errno; + self->code_it = self->code; + return 0; +} + +void asm_deinit(Asm *self) { + if(self->code) + munmap(self->code, self->size); + self->code = NULL; + self->code_it = NULL; + self->size = 0; +} + +static void asm_print_code_hex(Asm *self) { + u8 *ptr; + int off; + ptr = self->code; + off = 0; + while(ptr != self->code_it) { + printf("%02x", *ptr); + ++ptr; + ++off; + if(off == 8) { + putc('\n', stdout); + off = 0; + } else { + putc(' ', stdout); + } + } + if(off != 0) + putc('\n', stdout); +} + +int asm_execute(Asm *self) { + void (*func)(); + if(mprotect(self->code, self->size, PROT_READ | PROT_EXEC) != 0) + return -errno; + + asm_print_code_hex(self); + + /* TODO: Verify if this is valid on all platforms. According to ISO C standard it isn't? */ + *(void**)(&func) = self->code; + func(); + return 0; +} + +/* TODO: See how this can be optimized */ +static CHECK_RESULT int asm_ensure_capacity(Asm *self, usize size) { + usize current_offset; + current_offset = (u8*)self->code_it - (u8*)self->code; + if(current_offset + size > self->size) { + void *new_mem; + usize new_size; + new_size = self->size + am_pagesize(); + new_mem = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if(self->code == MAP_FAILED) + return -errno; + + am_memcpy(new_mem, self->code, self->size); + self->code = new_mem; + self->size = new_size; + self->code_it = (u8*)self->code + current_offset; + } + return 0; +} + +#ifdef DEBUG +static isize asm_get_capacity_left(Asm *self) { + return (isize)self->size - (isize)((u8*)self->code_it - (u8*)self->code); +} +#endif + +int asm_nop(Asm *self) { + return_if_error(asm_ensure_capacity(self, 1)); + *self->code_it++ = 0x90; + return 0; +} + +static i32 abs_i32(i32 value) { + return value >= 0 ? value : -value; +} + +/* +TODO: Implement 1 and 2 byte displacement? +There has to be at least 6 bytes left in the asm buffer before calling this function. +*/ +static void asm_rm(Asm *self, AsmPtr *mem, Reg64 reg) { + u8 rm_byte; + u8 disp_bytes; + assert(asm_get_capacity_left(self) >= 6); + if((int)mem->index != -1) { + u8 sib_offset; + if(mem->disp == 0) { + rm_byte = 0x04; + disp_bytes = 0; + } else if(abs_i32(mem->disp) <= INT8_MAX) { + rm_byte = 0x44; + disp_bytes = 1; + } else { + rm_byte = 0x84; + disp_bytes = 4; + } + + #ifdef DEBUG + if(mem->scale != 0 && mem->scale != 2 && mem->scale != 4 && mem->scale != 8) { + amal_log_error("Invalid scale %d, expected 0, 2, 4, or 8", mem->scale); + assert(bool_false); + } + #endif + assert(mem->base != RBP && "TODO: Implement RBP base for sib byte. RBP is special and requires different logic"); + sib_offset = (mem->scale << 5) + 8*mem->index + mem->base; + + *self->code_it++ = rm_byte; + *self->code_it++ = sib_offset; + } else { + if(mem->disp == 0) { + if(mem->base == RBP) { + rm_byte = 0x45; + disp_bytes = 1; + } else { + rm_byte = mem->base; + disp_bytes = 0; + } + } else if(abs_i32(mem->disp) <= INT8_MAX) { + rm_byte = 0x40 + mem->base; + disp_bytes = 1; + } else { + rm_byte = 0x80 + mem->base; + disp_bytes = 4; + } + *self->code_it++ = (reg << 3) | rm_byte; + } + + am_memcpy(self->code_it, &mem->disp, disp_bytes); + self->code_it += disp_bytes; +} + +/* There has to be at least 1 byte left in the asm buffer before calling this function. */ +static void asm_rr(Asm *self, Reg64 dst, Reg64 src) { + assert(asm_get_capacity_left(self) >= 1); + *self->code_it++ = 0xC0 + dst + 8*src; +} + +/* TODO: Implement 1 and 2 byte immediate? */ +int asm_mov_mi(Asm *self, AsmPtr *dst, i32 immediate) { + /* 12 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ + return_if_error(asm_ensure_capacity(self, 12)); + *self->code_it++ = REX_W; + *self->code_it++ = 0xC7; + asm_rm(self, dst, 0); + am_memcpy(self->code_it, &immediate, sizeof(immediate)); + self->code_it += sizeof(immediate); + return 0; +} + +int asm_mov_mr(Asm *self, AsmPtr *dst, Reg64 src) { + /* 8 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ + return_if_error(asm_ensure_capacity(self, 8)); + *self->code_it++ = REX_W; + *self->code_it++ = 0x89; + asm_rm(self, dst, src); + return 0; +} + +int asm_mov_rm(Asm *self, Reg64 dst, AsmPtr *src) { + /* 8 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ + return_if_error(asm_ensure_capacity(self, 8)); + *self->code_it++ = REX_W; + *self->code_it++ = 0x8B; + asm_rm(self, src, dst); + return 0; +} + +int asm_mov_ri(Asm *self, Reg64 dst, i64 immediate) { + /* 10 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ + return_if_error(asm_ensure_capacity(self, 10)); + *self->code_it++ = REX_W; + *self->code_it++ = 0xB8 + dst; + am_memcpy(self->code_it, &immediate, sizeof(immediate)); + self->code_it += sizeof(immediate); + return 0; +} + +int asm_mov_rr(Asm *self, Reg64 dst, Reg64 src) { + /* 3 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ + return_if_error(asm_ensure_capacity(self, 3)); + *self->code_it++ = REX_W; + *self->code_it++ = 0x89; + asm_rr(self, dst, src); + return 0; +} + +int asm_add_rr(Asm *self, Reg64 dst, Reg64 src) { + /* 3 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ + return_if_error(asm_ensure_capacity(self, 3)); + *self->code_it++ = REX_W; + *self->code_it++ = 0x01; + asm_rr(self, dst, src); + return 0; +} + +int asm_sub_rr(Asm *self, Reg64 dst, Reg64 src) { + /* 3 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ + return_if_error(asm_ensure_capacity(self, 3)); + *self->code_it++ = REX_W; + *self->code_it++ = 0x29; + asm_rr(self, dst, src); + return 0; +} + +int asm_imul_rr(Asm *self, Reg64 dst, Reg64 src) { + /* 3 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ + return_if_error(asm_ensure_capacity(self, 4)); + *self->code_it++ = REX_W; + *self->code_it++ = 0x0F; + *self->code_it++ = 0xAF; + asm_rr(self, dst, src); + return 0; +} + +int asm_pushr(Asm *self, Reg64 reg) { + return_if_error(asm_ensure_capacity(self, 1)); + *self->code_it++ = 0x50 + reg; + return 0; +} + +int asm_popr(Asm *self, Reg64 reg) { + return_if_error(asm_ensure_capacity(self, 1)); + *self->code_it++ = 0x58 + reg; + return 0; +} + +/* /r */ +#define DEFINE_INS_RM(mnemonic, opcode) \ +int asm_##mnemonic##_rm32(Asm *self, Reg32 dst, Reg32 src) { \ + return_if_error(asm_ensure_capacity(self, 2)); \ + *self->code_it++ = opcode; \ + *self->code_it++ = 0xC0 + 8*dst + src; \ + return 0; \ +} \ + \ +int asm_##mnemonic##_rm64(Asm *self, Reg64 dst, Reg64 src) { \ + return_if_error(asm_ensure_capacity(self, 1)); \ + *self->code_it++ = REX_W; \ + return asm_##mnemonic##_rm32(self, (Reg32)dst, (Reg32)src); \ +} + +DEFINE_INS_RM(mov, 0x8B) +DEFINE_INS_RM(add, 0x03) +DEFINE_INS_RM(sub, 0x2B) +DEFINE_INS_RM(and, 0x23) +DEFINE_INS_RM(or, 0x0B) +DEFINE_INS_RM(xor, 0x33) +DEFINE_INS_RM(cmp, 0x3B) + +/* + /number + The number is called the extension, a number from 0 to 7; + It's a number used to extend the opcode type, since the instruction only uses + one register the other register can be encoded for that. +*/ +#define DEFINE_INS_EXT_IMM(mnemonic, extension) \ +int asm_##mnemonic##_rm32_imm(Asm *self, Reg32 reg, i32 immediate) { \ + if(abs_i32(immediate) <= INT8_MAX) { \ + return_if_error(asm_ensure_capacity(self, 3)); \ + *self->code_it++ = 0x83; \ + *self->code_it++ = 0xC0 + 8*extension + reg; \ + *self->code_it++ = (u8)immediate; \ + } else { \ + return_if_error(asm_ensure_capacity(self, 6)); \ + *self->code_it++ = 0x81; \ + *self->code_it++ = 0xC0 + 8*extension + reg; \ + am_memcpy(self->code_it, &immediate, sizeof(immediate)); \ + self->code_it += sizeof(immediate); \ + } \ + return 0; \ +} \ + \ +int asm_##mnemonic##_rm64_imm(Asm *self, Reg64 reg, i32 immediate) { \ + return_if_error(asm_ensure_capacity(self, 1)); \ + *self->code_it++ = REX_W; \ + return asm_##mnemonic##_rm32_imm(self, (Reg32)reg, immediate); \ +} + +DEFINE_INS_EXT_IMM(add, 0) +DEFINE_INS_EXT_IMM(or, 1) +DEFINE_INS_EXT_IMM(adc, 2) +DEFINE_INS_EXT_IMM(sbb, 3) +DEFINE_INS_EXT_IMM(and, 4) +DEFINE_INS_EXT_IMM(sub, 5) +DEFINE_INS_EXT_IMM(xor, 6) +DEFINE_INS_EXT_IMM(cmp, 7) + +/* + /number + The number is called the extension, a number from 0 to 7; + It's a number used to extend the opcode type, since the instruction only uses + one register the other register can be encoded for that. +*/ +#define DEFINE_INS_SHIFT_IMM8(mnemonic, extension) \ +int asm_##mnemonic##_rm32_imm(Asm *self, Reg32 reg, i8 immediate) { \ + if(immediate == 1) { \ + return_if_error(asm_ensure_capacity(self, 2)); \ + *self->code_it++ = 0xC1; \ + *self->code_it++ = 0xC0 + 8*reg + extension; \ + } else { \ + return_if_error(asm_ensure_capacity(self, 3)); \ + *self->code_it++ = 0xD1; \ + *self->code_it++ = 0xC0 + 8*reg + extension; \ + *self->code_it++ = immediate; \ + } \ + return 0; \ +} \ + \ +int asm_##mnemonic##_rm64_imm(Asm *self, Reg64 reg, i8 immediate) { \ + return_if_error(asm_ensure_capacity(self, 1)); \ + *self->code_it++ = REX_W; \ + return asm_##mnemonic##_rm32_imm(self, (Reg32)reg, immediate); \ +} + +DEFINE_INS_SHIFT_IMM8(rol, 0) +DEFINE_INS_SHIFT_IMM8(ror, 1) +DEFINE_INS_SHIFT_IMM8(rcl, 2) +DEFINE_INS_SHIFT_IMM8(rcr, 3) +DEFINE_INS_SHIFT_IMM8(shl, 4) +DEFINE_INS_SHIFT_IMM8(shr, 5) +/*DEFINE_INS_SHIFT_IMM8(shl, 6)*/ +DEFINE_INS_SHIFT_IMM8(sar, 7) + +int asm_ret(Asm *self, u16 bytes) { + if(bytes == 0) { + return_if_error(asm_ensure_capacity(self, 1)); + *self->code_it++ = 0xC3; + } else { + return_if_error(asm_ensure_capacity(self, 3)); + *self->code_it++ = 0xC2; + am_memcpy(self->code_it, &bytes, sizeof(bytes)); + } + return 0; +} diff --git a/executor/x86_64/asm.h b/executor/x86_64/asm.h new file mode 100644 index 0000000..6fad26a --- /dev/null +++ b/executor/x86_64/asm.h @@ -0,0 +1,136 @@ +#ifndef AMAL_EXECUTOR_X86_64_ASM_H +#define AMAL_EXECUTOR_X86_64_ASM_H + +#include "../../include/std/misc.h" +#include "../../include/std/types.h" + +typedef struct { + void *code; + u8 *code_it; + usize size; +} Asm; + +typedef enum { + EAX, + ECX, + EDX, + EBX, + ESP, + EBP, + ESI, + EDI +} Reg32; + +typedef enum { + RAX, + RCX, + RDX, + RBX, + RSP, + RBP, + RSI, + RDI +} Reg64; + +typedef struct { + Reg64 base; + Reg64 index; + i32 disp; + u8 scale; +} AsmPtr; + +void asm_ptr_init(AsmPtr *self, Reg64 base); +void asm_ptr_init_index(AsmPtr *self, Reg64 base, Reg64 index); +void asm_ptr_init_disp(AsmPtr *self, Reg64 base, i32 disp); +void asm_ptr_init_index_disp(AsmPtr *self, Reg64 base, Reg64 index, i32 disp); + +CHECK_RESULT int asm_init(Asm *self); +void asm_deinit(Asm *self); + +CHECK_RESULT int asm_execute(Asm *self); + +CHECK_RESULT int asm_nop(Asm *self); + + + + + + + + + +CHECK_RESULT int asm_mov_mi(Asm *self, AsmPtr *dst, i32 immediate); +CHECK_RESULT int asm_mov_mr(Asm *self, AsmPtr *dst, Reg64 src); +CHECK_RESULT int asm_mov_rm(Asm *self, Reg64 dst, AsmPtr *src); +CHECK_RESULT int asm_mov_ri(Asm *self, Reg64 dst, i64 immediate); +CHECK_RESULT int asm_mov_rr(Asm *self, Reg64 dst, Reg64 src); + +CHECK_RESULT int asm_add_rr(Asm *self, Reg64 dst, Reg64 src); +CHECK_RESULT int asm_sub_rr(Asm *self, Reg64 dst, Reg64 src); +CHECK_RESULT int asm_imul_rr(Asm *self, Reg64 dst, Reg64 src); + +CHECK_RESULT int asm_pushr(Asm *self, Reg64 reg); +CHECK_RESULT int asm_popr(Asm *self, Reg64 reg); + + + + + + + + + + + + + + +CHECK_RESULT int asm_mov_rm32(Asm *self, Reg32 dst, Reg32 src); +CHECK_RESULT int asm_add_rm32(Asm *self, Reg32 dst, Reg32 src); +CHECK_RESULT int asm_sub_rm32(Asm *self, Reg32 dst, Reg32 src); +CHECK_RESULT int asm_and_rm32(Asm *self, Reg32 dst, Reg32 src); +CHECK_RESULT int asm_or_rm32(Asm *self, Reg32 dst, Reg32 src); +CHECK_RESULT int asm_xor_rm32(Asm *self, Reg32 dst, Reg32 src); +CHECK_RESULT int asm_cmp_rm32(Asm *self, Reg32 dst, Reg32 src); +CHECK_RESULT int asm_add_rm32_imm(Asm *self, Reg32 reg, i32 immediate); +CHECK_RESULT int asm_or_rm32_imm(Asm *self, Reg32 reg, i32 immediate); +CHECK_RESULT int asm_adc_rm32_imm(Asm *self, Reg32 reg, i32 immediate); +CHECK_RESULT int asm_sbb_rm32_imm(Asm *self, Reg32 reg, i32 immediate); +CHECK_RESULT int asm_and_rm32_imm(Asm *self, Reg32 reg, i32 immediate); +CHECK_RESULT int asm_sub_rm32_imm(Asm *self, Reg32 reg, i32 immediate); +CHECK_RESULT int asm_xor_rm32_imm(Asm *self, Reg32 reg, i32 immediate); +CHECK_RESULT int asm_cmp_rm32_imm(Asm *self, Reg32 reg, i32 immediate); +CHECK_RESULT int asm_rol_rm32_imm(Asm *self, Reg32 reg, i8 immediate); +CHECK_RESULT int asm_ror_rm32_imm(Asm *self, Reg32 reg, i8 immediate); +CHECK_RESULT int asm_rcl_rm32_imm(Asm *self, Reg32 reg, i8 immediate); +CHECK_RESULT int asm_rcr_rm32_imm(Asm *self, Reg32 reg, i8 immediate); +CHECK_RESULT int asm_shl_rm32_imm(Asm *self, Reg32 reg, i8 immediate); +CHECK_RESULT int asm_shr_rm32_imm(Asm *self, Reg32 reg, i8 immediate); +CHECK_RESULT int asm_sar_rm32_imm(Asm *self, Reg32 reg, i8 immediate); + +CHECK_RESULT int asm_mov_rm64(Asm *self, Reg64 dst, Reg64 src); +CHECK_RESULT int asm_add_rm64(Asm *self, Reg64 dst, Reg64 src); +CHECK_RESULT int asm_sub_rm64(Asm *self, Reg64 dst, Reg64 src); +CHECK_RESULT int asm_and_rm64(Asm *self, Reg64 dst, Reg64 src); +CHECK_RESULT int asm_or_rm64(Asm *self, Reg64 dst, Reg64 src); +CHECK_RESULT int asm_xor_rm64(Asm *self, Reg64 dst, Reg64 src); +CHECK_RESULT int asm_cmp_rm64(Asm *self, Reg64 dst, Reg64 src); +CHECK_RESULT int asm_add_rm64_imm(Asm *self, Reg64 reg, i32 immediate); +CHECK_RESULT int asm_or_rm64_imm(Asm *self, Reg64 reg, i32 immediate); +CHECK_RESULT int asm_adc_rm64_imm(Asm *self, Reg64 reg, i32 immediate); +CHECK_RESULT int asm_sbb_rm64_imm(Asm *self, Reg64 reg, i32 immediate); +CHECK_RESULT int asm_and_rm64_imm(Asm *self, Reg64 reg, i32 immediate); +CHECK_RESULT int asm_sub_rm64_imm(Asm *self, Reg64 reg, i32 immediate); +CHECK_RESULT int asm_xor_rm64_imm(Asm *self, Reg64 reg, i32 immediate); +CHECK_RESULT int asm_cmp_rm64_imm(Asm *self, Reg64 reg, i32 immediate); +CHECK_RESULT int asm_rol_rm64_imm(Asm *self, Reg64 reg, i8 immediate); +CHECK_RESULT int asm_ror_rm64_imm(Asm *self, Reg64 reg, i8 immediate); +CHECK_RESULT int asm_rcl_rm64_imm(Asm *self, Reg64 reg, i8 immediate); +CHECK_RESULT int asm_rcr_rm64_imm(Asm *self, Reg64 reg, i8 immediate); +CHECK_RESULT int asm_shl_rm64_imm(Asm *self, Reg64 reg, i8 immediate); +CHECK_RESULT int asm_shr_rm64_imm(Asm *self, Reg64 reg, i8 immediate); +CHECK_RESULT int asm_sar_rm64_imm(Asm *self, Reg64 reg, i8 immediate); + +CHECK_RESULT int asm_ret(Asm *self, u16 bytes); + +#endif diff --git a/include/asm/x86_64.h b/include/asm/x86_64.h deleted file mode 100644 index 92de96b..0000000 --- a/include/asm/x86_64.h +++ /dev/null @@ -1,136 +0,0 @@ -#ifndef AMAL_ASM_X86_64_H -#define AMAL_ASM_X86_64_H - -#include "../std/misc.h" -#include "../std/types.h" - -typedef struct { - void *code; - u8 *code_it; - usize size; -} Asm; - -typedef enum { - EAX, - ECX, - EDX, - EBX, - ESP, - EBP, - ESI, - EDI -} Reg32; - -typedef enum { - RAX, - RCX, - RDX, - RBX, - RSP, - RBP, - RSI, - RDI -} Reg64; - -typedef struct { - Reg64 base; - Reg64 index; - i32 disp; - u8 scale; -} AsmPtr; - -void asm_ptr_init(AsmPtr *self, Reg64 base); -void asm_ptr_init_index(AsmPtr *self, Reg64 base, Reg64 index); -void asm_ptr_init_disp(AsmPtr *self, Reg64 base, i32 disp); -void asm_ptr_init_index_disp(AsmPtr *self, Reg64 base, Reg64 index, i32 disp); - -CHECK_RESULT int asm_init(Asm *self); -void asm_deinit(Asm *self); - -CHECK_RESULT int asm_execute(Asm *self); - -CHECK_RESULT int asm_nop(Asm *self); - - - - - - - - - -CHECK_RESULT int asm_mov_mi(Asm *self, AsmPtr *dst, i32 immediate); -CHECK_RESULT int asm_mov_mr(Asm *self, AsmPtr *dst, Reg64 src); -CHECK_RESULT int asm_mov_rm(Asm *self, Reg64 dst, AsmPtr *src); -CHECK_RESULT int asm_mov_ri(Asm *self, Reg64 dst, i64 immediate); -CHECK_RESULT int asm_mov_rr(Asm *self, Reg64 dst, Reg64 src); - -CHECK_RESULT int asm_add_rr(Asm *self, Reg64 dst, Reg64 src); -CHECK_RESULT int asm_sub_rr(Asm *self, Reg64 dst, Reg64 src); -CHECK_RESULT int asm_imul_rr(Asm *self, Reg64 dst, Reg64 src); - -CHECK_RESULT int asm_pushr(Asm *self, Reg64 reg); -CHECK_RESULT int asm_popr(Asm *self, Reg64 reg); - - - - - - - - - - - - - - -CHECK_RESULT int asm_mov_rm32(Asm *self, Reg32 dst, Reg32 src); -CHECK_RESULT int asm_add_rm32(Asm *self, Reg32 dst, Reg32 src); -CHECK_RESULT int asm_sub_rm32(Asm *self, Reg32 dst, Reg32 src); -CHECK_RESULT int asm_and_rm32(Asm *self, Reg32 dst, Reg32 src); -CHECK_RESULT int asm_or_rm32(Asm *self, Reg32 dst, Reg32 src); -CHECK_RESULT int asm_xor_rm32(Asm *self, Reg32 dst, Reg32 src); -CHECK_RESULT int asm_cmp_rm32(Asm *self, Reg32 dst, Reg32 src); -CHECK_RESULT int asm_add_rm32_imm(Asm *self, Reg32 reg, i32 immediate); -CHECK_RESULT int asm_or_rm32_imm(Asm *self, Reg32 reg, i32 immediate); -CHECK_RESULT int asm_adc_rm32_imm(Asm *self, Reg32 reg, i32 immediate); -CHECK_RESULT int asm_sbb_rm32_imm(Asm *self, Reg32 reg, i32 immediate); -CHECK_RESULT int asm_and_rm32_imm(Asm *self, Reg32 reg, i32 immediate); -CHECK_RESULT int asm_sub_rm32_imm(Asm *self, Reg32 reg, i32 immediate); -CHECK_RESULT int asm_xor_rm32_imm(Asm *self, Reg32 reg, i32 immediate); -CHECK_RESULT int asm_cmp_rm32_imm(Asm *self, Reg32 reg, i32 immediate); -CHECK_RESULT int asm_rol_rm32_imm(Asm *self, Reg32 reg, i8 immediate); -CHECK_RESULT int asm_ror_rm32_imm(Asm *self, Reg32 reg, i8 immediate); -CHECK_RESULT int asm_rcl_rm32_imm(Asm *self, Reg32 reg, i8 immediate); -CHECK_RESULT int asm_rcr_rm32_imm(Asm *self, Reg32 reg, i8 immediate); -CHECK_RESULT int asm_shl_rm32_imm(Asm *self, Reg32 reg, i8 immediate); -CHECK_RESULT int asm_shr_rm32_imm(Asm *self, Reg32 reg, i8 immediate); -CHECK_RESULT int asm_sar_rm32_imm(Asm *self, Reg32 reg, i8 immediate); - -CHECK_RESULT int asm_mov_rm64(Asm *self, Reg64 dst, Reg64 src); -CHECK_RESULT int asm_add_rm64(Asm *self, Reg64 dst, Reg64 src); -CHECK_RESULT int asm_sub_rm64(Asm *self, Reg64 dst, Reg64 src); -CHECK_RESULT int asm_and_rm64(Asm *self, Reg64 dst, Reg64 src); -CHECK_RESULT int asm_or_rm64(Asm *self, Reg64 dst, Reg64 src); -CHECK_RESULT int asm_xor_rm64(Asm *self, Reg64 dst, Reg64 src); -CHECK_RESULT int asm_cmp_rm64(Asm *self, Reg64 dst, Reg64 src); -CHECK_RESULT int asm_add_rm64_imm(Asm *self, Reg64 reg, i32 immediate); -CHECK_RESULT int asm_or_rm64_imm(Asm *self, Reg64 reg, i32 immediate); -CHECK_RESULT int asm_adc_rm64_imm(Asm *self, Reg64 reg, i32 immediate); -CHECK_RESULT int asm_sbb_rm64_imm(Asm *self, Reg64 reg, i32 immediate); -CHECK_RESULT int asm_and_rm64_imm(Asm *self, Reg64 reg, i32 immediate); -CHECK_RESULT int asm_sub_rm64_imm(Asm *self, Reg64 reg, i32 immediate); -CHECK_RESULT int asm_xor_rm64_imm(Asm *self, Reg64 reg, i32 immediate); -CHECK_RESULT int asm_cmp_rm64_imm(Asm *self, Reg64 reg, i32 immediate); -CHECK_RESULT int asm_rol_rm64_imm(Asm *self, Reg64 reg, i8 immediate); -CHECK_RESULT int asm_ror_rm64_imm(Asm *self, Reg64 reg, i8 immediate); -CHECK_RESULT int asm_rcl_rm64_imm(Asm *self, Reg64 reg, i8 immediate); -CHECK_RESULT int asm_rcr_rm64_imm(Asm *self, Reg64 reg, i8 immediate); -CHECK_RESULT int asm_shl_rm64_imm(Asm *self, Reg64 reg, i8 immediate); -CHECK_RESULT int asm_shr_rm64_imm(Asm *self, Reg64 reg, i8 immediate); -CHECK_RESULT int asm_sar_rm64_imm(Asm *self, Reg64 reg, i8 immediate); - -CHECK_RESULT int asm_ret(Asm *self, u16 bytes); - -#endif diff --git a/include/ast.h b/include/ast.h index 9f01b1b..d89d099 100644 --- a/include/ast.h +++ b/include/ast.h @@ -90,7 +90,7 @@ struct Ast { struct Scope { Buffer/**/ ast_objects; - HashMap/*(key=BufferView, value=Ast*)*/ named_objects; + HashMapType(BufferView, Ast*) named_objects; /* Value is always an Ast* with type LhsExpr */ Scope *parent; /* Is null unless the scope is a file scope, in which case this is the parser that owns the scope */ Parser *parser; @@ -144,6 +144,14 @@ typedef struct { } value; } VariableType; +typedef enum { + DECL_FLAG_NONE = 0, + DECL_FLAG_EXTERN = 1 << 0, + DECL_FLAG_EXPORT = 1 << 1, + DECL_FLAG_PUB = 1 << 2, + DECL_FLAG_CONST = 1 << 3 +} DeclFlag; + /* Note: When resolving AST, multiple threads can end up resolving the same expressions at the same time. This is intentional. Instead of using mutex for every expression and locking/unlocking everytime @@ -153,14 +161,17 @@ typedef struct { leading to @ast_resolve running again for the same expression. */ struct LhsExpr { - bool is_extern; - bool is_pub; - bool is_const; + u8 decl_flags; BufferView var_name; VariableType type; - Ast *rhs_expr; + nullable Ast *rhs_expr; }; +#define LHS_EXPR_IS_EXTERN(expr) ((expr)->decl_flags & DECL_FLAG_EXTERN) +#define LHS_EXPR_IS_EXPORT(expr) ((expr)->decl_flags & DECL_FLAG_EXPORT) +#define LHS_EXPR_IS_PUB(expr) ((expr)->decl_flags & DECL_FLAG_PUB) +#define LHS_EXPR_IS_CONST(expr) ((expr)->decl_flags & DECL_FLAG_CONST) + struct AssignmentExpr { Ast *lhs_expr; Ast *rhs_expr; @@ -228,8 +239,9 @@ void function_signature_init(FunctionSignature *self); CHECK_RESULT int funcdecl_init(FunctionDecl *self, FunctionSignature *signature, Scope *parent, ArenaAllocator *allocator); CHECK_RESULT int funccall_init(FunctionCall *self, BufferView name, ArenaAllocator *allocator); CHECK_RESULT int structdecl_init(StructDecl *self, Scope *parent, ArenaAllocator *allocator); +LhsExpr* structdecl_get_field_by_name(StructDecl *self, BufferView field_name); void structfield_init(StructField *self, BufferView name, BufferView type_name); -void lhsexpr_init(LhsExpr *self, bool is_extern, bool is_pub, bool is_const, BufferView var_name); +void lhsexpr_init(LhsExpr *self, DeclFlag decl_flag, BufferView var_name); void assignmentexpr_init(AssignmentExpr *self, Ast *lhs_expr, Ast *rhs_expr); void import_init(Import *self, BufferView path); CHECK_RESULT int string_init(String *self, BufferView str); diff --git a/include/compiler.h b/include/compiler.h index a0ac17a..83dde63 100644 --- a/include/compiler.h +++ b/include/compiler.h @@ -49,7 +49,7 @@ struct amal_compiler { Scope root_scope; Buffer/**/ parsers; Buffer/**/ queued_files; - HashMap/**/ file_scopes; + HashMapType(BufferView, FileScopeReference*) file_scopes; ParserThreadData *threads; int usable_thread_count; bool started; diff --git a/include/nullable.h b/include/nullable.h index f84100b..f38fb37 100644 --- a/include/nullable.h +++ b/include/nullable.h @@ -4,6 +4,6 @@ struct __nullable_type_dummy{ int _; }; /* Used by static analysis tool to find null-pointer dereference errors */ -#define nullable +#define nullable __attribute__((annotate("nullable"))) #endif diff --git a/include/program.h b/include/program.h index 3fc69fa..a0ed4ed 100644 --- a/include/program.h +++ b/include/program.h @@ -3,7 +3,7 @@ #include "std/buffer.h" #include "bytecode/bytecode.h" -#include "asm/x86_64.h" +#include "../executor/x86_64/asm.h" #define AMAL_PROGRAM_OK 0 #define AMAL_PROGRAM_INVALID_HEADER -1 diff --git a/include/ssa/ssa.h b/include/ssa/ssa.h index 1d4c612..016acc8 100644 --- a/include/ssa/ssa.h +++ b/include/ssa/ssa.h @@ -48,9 +48,9 @@ typedef u16 SsaFuncIndex; typedef struct { Buffer/*instruction data*/ instructions; - HashMap/**/ intermediates_map; + HashMapType(SsaNumber, SsaIntermediateIndex) intermediates_map; Buffer/*SsaNumber*/ intermediates; - HashMap/**/ strings_map; + HashMapType(BufferView, SsaStringIndex) strings_map; Buffer/*BufferView*/ strings; SsaIntermediateIndex intermediate_counter; SsaStringIndex string_counter; diff --git a/include/std/hash_map.h b/include/std/hash_map.h index b9b90c6..020748b 100644 --- a/include/std/hash_map.h +++ b/include/std/hash_map.h @@ -21,6 +21,8 @@ struct HashMap { HashMapHash hash_func; }; +#define HashMapType(key_type, value_type) __attribute__((annotate(#key_type", "#value_type))) HashMap + CHECK_RESULT int hash_map_init(HashMap *self, ArenaAllocator *allocator, usize value_type_size, HashMapCompare compare_func, HashMapHash hash_func); /* Not thread-safe. diff --git a/src/asm/x86_64.c b/src/asm/x86_64.c deleted file mode 100644 index e246fbc..0000000 --- a/src/asm/x86_64.c +++ /dev/null @@ -1,385 +0,0 @@ -#include "../../include/asm/x86_64.h" - -#include "../../include/std/mem.h" -#include "../../include/std/log.h" -#include -#include -#include - -#include - -#define REX_W 0x48 - -void asm_ptr_init(AsmPtr *self, Reg64 base) { - self->base = base; - self->index = -1; - self->disp = 0; - self->scale = 0; -} - -void asm_ptr_init_index(AsmPtr *self, Reg64 base, Reg64 index) { - self->base = base; - self->index = index; - self->disp = 0; - self->scale = 0; -} - -void asm_ptr_init_disp(AsmPtr *self, Reg64 base, i32 disp) { - self->base = base; - self->index = -1; - self->disp = disp; - self->scale = 0; -} - -void asm_ptr_init_index_disp(AsmPtr *self, Reg64 base, Reg64 index, i32 disp) { - self->base = base; - self->index = index; - self->disp = disp; - self->scale = 0; -} - -int asm_init(Asm *self) { - self->size = am_pagesize(); - amal_log_debug("asm: page size: %u", self->size); - self->code = mmap(NULL, self->size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if(self->code == MAP_FAILED) - return -errno; - self->code_it = self->code; - return 0; -} - -void asm_deinit(Asm *self) { - if(self->code) - munmap(self->code, self->size); - self->code = NULL; - self->code_it = NULL; - self->size = 0; -} - -static void asm_print_code_hex(Asm *self) { - u8 *ptr; - int off; - ptr = self->code; - off = 0; - while(ptr != self->code_it) { - printf("%02x", *ptr); - ++ptr; - ++off; - if(off == 8) { - putc('\n', stdout); - off = 0; - } else { - putc(' ', stdout); - } - } - if(off != 0) - putc('\n', stdout); -} - -int asm_execute(Asm *self) { - void (*func)(); - if(mprotect(self->code, self->size, PROT_READ | PROT_EXEC) != 0) - return -errno; - - asm_print_code_hex(self); - - /* TODO: Verify if this is valid on all platforms. According to ISO C standard it isn't? */ - *(void**)(&func) = self->code; - func(); - return 0; -} - -/* TODO: See how this can be optimized */ -static CHECK_RESULT int asm_ensure_capacity(Asm *self, usize size) { - usize current_offset; - current_offset = (u8*)self->code_it - (u8*)self->code; - if(current_offset + size > self->size) { - void *new_mem; - usize new_size; - new_size = self->size + am_pagesize(); - new_mem = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if(self->code == MAP_FAILED) - return -errno; - - am_memcpy(new_mem, self->code, self->size); - self->code = new_mem; - self->size = new_size; - self->code_it = (u8*)self->code + current_offset; - } - return 0; -} - -#ifdef DEBUG -static isize asm_get_capacity_left(Asm *self) { - return (isize)self->size - (isize)((u8*)self->code_it - (u8*)self->code); -} -#endif - -int asm_nop(Asm *self) { - return_if_error(asm_ensure_capacity(self, 1)); - *self->code_it++ = 0x90; - return 0; -} - -static i32 abs_i32(i32 value) { - return value >= 0 ? value : -value; -} - -/* -TODO: Implement 1 and 2 byte displacement? -There has to be at least 6 bytes left in the asm buffer before calling this function. -*/ -static void asm_rm(Asm *self, AsmPtr *mem, Reg64 reg) { - u8 rm_byte; - u8 disp_bytes; - assert(asm_get_capacity_left(self) >= 6); - if((int)mem->index != -1) { - u8 sib_offset; - if(mem->disp == 0) { - rm_byte = 0x04; - disp_bytes = 0; - } else if(abs_i32(mem->disp) <= INT8_MAX) { - rm_byte = 0x44; - disp_bytes = 1; - } else { - rm_byte = 0x84; - disp_bytes = 4; - } - - #ifdef DEBUG - if(mem->scale != 0 && mem->scale != 2 && mem->scale != 4 && mem->scale != 8) { - amal_log_error("Invalid scale %d, expected 0, 2, 4, or 8", mem->scale); - assert(bool_false); - } - #endif - assert(mem->base != RBP && "TODO: Implement RBP base for sib byte. RBP is special and requires different logic"); - sib_offset = (mem->scale << 5) + 8*mem->index + mem->base; - - *self->code_it++ = rm_byte; - *self->code_it++ = sib_offset; - } else { - if(mem->disp == 0) { - if(mem->base == RBP) { - rm_byte = 0x45; - disp_bytes = 1; - } else { - rm_byte = mem->base; - disp_bytes = 0; - } - } else if(abs_i32(mem->disp) <= INT8_MAX) { - rm_byte = 0x40 + mem->base; - disp_bytes = 1; - } else { - rm_byte = 0x80 + mem->base; - disp_bytes = 4; - } - *self->code_it++ = (reg << 3) | rm_byte; - } - - am_memcpy(self->code_it, &mem->disp, disp_bytes); - self->code_it += disp_bytes; -} - -/* There has to be at least 1 byte left in the asm buffer before calling this function. */ -static void asm_rr(Asm *self, Reg64 dst, Reg64 src) { - assert(asm_get_capacity_left(self) >= 1); - *self->code_it++ = 0xC0 + dst + 8*src; -} - -/* TODO: Implement 1 and 2 byte immediate? */ -int asm_mov_mi(Asm *self, AsmPtr *dst, i32 immediate) { - /* 12 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ - return_if_error(asm_ensure_capacity(self, 12)); - *self->code_it++ = REX_W; - *self->code_it++ = 0xC7; - asm_rm(self, dst, 0); - am_memcpy(self->code_it, &immediate, sizeof(immediate)); - self->code_it += sizeof(immediate); - return 0; -} - -int asm_mov_mr(Asm *self, AsmPtr *dst, Reg64 src) { - /* 8 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ - return_if_error(asm_ensure_capacity(self, 8)); - *self->code_it++ = REX_W; - *self->code_it++ = 0x89; - asm_rm(self, dst, src); - return 0; -} - -int asm_mov_rm(Asm *self, Reg64 dst, AsmPtr *src) { - /* 8 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ - return_if_error(asm_ensure_capacity(self, 8)); - *self->code_it++ = REX_W; - *self->code_it++ = 0x8B; - asm_rm(self, src, dst); - return 0; -} - -int asm_mov_ri(Asm *self, Reg64 dst, i64 immediate) { - /* 10 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ - return_if_error(asm_ensure_capacity(self, 10)); - *self->code_it++ = REX_W; - *self->code_it++ = 0xB8 + dst; - am_memcpy(self->code_it, &immediate, sizeof(immediate)); - self->code_it += sizeof(immediate); - return 0; -} - -int asm_mov_rr(Asm *self, Reg64 dst, Reg64 src) { - /* 3 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ - return_if_error(asm_ensure_capacity(self, 3)); - *self->code_it++ = REX_W; - *self->code_it++ = 0x89; - asm_rr(self, dst, src); - return 0; -} - -int asm_add_rr(Asm *self, Reg64 dst, Reg64 src) { - /* 3 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ - return_if_error(asm_ensure_capacity(self, 3)); - *self->code_it++ = REX_W; - *self->code_it++ = 0x01; - asm_rr(self, dst, src); - return 0; -} - -int asm_sub_rr(Asm *self, Reg64 dst, Reg64 src) { - /* 3 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ - return_if_error(asm_ensure_capacity(self, 3)); - *self->code_it++ = REX_W; - *self->code_it++ = 0x29; - asm_rr(self, dst, src); - return 0; -} - -int asm_imul_rr(Asm *self, Reg64 dst, Reg64 src) { - /* 3 bytes is the maximum size of the instruction. We don't how how large it will be so we prepare for the largest size */ - return_if_error(asm_ensure_capacity(self, 4)); - *self->code_it++ = REX_W; - *self->code_it++ = 0x0F; - *self->code_it++ = 0xAF; - asm_rr(self, dst, src); - return 0; -} - -int asm_pushr(Asm *self, Reg64 reg) { - return_if_error(asm_ensure_capacity(self, 1)); - *self->code_it++ = 0x50 + reg; - return 0; -} - -int asm_popr(Asm *self, Reg64 reg) { - return_if_error(asm_ensure_capacity(self, 1)); - *self->code_it++ = 0x58 + reg; - return 0; -} - -/* /r */ -#define DEFINE_INS_RM(mnemonic, opcode) \ -int asm_##mnemonic##_rm32(Asm *self, Reg32 dst, Reg32 src) { \ - return_if_error(asm_ensure_capacity(self, 2)); \ - *self->code_it++ = opcode; \ - *self->code_it++ = 0xC0 + 8*dst + src; \ - return 0; \ -} \ - \ -int asm_##mnemonic##_rm64(Asm *self, Reg64 dst, Reg64 src) { \ - return_if_error(asm_ensure_capacity(self, 1)); \ - *self->code_it++ = REX_W; \ - return asm_##mnemonic##_rm32(self, (Reg32)dst, (Reg32)src); \ -} - -DEFINE_INS_RM(mov, 0x8B) -DEFINE_INS_RM(add, 0x03) -DEFINE_INS_RM(sub, 0x2B) -DEFINE_INS_RM(and, 0x23) -DEFINE_INS_RM(or, 0x0B) -DEFINE_INS_RM(xor, 0x33) -DEFINE_INS_RM(cmp, 0x3B) - -/* - /number - The number is called the extension, a number from 0 to 7; - It's a number used to extend the opcode type, since the instruction only uses - one register the other register can be encoded for that. -*/ -#define DEFINE_INS_EXT_IMM(mnemonic, extension) \ -int asm_##mnemonic##_rm32_imm(Asm *self, Reg32 reg, i32 immediate) { \ - if(abs_i32(immediate) <= INT8_MAX) { \ - return_if_error(asm_ensure_capacity(self, 3)); \ - *self->code_it++ = 0x83; \ - *self->code_it++ = 0xC0 + 8*extension + reg; \ - *self->code_it++ = (u8)immediate; \ - } else { \ - return_if_error(asm_ensure_capacity(self, 6)); \ - *self->code_it++ = 0x81; \ - *self->code_it++ = 0xC0 + 8*extension + reg; \ - am_memcpy(self->code_it, &immediate, sizeof(immediate)); \ - self->code_it += sizeof(immediate); \ - } \ - return 0; \ -} \ - \ -int asm_##mnemonic##_rm64_imm(Asm *self, Reg64 reg, i32 immediate) { \ - return_if_error(asm_ensure_capacity(self, 1)); \ - *self->code_it++ = REX_W; \ - return asm_##mnemonic##_rm32_imm(self, (Reg32)reg, immediate); \ -} - -DEFINE_INS_EXT_IMM(add, 0) -DEFINE_INS_EXT_IMM(or, 1) -DEFINE_INS_EXT_IMM(adc, 2) -DEFINE_INS_EXT_IMM(sbb, 3) -DEFINE_INS_EXT_IMM(and, 4) -DEFINE_INS_EXT_IMM(sub, 5) -DEFINE_INS_EXT_IMM(xor, 6) -DEFINE_INS_EXT_IMM(cmp, 7) - -/* - /number - The number is called the extension, a number from 0 to 7; - It's a number used to extend the opcode type, since the instruction only uses - one register the other register can be encoded for that. -*/ -#define DEFINE_INS_SHIFT_IMM8(mnemonic, extension) \ -int asm_##mnemonic##_rm32_imm(Asm *self, Reg32 reg, i8 immediate) { \ - if(immediate == 1) { \ - return_if_error(asm_ensure_capacity(self, 2)); \ - *self->code_it++ = 0xC1; \ - *self->code_it++ = 0xC0 + 8*reg + extension; \ - } else { \ - return_if_error(asm_ensure_capacity(self, 3)); \ - *self->code_it++ = 0xD1; \ - *self->code_it++ = 0xC0 + 8*reg + extension; \ - *self->code_it++ = immediate; \ - } \ - return 0; \ -} \ - \ -int asm_##mnemonic##_rm64_imm(Asm *self, Reg64 reg, i8 immediate) { \ - return_if_error(asm_ensure_capacity(self, 1)); \ - *self->code_it++ = REX_W; \ - return asm_##mnemonic##_rm32_imm(self, (Reg32)reg, immediate); \ -} - -DEFINE_INS_SHIFT_IMM8(rol, 0) -DEFINE_INS_SHIFT_IMM8(ror, 1) -DEFINE_INS_SHIFT_IMM8(rcl, 2) -DEFINE_INS_SHIFT_IMM8(rcr, 3) -DEFINE_INS_SHIFT_IMM8(shl, 4) -DEFINE_INS_SHIFT_IMM8(shr, 5) -/*DEFINE_INS_SHIFT_IMM8(shl, 6)*/ -DEFINE_INS_SHIFT_IMM8(sar, 7) - -int asm_ret(Asm *self, u16 bytes) { - if(bytes == 0) { - return_if_error(asm_ensure_capacity(self, 1)); - *self->code_it++ = 0xC3; - } else { - return_if_error(asm_ensure_capacity(self, 3)); - *self->code_it++ = 0xC2; - am_memcpy(self->code_it, &bytes, sizeof(bytes)); - } - return 0; -} diff --git a/src/ast.c b/src/ast.c index e28b072..0aa19d4 100644 --- a/src/ast.c +++ b/src/ast.c @@ -95,15 +95,21 @@ int structdecl_init(StructDecl *self, Scope *parent, ArenaAllocator *allocator) return scope_init(&self->body, parent, allocator); } +LhsExpr* structdecl_get_field_by_name(StructDecl *self, BufferView field_name) { + Ast* result; + if(!hash_map_get(&self->body.named_objects, field_name, &result)) + return NULL; + return result->value.lhs_expr; +} + void structfield_init(StructField *self, BufferView name, BufferView type_name) { self->name = name; variable_init(&self->type, type_name); } -void lhsexpr_init(LhsExpr *self, bool is_extern, bool is_pub, bool is_const, BufferView var_name) { - self->is_extern = is_extern; - self->is_pub = is_pub; - self->is_const = is_const; +void lhsexpr_init(LhsExpr *self, DeclFlag decl_flag, BufferView var_name) { + assert(!((decl_flag & DECL_FLAG_EXTERN) && (decl_flag & DECL_FLAG_EXPORT)) && "Expression cant be both extern and export"); + self->decl_flags = decl_flag; self->type.type = VARIABLE_TYPE_NONE; self->type.value.variable = NULL; self->var_name = var_name; @@ -423,7 +429,7 @@ static void assignmentexpr_resolve(Ast *ast, AstCompilerContext *context) { /* This also covers extern variables, since extern variables are always const */ /* TODO: var.field type expressions should also be checked */ - if(lhs_source && lhs_source->is_const) { + if(lhs_source && LHS_EXPR_IS_CONST(lhs_source)) { Parser *parser; parser = scope_get_parser(context->scope); parser_print_error(parser, ast_get_code_reference(self->lhs_expr).data, "Can't assign to a const value"); @@ -587,7 +593,7 @@ static void binop_resolve_dot_access(Ast *ast, AstCompilerContext *context) { throw(AST_ERR); } - if(!self->rhs->resolve_data.type->is_pub) { + if(!LHS_EXPR_IS_PUB(self->rhs->resolve_data.type)) { parser_print_error(caller_parser, caller_code_ref.data, "Can't access non-public field \"%.*s\"", caller_code_ref.size, caller_code_ref.data); /* TODO: use tokenizer_print_note, once it has been added */ /* TODO: Print type */ diff --git a/src/compiler.c b/src/compiler.c index 8c3266c..39cbb00 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -33,7 +33,7 @@ static CHECK_RESULT int create_default_type(amal_compiler *compiler, const char return_if_error(structdecl_init(struct_decl, &compiler->root_scope, &compiler->allocator)); return_if_error(arena_allocator_alloc(&compiler->allocator, sizeof(LhsExpr), (void**)lhs_expr)); - lhsexpr_init(*lhs_expr, bool_true, bool_true, bool_true, create_buffer_view(name, strnlen(name, PATH_MAX))); + lhsexpr_init(*lhs_expr, DECL_FLAG_EXTERN | DECL_FLAG_PUB | DECL_FLAG_CONST, create_buffer_view(name, strnlen(name, PATH_MAX))); return_if_error(ast_create(&compiler->allocator, struct_decl, AST_STRUCT_DECL, &(*lhs_expr)->rhs_expr)); return_if_error(ast_create(&compiler->allocator, *lhs_expr, AST_LHS, &expr)); expr->resolve_data.type = *lhs_expr; @@ -540,7 +540,6 @@ int amal_compiler_load_file(amal_compiler_options *options, amal_program *progra int amal_compiler_internal_load_file(amal_compiler *self, const char *filepath, FileScopeReference **file_scope) { int result; - BufferView filepath_view; ParserThreadData *parser_thread_data; ThreadWorkData thread_work_data; bool main_job; @@ -548,9 +547,8 @@ int amal_compiler_internal_load_file(amal_compiler *self, const char *filepath, return_if_error(try_create_file_scope(self, filepath, file_scope, &new_entry)); assert(file_scope && *file_scope && (*file_scope)->canonical_path.data); - filepath_view = create_buffer_view((*file_scope)->canonical_path.data, (*file_scope)->canonical_path.size); if(!new_entry) { - amal_log_info("amal_compiler_load_file: file already parsed: %.*s", filepath_view.size, filepath_view.data); + amal_log_info("amal_compiler_load_file: file already parsed: %.*s", (*file_scope)->canonical_path.size, (*file_scope)->canonical_path.data); return 0; } @@ -575,11 +573,40 @@ int amal_compiler_internal_load_file(amal_compiler *self, const char *filepath, and writing it to a file, which is an IO bottlenecked operation and it won't benefit from multithreading and may even lose performance because of it. */ + const BufferView main_func_name = { "main", 4 }; + LhsExpr *main_func_expr; return_if_error(amal_compiler_load_file_join_threads(self)); assert(amal_compiler_check_all_threads_done(self)); amal_log_info("Finished parsing all files, resolving AST"); + main_func_expr = structdecl_get_field_by_name(&(*file_scope)->parser->struct_decl, main_func_name); + if(!main_func_expr) { + amal_log_error("main function missing from start file \"%.*s\"", (*file_scope)->canonical_path.size, (*file_scope)->canonical_path.data); + return AMAL_COMPILER_ERR; + } + + if(!main_func_expr->rhs_expr || main_func_expr->rhs_expr->type != AST_FUNCTION_DECL) { + amal_log_error("main exists in start file \"%.*s\" but it's not an non-extern function", (*file_scope)->canonical_path.size, (*file_scope)->canonical_path.data); + return AMAL_COMPILER_ERR; + } + + if(!LHS_EXPR_IS_CONST(main_func_expr)) { + amal_log_error("main function in start file \"%.*s\" has to be const", (*file_scope)->canonical_path.size, (*file_scope)->canonical_path.data); + return AMAL_COMPILER_ERR; + } + + if(LHS_EXPR_IS_EXTERN(main_func_expr)) { + amal_log_error("main function in start file \"%.*s\" can't be declared as extern", (*file_scope)->canonical_path.size, (*file_scope)->canonical_path.data); + return AMAL_COMPILER_ERR; + } + + /* + The main function is the start file needs to be exported, so it's accessible in the program execution + to find the entry (main) function. + */ + main_func_expr->decl_flags |= DECL_FLAG_EXPORT; + return_if_error(amal_compiler_dispatch_generic(self, THREAD_WORK_RESOLVE_AST)); assert(amal_compiler_check_all_threads_done(self)); amal_log_info("Finished resolving AST, generating SSA"); diff --git a/src/parser.c b/src/parser.c index fdf34ce..e36790f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -61,7 +61,7 @@ int parser_init(Parser *self, amal_compiler *compiler, ArenaAllocator *allocator self->error_context = ERROR_CONTEXT_NONE; return_if_error(structdecl_init(&self->struct_decl, &compiler->root_scope, allocator)); self->struct_decl.body.parser = self; - lhsexpr_init(&self->file_decl, bool_true, bool_true, bool_true, create_buffer_view_null()); + lhsexpr_init(&self->file_decl, DECL_FLAG_EXTERN | DECL_FLAG_PUB | DECL_FLAG_CONST, create_buffer_view_null()); return_if_error(ast_create(self->allocator, &self->struct_decl, AST_STRUCT_DECL, &self->file_decl.rhs_expr)); self->current_scope = &self->struct_decl.body; self->has_func_parent = bool_false; @@ -192,29 +192,39 @@ LHS_DECLARATION = 'extern'? 'pub'? 'const'|'var' TOK_IDENTIFIER VAR_TYPE_DEF? */ static CHECK_RESULT LhsExpr* parser_parse_declaration_lhs(Parser *self) { LhsExpr *result; + DeclFlag decl_flag; bool is_extern; bool is_pub; bool is_const; BufferView var_name; + decl_flag = DECL_FLAG_NONE; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_EXTERN, &is_extern)); - if(is_extern && self->has_func_parent) { - self->error = tokenizer_create_error(&self->tokenizer, - tokenizer_get_code_reference_index(&self->tokenizer, self->tokenizer.value.identifier.data), - "Only declarations in global structs can be extern"); - throw(PARSER_UNEXPECTED_TOKEN); + if(is_extern) { + decl_flag |= DECL_FLAG_EXTERN; + if(self->has_func_parent) { + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_code_reference_index(&self->tokenizer, self->tokenizer.value.identifier.data), + "Only declarations in global structs can be extern"); + throw(PARSER_UNEXPECTED_TOKEN); + } } throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_PUB, &is_pub)); - if(is_pub && self->has_func_parent) { - self->error = tokenizer_create_error(&self->tokenizer, - tokenizer_get_code_reference_index(&self->tokenizer, self->tokenizer.value.identifier.data), - "Only declarations in global structs can be public"); - throw(PARSER_UNEXPECTED_TOKEN); + if(is_pub) { + decl_flag |= DECL_FLAG_PUB; + if(self->has_func_parent) { + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_code_reference_index(&self->tokenizer, self->tokenizer.value.identifier.data), + "Only declarations in global structs can be public"); + throw(PARSER_UNEXPECTED_TOKEN); + } } throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CONST, &is_const)); - if(!is_const) { + if(is_const) { + decl_flag |= DECL_FLAG_CONST; + } else { bool isVar; if(is_extern) { @@ -232,7 +242,7 @@ static CHECK_RESULT LhsExpr* parser_parse_declaration_lhs(Parser *self) { throw_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); var_name = self->tokenizer.value.identifier; throw_if_error(arena_allocator_alloc(self->allocator, sizeof(LhsExpr), (void**)&result)); - lhsexpr_init(result, is_extern, is_pub, is_const, var_name); + lhsexpr_init(result, decl_flag, var_name); parser_parse_var_type_def(self, &result->type); return result; @@ -642,7 +652,7 @@ Ast* parser_parse_body(Parser *self) { bool match; throw_if_error(ast_create(self->allocator, lhs_expr, AST_LHS, &result)); - if(lhs_expr->is_extern) { + if(LHS_EXPR_IS_EXTERN(lhs_expr)) { throw_if_error(tokenizer_accept(&self->tokenizer, TOK_SEMICOLON)); if (lhs_expr->type.type == VARIABLE_TYPE_NONE) { self->error = tokenizer_create_error(&self->tokenizer, self->tokenizer.prev_index, "A variable can't be declared without a type or assignment"); diff --git a/src/ssa/ssa.c b/src/ssa/ssa.c index 707ccd0..f3679aa 100644 --- a/src/ssa/ssa.c +++ b/src/ssa/ssa.c @@ -339,7 +339,7 @@ static CHECK_RESULT SsaRegister lhsexpr_generate_ssa(Ast *self, SsaCompilerConte assert(self->type == AST_LHS); lhs_expr = self->value.lhs_expr; - if(lhs_expr->is_extern) + if(LHS_EXPR_IS_EXTERN(lhs_expr)) return lhsexpr_extern_generate_ssa(lhs_expr, context); if(lhs_expr->rhs_expr) { @@ -435,7 +435,7 @@ static CHECK_RESULT SsaRegister funccall_generate_ssa(Ast *self, SsaCompilerCont assert((self->resolve_data.type->rhs_expr && self->resolve_data.type->rhs_expr->type == AST_FUNCTION_DECL) || self->resolve_data.type->type.type == VARIABLE_TYPE_SIGNATURE); - if(self->resolve_data.type->is_extern) { + if(LHS_EXPR_IS_EXTERN(self->resolve_data.type)) { amal_log_error("TODO: Implement extern function call (extern function %.*s was called)", func_call->func.name.size, func_call->func.name.data); reg = 0; assert(bool_false && "TODO: Implement extern function call!"); diff --git a/src/std/hash_map.c b/src/std/hash_map.c index bcb43eb..c2e42c1 100644 --- a/src/std/hash_map.c +++ b/src/std/hash_map.c @@ -205,13 +205,12 @@ bool hash_map_get(HashMap *self, BufferView key, void *value) { int hash_map_compare_string(const void *a, const void *b) { const BufferView *lhs; const BufferView *rhs; - int mem_diff; lhs = a; rhs = b; - mem_diff = am_memcmp(lhs->data, rhs->data, MIN(lhs->size, rhs->size)); - if(mem_diff == 0) - return (int)lhs->size - (int)rhs->size; - else - return mem_diff; + + if(lhs->size != rhs->size) + return -1; + + return am_memcmp(lhs->data, rhs->data, MIN(lhs->size, rhs->size)); } diff --git a/tests/errors/no_main_func.amal b/tests/errors/no_main_func.amal new file mode 100644 index 0000000..c008d9e --- /dev/null +++ b/tests/errors/no_main_func.amal @@ -0,0 +1 @@ +const not_main = fn { } \ No newline at end of file diff --git a/tests/main.c b/tests/main.c index 5b8bad0..63f72d5 100644 --- a/tests/main.c +++ b/tests/main.c @@ -21,7 +21,7 @@ static int num_tests_run = 0; static CHECK_RESULT int test_hash_map() { ArenaAllocator arena_allocator; - HashMap hash_map; + HashMapType(BufferView, int) hash_map; int value; bool has_key; unsigned char i; @@ -175,8 +175,10 @@ static void test_load_error(const char *filepath, const char *expected_error) { options.error_callback = error_callback_assert; expected_data.filepath = get_full_path(filepath); - expected_data.expected_error = join_str(expected_data.filepath, expected_error, ':'); - expected_data.got_expected_error = bool_false; + if(expected_error) { + expected_data.expected_error = join_str(expected_data.filepath, expected_error, ':'); + expected_data.got_expected_error = bool_false; + } options.error_callback_userdata = &expected_data; if(amal_program_init(&program) != 0) { @@ -184,12 +186,12 @@ static void test_load_error(const char *filepath, const char *expected_error) { FAIL_TEST(expected_data.filepath); } if(amal_compiler_load_file(&options, &program, filepath) == AMAL_COMPILER_OK) { - fprintf(stderr, "Expected to fail loading file\n"); + fprintf(stderr, "Successfully loaded file when it was expected to fail\n"); FAIL_TEST(expected_data.filepath); } amal_program_deinit(&program); - if(!expected_data.got_expected_error) { + if(expected_error && !expected_data.got_expected_error) { fprintf(stderr, "Didn't get expected error message:\n%s\n", expected_error); FAIL_TEST(expected_data.filepath); } @@ -197,7 +199,8 @@ static void test_load_error(const char *filepath, const char *expected_error) { fprintf(stderr, "Test failed as expected: %s\n", expected_data.filepath); ++num_successful_tests; free(expected_data.filepath); - free(expected_data.expected_error); + if(expected_error) + free(expected_data.expected_error); } static void run_all_tests() { @@ -247,6 +250,7 @@ static void run_all_tests() { "1:15: error: A variable can't be declared without a type or assignment\n" " extern const a;\n" " ^\n"); + test_load_error("tests/errors/no_main_func.amal", NULL); } /* TODO: Restrict variables in global scope to const */ diff --git a/tools/highlevel_c.py b/tools/highlevel_c.py new file mode 100755 index 0000000..2cff3b3 --- /dev/null +++ b/tools/highlevel_c.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python2 + +import os +import sys +import json +import clang.cindex + +def find_child_of_kind(node, kind): + if node.kind == kind: + return node + for child in node.get_children(): + found_type = find_child_of_kind(child, kind) + if found_type: + return found_type + +def get_types_from_annotation(annotation): + return [t.replace(" ", "") for t in annotation.split(",")] + +def parse_call_expr(call_expr): + if call_expr.spelling == "hash_map_get" or call_expr.spelling == "hash_map_insert": + args = list(call_expr.get_arguments()) + + # First arg + self_obj = next(args[0].get_children(), None) + self_obj_def = self_obj.get_definition() + annotation = next(self_obj_def.get_children(), None) + if not annotation or annotation.kind != clang.cindex.CursorKind.ANNOTATE_ATTR: + print("WARNING: Hash map at %s is not annotated" % args[0].location) + return + + (hash_map_key_type, hash_map_value_type) = get_types_from_annotation(annotation.spelling) + hash_map_value_type = hash_map_value_type + "*" + + # Second arg + #key_arg_type = args[1].type.spelling.replace(" ", "") + #if key_arg_type != hash_map_key_type: + # print("Error: Incorrect usage of %s key argument found at %s." % (call_expr.spelling, args[1].location)) + # print(" Argument variable is of type %s, but the HashMap expects a value of type %s" % (key_arg_type, hash_map_key_type)) + + # Third arg + value_obj = next(args[2].get_children(), None) + value_arg_type = value_obj.type.spelling.replace(" ", "") + + if value_arg_type != hash_map_value_type: + print("ERROR: Incorrect usage of %s value argument found at %s." % (call_expr.spelling, args[2].location)) + print(" Argument variable is of type %s, but the HashMap expects a value of type %s" % (value_arg_type, hash_map_value_type)) + #print("def: %s, loc: %s" % (self_obj.get_definition(), self_obj.get_definition().location)) + +def parse(node): + if node.kind == clang.cindex.CursorKind.CALL_EXPR: + parse_call_expr(node) + for child in node.get_children(): + parse(child) + +def compile_commands_get_files(compile_commands_filepath): + files = [] + with open(compile_commands_filepath) as json_file: + data = json.load(json_file) + for obj in data: + filepath = os.path.join(obj["directory"], obj["file"]) + files.append(filepath) + return files + +def main(): + script_path = os.path.realpath(sys.argv[0]) + script_dir = os.path.dirname(script_path) + + compile_commands_file = os.path.join(script_dir, "..", "compile_commands.json") + if not os.path.isfile(compile_commands_file): + print("compile_commands.json file is missing! You need to compile amalgam before running this script") + exit(1) + + idx = clang.cindex.Index.create() + for filepath in compile_commands_get_files(compile_commands_file): + print("Parsing file: %s" % filepath) + tu = idx.parse(filepath, args=['-std=c89'], options=0) + parse(tu.cursor) + +if __name__ == "__main__": + main() + -- cgit v1.2.3