aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2019-07-16 00:27:53 +0200
committerdec05eba <dec05eba@protonmail.com>2020-07-25 14:36:46 +0200
commit85c654a102701958d3748e82ecac9c1bc4dbbcba (patch)
tree61a804a3af5ca43e7608f4c5dc6ea1e292fc8a35 /src
parentabd74f22fd0c58b30f951da9cec1d1799e9b5072 (diff)
Start on real bytecode & doc parsing
Diffstat (limited to 'src')
-rw-r--r--src/bytecode/bytecode.c165
-rw-r--r--src/compiler.c9
-rw-r--r--src/ssa/ssa.c12
3 files changed, 168 insertions, 18 deletions
diff --git a/src/bytecode/bytecode.c b/src/bytecode/bytecode.c
index 6ceacf0..29d99c0 100644
--- a/src/bytecode/bytecode.c
+++ b/src/bytecode/bytecode.c
@@ -58,6 +58,31 @@ static CHECK_RESULT usize ssa_extract_jump(u8 *instruction_data, SsaInsJump *res
return sizeof(result->jump_offset);
}
+static void add_header(BytecodeCompilerContext *self) {
+ /*doc(BytecodeHeader)
+ # Header layout
+ |Size|Name |Description |
+ |----|-------------|----------------------------------------------------------------------------|
+ |4 |Magic number |The magic number used to identify an amalgam bytecode file. |
+ |1 |Major version|The major version of the bytecode. Updates in this is a breaking change. |
+ |1 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.|
+ |1 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.|
+ The versions in the header only changes for every release, not every change.
+ */
+
+ const u32 magic_number = 0xdec05eba;
+ const u8 major_version = 1;
+ const u8 minor_version = 0;
+ const u8 patch_version = 0;
+
+ Buffer *instructions;
+ instructions = &self->bytecode->instructions;
+ throw_if_error(buffer_append(instructions, &magic_number, 4));
+ throw_if_error(buffer_append(instructions, &major_version, 1));
+ throw_if_error(buffer_append(instructions, &minor_version, 1));
+ throw_if_error(buffer_append(instructions, &patch_version, 1));
+}
+
static void add_intermediates(BytecodeCompilerContext *self) {
Ssa *ssa;
Buffer *instructions;
@@ -78,6 +103,107 @@ static void add_intermediates(BytecodeCompilerContext *self) {
}
}
+void add_strings(BytecodeCompilerContext *self) {
+ Ssa *ssa;
+ Buffer *instructions;
+ BufferView *string;
+ BufferView *strings_end;
+
+ ssa = self->parser->ssa;
+ instructions = &self->bytecode->instructions;
+ string = buffer_begin(&ssa->strings);
+ strings_end = buffer_end(&ssa->strings);
+
+ /*
+ The 8 here is a arbitrary chosen number since we don't know the actual
+ size of all strings without counting. The logic is that the average
+ size of all strings length would be 8.
+ */
+ throw_if_error(buffer_expand(instructions,
+ sizeof(u16) + (sizeof(u16) + 8) * ssa->strings.size));
+ throw_if_error(buffer_append(instructions, &ssa->strings.size, sizeof(u16)));
+ for(; string != strings_end; ++string) {
+ throw_if_error(buffer_append(instructions, &string->size, sizeof(u16)));
+ throw_if_error(buffer_append(instructions, &string->data, string->size));
+ }
+}
+
+static void add_ins1(BytecodeCompilerContext *self, AmalOpcode opcode, const char *fmt) {
+ throw_if_error(buffer_append(&self->bytecode->instructions, &opcode, sizeof(AmalOpcodeType)));
+ fprintf(stderr, fmt);
+ fputc('\n', stderr);
+}
+
+
+static void add_ins2(BytecodeCompilerContext *self, AmalOpcode opcode, u8 reg, const char *fmt) {
+ Buffer *instructions;
+ size_t index;
+ instructions = &self->bytecode->instructions;
+ index = instructions->size;
+
+ throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(reg)));
+ instructions->data[index] = opcode;
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType), &reg, sizeof(reg));
+ fprintf(stderr, fmt, reg);
+ fputc('\n', stderr);
+}
+
+static void add_ins3(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 src_reg, const char *fmt) {
+ Buffer *instructions;
+ size_t index;
+ instructions = &self->bytecode->instructions;
+ index = instructions->size;
+
+ throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(src_reg)));
+ instructions->data[index] = opcode;
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg));
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &src_reg, sizeof(src_reg));
+ fprintf(stderr, fmt, dst_reg, src_reg);
+ fputc('\n', stderr);
+}
+
+static void add_ins4(BytecodeCompilerContext *self, AmalOpcode opcode, u16 data, const char *fmt) {
+ Buffer *instructions;
+ size_t index;
+ instructions = &self->bytecode->instructions;
+ index = instructions->size;
+
+ throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(data)));
+ instructions->data[index] = opcode;
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType), &data, sizeof(data));
+ fprintf(stderr, fmt, data);
+ fputc('\n', stderr);
+}
+
+static void add_ins5(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 reg1, u8 reg2, const char *fmt) {
+ Buffer *instructions;
+ size_t index;
+ instructions = &self->bytecode->instructions;
+ index = instructions->size;
+
+ throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1) + sizeof(reg2)));
+ instructions->data[index] = opcode;
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg));
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &reg1, sizeof(reg1));
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1), &reg2, sizeof(reg2));
+ fprintf(stderr, fmt, dst_reg, reg1, reg2);
+ fputc('\n', stderr);
+}
+
+static void add_ins6(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u16 data, const char *fmt) {
+ Buffer *instructions;
+ size_t index;
+ instructions = &self->bytecode->instructions;
+ index = instructions->size;
+
+ throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(data)));
+ instructions->data[index] = opcode;
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg));
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &data, sizeof(data));
+ fprintf(stderr, fmt, dst_reg, data);
+ fputc('\n', stderr);
+}
+
#if 0
#define NUM_MAX_REGS 256
#define NUM_MAX_FUNC_ARGS 32
@@ -251,63 +377,64 @@ static void add_instructions(BytecodeCompilerContext *self) {
}
}
#else
- #define ARITH_OP(op) do {\
- instruction += ssa_extract_form2(instruction, &ssa_ins_form2); \
- fprintf(file, "%s r%d, r%d, r%d\n", (op), ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs); \
- } while(0)
while(instruction != instructions_end) {
SsaInstruction ins = (SsaInstruction)*instruction++;
switch(ins) {
case SSA_ASSIGN_INTER: {
instruction += ssa_extract_form1(instruction, &ssa_ins_form1);
- fprintf(file, "mov r%d, i%d\n", ssa_ins_form1.lhs, ssa_ins_form1.rhs);
+ add_ins3(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d");
break;
}
case SSA_ASSIGN_STRING: {
instruction += ssa_extract_form1(instruction, &ssa_ins_form1);
- fprintf(file, "mov r%d, s%d\n", ssa_ins_form1.lhs, ssa_ins_form1.rhs);
+ add_ins3(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d");
break;
}
case SSA_ASSIGN_REG: {
instruction += ssa_extract_form1(instruction, &ssa_ins_form1);
- fprintf(file, "mov r%d, r%d\n", ssa_ins_form1.lhs, ssa_ins_form1.rhs);
+ add_ins3(self, AMAL_OP_MOV, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "mov r%d, d%d");
break;
}
case SSA_ADD: {
- ARITH_OP("add");
+ instruction += ssa_extract_form2(instruction, &ssa_ins_form2);
+ add_ins5(self, AMAL_OP_ADD, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "add r%d, r%d, r%d");
break;
}
case SSA_SUB: {
- ARITH_OP("sub");
+ instruction += ssa_extract_form2(instruction, &ssa_ins_form2);
+ add_ins5(self, AMAL_OP_SUB, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "sub r%d, r%d, r%d");
break;
}
case SSA_MUL: {
- ARITH_OP("mul");
+ instruction += ssa_extract_form2(instruction, &ssa_ins_form2);
+ add_ins5(self, AMAL_OP_MUL, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "mul r%d, r%d, r%d");
break;
}
case SSA_DIV: {
- ARITH_OP("div");
+ instruction += ssa_extract_form2(instruction, &ssa_ins_form2);
+ add_ins5(self, AMAL_OP_DIV, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "div r%d, r%d, r%d");
break;
}
case SSA_EQUALS: {
- ARITH_OP("eq");
+ instruction += ssa_extract_form2(instruction, &ssa_ins_form2);
+ add_ins5(self, AMAL_OP_CMP, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "cmp r%d, r%d, r%d");
break;
}
case SSA_FUNC_START: {
instruction += ssa_extract_func_start(instruction, &ssa_ins_func_start);
- fprintf(file, "FUNC_START %d\n", ssa_ins_func_start.num_args);
+ add_ins1(self, AMAL_OP_FUNC_START, "func_start");
break;
}
case SSA_FUNC_END: {
- fprintf(file, "FUNC_END\n");
+ add_ins1(self, AMAL_OP_FUNC_START, "func_end");
break;
}
case SSA_PUSH: {
SsaRegister reg;
am_memcpy(&reg, instruction, sizeof(SsaRegister));
instruction += sizeof(SsaRegister);
- fprintf(file, "push r%d\n", reg);
+ add_ins2(self, AMAL_OP_PUSH, reg, "push r%d");
break;
}
case SSA_CALL: {
@@ -325,17 +452,17 @@ static void add_instructions(BytecodeCompilerContext *self) {
is defined as the size of all previous files' number of functions.
*/
instruction += ssa_extract_func_call(instruction, &ssa_ins_func_call);
- fprintf(file, "call %d\n", ssa_ins_func_call.func_decl->ssa_func_index);
+ add_ins4(self, AMAL_OP_CALL, ssa_ins_func_call.func_decl->ssa_func_index, "call %d");
break;
}
case SSA_JUMP_ZERO: {
instruction += ssa_extract_jump_zero(instruction, &ssa_ins_jump_zero);
- fprintf(file, "jz r%d, %d\n", ssa_ins_jump_zero.condition_reg, ssa_ins_jump_zero.jump_offset);
+ add_ins6(self, AMAL_OP_JZ, ssa_ins_jump_zero.condition_reg, ssa_ins_jump_zero.jump_offset, "jz r%d, %d");
break;
}
case SSA_JUMP: {
instruction += ssa_extract_jump(instruction, &ssa_ins_jump);
- fprintf(file, "jmp %d\n", ssa_ins_jump.jump_offset);
+ add_ins4(self, AMAL_OP_JMP, ssa_ins_jump.jump_offset, "jmp %d");
break;
}
default:
@@ -348,7 +475,9 @@ static void add_instructions(BytecodeCompilerContext *self) {
}
void generate_bytecode_from_ssa(BytecodeCompilerContext *self) {
+ add_header(self);
add_intermediates(self);
+ add_strings(self);
/* TODO: Also add strings in ssa, so we can index them */
add_instructions(self);
}
diff --git a/src/compiler.c b/src/compiler.c
index 9f003c8..e7b242b 100644
--- a/src/compiler.c
+++ b/src/compiler.c
@@ -510,6 +510,15 @@ int amal_compiler_internal_load_file(amal_compiler *self, const char *filepath,
return_if_error(amal_compiler_select_thread_for_work(self, thread_work_data, &parser_thread_data));
if(main_job) {
+ /*doc(CompilerFlow)
+ # Compiler flow
+ (Tokenize&parse -> Resolve AST -> Generate SSA -> Generate bytecode) -> Generate program\
+ Each step except the last is done using multiple threads in parallel and the output of each step is used
+ in the next step. The last step is not done in parallel because the last step is combining all bytecode
+ and writing it to a file, which is an IO bottlenecked operation and it won't benefit from multithreading
+ and may even lose performance because of it.
+ */
+
return_if_error(amal_compiler_load_file_join_threads(self));
assert(amal_compiler_check_all_threads_done(self));
amal_log_info("Finished parsing all files, resolving AST");
diff --git a/src/ssa/ssa.c b/src/ssa/ssa.c
index 91ba185..34e3e3e 100644
--- a/src/ssa/ssa.c
+++ b/src/ssa/ssa.c
@@ -16,6 +16,9 @@ do { \
throw(return_if_result); \
} while(0)
+/* Max length of a string that fits in u16 */
+#define MAX_STRING_LENGTH ((2 << 16) - 1)
+
static int compare_number(const void *a, const void *b) {
const SsaNumber *lhs;
const SsaNumber *rhs;
@@ -125,6 +128,11 @@ static CHECK_RESULT int ssa_try_add_string(Ssa *self, BufferView str, SsaStringI
/* Overflow */
if(self->string_counter + 1 < self->string_counter)
return -1;
+
+ if(str.size > MAX_STRING_LENGTH) {
+ amal_log_error("String \"%.*s\" is longer than %d\n", str.size, str.data, MAX_STRING_LENGTH);
+ return -2;
+ }
*result_index = self->string_counter;
++self->string_counter;
@@ -386,6 +394,10 @@ in any order.
*/
static CHECK_RESULT SsaRegister funcdecl_generate_ssa(FunctionDecl *self, SsaCompilerContext *context) {
/* TODO: Implement */
+ /*
+ Reset reg counter in each function, because each function has a separate register context
+ that is reset after function end
+ */
SsaRegister prev_reg_counter;
prev_reg_counter = context->ssa->reg_counter;
context->ssa->reg_counter = 0;