aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2019-07-18 03:10:03 +0200
committerdec05eba <dec05eba@protonmail.com>2020-07-25 14:36:46 +0200
commitec1a48e7b86fcd00127dd5a88d56c42083af1d78 (patch)
tree7e6423f20fa795bbba8cbc5ead8bc8a3f7289d2e
parent84e65c63e7482590d535e86f7660a00ae8a0cecb (diff)
Setup structure for program execute
-rw-r--r--doc/Documentation.md5
-rw-r--r--include/ast.h7
-rw-r--r--include/bytecode/bytecode.h5
-rw-r--r--include/program.h22
-rw-r--r--src/bytecode/bytecode.c51
-rw-r--r--src/compiler.c1
-rw-r--r--src/program.c257
-rw-r--r--tests/main.c14
8 files changed, 315 insertions, 47 deletions
diff --git a/doc/Documentation.md b/doc/Documentation.md
index 34d1b41..5564b05 100644
--- a/doc/Documentation.md
+++ b/doc/Documentation.md
@@ -11,7 +11,10 @@ Instructions can be in 6 different formats:
4.3 Opcode + index\
4.4 Opcode + offset
5. 4 bytes: Opcode + register + register + register
-6. 4 bytes: Opcode + register + offset
+6. 4 bytes:\
+6.1 Opcode + register + offset\
+6.2 Opcode + register + intermediate\
+6.3 Opcode + register + data
# Compiler flow
(Tokenize&parse -> Resolve AST -> Generate SSA -> Generate bytecode) -> Generate program\
diff --git a/include/ast.h b/include/ast.h
index 2925f6c..f056db7 100644
--- a/include/ast.h
+++ b/include/ast.h
@@ -142,9 +142,12 @@ typedef struct {
} VariableType;
/*
- Note: When resolving AST, more than one thread can end up resolving the same expressions at the same time.
+ Note: When resolving AST, multiple threads can end up resolving the same expressions at the same time.
This is intentional. Instead of using mutex for every expression and locking/unlocking everytime
which uses more memory and affects performance, we assume such race conditions are rare and let them happen.
+ TODO: Investigate possible deadlock when multiple threads resolve the same expression and that
+ expression has a recursive dependency and the threads execute @ast_resolve at the same speed,
+ leading to @ast_resolve running again for the same expression.
*/
struct LhsExpr {
bool is_extern;
@@ -210,7 +213,7 @@ typedef struct {
Parser *parser; /* Borrowed. This is the parser that belongs to the thread */
/*
Borrowed. This is the current scope. Note that this scope can belong to another parser (and thread),
- as such, @parser and scope_get_parser(@scope) parser may not be the same
+ as such, @parser and scope_get_parser(@scope) parser may not be the same.
*/
Scope *scope;
} AstCompilerContext;
diff --git a/include/bytecode/bytecode.h b/include/bytecode/bytecode.h
index 739aa79..b571383 100644
--- a/include/bytecode/bytecode.h
+++ b/include/bytecode/bytecode.h
@@ -21,7 +21,10 @@
4.3 Opcode + index\
4.4 Opcode + offset
5. 4 bytes: Opcode + register + register + register
- 6. 4 bytes: Opcode + register + offset
+ 6. 4 bytes:\
+ 6.1 Opcode + register + offset\
+ 6.2 Opcode + register + intermediate\
+ 6.3 Opcode + register + data
*/
typedef enum {
AMAL_OP_NOP, /* No operation. This can be used for patching */
diff --git a/include/program.h b/include/program.h
index cbd9432..f251fbc 100644
--- a/include/program.h
+++ b/include/program.h
@@ -4,11 +4,31 @@
#include "std/buffer.h"
#include "bytecode/bytecode.h"
+#define AMAL_PROGRAM_OK 0
+#define AMAL_PROGRAM_INVALID_HEADER -1
+#define AMAL_PROGRAM_INVALID_MAGIC_NUMBER -2
+#define AMAL_PROGRAM_INCOMPATIBLE -3
+#define AMAL_PROGRAM_INVALID_INTERMEDIATES -4
+#define AMAL_PROGRAM_INVALID_INTERMEDIATES_SIZE -5
+#define AMAL_PROGRAM_INVALID_STRINGS -6
+#define AMAL_PROGRAM_INVALID_STRINGS_SIZE -7
+#define AMAL_PROGRAM_STRING_ALLOC_FAILURE -8
+#define AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE -9
+
+#define AMAL_PROGRAM_MAGIC_NUMBER (u32)0xdec05eba
+#define AMAL_PROGRAM_MAJOR_VERSION 1
+#define AMAL_PROGRAM_MINOR_VERSION 0
+#define AMAL_PROGRAM_PATCH_VERSION 0
+
typedef struct {
Buffer/*<...>*/ data;
+ Buffer/*<u32>*/ string_indices;
+ char *intermediates_start;
+ char *strings_start;
+ usize read_index;
} amal_program;
-void amal_program_init(amal_program *self);
+CHECK_RESULT int amal_program_init(amal_program *self);
void amal_program_deinit(amal_program *self);
CHECK_RESULT int amal_program_append_bytecode(amal_program *self, Bytecode *bytecode);
diff --git a/src/bytecode/bytecode.c b/src/bytecode/bytecode.c
index 7c4fe08..9bf5f24 100644
--- a/src/bytecode/bytecode.c
+++ b/src/bytecode/bytecode.c
@@ -58,47 +58,24 @@ static CHECK_RESULT usize ssa_extract_jump(u8 *instruction_data, SsaInsJump *res
return sizeof(result->jump_offset);
}
-static void add_header(BytecodeCompilerContext *self) {
- /*doc(Bytecode header)
- # Header layout
- |Size|Name |Description |
- |----|-------------|----------------------------------------------------------------------------|
- |4 |Magic number |The magic number used to identify an amalgam bytecode file. |
- |1 |Major version|The major version of the bytecode. Updates in this is a breaking change. |
- |1 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.|
- |1 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.|
- The versions in the header only changes for every release, not every change.
- */
-
- const u32 magic_number = 0xdec05eba;
- const u8 major_version = 1;
- const u8 minor_version = 0;
- const u8 patch_version = 0;
-
- Buffer *instructions;
- instructions = &self->bytecode.data;
- throw_if_error(buffer_append(instructions, &magic_number, 4));
- throw_if_error(buffer_append(instructions, &major_version, 1));
- throw_if_error(buffer_append(instructions, &minor_version, 1));
- throw_if_error(buffer_append(instructions, &patch_version, 1));
-}
-
static void add_intermediates(BytecodeCompilerContext *self) {
Ssa *ssa;
Buffer *instructions;
SsaNumber *intermediate;
SsaNumber *intermediates_end;
+ u32 intemediates_size;
ssa = self->parser->ssa;
instructions = &self->bytecode.data;
intermediate = buffer_begin(&ssa->intermediates);
intermediates_end = buffer_end(&ssa->intermediates);
- throw_if_error(buffer_expand(instructions,
- sizeof(u16) + (sizeof(u8) + sizeof(u64)) * ssa->intermediates.size));
- throw_if_error(buffer_append(instructions, &ssa->intermediates.size, sizeof(u16)));
+ intemediates_size = (sizeof(u8) + sizeof(u64)) * buffer_get_size(&ssa->intermediates, SsaNumber);
+ throw_if_error(buffer_expand(instructions, sizeof(u32) + intemediates_size));
+ throw_if_error(buffer_append(instructions, &intemediates_size, sizeof(u32)));
for(; intermediate != intermediates_end; ++intermediate) {
throw_if_error(buffer_append(instructions, &intermediate->type, sizeof(u8)));
+ /* TODO: Store value using an encoding that will save space when using low numbers */
throw_if_error(buffer_append(instructions, &intermediate->value.integer, sizeof(u64)));
}
}
@@ -108,12 +85,18 @@ void add_strings(BytecodeCompilerContext *self) {
Buffer *instructions;
BufferView *string;
BufferView *strings_end;
+ u16 num_strings;
u32 strings_size;
ssa = self->parser->ssa;
instructions = &self->bytecode.data;
string = buffer_begin(&ssa->strings);
strings_end = buffer_end(&ssa->strings);
+ if(strings_end - string > UINT16_MAX) {
+ amal_log_error("Too many strings in the program");
+ throw(-1);
+ }
+ num_strings = strings_end - string;
strings_size = 0;
for(; string != strings_end; ++string) {
@@ -121,7 +104,8 @@ void add_strings(BytecodeCompilerContext *self) {
}
string = buffer_begin(&ssa->strings);
- throw_if_error(buffer_expand(instructions, sizeof(u32) + strings_size));
+ throw_if_error(buffer_expand(instructions, sizeof(u16) + sizeof(u32) + strings_size));
+ throw_if_error(buffer_append(instructions, &num_strings, sizeof(u16)));
throw_if_error(buffer_append(instructions, &strings_size, sizeof(u32)));
for(; string != strings_end; ++string) {
throw_if_error(buffer_append(instructions, &string->size, sizeof(u16)));
@@ -383,17 +367,19 @@ static void add_instructions(BytecodeCompilerContext *self) {
}
#else
+ /* TODO: Keep all registers under 256 */
+
while(instruction != instructions_end) {
SsaInstruction ins = (SsaInstruction)*instruction++;
switch(ins) {
case SSA_ASSIGN_INTER: {
instruction += ssa_extract_form1(instruction, &ssa_ins_form1);
- add_ins3(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d");
+ add_ins6(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d");
break;
}
case SSA_ASSIGN_STRING: {
instruction += ssa_extract_form1(instruction, &ssa_ins_form1);
- add_ins3(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d");
+ add_ins6(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d");
break;
}
case SSA_ASSIGN_REG: {
@@ -479,7 +465,7 @@ static void add_instructions(BytecodeCompilerContext *self) {
/* Prepend instructions with its size */
{
u32 instructions_size;
- instructions_size = self->bytecode.data.size - num_instructions_index;
+ instructions_size = self->bytecode.data.size - num_instructions_index - sizeof(instructions_size); /* Remove the count itself from the size of the instructions size */
am_memcpy(&self->bytecode.data.data[num_instructions_index], &instructions_size, sizeof(instructions_size));
}
@@ -488,7 +474,6 @@ static void add_instructions(BytecodeCompilerContext *self) {
}
void generate_bytecode_from_ssa(BytecodeCompilerContext *self) {
- add_header(self);
add_intermediates(self);
add_strings(self);
/* TODO: Also add strings in ssa, so we can index them */
diff --git a/src/compiler.c b/src/compiler.c
index 1af9537..05fae78 100644
--- a/src/compiler.c
+++ b/src/compiler.c
@@ -446,6 +446,7 @@ static CHECK_RESULT int amal_compiler_dispatch_generic(amal_compiler *self, Thre
}
static CHECK_RESULT int amal_compiler_generate_program(amal_compiler *self) {
+ /* TODO: Copying the bytecode to the program can be done using multiple threads */
Parser **parser;
Parser **parser_end;
parser = buffer_begin(&self->parsers);
diff --git a/src/program.c b/src/program.c
index aa39a4c..167f4c4 100644
--- a/src/program.c
+++ b/src/program.c
@@ -1,23 +1,272 @@
#include "../include/program.h"
+#include "../include/std/mem.h"
+#include "../include/std/alloc.h"
+#include "../include/std/log.h"
#include <stdio.h>
#include <errno.h>
+#include <assert.h>
-void amal_program_init(amal_program *self) {
+/* TODO: If system is big-endian, then do endian conversion for all reads */
+
+/* This matches SsaNumberType */
+typedef enum {
+ NUMBER_TYPE_INTEGER,
+ NUMBER_TYPE_FLOAT
+} NumberType;
+
+typedef union {
+ i64 integer;
+ f64 floating;
+} NumberUnion;
+
+int amal_program_init(amal_program *self) {
ignore_result_int(buffer_init(&self->data, NULL));
+ ignore_result_int(buffer_init(&self->string_indices, NULL));
+ self->intermediates_start = NULL;
+ self->strings_start = NULL;
+ self->read_index = 0;
+
+ /*doc(Bytecode header)
+ # Header layout
+ |Size|Name |Description |
+ |----|-------------|----------------------------------------------------------------------------|
+ |4 |Magic number |The magic number used to identify an amalgam bytecode file. |
+ |1 |Major version|The major version of the bytecode. Updates in this is a breaking change. |
+ |1 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.|
+ |1 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.|
+ The versions in the header only changes for every release, not every change.
+ */
+
+ const u32 magic_number = AMAL_PROGRAM_MAGIC_NUMBER;
+ const u8 major_version = AMAL_PROGRAM_MAJOR_VERSION;
+ const u8 minor_version = AMAL_PROGRAM_MINOR_VERSION;
+ const u8 patch_version = AMAL_PROGRAM_PATCH_VERSION;
+
+ return_if_error(buffer_append(&self->data, &magic_number, 4));
+ return_if_error(buffer_append(&self->data, &major_version, 1));
+ return_if_error(buffer_append(&self->data, &minor_version, 1));
+ return_if_error(buffer_append(&self->data, &patch_version, 1));
+
+ return 0;
}
void amal_program_deinit(amal_program *self) {
buffer_deinit(&self->data);
+ buffer_deinit(&self->string_indices);
}
int amal_program_append_bytecode(amal_program *self, Bytecode *bytecode) {
return buffer_append(&self->data, bytecode->data.data, bytecode->data.size);
}
+static usize bytes_left_to_read(amal_program *self) {
+ assert(self->read_index <= self->data.size);
+ return self->data.size - self->read_index;
+}
+
+static CHECK_RESULT int amal_program_read_header(amal_program *self) {
+ u32 magic_number;
+ u8 major_version;
+ u8 minor_version;
+ u8 patch_version;
+
+ if(bytes_left_to_read(self) < sizeof(u32) + sizeof(u8) * 3)
+ return AMAL_PROGRAM_INVALID_HEADER;
+
+ am_memcpy(&magic_number, &self->data.data[self->read_index], sizeof(magic_number));
+ self->read_index += sizeof(u32);
+ am_memcpy(&major_version, &self->data.data[self->read_index], sizeof(major_version));
+ self->read_index += sizeof(u8);
+ am_memcpy(&minor_version, &self->data.data[self->read_index], sizeof(minor_version));
+ self->read_index += sizeof(u8);
+ am_memcpy(&patch_version, &self->data.data[self->read_index], sizeof(patch_version));
+ self->read_index += sizeof(u8);
+
+ if(magic_number != AMAL_PROGRAM_MAGIC_NUMBER)
+ return AMAL_PROGRAM_INVALID_MAGIC_NUMBER;
+
+ /*
+ A program is only incompatible if the major version is newer than the version that is used to run it.
+ TODO: Implement backwards compatible reads, starting from when the program bytecode breaks backwards compatibility
+ */
+ if(major_version > AMAL_PROGRAM_MAJOR_VERSION)
+ return AMAL_PROGRAM_INCOMPATIBLE;
+
+ return AMAL_PROGRAM_OK;
+}
+
+static CHECK_RESULT int amal_program_read_intermediates(amal_program *self) {
+ u32 intermediates_size;
+ /*u32 read_end;*/
+
+ if(bytes_left_to_read(self) < sizeof(intermediates_size)) {
+ amal_log_error("Not enough space in program to intermediates size");
+ return AMAL_PROGRAM_INVALID_INTERMEDIATES;
+ }
+
+ am_memcpy(&intermediates_size, &self->data.data[self->read_index], sizeof(intermediates_size));
+ self->read_index += sizeof(intermediates_size);
+
+ if(bytes_left_to_read(self) < intermediates_size) {
+ amal_log_error("Not enough space in program to read all intermediates");
+ return AMAL_PROGRAM_INVALID_INTERMEDIATES_SIZE;
+ }
+
+ self->intermediates_start = &self->data.data[self->read_index];
+ /*
+ read_end = self->read_index + intermediates_size;
+ while(self->read_index < read_end) {
+ NumberType type;
+ NumberUnion value;
+ am_memcpy(&type, &self->data.data[self->read_index], sizeof(u8));
+ am_memcpy(&value, &self->data.data[self->read_index + sizeof(u8)], sizeof(u64));
+ self->read_index += sizeof(u8) + sizeof(u64);
+ }
+ */
+ self->read_index += intermediates_size;
+
+ return AMAL_PROGRAM_OK;
+}
+
+static CHECK_RESULT int amal_program_read_strings(amal_program *self) {
+ u16 num_strings;
+ u32 strings_size;
+ u32 read_start;
+ u32 read_end;
+ u32 *string_index_ptr;
+
+ if(bytes_left_to_read(self) < sizeof(num_strings))
+ return AMAL_PROGRAM_INVALID_STRINGS;
+
+ am_memcpy(&num_strings, &self->data.data[self->read_index], sizeof(num_strings));
+ self->read_index += sizeof(num_strings);
+
+ if(buffer_append_empty(&self->string_indices, num_strings) != 0)
+ return AMAL_PROGRAM_STRING_ALLOC_FAILURE;
+ string_index_ptr = (u32*)self->string_indices.data;
+
+ if(bytes_left_to_read(self) < sizeof(strings_size))
+ return AMAL_PROGRAM_INVALID_STRINGS;
+
+ am_memcpy(&strings_size, &self->data.data[self->read_index], sizeof(strings_size));
+ self->read_index += sizeof(strings_size);
+
+ if(bytes_left_to_read(self) < strings_size)
+ return AMAL_PROGRAM_INVALID_STRINGS_SIZE;
+
+ read_start = self->read_index;
+ read_end = read_start + strings_size;
+ self->strings_start = &self->data.data[self->read_index];
+ while(self->read_index < read_end) {
+ u16 string_size;
+
+ if(bytes_left_to_read(self) < sizeof(string_size))
+ return AMAL_PROGRAM_INVALID_STRINGS;
+
+ *string_index_ptr = self->read_index - read_start;
+ ++string_index_ptr;
+ am_memcpy(&string_size, &self->data.data[self->read_index], sizeof(string_size));
+ self->read_index += sizeof(string_size);
+
+ if(bytes_left_to_read(self) < string_size)
+ return AMAL_PROGRAM_INVALID_STRINGS;
+
+ self->read_index += string_size;
+ }
+
+ assert(self->read_index == read_end);
+ return AMAL_PROGRAM_OK;
+}
+
+static CHECK_RESULT int amal_program_read_instructions(amal_program *self) {
+ u32 instructions_size;
+ u32 read_end;
+
+ if(bytes_left_to_read(self) < sizeof(instructions_size))
+ return AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE;
+
+ am_memcpy(&instructions_size, &self->data.data[self->read_index], sizeof(instructions_size));
+ self->read_index += sizeof(instructions_size);
+
+ if(bytes_left_to_read(self) < instructions_size)
+ return AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE;
+
+ read_end = self->read_index + instructions_size;
+ while(self->read_index < read_end) {
+ AmalOpcode opcode;
+ opcode = self->data.data[self->read_index];
+ self->read_index += sizeof(AmalOpcodeType);
+ switch(opcode) {
+ case AMAL_OP_NOP:
+ break;
+ case AMAL_OP_SETZ:
+ self->read_index += 1;
+ break;
+ case AMAL_OP_MOV:
+ self->read_index += 2;
+ break;
+ case AMAL_OP_MOVI:
+ self->read_index += 3;
+ break;
+ case AMAL_OP_MOVD:
+ self->read_index += 3;
+ break;
+ case AMAL_OP_ADD:
+ self->read_index += 3;
+ break;
+ case AMAL_OP_SUB:
+ self->read_index += 3;
+ break;
+ case AMAL_OP_MUL:
+ self->read_index += 3;
+ break;
+ case AMAL_OP_DIV:
+ self->read_index += 3;
+ break;
+ case AMAL_OP_PUSH:
+ self->read_index += 1;
+ break;
+ case AMAL_OP_PUSHI:
+ self->read_index += 2;
+ break;
+ case AMAL_OP_PUSHD:
+ self->read_index += 2;
+ break;
+ case AMAL_OP_CALL:
+ self->read_index += 2;
+ break;
+ case AMAL_OP_CALLR:
+ self->read_index += 1;
+ break;
+ case AMAL_OP_CMP:
+ self->read_index += 3;
+ break;
+ case AMAL_OP_JZ:
+ self->read_index += 3;
+ break;
+ case AMAL_OP_JMP:
+ self->read_index += 2;
+ break;
+ case AMAL_OP_RET:
+ break;
+ case AMAL_OP_FUNC_START:
+ break;
+ case AMAL_OP_FUNC_END:
+ break;
+ }
+ }
+
+ return AMAL_PROGRAM_OK;
+}
+
int amal_program_run(amal_program *self) {
- /* TODO: Implement */
- (void)self;
- return 0;
+ return_if_error(amal_program_read_header(self));
+ while(bytes_left_to_read(self) > 0) {
+ return_if_error(amal_program_read_intermediates(self));
+ return_if_error(amal_program_read_strings(self));
+ return_if_error(amal_program_read_instructions(self));
+ }
+ return AMAL_PROGRAM_OK;
}
int amal_program_save(amal_program *self, const char *filepath) {
diff --git a/tests/main.c b/tests/main.c
index 9b1e40a..1394860 100644
--- a/tests/main.c
+++ b/tests/main.c
@@ -142,7 +142,10 @@ static void test_load(const char *filepath) {
++num_tests_run;
full_path = get_full_path(filepath);
- amal_program_init(&program);
+ if(amal_program_init(&program) != 0) {
+ fprintf(stderr, "Failed to initialize amal program\n");
+ FAIL_TEST(full_path);
+ }
result = amal_compiler_load_file(&options, &program, filepath);
if(result != AMAL_COMPILER_OK) {
fprintf(stderr, "Failed to load file %s, result: %d\n", full_path, result);
@@ -165,7 +168,6 @@ static void test_load_error(const char *filepath, const char *expected_error) {
amal_compiler_options options;
amal_program program;
ErrorExpectedData expected_data;
- int result;
amal_compiler_options_init(&options);
options.num_threads = num_threads;
@@ -177,9 +179,11 @@ static void test_load_error(const char *filepath, const char *expected_error) {
expected_data.got_expected_error = bool_false;
options.error_callback_userdata = &expected_data;
- amal_program_init(&program);
- result = amal_compiler_load_file(&options, &program, filepath);
- if(result == AMAL_COMPILER_OK) {
+ if(amal_program_init(&program) != 0) {
+ fprintf(stderr, "Failed to initialize amal program\n");
+ FAIL_TEST(expected_data.filepath);
+ }
+ if(amal_compiler_load_file(&options, &program, filepath) == AMAL_COMPILER_OK) {
fprintf(stderr, "Expected to fail loading file\n");
FAIL_TEST(expected_data.filepath);
}