Setup structure for program execute

author: dec05eba <dec05eba@protonmail.com> 2019-07-18 03:10:03 +0200
committer: dec05eba <dec05eba@protonmail.com> 2020-07-25 14:36:46 +0200
commit: ec1a48e7b86fcd00127dd5a88d56c42083af1d78 (patch)
tree: 7e6423f20fa795bbba8cbc5ead8bc8a3f7289d2e
parent: 84e65c63e7482590d535e86f7660a00ae8a0cecb (diff)
8 files changed, 315 insertions, 47 deletions
diff --git a/doc/Documentation.md b/doc/Documentation.md
index 34d1b41..5564b05 100644
--- a/doc/Documentation.md
+++ b/doc/Documentation.md
@@ -11,7 +11,10 @@ Instructions can be in 6 different formats:
 4.3 Opcode + index\
 4.4 Opcode + offset
 5. 4 bytes: Opcode + register + register + register
-6. 4 bytes: Opcode + register + offset
+6. 4 bytes:\
+6.1 Opcode + register + offset\
+6.2 Opcode + register + intermediate\
+6.3 Opcode + register + data
 
 # Compiler flow
 (Tokenize&parse -> Resolve AST -> Generate SSA -> Generate bytecode) -> Generate program\
diff --git a/include/ast.h b/include/ast.h
index 2925f6c..f056db7 100644
--- a/include/ast.h
+++ b/include/ast.h
@@ -142,9 +142,12 @@ typedef struct {
 } VariableType;
 
 /*
-    Note: When resolving AST, more than one thread can end up resolving the same expressions at the same time.
+    Note: When resolving AST, multiple threads can end up resolving the same expressions at the same time.
     This is intentional. Instead of using mutex for every expression and locking/unlocking everytime
     which uses more memory and affects performance, we assume such race conditions are rare and let them happen.
+    TODO: Investigate possible deadlock when multiple threads resolve the same expression and that
+    expression has a recursive dependency and the threads execute @ast_resolve at the same speed,
+    leading to @ast_resolve running again for the same expression.
 */
 struct LhsExpr {
     bool is_extern;
@@ -210,7 +213,7 @@ typedef struct {
     Parser *parser; /* Borrowed. This is the parser that belongs to the thread */
     /*
         Borrowed. This is the current scope. Note that this scope can belong to another parser (and thread),
-        as such, @parser and scope_get_parser(@scope) parser may not be the same
+        as such, @parser and scope_get_parser(@scope) parser may not be the same.
     */
     Scope *scope;
 } AstCompilerContext;
diff --git a/include/bytecode/bytecode.h b/include/bytecode/bytecode.h
index 739aa79..b571383 100644
--- a/include/bytecode/bytecode.h
+++ b/include/bytecode/bytecode.h
@@ -21,7 +21,10 @@
     4.3 Opcode + index\
     4.4 Opcode + offset
     5. 4 bytes: Opcode + register + register + register
-    6. 4 bytes: Opcode + register + offset
+    6. 4 bytes:\
+    6.1 Opcode + register + offset\
+    6.2 Opcode + register + intermediate\
+    6.3 Opcode + register + data
 */
 typedef enum {
     AMAL_OP_NOP,         /* No operation. This can be used for patching */
diff --git a/include/program.h b/include/program.h
index cbd9432..f251fbc 100644
--- a/include/program.h
+++ b/include/program.h
@@ -4,11 +4,31 @@
 #include "std/buffer.h"
 #include "bytecode/bytecode.h"
 
+#define AMAL_PROGRAM_OK 0
+#define AMAL_PROGRAM_INVALID_HEADER -1
+#define AMAL_PROGRAM_INVALID_MAGIC_NUMBER -2
+#define AMAL_PROGRAM_INCOMPATIBLE -3
+#define AMAL_PROGRAM_INVALID_INTERMEDIATES -4
+#define AMAL_PROGRAM_INVALID_INTERMEDIATES_SIZE -5
+#define AMAL_PROGRAM_INVALID_STRINGS -6
+#define AMAL_PROGRAM_INVALID_STRINGS_SIZE -7
+#define AMAL_PROGRAM_STRING_ALLOC_FAILURE -8
+#define AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE -9
+
+#define AMAL_PROGRAM_MAGIC_NUMBER (u32)0xdec05eba
+#define AMAL_PROGRAM_MAJOR_VERSION 1
+#define AMAL_PROGRAM_MINOR_VERSION 0
+#define AMAL_PROGRAM_PATCH_VERSION 0
+
 typedef struct {
     Buffer/*<...>*/ data;
+    Buffer/*<u32>*/ string_indices;
+    char *intermediates_start;
+    char *strings_start;
+    usize read_index;
 } amal_program;
 
-void amal_program_init(amal_program *self);
+CHECK_RESULT int amal_program_init(amal_program *self);
 void amal_program_deinit(amal_program *self);
 
 CHECK_RESULT int amal_program_append_bytecode(amal_program *self, Bytecode *bytecode);
diff --git a/src/bytecode/bytecode.c b/src/bytecode/bytecode.c
index 7c4fe08..9bf5f24 100644
--- a/src/bytecode/bytecode.c
+++ b/src/bytecode/bytecode.c
@@ -58,47 +58,24 @@ static CHECK_RESULT usize ssa_extract_jump(u8 *instruction_data, SsaInsJump *res
     return sizeof(result->jump_offset);
 }
 
-static void add_header(BytecodeCompilerContext *self) {
-    /*doc(Bytecode header)
-        # Header layout
-        |Size|Name         |Description                                                                 |
-        |----|-------------|----------------------------------------------------------------------------|
-        |4   |Magic number |The magic number used to identify an amalgam bytecode file.                 |
-        |1   |Major version|The major version of the bytecode. Updates in this is a breaking change.    |
-        |1   |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.|
-        |1   |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.|
-        The versions in the header only changes for every release, not every change.
-    */
-
-    const u32 magic_number = 0xdec05eba;
-    const u8 major_version = 1;
-    const u8 minor_version = 0;
-    const u8 patch_version = 0;
-
-    Buffer *instructions;
-    instructions = &self->bytecode.data;
-    throw_if_error(buffer_append(instructions, &magic_number, 4));
-    throw_if_error(buffer_append(instructions, &major_version, 1));
-    throw_if_error(buffer_append(instructions, &minor_version, 1));
-    throw_if_error(buffer_append(instructions, &patch_version, 1));
-}
-
 static void add_intermediates(BytecodeCompilerContext *self) {
     Ssa *ssa;
     Buffer *instructions;
     SsaNumber *intermediate;
     SsaNumber *intermediates_end;
+    u32 intemediates_size;
 
     ssa = self->parser->ssa;
     instructions = &self->bytecode.data;
     intermediate = buffer_begin(&ssa->intermediates);
     intermediates_end = buffer_end(&ssa->intermediates);
 
-    throw_if_error(buffer_expand(instructions,
-                                 sizeof(u16) + (sizeof(u8) + sizeof(u64)) * ssa->intermediates.size));
-    throw_if_error(buffer_append(instructions, &ssa->intermediates.size, sizeof(u16)));
+    intemediates_size = (sizeof(u8) + sizeof(u64)) * buffer_get_size(&ssa->intermediates, SsaNumber);
+    throw_if_error(buffer_expand(instructions, sizeof(u32) + intemediates_size));
+    throw_if_error(buffer_append(instructions, &intemediates_size, sizeof(u32)));
     for(; intermediate != intermediates_end; ++intermediate) {
         throw_if_error(buffer_append(instructions, &intermediate->type, sizeof(u8)));
+        /* TODO: Store value using an encoding that will save space when using low numbers */
         throw_if_error(buffer_append(instructions, &intermediate->value.integer, sizeof(u64)));
     }
 }
@@ -108,12 +85,18 @@ void add_strings(BytecodeCompilerContext *self) {
     Buffer *instructions;
     BufferView *string;
     BufferView *strings_end;
+    u16 num_strings;
     u32 strings_size;
 
     ssa = self->parser->ssa;
     instructions = &self->bytecode.data;
     string = buffer_begin(&ssa->strings);
     strings_end = buffer_end(&ssa->strings);
+    if(strings_end - string > UINT16_MAX) {
+        amal_log_error("Too many strings in the program");
+        throw(-1);
+    }
+    num_strings = strings_end - string;
     strings_size = 0;
 
     for(; string != strings_end; ++string) {
@@ -121,7 +104,8 @@ void add_strings(BytecodeCompilerContext *self) {
     }
     string = buffer_begin(&ssa->strings);
 
-    throw_if_error(buffer_expand(instructions, sizeof(u32) + strings_size));
+    throw_if_error(buffer_expand(instructions, sizeof(u16) + sizeof(u32) + strings_size));
+    throw_if_error(buffer_append(instructions, &num_strings, sizeof(u16)));
     throw_if_error(buffer_append(instructions, &strings_size, sizeof(u32)));
     for(; string != strings_end; ++string) {
         throw_if_error(buffer_append(instructions, &string->size, sizeof(u16)));
@@ -383,17 +367,19 @@ static void add_instructions(BytecodeCompilerContext *self) {
     }
     #else
 
+    /* TODO: Keep all registers under 256 */
+
     while(instruction != instructions_end) {
         SsaInstruction ins = (SsaInstruction)*instruction++;
         switch(ins) {
             case SSA_ASSIGN_INTER: {
                 instruction += ssa_extract_form1(instruction, &ssa_ins_form1);
-                add_ins3(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d");
+                add_ins6(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d");
                 break;
             }
             case SSA_ASSIGN_STRING: {
                 instruction += ssa_extract_form1(instruction, &ssa_ins_form1);
-                add_ins3(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d");
+                add_ins6(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d");
                 break;
             }
             case SSA_ASSIGN_REG: {
@@ -479,7 +465,7 @@ static void add_instructions(BytecodeCompilerContext *self) {
     /* Prepend instructions with its size */
     {
         u32 instructions_size;
-        instructions_size = self->bytecode.data.size - num_instructions_index;
+        instructions_size = self->bytecode.data.size - num_instructions_index - sizeof(instructions_size); /* Remove the count itself from the size of the instructions size */
         am_memcpy(&self->bytecode.data.data[num_instructions_index], &instructions_size, sizeof(instructions_size));
     }
 
@@ -488,7 +474,6 @@ static void add_instructions(BytecodeCompilerContext *self) {
 }
 
 void generate_bytecode_from_ssa(BytecodeCompilerContext *self) {
-    add_header(self);
     add_intermediates(self);
     add_strings(self);
     /* TODO: Also add strings in ssa, so we can index them */
diff --git a/src/compiler.c b/src/compiler.c
index 1af9537..05fae78 100644
--- a/src/compiler.c
+++ b/src/compiler.c
@@ -446,6 +446,7 @@ static CHECK_RESULT int amal_compiler_dispatch_generic(amal_compiler *self, Thre
 }
 
 static CHECK_RESULT int amal_compiler_generate_program(amal_compiler *self) {
+    /* TODO: Copying the bytecode to the program can be done using multiple threads */
     Parser **parser;
     Parser **parser_end;
     parser = buffer_begin(&self->parsers);
diff --git a/src/program.c b/src/program.c
index aa39a4c..167f4c4 100644
--- a/src/program.c
+++ b/src/program.c
@@ -1,23 +1,272 @@
 #include "../include/program.h"
+#include "../include/std/mem.h"
+#include "../include/std/alloc.h"
+#include "../include/std/log.h"
 #include <stdio.h>
 #include <errno.h>
+#include <assert.h>
 
-void amal_program_init(amal_program *self) {
+/* TODO: If system is big-endian, then do endian conversion for all reads */
+
+/* This matches SsaNumberType */
+typedef enum {
+    NUMBER_TYPE_INTEGER,
+    NUMBER_TYPE_FLOAT
+} NumberType;
+
+typedef union {
+    i64 integer;
+    f64 floating;
+} NumberUnion;
+
+int amal_program_init(amal_program *self) {
     ignore_result_int(buffer_init(&self->data, NULL));
+    ignore_result_int(buffer_init(&self->string_indices, NULL));
+    self->intermediates_start = NULL;
+    self->strings_start = NULL;
+    self->read_index = 0;
+
+    /*doc(Bytecode header)
+        # Header layout
+        |Size|Name         |Description                                                                 |
+        |----|-------------|----------------------------------------------------------------------------|
+        |4   |Magic number |The magic number used to identify an amalgam bytecode file.                 |
+        |1   |Major version|The major version of the bytecode. Updates in this is a breaking change.    |
+        |1   |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.|
+        |1   |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.|
+        The versions in the header only changes for every release, not every change.
+    */
+
+    const u32 magic_number = AMAL_PROGRAM_MAGIC_NUMBER;
+    const u8 major_version = AMAL_PROGRAM_MAJOR_VERSION;
+    const u8 minor_version = AMAL_PROGRAM_MINOR_VERSION;
+    const u8 patch_version = AMAL_PROGRAM_PATCH_VERSION;
+
+    return_if_error(buffer_append(&self->data, &magic_number, 4));
+    return_if_error(buffer_append(&self->data, &major_version, 1));
+    return_if_error(buffer_append(&self->data, &minor_version, 1));
+    return_if_error(buffer_append(&self->data, &patch_version, 1));
+
+    return 0;
 }
 
 void amal_program_deinit(amal_program *self) {
     buffer_deinit(&self->data);
+    buffer_deinit(&self->string_indices);
 }
 
 int amal_program_append_bytecode(amal_program *self, Bytecode *bytecode) {
     return buffer_append(&self->data, bytecode->data.data, bytecode->data.size);
 }
 
+static usize bytes_left_to_read(amal_program *self) {
+    assert(self->read_index <= self->data.size);
+    return self->data.size - self->read_index;
+}
+
+static CHECK_RESULT int amal_program_read_header(amal_program *self) {
+    u32 magic_number;
+    u8 major_version;
+    u8 minor_version;
+    u8 patch_version;
+
+    if(bytes_left_to_read(self) < sizeof(u32) + sizeof(u8) * 3)
+        return AMAL_PROGRAM_INVALID_HEADER;
+
+    am_memcpy(&magic_number, &self->data.data[self->read_index], sizeof(magic_number));
+    self->read_index += sizeof(u32);
+    am_memcpy(&major_version, &self->data.data[self->read_index], sizeof(major_version));
+    self->read_index += sizeof(u8);
+    am_memcpy(&minor_version, &self->data.data[self->read_index], sizeof(minor_version));
+    self->read_index += sizeof(u8);
+    am_memcpy(&patch_version, &self->data.data[self->read_index], sizeof(patch_version));
+    self->read_index += sizeof(u8);
+
+    if(magic_number != AMAL_PROGRAM_MAGIC_NUMBER)
+        return AMAL_PROGRAM_INVALID_MAGIC_NUMBER;
+
+    /*
+        A program is only incompatible if the major version is newer than the version that is used to run it.
+        TODO: Implement backwards compatible reads, starting from when the program bytecode breaks backwards compatibility
+    */
+    if(major_version > AMAL_PROGRAM_MAJOR_VERSION)
+        return AMAL_PROGRAM_INCOMPATIBLE;
+
+    return AMAL_PROGRAM_OK;
+}
+
+static CHECK_RESULT int amal_program_read_intermediates(amal_program *self) {
+    u32 intermediates_size;
+    /*u32 read_end;*/
+
+    if(bytes_left_to_read(self) < sizeof(intermediates_size)) {
+        amal_log_error("Not enough space in program to intermediates size");
+        return AMAL_PROGRAM_INVALID_INTERMEDIATES;
+    }
+
+    am_memcpy(&intermediates_size, &self->data.data[self->read_index], sizeof(intermediates_size));
+    self->read_index += sizeof(intermediates_size);
+
+    if(bytes_left_to_read(self) < intermediates_size) {
+        amal_log_error("Not enough space in program to read all intermediates");
+        return AMAL_PROGRAM_INVALID_INTERMEDIATES_SIZE;
+    }
+
+    self->intermediates_start = &self->data.data[self->read_index];
+    /*
+    read_end = self->read_index + intermediates_size;
+    while(self->read_index < read_end) {
+        NumberType type;
+        NumberUnion value;
+        am_memcpy(&type, &self->data.data[self->read_index], sizeof(u8));
+        am_memcpy(&value, &self->data.data[self->read_index + sizeof(u8)], sizeof(u64));
+        self->read_index += sizeof(u8) + sizeof(u64);
+    }
+    */
+    self->read_index += intermediates_size;
+
+    return AMAL_PROGRAM_OK;
+}
+
+static CHECK_RESULT int amal_program_read_strings(amal_program *self) {
+    u16 num_strings;
+    u32 strings_size;
+    u32 read_start;
+    u32 read_end;
+    u32 *string_index_ptr;
+
+    if(bytes_left_to_read(self) < sizeof(num_strings))
+        return AMAL_PROGRAM_INVALID_STRINGS;
+
+    am_memcpy(&num_strings, &self->data.data[self->read_index], sizeof(num_strings));
+    self->read_index += sizeof(num_strings);
+
+    if(buffer_append_empty(&self->string_indices, num_strings) != 0)
+        return AMAL_PROGRAM_STRING_ALLOC_FAILURE;
+    string_index_ptr = (u32*)self->string_indices.data;
+
+    if(bytes_left_to_read(self) < sizeof(strings_size))
+        return AMAL_PROGRAM_INVALID_STRINGS;
+
+    am_memcpy(&strings_size, &self->data.data[self->read_index], sizeof(strings_size));
+    self->read_index += sizeof(strings_size);
+
+    if(bytes_left_to_read(self) < strings_size)
+        return AMAL_PROGRAM_INVALID_STRINGS_SIZE;
+
+    read_start = self->read_index;
+    read_end = read_start + strings_size;
+    self->strings_start = &self->data.data[self->read_index];
+    while(self->read_index < read_end) {
+        u16 string_size;
+
+        if(bytes_left_to_read(self) < sizeof(string_size))
+            return AMAL_PROGRAM_INVALID_STRINGS;
+
+        *string_index_ptr = self->read_index - read_start;
+        ++string_index_ptr;
+        am_memcpy(&string_size, &self->data.data[self->read_index], sizeof(string_size));
+        self->read_index += sizeof(string_size);
+
+        if(bytes_left_to_read(self) < string_size)
+            return AMAL_PROGRAM_INVALID_STRINGS;
+
+        self->read_index += string_size;
+    }
+
+    assert(self->read_index == read_end);
+    return AMAL_PROGRAM_OK;
+}
+
+static CHECK_RESULT int amal_program_read_instructions(amal_program *self) {
+    u32 instructions_size;
+    u32 read_end;
+
+    if(bytes_left_to_read(self) < sizeof(instructions_size))
+        return AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE;
+
+    am_memcpy(&instructions_size, &self->data.data[self->read_index], sizeof(instructions_size));
+    self->read_index += sizeof(instructions_size);
+
+    if(bytes_left_to_read(self) < instructions_size)
+        return AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE;
+
+    read_end = self->read_index + instructions_size;
+    while(self->read_index < read_end) {
+        AmalOpcode opcode;
+        opcode = self->data.data[self->read_index];
+        self->read_index += sizeof(AmalOpcodeType);
+        switch(opcode) {
+            case AMAL_OP_NOP:
+                break;
+            case AMAL_OP_SETZ:
+                self->read_index += 1;
+                break;
+            case AMAL_OP_MOV:
+                self->read_index += 2;
+                break;
+            case AMAL_OP_MOVI:
+                self->read_index += 3;
+                break;
+            case AMAL_OP_MOVD:
+                self->read_index += 3;
+                break;
+            case AMAL_OP_ADD:
+                self->read_index += 3;
+                break;
+            case AMAL_OP_SUB:
+                self->read_index += 3;
+                break;
+            case AMAL_OP_MUL:
+                self->read_index += 3;
+                break;
+            case AMAL_OP_DIV:
+                self->read_index += 3;
+                break;
+            case AMAL_OP_PUSH:
+                self->read_index += 1;
+                break;
+            case AMAL_OP_PUSHI:
+                self->read_index += 2;
+                break;
+            case AMAL_OP_PUSHD:
+                self->read_index += 2;
+                break;
+            case AMAL_OP_CALL:
+                self->read_index += 2;
+                break;
+            case AMAL_OP_CALLR:
+                self->read_index += 1;
+                break;
+            case AMAL_OP_CMP:
+                self->read_index += 3;
+                break;
+            case AMAL_OP_JZ:
+                self->read_index += 3;
+                break;
+            case AMAL_OP_JMP:
+                self->read_index += 2;
+                break;
+            case AMAL_OP_RET:
+                break;
+            case AMAL_OP_FUNC_START:
+                break;
+            case AMAL_OP_FUNC_END:
+                break;
+        }
+    }
+
+    return AMAL_PROGRAM_OK;
+}
+
 int amal_program_run(amal_program *self) {
-    /* TODO: Implement */
-    (void)self;
-    return 0;
+    return_if_error(amal_program_read_header(self));
+    while(bytes_left_to_read(self) > 0) {
+        return_if_error(amal_program_read_intermediates(self));
+        return_if_error(amal_program_read_strings(self));
+        return_if_error(amal_program_read_instructions(self));
+    }
+    return AMAL_PROGRAM_OK;
 }
 
 int amal_program_save(amal_program *self, const char *filepath) {
diff --git a/tests/main.c b/tests/main.c
index 9b1e40a..1394860 100644
--- a/tests/main.c
+++ b/tests/main.c
@@ -142,7 +142,10 @@ static void test_load(const char *filepath) {
     ++num_tests_run;
     full_path = get_full_path(filepath);
 
-    amal_program_init(&program);
+    if(amal_program_init(&program) != 0) {
+        fprintf(stderr, "Failed to initialize amal program\n");
+        FAIL_TEST(full_path);
+    }
     result = amal_compiler_load_file(&options, &program, filepath);
     if(result != AMAL_COMPILER_OK) {
         fprintf(stderr, "Failed to load file %s, result: %d\n", full_path, result);
@@ -165,7 +168,6 @@ static void test_load_error(const char *filepath, const char *expected_error) {
     amal_compiler_options options;
     amal_program program;
     ErrorExpectedData expected_data;
-    int result;
 
     amal_compiler_options_init(&options);
     options.num_threads = num_threads;
@@ -177,9 +179,11 @@ static void test_load_error(const char *filepath, const char *expected_error) {
     expected_data.got_expected_error = bool_false;
     options.error_callback_userdata = &expected_data;
 
-    amal_program_init(&program);
-    result = amal_compiler_load_file(&options, &program, filepath);
-    if(result == AMAL_COMPILER_OK) {
+    if(amal_program_init(&program) != 0) {
+        fprintf(stderr, "Failed to initialize amal program\n");
+        FAIL_TEST(expected_data.filepath);
+    }
+    if(amal_compiler_load_file(&options, &program, filepath) == AMAL_COMPILER_OK) {
         fprintf(stderr, "Expected to fail loading file\n");
         FAIL_TEST(expected_data.filepath);
     }
author	dec05eba <dec05eba@protonmail.com>	2019-07-18 03:10:03 +0200
committer	dec05eba <dec05eba@protonmail.com>	2020-07-25 14:36:46 +0200
commit	ec1a48e7b86fcd00127dd5a88d56c42083af1d78 (patch)
tree	7e6423f20fa795bbba8cbc5ead8bc8a3f7289d2e
parent	84e65c63e7482590d535e86f7660a00ae8a0cecb (diff)