From ec1a48e7b86fcd00127dd5a88d56c42083af1d78 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Thu, 18 Jul 2019 03:10:03 +0200 Subject: Setup structure for program execute --- doc/Documentation.md | 5 +- include/ast.h | 7 +- include/bytecode/bytecode.h | 5 +- include/program.h | 22 +++- src/bytecode/bytecode.c | 51 ++++----- src/compiler.c | 1 + src/program.c | 257 +++++++++++++++++++++++++++++++++++++++++++- tests/main.c | 14 ++- 8 files changed, 315 insertions(+), 47 deletions(-) diff --git a/doc/Documentation.md b/doc/Documentation.md index 34d1b41..5564b05 100644 --- a/doc/Documentation.md +++ b/doc/Documentation.md @@ -11,7 +11,10 @@ Instructions can be in 6 different formats: 4.3 Opcode + index\ 4.4 Opcode + offset 5. 4 bytes: Opcode + register + register + register -6. 4 bytes: Opcode + register + offset +6. 4 bytes:\ +6.1 Opcode + register + offset\ +6.2 Opcode + register + intermediate\ +6.3 Opcode + register + data # Compiler flow (Tokenize&parse -> Resolve AST -> Generate SSA -> Generate bytecode) -> Generate program\ diff --git a/include/ast.h b/include/ast.h index 2925f6c..f056db7 100644 --- a/include/ast.h +++ b/include/ast.h @@ -142,9 +142,12 @@ typedef struct { } VariableType; /* - Note: When resolving AST, more than one thread can end up resolving the same expressions at the same time. + Note: When resolving AST, multiple threads can end up resolving the same expressions at the same time. This is intentional. Instead of using mutex for every expression and locking/unlocking everytime which uses more memory and affects performance, we assume such race conditions are rare and let them happen. + TODO: Investigate possible deadlock when multiple threads resolve the same expression and that + expression has a recursive dependency and the threads execute @ast_resolve at the same speed, + leading to @ast_resolve running again for the same expression. */ struct LhsExpr { bool is_extern; @@ -210,7 +213,7 @@ typedef struct { Parser *parser; /* Borrowed. This is the parser that belongs to the thread */ /* Borrowed. This is the current scope. Note that this scope can belong to another parser (and thread), - as such, @parser and scope_get_parser(@scope) parser may not be the same + as such, @parser and scope_get_parser(@scope) parser may not be the same. */ Scope *scope; } AstCompilerContext; diff --git a/include/bytecode/bytecode.h b/include/bytecode/bytecode.h index 739aa79..b571383 100644 --- a/include/bytecode/bytecode.h +++ b/include/bytecode/bytecode.h @@ -21,7 +21,10 @@ 4.3 Opcode + index\ 4.4 Opcode + offset 5. 4 bytes: Opcode + register + register + register - 6. 4 bytes: Opcode + register + offset + 6. 4 bytes:\ + 6.1 Opcode + register + offset\ + 6.2 Opcode + register + intermediate\ + 6.3 Opcode + register + data */ typedef enum { AMAL_OP_NOP, /* No operation. This can be used for patching */ diff --git a/include/program.h b/include/program.h index cbd9432..f251fbc 100644 --- a/include/program.h +++ b/include/program.h @@ -4,11 +4,31 @@ #include "std/buffer.h" #include "bytecode/bytecode.h" +#define AMAL_PROGRAM_OK 0 +#define AMAL_PROGRAM_INVALID_HEADER -1 +#define AMAL_PROGRAM_INVALID_MAGIC_NUMBER -2 +#define AMAL_PROGRAM_INCOMPATIBLE -3 +#define AMAL_PROGRAM_INVALID_INTERMEDIATES -4 +#define AMAL_PROGRAM_INVALID_INTERMEDIATES_SIZE -5 +#define AMAL_PROGRAM_INVALID_STRINGS -6 +#define AMAL_PROGRAM_INVALID_STRINGS_SIZE -7 +#define AMAL_PROGRAM_STRING_ALLOC_FAILURE -8 +#define AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE -9 + +#define AMAL_PROGRAM_MAGIC_NUMBER (u32)0xdec05eba +#define AMAL_PROGRAM_MAJOR_VERSION 1 +#define AMAL_PROGRAM_MINOR_VERSION 0 +#define AMAL_PROGRAM_PATCH_VERSION 0 + typedef struct { Buffer/*<...>*/ data; + Buffer/**/ string_indices; + char *intermediates_start; + char *strings_start; + usize read_index; } amal_program; -void amal_program_init(amal_program *self); +CHECK_RESULT int amal_program_init(amal_program *self); void amal_program_deinit(amal_program *self); CHECK_RESULT int amal_program_append_bytecode(amal_program *self, Bytecode *bytecode); diff --git a/src/bytecode/bytecode.c b/src/bytecode/bytecode.c index 7c4fe08..9bf5f24 100644 --- a/src/bytecode/bytecode.c +++ b/src/bytecode/bytecode.c @@ -58,47 +58,24 @@ static CHECK_RESULT usize ssa_extract_jump(u8 *instruction_data, SsaInsJump *res return sizeof(result->jump_offset); } -static void add_header(BytecodeCompilerContext *self) { - /*doc(Bytecode header) - # Header layout - |Size|Name |Description | - |----|-------------|----------------------------------------------------------------------------| - |4 |Magic number |The magic number used to identify an amalgam bytecode file. | - |1 |Major version|The major version of the bytecode. Updates in this is a breaking change. | - |1 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.| - |1 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.| - The versions in the header only changes for every release, not every change. - */ - - const u32 magic_number = 0xdec05eba; - const u8 major_version = 1; - const u8 minor_version = 0; - const u8 patch_version = 0; - - Buffer *instructions; - instructions = &self->bytecode.data; - throw_if_error(buffer_append(instructions, &magic_number, 4)); - throw_if_error(buffer_append(instructions, &major_version, 1)); - throw_if_error(buffer_append(instructions, &minor_version, 1)); - throw_if_error(buffer_append(instructions, &patch_version, 1)); -} - static void add_intermediates(BytecodeCompilerContext *self) { Ssa *ssa; Buffer *instructions; SsaNumber *intermediate; SsaNumber *intermediates_end; + u32 intemediates_size; ssa = self->parser->ssa; instructions = &self->bytecode.data; intermediate = buffer_begin(&ssa->intermediates); intermediates_end = buffer_end(&ssa->intermediates); - throw_if_error(buffer_expand(instructions, - sizeof(u16) + (sizeof(u8) + sizeof(u64)) * ssa->intermediates.size)); - throw_if_error(buffer_append(instructions, &ssa->intermediates.size, sizeof(u16))); + intemediates_size = (sizeof(u8) + sizeof(u64)) * buffer_get_size(&ssa->intermediates, SsaNumber); + throw_if_error(buffer_expand(instructions, sizeof(u32) + intemediates_size)); + throw_if_error(buffer_append(instructions, &intemediates_size, sizeof(u32))); for(; intermediate != intermediates_end; ++intermediate) { throw_if_error(buffer_append(instructions, &intermediate->type, sizeof(u8))); + /* TODO: Store value using an encoding that will save space when using low numbers */ throw_if_error(buffer_append(instructions, &intermediate->value.integer, sizeof(u64))); } } @@ -108,12 +85,18 @@ void add_strings(BytecodeCompilerContext *self) { Buffer *instructions; BufferView *string; BufferView *strings_end; + u16 num_strings; u32 strings_size; ssa = self->parser->ssa; instructions = &self->bytecode.data; string = buffer_begin(&ssa->strings); strings_end = buffer_end(&ssa->strings); + if(strings_end - string > UINT16_MAX) { + amal_log_error("Too many strings in the program"); + throw(-1); + } + num_strings = strings_end - string; strings_size = 0; for(; string != strings_end; ++string) { @@ -121,7 +104,8 @@ void add_strings(BytecodeCompilerContext *self) { } string = buffer_begin(&ssa->strings); - throw_if_error(buffer_expand(instructions, sizeof(u32) + strings_size)); + throw_if_error(buffer_expand(instructions, sizeof(u16) + sizeof(u32) + strings_size)); + throw_if_error(buffer_append(instructions, &num_strings, sizeof(u16))); throw_if_error(buffer_append(instructions, &strings_size, sizeof(u32))); for(; string != strings_end; ++string) { throw_if_error(buffer_append(instructions, &string->size, sizeof(u16))); @@ -383,17 +367,19 @@ static void add_instructions(BytecodeCompilerContext *self) { } #else + /* TODO: Keep all registers under 256 */ + while(instruction != instructions_end) { SsaInstruction ins = (SsaInstruction)*instruction++; switch(ins) { case SSA_ASSIGN_INTER: { instruction += ssa_extract_form1(instruction, &ssa_ins_form1); - add_ins3(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d"); + add_ins6(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d"); break; } case SSA_ASSIGN_STRING: { instruction += ssa_extract_form1(instruction, &ssa_ins_form1); - add_ins3(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d"); + add_ins6(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d"); break; } case SSA_ASSIGN_REG: { @@ -479,7 +465,7 @@ static void add_instructions(BytecodeCompilerContext *self) { /* Prepend instructions with its size */ { u32 instructions_size; - instructions_size = self->bytecode.data.size - num_instructions_index; + instructions_size = self->bytecode.data.size - num_instructions_index - sizeof(instructions_size); /* Remove the count itself from the size of the instructions size */ am_memcpy(&self->bytecode.data.data[num_instructions_index], &instructions_size, sizeof(instructions_size)); } @@ -488,7 +474,6 @@ static void add_instructions(BytecodeCompilerContext *self) { } void generate_bytecode_from_ssa(BytecodeCompilerContext *self) { - add_header(self); add_intermediates(self); add_strings(self); /* TODO: Also add strings in ssa, so we can index them */ diff --git a/src/compiler.c b/src/compiler.c index 1af9537..05fae78 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -446,6 +446,7 @@ static CHECK_RESULT int amal_compiler_dispatch_generic(amal_compiler *self, Thre } static CHECK_RESULT int amal_compiler_generate_program(amal_compiler *self) { + /* TODO: Copying the bytecode to the program can be done using multiple threads */ Parser **parser; Parser **parser_end; parser = buffer_begin(&self->parsers); diff --git a/src/program.c b/src/program.c index aa39a4c..167f4c4 100644 --- a/src/program.c +++ b/src/program.c @@ -1,23 +1,272 @@ #include "../include/program.h" +#include "../include/std/mem.h" +#include "../include/std/alloc.h" +#include "../include/std/log.h" #include #include +#include -void amal_program_init(amal_program *self) { +/* TODO: If system is big-endian, then do endian conversion for all reads */ + +/* This matches SsaNumberType */ +typedef enum { + NUMBER_TYPE_INTEGER, + NUMBER_TYPE_FLOAT +} NumberType; + +typedef union { + i64 integer; + f64 floating; +} NumberUnion; + +int amal_program_init(amal_program *self) { ignore_result_int(buffer_init(&self->data, NULL)); + ignore_result_int(buffer_init(&self->string_indices, NULL)); + self->intermediates_start = NULL; + self->strings_start = NULL; + self->read_index = 0; + + /*doc(Bytecode header) + # Header layout + |Size|Name |Description | + |----|-------------|----------------------------------------------------------------------------| + |4 |Magic number |The magic number used to identify an amalgam bytecode file. | + |1 |Major version|The major version of the bytecode. Updates in this is a breaking change. | + |1 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.| + |1 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.| + The versions in the header only changes for every release, not every change. + */ + + const u32 magic_number = AMAL_PROGRAM_MAGIC_NUMBER; + const u8 major_version = AMAL_PROGRAM_MAJOR_VERSION; + const u8 minor_version = AMAL_PROGRAM_MINOR_VERSION; + const u8 patch_version = AMAL_PROGRAM_PATCH_VERSION; + + return_if_error(buffer_append(&self->data, &magic_number, 4)); + return_if_error(buffer_append(&self->data, &major_version, 1)); + return_if_error(buffer_append(&self->data, &minor_version, 1)); + return_if_error(buffer_append(&self->data, &patch_version, 1)); + + return 0; } void amal_program_deinit(amal_program *self) { buffer_deinit(&self->data); + buffer_deinit(&self->string_indices); } int amal_program_append_bytecode(amal_program *self, Bytecode *bytecode) { return buffer_append(&self->data, bytecode->data.data, bytecode->data.size); } +static usize bytes_left_to_read(amal_program *self) { + assert(self->read_index <= self->data.size); + return self->data.size - self->read_index; +} + +static CHECK_RESULT int amal_program_read_header(amal_program *self) { + u32 magic_number; + u8 major_version; + u8 minor_version; + u8 patch_version; + + if(bytes_left_to_read(self) < sizeof(u32) + sizeof(u8) * 3) + return AMAL_PROGRAM_INVALID_HEADER; + + am_memcpy(&magic_number, &self->data.data[self->read_index], sizeof(magic_number)); + self->read_index += sizeof(u32); + am_memcpy(&major_version, &self->data.data[self->read_index], sizeof(major_version)); + self->read_index += sizeof(u8); + am_memcpy(&minor_version, &self->data.data[self->read_index], sizeof(minor_version)); + self->read_index += sizeof(u8); + am_memcpy(&patch_version, &self->data.data[self->read_index], sizeof(patch_version)); + self->read_index += sizeof(u8); + + if(magic_number != AMAL_PROGRAM_MAGIC_NUMBER) + return AMAL_PROGRAM_INVALID_MAGIC_NUMBER; + + /* + A program is only incompatible if the major version is newer than the version that is used to run it. + TODO: Implement backwards compatible reads, starting from when the program bytecode breaks backwards compatibility + */ + if(major_version > AMAL_PROGRAM_MAJOR_VERSION) + return AMAL_PROGRAM_INCOMPATIBLE; + + return AMAL_PROGRAM_OK; +} + +static CHECK_RESULT int amal_program_read_intermediates(amal_program *self) { + u32 intermediates_size; + /*u32 read_end;*/ + + if(bytes_left_to_read(self) < sizeof(intermediates_size)) { + amal_log_error("Not enough space in program to intermediates size"); + return AMAL_PROGRAM_INVALID_INTERMEDIATES; + } + + am_memcpy(&intermediates_size, &self->data.data[self->read_index], sizeof(intermediates_size)); + self->read_index += sizeof(intermediates_size); + + if(bytes_left_to_read(self) < intermediates_size) { + amal_log_error("Not enough space in program to read all intermediates"); + return AMAL_PROGRAM_INVALID_INTERMEDIATES_SIZE; + } + + self->intermediates_start = &self->data.data[self->read_index]; + /* + read_end = self->read_index + intermediates_size; + while(self->read_index < read_end) { + NumberType type; + NumberUnion value; + am_memcpy(&type, &self->data.data[self->read_index], sizeof(u8)); + am_memcpy(&value, &self->data.data[self->read_index + sizeof(u8)], sizeof(u64)); + self->read_index += sizeof(u8) + sizeof(u64); + } + */ + self->read_index += intermediates_size; + + return AMAL_PROGRAM_OK; +} + +static CHECK_RESULT int amal_program_read_strings(amal_program *self) { + u16 num_strings; + u32 strings_size; + u32 read_start; + u32 read_end; + u32 *string_index_ptr; + + if(bytes_left_to_read(self) < sizeof(num_strings)) + return AMAL_PROGRAM_INVALID_STRINGS; + + am_memcpy(&num_strings, &self->data.data[self->read_index], sizeof(num_strings)); + self->read_index += sizeof(num_strings); + + if(buffer_append_empty(&self->string_indices, num_strings) != 0) + return AMAL_PROGRAM_STRING_ALLOC_FAILURE; + string_index_ptr = (u32*)self->string_indices.data; + + if(bytes_left_to_read(self) < sizeof(strings_size)) + return AMAL_PROGRAM_INVALID_STRINGS; + + am_memcpy(&strings_size, &self->data.data[self->read_index], sizeof(strings_size)); + self->read_index += sizeof(strings_size); + + if(bytes_left_to_read(self) < strings_size) + return AMAL_PROGRAM_INVALID_STRINGS_SIZE; + + read_start = self->read_index; + read_end = read_start + strings_size; + self->strings_start = &self->data.data[self->read_index]; + while(self->read_index < read_end) { + u16 string_size; + + if(bytes_left_to_read(self) < sizeof(string_size)) + return AMAL_PROGRAM_INVALID_STRINGS; + + *string_index_ptr = self->read_index - read_start; + ++string_index_ptr; + am_memcpy(&string_size, &self->data.data[self->read_index], sizeof(string_size)); + self->read_index += sizeof(string_size); + + if(bytes_left_to_read(self) < string_size) + return AMAL_PROGRAM_INVALID_STRINGS; + + self->read_index += string_size; + } + + assert(self->read_index == read_end); + return AMAL_PROGRAM_OK; +} + +static CHECK_RESULT int amal_program_read_instructions(amal_program *self) { + u32 instructions_size; + u32 read_end; + + if(bytes_left_to_read(self) < sizeof(instructions_size)) + return AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE; + + am_memcpy(&instructions_size, &self->data.data[self->read_index], sizeof(instructions_size)); + self->read_index += sizeof(instructions_size); + + if(bytes_left_to_read(self) < instructions_size) + return AMAL_PROGRAM_INVALID_INSTRUCTIONS_SIZE; + + read_end = self->read_index + instructions_size; + while(self->read_index < read_end) { + AmalOpcode opcode; + opcode = self->data.data[self->read_index]; + self->read_index += sizeof(AmalOpcodeType); + switch(opcode) { + case AMAL_OP_NOP: + break; + case AMAL_OP_SETZ: + self->read_index += 1; + break; + case AMAL_OP_MOV: + self->read_index += 2; + break; + case AMAL_OP_MOVI: + self->read_index += 3; + break; + case AMAL_OP_MOVD: + self->read_index += 3; + break; + case AMAL_OP_ADD: + self->read_index += 3; + break; + case AMAL_OP_SUB: + self->read_index += 3; + break; + case AMAL_OP_MUL: + self->read_index += 3; + break; + case AMAL_OP_DIV: + self->read_index += 3; + break; + case AMAL_OP_PUSH: + self->read_index += 1; + break; + case AMAL_OP_PUSHI: + self->read_index += 2; + break; + case AMAL_OP_PUSHD: + self->read_index += 2; + break; + case AMAL_OP_CALL: + self->read_index += 2; + break; + case AMAL_OP_CALLR: + self->read_index += 1; + break; + case AMAL_OP_CMP: + self->read_index += 3; + break; + case AMAL_OP_JZ: + self->read_index += 3; + break; + case AMAL_OP_JMP: + self->read_index += 2; + break; + case AMAL_OP_RET: + break; + case AMAL_OP_FUNC_START: + break; + case AMAL_OP_FUNC_END: + break; + } + } + + return AMAL_PROGRAM_OK; +} + int amal_program_run(amal_program *self) { - /* TODO: Implement */ - (void)self; - return 0; + return_if_error(amal_program_read_header(self)); + while(bytes_left_to_read(self) > 0) { + return_if_error(amal_program_read_intermediates(self)); + return_if_error(amal_program_read_strings(self)); + return_if_error(amal_program_read_instructions(self)); + } + return AMAL_PROGRAM_OK; } int amal_program_save(amal_program *self, const char *filepath) { diff --git a/tests/main.c b/tests/main.c index 9b1e40a..1394860 100644 --- a/tests/main.c +++ b/tests/main.c @@ -142,7 +142,10 @@ static void test_load(const char *filepath) { ++num_tests_run; full_path = get_full_path(filepath); - amal_program_init(&program); + if(amal_program_init(&program) != 0) { + fprintf(stderr, "Failed to initialize amal program\n"); + FAIL_TEST(full_path); + } result = amal_compiler_load_file(&options, &program, filepath); if(result != AMAL_COMPILER_OK) { fprintf(stderr, "Failed to load file %s, result: %d\n", full_path, result); @@ -165,7 +168,6 @@ static void test_load_error(const char *filepath, const char *expected_error) { amal_compiler_options options; amal_program program; ErrorExpectedData expected_data; - int result; amal_compiler_options_init(&options); options.num_threads = num_threads; @@ -177,9 +179,11 @@ static void test_load_error(const char *filepath, const char *expected_error) { expected_data.got_expected_error = bool_false; options.error_callback_userdata = &expected_data; - amal_program_init(&program); - result = amal_compiler_load_file(&options, &program, filepath); - if(result == AMAL_COMPILER_OK) { + if(amal_program_init(&program) != 0) { + fprintf(stderr, "Failed to initialize amal program\n"); + FAIL_TEST(expected_data.filepath); + } + if(amal_compiler_load_file(&options, &program, filepath) == AMAL_COMPILER_OK) { fprintf(stderr, "Expected to fail loading file\n"); FAIL_TEST(expected_data.filepath); } -- cgit v1.2.3