aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2019-07-16 00:27:53 +0200
committerdec05eba <dec05eba@protonmail.com>2020-07-25 14:36:46 +0200
commit85c654a102701958d3748e82ecac9c1bc4dbbcba (patch)
tree61a804a3af5ca43e7608f4c5dc6ea1e292fc8a35
parentabd74f22fd0c58b30f951da9cec1d1799e9b5072 (diff)
Start on real bytecode & doc parsing
-rw-r--r--README.md5
-rw-r--r--amalgam.cflags1
-rw-r--r--amalgam.config2
-rw-r--r--amalgam.creator1
-rw-r--r--amalgam.cxxflags1
-rw-r--r--amalgam.files54
-rw-r--r--amalgam.includes4
-rwxr-xr-xbuild.sh2
-rw-r--r--doc/BytecodeHeader.md8
-rw-r--r--doc/CompilerFlow.md6
-rw-r--r--doc/Opcode.md14
-rwxr-xr-xdoc/doc_extract.py76
-rw-r--r--include/bytecode/bytecode.h42
-rw-r--r--src/bytecode/bytecode.c165
-rw-r--r--src/compiler.c9
-rw-r--r--src/ssa/ssa.c12
-rw-r--r--tests/main.c3
17 files changed, 318 insertions, 87 deletions
diff --git a/README.md b/README.md
index d89a2d5..68a6dbd 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,10 @@
+# Amalgam
Amalgam is written in c89 C standard to work on as many devices as possible and with many different compilers,
which would allow you to compile amalgam with a compiler that generates smaller (static) binaries than gcc.
Amalgam is not meant to be a replacement for any other language but rather a new unique language for programming
with gpu without writing an external gpu program (glsl/hlsl).
+# TODO
+Build with -nostdlib and replace use of libc with syscalls (on linux)
+# Documents
+Documents are located under doc. Some documents are generated using python3 scripts in the same directory. There is no need to run these unless you are changing the documentation in the source code. \ No newline at end of file
diff --git a/amalgam.cflags b/amalgam.cflags
deleted file mode 100644
index 188be76..0000000
--- a/amalgam.cflags
+++ /dev/null
@@ -1 +0,0 @@
--std=c89
diff --git a/amalgam.config b/amalgam.config
deleted file mode 100644
index e0284f4..0000000
--- a/amalgam.config
+++ /dev/null
@@ -1,2 +0,0 @@
-// Add predefined macros for your project here. For example:
-// #define THE_ANSWER 42
diff --git a/amalgam.creator b/amalgam.creator
deleted file mode 100644
index e94cbbd..0000000
--- a/amalgam.creator
+++ /dev/null
@@ -1 +0,0 @@
-[General]
diff --git a/amalgam.cxxflags b/amalgam.cxxflags
deleted file mode 100644
index df98e0d..0000000
--- a/amalgam.cxxflags
+++ /dev/null
@@ -1 +0,0 @@
--std=c++14
diff --git a/amalgam.files b/amalgam.files
deleted file mode 100644
index 1850f5c..0000000
--- a/amalgam.files
+++ /dev/null
@@ -1,54 +0,0 @@
-build.sh
-include/ast.h
-include/binop_type.h
-include/bytecode/bytecode.h
-include/compiler.h
-include/compiler_options.h
-include/defs.h
-include/parser.h
-include/ssa/ssa.h
-include/std/alloc.h
-include/std/buffer.h
-include/std/buffer_view.h
-include/std/defs.h
-include/std/file.h
-include/std/hash.h
-include/std/hash_map.h
-include/std/log.h
-include/std/mem.h
-include/std/misc.h
-include/std/scoped_allocator.h
-include/std/thread.h
-include/std/types.h
-include/tokenizer.h
-src/ast.c
-src/bytecode/bytecode.c
-src/compiler.c
-src/parser.c
-src/ssa/ssa.c
-src/std/alloc.c
-src/std/buffer.c
-src/std/buffer_view.c
-src/std/file.c
-src/std/hash.c
-src/std/hash_map.c
-src/std/log.c
-src/std/mem.c
-src/std/scoped_allocator.c
-src/std/thread.c
-src/tokenizer.c
-tests/b.amal
-tests/b.amal.z
-tests/bytecode.amal
-tests/bytecode.amal.z
-tests/errors/closure_no_lhs.amal
-tests/errors/const_assign.amal
-tests/errors/duplicate_declaration.amal
-tests/errors/pub_in_closure.amal
-tests/io.amal
-tests/io.amal.z
-tests/main.amal
-tests/main.amal.z
-tests/main.c
-tests/sub/a.amal
-tests/sub/a.amal.z
diff --git a/amalgam.includes b/amalgam.includes
deleted file mode 100644
index 5822a55..0000000
--- a/amalgam.includes
+++ /dev/null
@@ -1,4 +0,0 @@
-include
-include/bytecode
-include/ssa
-include/std
diff --git a/build.sh b/build.sh
index 6911ee0..c8b7ab7 100755
--- a/build.sh
+++ b/build.sh
@@ -59,3 +59,5 @@ for source_file in $source_files $source_files_tests; do
done
echo "]")
echo "$compile_commands" > "$this_script_dir/compile_commands.json"
+
+echo "Finished building"
diff --git a/doc/BytecodeHeader.md b/doc/BytecodeHeader.md
new file mode 100644
index 0000000..684a5ef
--- /dev/null
+++ b/doc/BytecodeHeader.md
@@ -0,0 +1,8 @@
+# Header layout
+|Size|Name |Description |
+|----|-------------|----------------------------------------------------------------------------|
+|4 |Magic number |The magic number used to identify an amalgam bytecode file. |
+|1 |Major version|The major version of the bytecode. Updates in this is a breaking change. |
+|1 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.|
+|1 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.|
+The versions in the header only changes for every release, not every change. \ No newline at end of file
diff --git a/doc/CompilerFlow.md b/doc/CompilerFlow.md
new file mode 100644
index 0000000..e42b94d
--- /dev/null
+++ b/doc/CompilerFlow.md
@@ -0,0 +1,6 @@
+# Compiler flow
+(Tokenize&parse -> Resolve AST -> Generate SSA -> Generate bytecode) -> Generate program\
+Each step except the last is done using multiple threads in parallel and the output of each step is used
+in the next step. The last step is not done in parallel because the last step is combining all bytecode
+and writing it to a file, which is an IO bottlenecked operation and it won't benefit from multithreading
+and may even lose performance because of it. \ No newline at end of file
diff --git a/doc/Opcode.md b/doc/Opcode.md
new file mode 100644
index 0000000..37fa4e2
--- /dev/null
+++ b/doc/Opcode.md
@@ -0,0 +1,14 @@
+# Opcode
+Variable length opcodes. Sizes range from 1 to 4 bytes.
+# Instruction formats
+Instructions can be in 6 different formats:
+1. 1 byte: Opcode
+2. 2 bytes: Opcode + register
+3. 3 bytes: Opcode + register + register
+4. 3 bytes:\
+4.1 Opcode + intermediate\
+4.2 Opcode + data\
+4.3 Opcode + index\
+4.4 Opcode + offset
+5. 4 bytes: Opcode + register + register + register
+6. 4 bytes: Opcode + register + offset \ No newline at end of file
diff --git a/doc/doc_extract.py b/doc/doc_extract.py
new file mode 100755
index 0000000..66ec7e5
--- /dev/null
+++ b/doc/doc_extract.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+
+def get_source_files_recursive(path):
+ source_files = []
+ for dirpath, _, filenames in os.walk(path):
+ for filename in filenames:
+ ext = os.path.splitext(filename)[1]
+ if ext == ".c" or ext == ".h":
+ full_path = os.path.join(dirpath, filename)
+ source_files.append(full_path)
+ return source_files
+
+def lstrip_lines(string):
+ result = []
+ for line in string.splitlines():
+ result.append(line.lstrip())
+
+ if len(result) > 0:
+ if len(result[0].strip()) == 0:
+ del result[0]
+ if len(result[-1].strip()) == 0:
+ del result[-1]
+ return "\n".join(result)
+
+def extract_docs(filepath):
+ file_content = ""
+ with open(filepath, "r") as file:
+ file_content = file.read()
+
+ docs = []
+ search_index = 0
+ while True:
+ index = file_content.find("/*doc(", search_index)
+ if index == -1:
+ break
+ index += 6
+
+ doc_name_end = len(file_content)
+ for i, c in enumerate(file_content[index:]):
+ if c == ')':
+ doc_name_end = index + i
+ break
+ doc_name = file_content[index:doc_name_end]
+
+ doc_name_end += 1
+ end = file_content.find("*/", doc_name_end)
+ if end == -1:
+ break
+ doc = file_content[doc_name_end:end]
+
+ search_index = end + 2
+ docs.append((doc_name, lstrip_lines(doc)))
+ return docs
+
+def main():
+ script_path = os.path.realpath(sys.argv[0])
+ script_dir = os.path.dirname(script_path)
+ amalgam_base = os.path.dirname(script_dir)
+ amalgam_includes = os.path.join(amalgam_base, "include")
+ amalgam_sources = os.path.join(amalgam_base, "src")
+
+ source_files = get_source_files_recursive(amalgam_includes)
+ source_files += get_source_files_recursive(amalgam_sources)
+ for filepath in source_files:
+ docs = extract_docs(filepath)
+ for doc in docs:
+ doc_path = os.path.join(script_dir, doc[0] + ".md")
+ with open(doc_path, "w") as file:
+ file.write(doc[1])
+ print("Wrote doc for %s into file %s" % (doc[0], doc_path))
+
+if __name__ == "__main__":
+ main()
diff --git a/include/bytecode/bytecode.h b/include/bytecode/bytecode.h
index e5a70e9..600c9f2 100644
--- a/include/bytecode/bytecode.h
+++ b/include/bytecode/bytecode.h
@@ -8,12 +8,46 @@
#include <setjmp.h>
+/*doc(Opcode)
+ # Opcode
+ Variable length opcodes. Sizes range from 1 to 4 bytes.
+ # Instruction formats
+ Instructions can be in 6 different formats:
+ 1. 1 byte: Opcode
+ 2. 2 bytes: Opcode + register
+ 3. 3 bytes: Opcode + register + register
+ 4. 3 bytes:\
+ 4.1 Opcode + intermediate\
+ 4.2 Opcode + data\
+ 4.3 Opcode + index\
+ 4.4 Opcode + offset
+ 5. 4 bytes: Opcode + register + register + register
+ 6. 4 bytes: Opcode + register + offset
+*/
typedef enum {
- NOP /* To allow hot-patching */
-
-} BytecodeInstruction;
+ AMAL_OP_NOP, /* No operation. This can be used for patching */
+ AMAL_OP_SETZ, /* setz reg - Set register value to 0 */
+ AMAL_OP_MOV, /* mov dst, src - move src register to dst register */
+ AMAL_OP_MOVI, /* movi dst, src - move src intermediate to dst register */
+ AMAL_OP_MOVD, /* movd dst, src - move src data to dst register */
+ AMAL_OP_ADD, /* add dst, reg1, reg2 */
+ AMAL_OP_SUB, /* sub dst, reg1, reg2 */
+ AMAL_OP_MUL, /* mul dst, reg1, reg2 */
+ AMAL_OP_DIV, /* div dst, reg1, reg2 */
+ AMAL_OP_PUSH, /* push reg - Push register onto stack */
+ AMAL_OP_PUSHI, /* pushi int - Push intermediate onto stack */
+ AMAL_OP_PUSHD, /* pushd data - Push data onto stack */
+ AMAL_OP_CALL, /* call fi - Call a function using function index (fi). fi is u16 */
+ AMAL_OP_CALLR, /* callr reg - Call a function using a register. Used for function pointers */
+ AMAL_OP_CMP, /* cmp dst, reg1, reg2 - Set dst to 1 if reg1 equals reg2, otherwise set it to 0 */
+ AMAL_OP_JZ, /* jz reg, offset - jump to offset if reg is zero. offset is i16 */
+ AMAL_OP_JMP, /* jmp offset - unconditional jump to offset. offset is i16 */
+ AMAL_OP_RET, /* ret */
+ AMAL_OP_FUNC_START, /* func_start */
+ AMAL_OP_FUNC_END /* func_end */
+} AmalOpcode;
-typedef u8 BytecodeInstructionType;
+typedef u8 AmalOpcodeType;
typedef struct {
Buffer/*<instruction data>*/ instructions;
diff --git a/src/bytecode/bytecode.c b/src/bytecode/bytecode.c
index 6ceacf0..29d99c0 100644
--- a/src/bytecode/bytecode.c
+++ b/src/bytecode/bytecode.c
@@ -58,6 +58,31 @@ static CHECK_RESULT usize ssa_extract_jump(u8 *instruction_data, SsaInsJump *res
return sizeof(result->jump_offset);
}
+static void add_header(BytecodeCompilerContext *self) {
+ /*doc(BytecodeHeader)
+ # Header layout
+ |Size|Name |Description |
+ |----|-------------|----------------------------------------------------------------------------|
+ |4 |Magic number |The magic number used to identify an amalgam bytecode file. |
+ |1 |Major version|The major version of the bytecode. Updates in this is a breaking change. |
+ |1 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.|
+ |1 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.|
+ The versions in the header only changes for every release, not every change.
+ */
+
+ const u32 magic_number = 0xdec05eba;
+ const u8 major_version = 1;
+ const u8 minor_version = 0;
+ const u8 patch_version = 0;
+
+ Buffer *instructions;
+ instructions = &self->bytecode->instructions;
+ throw_if_error(buffer_append(instructions, &magic_number, 4));
+ throw_if_error(buffer_append(instructions, &major_version, 1));
+ throw_if_error(buffer_append(instructions, &minor_version, 1));
+ throw_if_error(buffer_append(instructions, &patch_version, 1));
+}
+
static void add_intermediates(BytecodeCompilerContext *self) {
Ssa *ssa;
Buffer *instructions;
@@ -78,6 +103,107 @@ static void add_intermediates(BytecodeCompilerContext *self) {
}
}
+void add_strings(BytecodeCompilerContext *self) {
+ Ssa *ssa;
+ Buffer *instructions;
+ BufferView *string;
+ BufferView *strings_end;
+
+ ssa = self->parser->ssa;
+ instructions = &self->bytecode->instructions;
+ string = buffer_begin(&ssa->strings);
+ strings_end = buffer_end(&ssa->strings);
+
+ /*
+ The 8 here is a arbitrary chosen number since we don't know the actual
+ size of all strings without counting. The logic is that the average
+ size of all strings length would be 8.
+ */
+ throw_if_error(buffer_expand(instructions,
+ sizeof(u16) + (sizeof(u16) + 8) * ssa->strings.size));
+ throw_if_error(buffer_append(instructions, &ssa->strings.size, sizeof(u16)));
+ for(; string != strings_end; ++string) {
+ throw_if_error(buffer_append(instructions, &string->size, sizeof(u16)));
+ throw_if_error(buffer_append(instructions, &string->data, string->size));
+ }
+}
+
+static void add_ins1(BytecodeCompilerContext *self, AmalOpcode opcode, const char *fmt) {
+ throw_if_error(buffer_append(&self->bytecode->instructions, &opcode, sizeof(AmalOpcodeType)));
+ fprintf(stderr, fmt);
+ fputc('\n', stderr);
+}
+
+
+static void add_ins2(BytecodeCompilerContext *self, AmalOpcode opcode, u8 reg, const char *fmt) {
+ Buffer *instructions;
+ size_t index;
+ instructions = &self->bytecode->instructions;
+ index = instructions->size;
+
+ throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(reg)));
+ instructions->data[index] = opcode;
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType), &reg, sizeof(reg));
+ fprintf(stderr, fmt, reg);
+ fputc('\n', stderr);
+}
+
+static void add_ins3(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 src_reg, const char *fmt) {
+ Buffer *instructions;
+ size_t index;
+ instructions = &self->bytecode->instructions;
+ index = instructions->size;
+
+ throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(src_reg)));
+ instructions->data[index] = opcode;
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg));
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &src_reg, sizeof(src_reg));
+ fprintf(stderr, fmt, dst_reg, src_reg);
+ fputc('\n', stderr);
+}
+
+static void add_ins4(BytecodeCompilerContext *self, AmalOpcode opcode, u16 data, const char *fmt) {
+ Buffer *instructions;
+ size_t index;
+ instructions = &self->bytecode->instructions;
+ index = instructions->size;
+
+ throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(data)));
+ instructions->data[index] = opcode;
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType), &data, sizeof(data));
+ fprintf(stderr, fmt, data);
+ fputc('\n', stderr);
+}
+
+static void add_ins5(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u8 reg1, u8 reg2, const char *fmt) {
+ Buffer *instructions;
+ size_t index;
+ instructions = &self->bytecode->instructions;
+ index = instructions->size;
+
+ throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1) + sizeof(reg2)));
+ instructions->data[index] = opcode;
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg));
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &reg1, sizeof(reg1));
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(reg1), &reg2, sizeof(reg2));
+ fprintf(stderr, fmt, dst_reg, reg1, reg2);
+ fputc('\n', stderr);
+}
+
+static void add_ins6(BytecodeCompilerContext *self, AmalOpcode opcode, u8 dst_reg, u16 data, const char *fmt) {
+ Buffer *instructions;
+ size_t index;
+ instructions = &self->bytecode->instructions;
+ index = instructions->size;
+
+ throw_if_error(buffer_append_empty(instructions, sizeof(AmalOpcodeType) + sizeof(dst_reg) + sizeof(data)));
+ instructions->data[index] = opcode;
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType), &dst_reg, sizeof(dst_reg));
+ memcpy(instructions->data + index + sizeof(AmalOpcodeType) + sizeof(dst_reg), &data, sizeof(data));
+ fprintf(stderr, fmt, dst_reg, data);
+ fputc('\n', stderr);
+}
+
#if 0
#define NUM_MAX_REGS 256
#define NUM_MAX_FUNC_ARGS 32
@@ -251,63 +377,64 @@ static void add_instructions(BytecodeCompilerContext *self) {
}
}
#else
- #define ARITH_OP(op) do {\
- instruction += ssa_extract_form2(instruction, &ssa_ins_form2); \
- fprintf(file, "%s r%d, r%d, r%d\n", (op), ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs); \
- } while(0)
while(instruction != instructions_end) {
SsaInstruction ins = (SsaInstruction)*instruction++;
switch(ins) {
case SSA_ASSIGN_INTER: {
instruction += ssa_extract_form1(instruction, &ssa_ins_form1);
- fprintf(file, "mov r%d, i%d\n", ssa_ins_form1.lhs, ssa_ins_form1.rhs);
+ add_ins3(self, AMAL_OP_MOVI, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movi r%d, i%d");
break;
}
case SSA_ASSIGN_STRING: {
instruction += ssa_extract_form1(instruction, &ssa_ins_form1);
- fprintf(file, "mov r%d, s%d\n", ssa_ins_form1.lhs, ssa_ins_form1.rhs);
+ add_ins3(self, AMAL_OP_MOVD, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "movd r%d, s%d");
break;
}
case SSA_ASSIGN_REG: {
instruction += ssa_extract_form1(instruction, &ssa_ins_form1);
- fprintf(file, "mov r%d, r%d\n", ssa_ins_form1.lhs, ssa_ins_form1.rhs);
+ add_ins3(self, AMAL_OP_MOV, ssa_ins_form1.lhs, ssa_ins_form1.rhs, "mov r%d, d%d");
break;
}
case SSA_ADD: {
- ARITH_OP("add");
+ instruction += ssa_extract_form2(instruction, &ssa_ins_form2);
+ add_ins5(self, AMAL_OP_ADD, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "add r%d, r%d, r%d");
break;
}
case SSA_SUB: {
- ARITH_OP("sub");
+ instruction += ssa_extract_form2(instruction, &ssa_ins_form2);
+ add_ins5(self, AMAL_OP_SUB, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "sub r%d, r%d, r%d");
break;
}
case SSA_MUL: {
- ARITH_OP("mul");
+ instruction += ssa_extract_form2(instruction, &ssa_ins_form2);
+ add_ins5(self, AMAL_OP_MUL, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "mul r%d, r%d, r%d");
break;
}
case SSA_DIV: {
- ARITH_OP("div");
+ instruction += ssa_extract_form2(instruction, &ssa_ins_form2);
+ add_ins5(self, AMAL_OP_DIV, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "div r%d, r%d, r%d");
break;
}
case SSA_EQUALS: {
- ARITH_OP("eq");
+ instruction += ssa_extract_form2(instruction, &ssa_ins_form2);
+ add_ins5(self, AMAL_OP_CMP, ssa_ins_form2.result, ssa_ins_form2.lhs, ssa_ins_form2.rhs, "cmp r%d, r%d, r%d");
break;
}
case SSA_FUNC_START: {
instruction += ssa_extract_func_start(instruction, &ssa_ins_func_start);
- fprintf(file, "FUNC_START %d\n", ssa_ins_func_start.num_args);
+ add_ins1(self, AMAL_OP_FUNC_START, "func_start");
break;
}
case SSA_FUNC_END: {
- fprintf(file, "FUNC_END\n");
+ add_ins1(self, AMAL_OP_FUNC_START, "func_end");
break;
}
case SSA_PUSH: {
SsaRegister reg;
am_memcpy(&reg, instruction, sizeof(SsaRegister));
instruction += sizeof(SsaRegister);
- fprintf(file, "push r%d\n", reg);
+ add_ins2(self, AMAL_OP_PUSH, reg, "push r%d");
break;
}
case SSA_CALL: {
@@ -325,17 +452,17 @@ static void add_instructions(BytecodeCompilerContext *self) {
is defined as the size of all previous files' number of functions.
*/
instruction += ssa_extract_func_call(instruction, &ssa_ins_func_call);
- fprintf(file, "call %d\n", ssa_ins_func_call.func_decl->ssa_func_index);
+ add_ins4(self, AMAL_OP_CALL, ssa_ins_func_call.func_decl->ssa_func_index, "call %d");
break;
}
case SSA_JUMP_ZERO: {
instruction += ssa_extract_jump_zero(instruction, &ssa_ins_jump_zero);
- fprintf(file, "jz r%d, %d\n", ssa_ins_jump_zero.condition_reg, ssa_ins_jump_zero.jump_offset);
+ add_ins6(self, AMAL_OP_JZ, ssa_ins_jump_zero.condition_reg, ssa_ins_jump_zero.jump_offset, "jz r%d, %d");
break;
}
case SSA_JUMP: {
instruction += ssa_extract_jump(instruction, &ssa_ins_jump);
- fprintf(file, "jmp %d\n", ssa_ins_jump.jump_offset);
+ add_ins4(self, AMAL_OP_JMP, ssa_ins_jump.jump_offset, "jmp %d");
break;
}
default:
@@ -348,7 +475,9 @@ static void add_instructions(BytecodeCompilerContext *self) {
}
void generate_bytecode_from_ssa(BytecodeCompilerContext *self) {
+ add_header(self);
add_intermediates(self);
+ add_strings(self);
/* TODO: Also add strings in ssa, so we can index them */
add_instructions(self);
}
diff --git a/src/compiler.c b/src/compiler.c
index 9f003c8..e7b242b 100644
--- a/src/compiler.c
+++ b/src/compiler.c
@@ -510,6 +510,15 @@ int amal_compiler_internal_load_file(amal_compiler *self, const char *filepath,
return_if_error(amal_compiler_select_thread_for_work(self, thread_work_data, &parser_thread_data));
if(main_job) {
+ /*doc(CompilerFlow)
+ # Compiler flow
+ (Tokenize&parse -> Resolve AST -> Generate SSA -> Generate bytecode) -> Generate program\
+ Each step except the last is done using multiple threads in parallel and the output of each step is used
+ in the next step. The last step is not done in parallel because the last step is combining all bytecode
+ and writing it to a file, which is an IO bottlenecked operation and it won't benefit from multithreading
+ and may even lose performance because of it.
+ */
+
return_if_error(amal_compiler_load_file_join_threads(self));
assert(amal_compiler_check_all_threads_done(self));
amal_log_info("Finished parsing all files, resolving AST");
diff --git a/src/ssa/ssa.c b/src/ssa/ssa.c
index 91ba185..34e3e3e 100644
--- a/src/ssa/ssa.c
+++ b/src/ssa/ssa.c
@@ -16,6 +16,9 @@ do { \
throw(return_if_result); \
} while(0)
+/* Max length of a string that fits in u16 */
+#define MAX_STRING_LENGTH ((2 << 16) - 1)
+
static int compare_number(const void *a, const void *b) {
const SsaNumber *lhs;
const SsaNumber *rhs;
@@ -125,6 +128,11 @@ static CHECK_RESULT int ssa_try_add_string(Ssa *self, BufferView str, SsaStringI
/* Overflow */
if(self->string_counter + 1 < self->string_counter)
return -1;
+
+ if(str.size > MAX_STRING_LENGTH) {
+ amal_log_error("String \"%.*s\" is longer than %d\n", str.size, str.data, MAX_STRING_LENGTH);
+ return -2;
+ }
*result_index = self->string_counter;
++self->string_counter;
@@ -386,6 +394,10 @@ in any order.
*/
static CHECK_RESULT SsaRegister funcdecl_generate_ssa(FunctionDecl *self, SsaCompilerContext *context) {
/* TODO: Implement */
+ /*
+ Reset reg counter in each function, because each function has a separate register context
+ that is reset after function end
+ */
SsaRegister prev_reg_counter;
prev_reg_counter = context->ssa->reg_counter;
context->ssa->reg_counter = 0;
diff --git a/tests/main.c b/tests/main.c
index 520d8b4..94f2d32 100644
--- a/tests/main.c
+++ b/tests/main.c
@@ -233,7 +233,6 @@ static void test_load_error(const char *filepath, const char *expected_error) {
free(expected_data.expected_error);
}
-
/* TODO: Restrict variables in global scope to const */
int main(int argc, char **argv) {
return_if_error(test_hash_map());
@@ -269,4 +268,4 @@ int main(int argc, char **argv) {
exit(1);
}
return 0;
-}
+} \ No newline at end of file