From 85c654a102701958d3748e82ecac9c1bc4dbbcba Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 16 Jul 2019 00:27:53 +0200 Subject: Start on real bytecode & doc parsing --- doc/BytecodeHeader.md | 8 ++++++ doc/CompilerFlow.md | 6 ++++ doc/Opcode.md | 14 ++++++++++ doc/doc_extract.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+) create mode 100644 doc/BytecodeHeader.md create mode 100644 doc/CompilerFlow.md create mode 100644 doc/Opcode.md create mode 100755 doc/doc_extract.py (limited to 'doc') diff --git a/doc/BytecodeHeader.md b/doc/BytecodeHeader.md new file mode 100644 index 0000000..684a5ef --- /dev/null +++ b/doc/BytecodeHeader.md @@ -0,0 +1,8 @@ +# Header layout +|Size|Name |Description | +|----|-------------|----------------------------------------------------------------------------| +|4 |Magic number |The magic number used to identify an amalgam bytecode file. | +|1 |Major version|The major version of the bytecode. Updates in this is a breaking change. | +|1 |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.| +|1 |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.| +The versions in the header only changes for every release, not every change. \ No newline at end of file diff --git a/doc/CompilerFlow.md b/doc/CompilerFlow.md new file mode 100644 index 0000000..e42b94d --- /dev/null +++ b/doc/CompilerFlow.md @@ -0,0 +1,6 @@ +# Compiler flow +(Tokenize&parse -> Resolve AST -> Generate SSA -> Generate bytecode) -> Generate program\ +Each step except the last is done using multiple threads in parallel and the output of each step is used +in the next step. The last step is not done in parallel because the last step is combining all bytecode +and writing it to a file, which is an IO bottlenecked operation and it won't benefit from multithreading +and may even lose performance because of it. \ No newline at end of file diff --git a/doc/Opcode.md b/doc/Opcode.md new file mode 100644 index 0000000..37fa4e2 --- /dev/null +++ b/doc/Opcode.md @@ -0,0 +1,14 @@ +# Opcode +Variable length opcodes. Sizes range from 1 to 4 bytes. +# Instruction formats +Instructions can be in 6 different formats: +1. 1 byte: Opcode +2. 2 bytes: Opcode + register +3. 3 bytes: Opcode + register + register +4. 3 bytes:\ +4.1 Opcode + intermediate\ +4.2 Opcode + data\ +4.3 Opcode + index\ +4.4 Opcode + offset +5. 4 bytes: Opcode + register + register + register +6. 4 bytes: Opcode + register + offset \ No newline at end of file diff --git a/doc/doc_extract.py b/doc/doc_extract.py new file mode 100755 index 0000000..66ec7e5 --- /dev/null +++ b/doc/doc_extract.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + +import sys +import os + +def get_source_files_recursive(path): + source_files = [] + for dirpath, _, filenames in os.walk(path): + for filename in filenames: + ext = os.path.splitext(filename)[1] + if ext == ".c" or ext == ".h": + full_path = os.path.join(dirpath, filename) + source_files.append(full_path) + return source_files + +def lstrip_lines(string): + result = [] + for line in string.splitlines(): + result.append(line.lstrip()) + + if len(result) > 0: + if len(result[0].strip()) == 0: + del result[0] + if len(result[-1].strip()) == 0: + del result[-1] + return "\n".join(result) + +def extract_docs(filepath): + file_content = "" + with open(filepath, "r") as file: + file_content = file.read() + + docs = [] + search_index = 0 + while True: + index = file_content.find("/*doc(", search_index) + if index == -1: + break + index += 6 + + doc_name_end = len(file_content) + for i, c in enumerate(file_content[index:]): + if c == ')': + doc_name_end = index + i + break + doc_name = file_content[index:doc_name_end] + + doc_name_end += 1 + end = file_content.find("*/", doc_name_end) + if end == -1: + break + doc = file_content[doc_name_end:end] + + search_index = end + 2 + docs.append((doc_name, lstrip_lines(doc))) + return docs + +def main(): + script_path = os.path.realpath(sys.argv[0]) + script_dir = os.path.dirname(script_path) + amalgam_base = os.path.dirname(script_dir) + amalgam_includes = os.path.join(amalgam_base, "include") + amalgam_sources = os.path.join(amalgam_base, "src") + + source_files = get_source_files_recursive(amalgam_includes) + source_files += get_source_files_recursive(amalgam_sources) + for filepath in source_files: + docs = extract_docs(filepath) + for doc in docs: + doc_path = os.path.join(script_dir, doc[0] + ".md") + with open(doc_path, "w") as file: + file.write(doc[1]) + print("Wrote doc for %s into file %s" % (doc[0], doc_path)) + +if __name__ == "__main__": + main() -- cgit v1.2.3