From 85c654a102701958d3748e82ecac9c1bc4dbbcba Mon Sep 17 00:00:00 2001
From: dec05eba <dec05eba@protonmail.com>
Date: Tue, 16 Jul 2019 00:27:53 +0200
Subject: Start on real bytecode & doc parsing

---
 doc/BytecodeHeader.md |  8 ++++++
 doc/CompilerFlow.md   |  6 ++++
 doc/Opcode.md         | 14 ++++++++++
 doc/doc_extract.py    | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 104 insertions(+)
 create mode 100644 doc/BytecodeHeader.md
 create mode 100644 doc/CompilerFlow.md
 create mode 100644 doc/Opcode.md
 create mode 100755 doc/doc_extract.py

(limited to 'doc')

diff --git a/doc/BytecodeHeader.md b/doc/BytecodeHeader.md
new file mode 100644
index 0000000..684a5ef
--- /dev/null
+++ b/doc/BytecodeHeader.md
@@ -0,0 +1,8 @@
+# Header layout
+|Size|Name         |Description                                                                 |
+|----|-------------|----------------------------------------------------------------------------|
+|4   |Magic number |The magic number used to identify an amalgam bytecode file.                 |
+|1   |Major version|The major version of the bytecode. Updates in this is a breaking change.    |
+|1   |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.|
+|1   |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.|
+The versions in the header only changes for every release, not every change.
\ No newline at end of file
diff --git a/doc/CompilerFlow.md b/doc/CompilerFlow.md
new file mode 100644
index 0000000..e42b94d
--- /dev/null
+++ b/doc/CompilerFlow.md
@@ -0,0 +1,6 @@
+# Compiler flow
+(Tokenize&parse -> Resolve AST -> Generate SSA -> Generate bytecode) -> Generate program\
+Each step except the last is done using multiple threads in parallel and the output of each step is used
+in the next step. The last step is not done in parallel because the last step is combining all bytecode
+and writing it to a file, which is an IO bottlenecked operation and it won't benefit from multithreading
+and may even lose performance because of it.
\ No newline at end of file
diff --git a/doc/Opcode.md b/doc/Opcode.md
new file mode 100644
index 0000000..37fa4e2
--- /dev/null
+++ b/doc/Opcode.md
@@ -0,0 +1,14 @@
+# Opcode
+Variable length opcodes. Sizes range from 1 to 4 bytes.
+# Instruction formats
+Instructions can be in 6 different formats:
+1. 1 byte: Opcode
+2. 2 bytes: Opcode + register
+3. 3 bytes: Opcode + register + register
+4. 3 bytes:\
+4.1 Opcode + intermediate\
+4.2 Opcode + data\
+4.3 Opcode + index\
+4.4 Opcode + offset
+5. 4 bytes: Opcode + register + register + register
+6. 4 bytes: Opcode + register + offset
\ No newline at end of file
diff --git a/doc/doc_extract.py b/doc/doc_extract.py
new file mode 100755
index 0000000..66ec7e5
--- /dev/null
+++ b/doc/doc_extract.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+
+def get_source_files_recursive(path):
+    source_files = []
+    for dirpath, _, filenames in os.walk(path):
+        for filename in filenames:
+            ext = os.path.splitext(filename)[1]
+            if ext == ".c" or ext == ".h":
+                full_path = os.path.join(dirpath, filename)
+                source_files.append(full_path)
+    return source_files
+
+def lstrip_lines(string):
+    result = []
+    for line in string.splitlines():
+        result.append(line.lstrip())
+
+    if len(result) > 0:
+        if len(result[0].strip()) == 0:
+            del result[0]
+        if len(result[-1].strip()) == 0:
+            del result[-1]
+    return "\n".join(result)
+
+def extract_docs(filepath):
+    file_content = ""
+    with open(filepath, "r") as file:
+        file_content = file.read()
+
+    docs = []
+    search_index = 0
+    while True:
+        index = file_content.find("/*doc(", search_index)
+        if index == -1:
+            break
+        index += 6
+
+        doc_name_end = len(file_content)
+        for i, c in enumerate(file_content[index:]):
+            if c == ')':
+                doc_name_end = index + i
+                break
+        doc_name = file_content[index:doc_name_end]
+
+        doc_name_end += 1
+        end = file_content.find("*/", doc_name_end)
+        if end == -1:
+            break
+        doc = file_content[doc_name_end:end]
+
+        search_index = end + 2
+        docs.append((doc_name, lstrip_lines(doc)))
+    return docs
+
+def main():
+    script_path = os.path.realpath(sys.argv[0])
+    script_dir = os.path.dirname(script_path)
+    amalgam_base = os.path.dirname(script_dir)
+    amalgam_includes = os.path.join(amalgam_base, "include")
+    amalgam_sources = os.path.join(amalgam_base, "src")
+
+    source_files = get_source_files_recursive(amalgam_includes)
+    source_files += get_source_files_recursive(amalgam_sources)
+    for filepath in source_files:
+        docs = extract_docs(filepath)
+        for doc in docs:
+            doc_path = os.path.join(script_dir, doc[0] + ".md")
+            with open(doc_path, "w") as file:
+                file.write(doc[1])
+            print("Wrote doc for %s into file %s" % (doc[0], doc_path))
+
+if __name__ == "__main__":
+    main()
-- 
cgit v1.2.3-70-g09d2