Use struct for bytecode header instead of pointer arithmetic

author: dec05eba <dec05eba@protonmail.com> 2019-09-14 01:45:31 +0200
committer: dec05eba <dec05eba@protonmail.com> 2020-07-25 14:36:46 +0200
commit: 35200031e88c65da6a0bde563f20d95c1dd4f464 (patch)
tree: b1159960ca7ba78a42f6ef203f99d2b1a1c26641
parent: 7d663615b2a44715e7447a40cae467d7d4e38b9c (diff)
7 files changed, 130 insertions, 35 deletions
diff --git a/doc/Documentation.md b/doc/Documentation.md
index ca579a2..88647a7 100644
--- a/doc/Documentation.md
+++ b/doc/Documentation.md
@@ -1,5 +1,5 @@
-# Opcode
-Variable length opcodes. Sizes range from 1 to 5 bytes.
+# Instructions
+Variable length instructions. Instruction size ranges from 1 to 5 bytes.
 ## Instruction formats
 Instructions can be in 7 different formats:
 1. 1 byte: Opcode(u8)
@@ -15,7 +15,8 @@ Instructions can be in 7 different formats:
 6.1 Opcode(u8) + register(i8) + label(i16)\
 6.2 Opcode(u8) + register(i8) + intermediate(u16)\
 6.3 Opcode(u8) + register(i8) + data(u16)\
-6.4 Opcode(u8) + flags(u8) + num_local_var_reg(u16)
+6.4 Opcode(u8) + flags(u8) + num_local_var_reg(u16)\
+6.5 Opcode(u8) + stack_offset(i24)
 7. 5 bytes: Opcode(u8) + index(u8) + index(u16) + num_args(u8)
 ## Registers
 Registers have a range of 128. Local variables start from register 0 and increment while parameters start from -1
@@ -44,6 +45,7 @@ The value of the magic number is @AMAL_BYTECODE_SECTION_MAGIC_NUMBER
 |u8  |Major version|The major version of the bytecode. Updates in this is a breaking change.    |
 |u8  |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.|
 |u8  |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.|
+|u8  |Endian       |Endian of the program. 0 = little endian, 1 = big endian.                   |
 
 The versions in the header only changes for every release, not every change.
 
@@ -102,11 +104,16 @@ The versions in the header only changes for every release, not every change.
 |External function[]|External functions|Multiple external functions, where the number of functions is defined by @num_extern_func|
 
 ## External function
-|Type|Field     |Description                                                                                          |
-|----|----------|-----------------------------------------------------------------------------------------------------|
-|u8  |num_params|The number of parameters the functions has.                                                          |
-|u8  |name_len  |The length of the external function name, in bytes. Excluding the null-terminate character.          |
-|u8[]|name      |The name of the external function, where the size is defined by @name_len. Names are null-terminated.|
+|Type|Field                    |Description                                                                                          |
+|----|-------------------------|-----------------------------------------------------------------------------------------------------|
+|u8  |num_params               |The number of parameters.                                                                            |
+|u32 |params_num_pointers      |The number of pointers in the parameters.                                                            |
+|u32 |params_fixed_size        |The size of all non-pointer type parameters, in bytes.                                               |
+|u8  |num_return_types         |The number of return values.                                                                         |
+|u32 |return_types_num_pointers|The number of pointers in the return types.                                                          |
+|u32 |return_types_fixed_size  |The size of all non-pointer type return types, in bytes.                                             |
+|u8  |name_len                 |The length of the external function name, in bytes. Excluding the null-terminate character.          |
+|u8[]|name                     |The name of the external function, where the size is defined by @name_len. Names are null-terminated.|
 
 # Bytecode exported functions
 ## Exported functions layout
diff --git a/include/bytecode/bytecode.h b/include/bytecode/bytecode.h
index a93fe4f..83d62b9 100644
--- a/include/bytecode/bytecode.h
+++ b/include/bytecode/bytecode.h
@@ -83,6 +83,17 @@ typedef u8 AmalOpcodeType;
 /* TODO: Make sure this pragma pack works on all platforms */
 #pragma pack(push, 1)
 typedef struct {
+    u32 magic_number; /* AMAL_BYTECODE_MAGIC_NUMBER */
+    u8 major_version;
+    u8 minor_version;
+    u8 patch_version;
+    u8 endian; /* 0 = little endian, 1 = big endian */
+} BytecodeHeader;
+#pragma pack(pop)
+
+/* TODO: Make sure this pragma pack works on all platforms */
+#pragma pack(push, 1)
+typedef struct {
     u32 func_offset;
     u8 num_params;
     u32 params_num_pointers;
diff --git a/include/std/misc.h b/include/std/misc.h
index a9bd5b6..03d7972 100644
--- a/include/std/misc.h
+++ b/include/std/misc.h
@@ -1,6 +1,31 @@
 #ifndef AMALGAM_MISC_H
 #define AMALGAM_MISC_H
 
+#include "types.h"
+#include <sys/types.h>
+
+#if defined(__BYTE_ORDER)
+    #if __BYTE_ORDER == __LITTLE_ENDIAN
+        #define AMAL_LITTLE_ENDIAN
+    #elif __BYTE_ORDER == __BIG_ENDIAN
+        #define AMAL_BIG_ENDIAN
+    #endif
+#elif defined(__BYTE_ORDER__)
+    #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        #define AMAL_LITTLE_ENDIAN
+    #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+        #define AMAL_BIG_ENDIAN
+    #endif
+#endif
+
+#if !defined(AMAL_LITTLE_ENDIAN) && !defined(AMAL_BIG_ENDIAN)
+#error Unsupported endian, neither little or big endian
+#endif
+
+u16 byteswap16(u16 value);
+u32 byteswap32(u32 value);
+u64 byteswap64(u64 value);
+
 #ifndef AMAL_PEDANTIC
 #include "log.h"
 #endif
diff --git a/src/bytecode/bytecode.c b/src/bytecode/bytecode.c
index c968743..0a2b157 100644
--- a/src/bytecode/bytecode.c
+++ b/src/bytecode/bytecode.c
@@ -42,20 +42,24 @@ CHECK_RESULT int buffer_append_header(Buffer *program_data) {
         |u8  |Major version|The major version of the bytecode. Updates in this is a breaking change.    |
         |u8  |Minor version|The minor version of the bytecode. Updates in this are backwards compatible.|
         |u8  |Patch version|The patch version of the bytecode. Updates in this are only minor bug fixes.|
+        |u8  |Endian       |Endian of the program. 0 = little endian, 1 = big endian.                   |
 
         The versions in the header only changes for every release, not every change.
     */
 
-    const u32 magic_number = AMAL_BYTECODE_MAGIC_NUMBER;
-    const u8 major_version = AMAL_BYTECODE_MAJOR_VERSION;
-    const u8 minor_version = AMAL_BYTECODE_MINOR_VERSION;
-    const u8 patch_version = AMAL_BYTECODE_PATCH_VERSION;
-
-    return_if_error(buffer_append(program_data, &magic_number, 4));
-    return_if_error(buffer_append(program_data, &major_version, 1));
-    return_if_error(buffer_append(program_data, &minor_version, 1));
-    return_if_error(buffer_append(program_data, &patch_version, 1));
-
+    BytecodeHeader header;
+    header.magic_number = AMAL_BYTECODE_MAGIC_NUMBER;
+    header.major_version = AMAL_BYTECODE_MAJOR_VERSION;
+    header.minor_version = AMAL_BYTECODE_MINOR_VERSION;
+    header.patch_version = AMAL_BYTECODE_PATCH_VERSION;
+#if defined(AMAL_LITTLE_ENDIAN)
+    header.endian = 0;
+#elif defined(AMAL_BIG_ENDIAN)
+    header.magic_number = byteswap32(header.magic_number);
+    header.endian = 1;
+#endif
+
+    return_if_error(buffer_append(program_data, &header, sizeof(header)));
     return 0;
 }
 
diff --git a/src/compiler.c b/src/compiler.c
index 6e6bc4b..c10ccc9 100644
--- a/src/compiler.c
+++ b/src/compiler.c
@@ -432,8 +432,7 @@ static CHECK_RESULT int validate_main_func(FileScopeReference *main_file_scope,
 }
 
 static void amal_compiler_parsers_set_bytecode_offsets(amal_compiler *self) {
-    /* magic number + major version + minor version + patch version */
-    u32 offset = sizeof(u32) + sizeof(u8) + sizeof(u8) + sizeof(u8);
+    u32 offset = sizeof(BytecodeHeader);
     Parser **parser = buffer_begin(&self->parsers);
     Parser **parser_end = buffer_end(&self->parsers);
     for(; parser != parser_end; ++parser) {
diff --git a/src/program.c b/src/program.c
index 5c07c5d..1a53b53 100644
--- a/src/program.c
+++ b/src/program.c
@@ -186,31 +186,26 @@ static usize bytes_left_to_read(amal_program *self) {
 }
 
 static CHECK_RESULT int amal_program_read_header(amal_program *self) {
-    u32 magic_number;
-    u8 major_version;
-    u8 minor_version;
-    u8 patch_version;
+    BytecodeHeader header;
 
-    if(bytes_left_to_read(self) < sizeof(u32) + sizeof(u8) * 3)
+    if(bytes_left_to_read(self) < sizeof(header))
         return AMAL_PROGRAM_INVALID_HEADER;
 
-    am_memcpy(&magic_number, &self->data.data[self->read_index], sizeof(magic_number));
-    self->read_index += sizeof(u32);
-    am_memcpy(&major_version, &self->data.data[self->read_index], sizeof(major_version));
-    self->read_index += sizeof(u8);
-    am_memcpy(&minor_version, &self->data.data[self->read_index], sizeof(minor_version));
-    self->read_index += sizeof(u8);
-    am_memcpy(&patch_version, &self->data.data[self->read_index], sizeof(patch_version));
-    self->read_index += sizeof(u8);
+    am_memcpy(&header, self->data.data + self->read_index, sizeof(header));
+    self->read_index += sizeof(header);
+#ifdef AMAL_BIG_ENDIAN
+    header.magic_number = byteswap32(header.magic_number);
+    #error TODO: Support big endian for program decoding
+#endif
 
-    if(magic_number != AMAL_BYTECODE_MAGIC_NUMBER)
+    if(header.magic_number != AMAL_BYTECODE_MAGIC_NUMBER)
         return AMAL_PROGRAM_INVALID_MAGIC_NUMBER;
 
     /*
         A program is only incompatible if the major version is newer than the version that is used to run it.
         TODO: Implement backwards compatible reads, starting from when the program bytecode breaks backwards compatibility
     */
-    if(major_version > AMAL_BYTECODE_MAJOR_VERSION)
+    if(header.major_version > AMAL_BYTECODE_MAJOR_VERSION)
         return AMAL_PROGRAM_INCOMPATIBLE;
 
     return AMAL_PROGRAM_OK;
diff --git a/src/std/misc.c b/src/std/misc.c
new file mode 100644
index 0000000..f53797d
--- /dev/null
+++ b/src/std/misc.c
@@ -0,0 +1,54 @@
+#include "../../include/std/misc.h"
+
+#if defined(_MSC_VER)
+u16 byteswap16(u16 value) {
+    return _byteswap_ushort(value);
+}
+
+u32 byteswap32(u32 value) {
+    return _byteswap_ulong(value);
+}
+
+u64 byteswap64(u64 value) {
+    return _byteswap_uint64(value);
+}
+#elif defined(__GNUC__)
+u16 byteswap16(u16 value) {
+    return __builtin_bswap16(value);
+}
+u32 byteswap32(u32 value) {
+    return __builtin_bswap32(value);
+}
+u64 byteswap64(u64 value) {
+    return __builtin_bswap64(value);
+}
+#else
+u16 byteswap16(u16 value) {
+    u16 result = 0;
+    result |= (value & 0x00FF) << 8;
+    result |= (value & 0xFF00) >> 8;
+    return result;
+}
+
+u32 byteswap32(u32 value) {
+    u32 result = 0;
+    result |= (value & 0x000000FF) << 24;
+    result |= (value & 0x0000FF00) << 8;
+    result |= (value & 0x00FF0000) >> 8;
+    result |= (value & 0xFF000000) >> 24;
+    return result;
+}
+
+u64 byteswap64(u64 value) {
+    u64 result = 0;
+    result |= (value & 0x00000000000000FF) << 56;
+    result |= (value & 0x000000000000FF00) << 40;
+    result |= (value & 0x0000000000FF0000) << 24;
+    result |= (value & 0x00000000FF000000) << 8;
+    result |= (value & 0x000000FF00000000) >> 8;
+    result |= (value & 0x0000FF0000000000) >> 24;
+    result |= (value & 0x00FF000000000000) >> 40;
+    result |= (value & 0xFF00000000000000) >> 56;
+    return result;
+}
+#endif
author	dec05eba <dec05eba@protonmail.com>	2019-09-14 01:45:31 +0200
committer	dec05eba <dec05eba@protonmail.com>	2020-07-25 14:36:46 +0200
commit	35200031e88c65da6a0bde563f20d95c1dd4f464 (patch)
tree	b1159960ca7ba78a42f6ef203f99d2b1a1c26641
parent	7d663615b2a44715e7447a40cae467d7d4e38b9c (diff)