aboutsummaryrefslogtreecommitdiff
path: root/include/bytecode/bytecode.h
blob: a70bb6f8be4356e888bc9093f9a74ce06d3951be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#ifndef AMALGAM_BYTECODE_H
#define AMALGAM_BYTECODE_H

#include "../std/defs.h"
#include "../std/misc.h"
#include "../std/buffer.h"
#include "../defs.h"

#include <setjmp.h>

/*doc(Instructions)
    Variable length instructions. Instruction size ranges from 1 to 4 bytes.

    # Instruction formats
    Instructions can be in 7 different formats:
    1. 1 byte: Opcode(u8)
    2. 2 bytes: Opcode(u8) + register(AmalReg)
    3. 3 bytes: Opcode(u8) + register(AmalReg) + register(AmalReg)
    4. 3 bytes:\
    4.1 Opcode(u8) + intermediate(u16)\
    4.2 Opcode(u8) + data(u16)\
    4.3 Opcode(u8) + label(i16)\
    4.4 Opcode(u8) + register(AmalReg) + num_args(u8)
    5. 4 bytes: Opcode(u8) + register(AmalReg) + register(AmalReg) + register(AmalReg)
    6. 4 bytes:\
    6.1 Opcode(u8) + register(AmalReg) + label(i16)\
    6.2 Opcode(u8) + register(AmalReg) + intermediate(u16)\
    6.3 Opcode(u8) + register(AmalReg) + data(u16)\
    6.4 Opcode(u8) + flags(u8) + num_local_var_reg(u16)\
    6.5 Opcode(u8) + index(u8) + index(u16)

    # Registers
    Registers have a range of 128. Parameters have the most significant bit set while local variables dont.
    Registers have the scope of functions and reset after instructions reach a new function (AMAL_OP_FUNC_START).

    If import index for call and calle is 0, then that means the function resides in the same file the function call
    is being called from. Which means that import index 1 is actually import index 0 into the import list.

    @AmalReg is an alias for u8.
*/
/* Important: The number of fields in this enum can't exceed 255 */
typedef enum {
    AMAL_OP_NOP,         /* No operation (do nothing). This can be used for patching code */
    AMAL_OP_SETZ,        /* setz reg                             - Set register value to 0 */
    AMAL_OP_MOV,         /* mov dst, src                         - Move src register to dst register */
    AMAL_OP_MOVI,        /* movi dst, src                        - Move src intermediate to dst register */
    AMAL_OP_MOVD,        /* movd dst, src                        - Move src data to dst register */
    AMAL_OP_ADD,         /* add dst, reg1, reg2                  - Add reg1 and reg2 and put the result in dst */
    AMAL_OP_SUB,         /* sub dst, reg1, reg2                  - Substract reg2 from reg1 and put the result in dst */
    AMAL_OP_IMUL,        /* imul dst, reg1, reg2                 - Signed multiplication */
    AMAL_OP_MUL,         /* mul dst, reg1, reg2                  - Unsigned multiplication */
    AMAL_OP_IDIV,        /* idiv dst, reg1, reg2                 - Signed division */
    AMAL_OP_DIV,         /* div dst, reg1, reg2                  - Unsigned division */
    AMAL_OP_PUSH,        /* push reg                             - Push register for CALL. Values are pushed right before a CALL, with no other instructions between */
    AMAL_OP_PUSHI,       /* pushi int                            - Push intermediate for CALL. Values are pushed right before a CALL, with no other instructions between */
    AMAL_OP_PUSHD,       /* pushd data                           - Push data for CALL. Values are pushed right before a CALL, with no other instructions between */
    AMAL_OP_PUSH_RET,    /* push_ret reg                         - Push register as a return value of the next function call */
    AMAL_OP_CALL_START,  /* call_start num_args                  - Start of a CALL with @num_args number of arguments. Arguments for the next CALL is pushed immediately after this, followed by a CALL. @num_args is u8 */
    AMAL_OP_CALL,        /* call ii, fi                          - Call a function in imported file (ii, import index) using function index (fi). The number of arguments is the number of values pushed to stack. ii is u8, fi is u16 */
    AMAL_OP_CALLR,       /* callr reg                            - Call a function using a register. Used for function pointers. The number of arguments is the number of values pushed to stack */
    AMAL_OP_CALLE,       /* calle ii, efi                        - Call an extern function in imported file (ii, import index) using extern function index (efi). The number of arguments is the number of values pushed to stack. ii is u8, efi is u16 */
    AMAL_OP_EQ,          /* eq dst, reg1, reg2                   - Set dst to 1 if reg1 equals reg2, otherwise set it to 0 */
    AMAL_OP_NEQ,         /* neq dst, reg1, reg2                  - Set dst to 1 if reg1 is not equal to reg2, otherwise set it to 0 */
    AMAL_OP_ILT,         /* ilt dst, reg1, reg2                  - Set dst to 1 if reg1 is less than reg2 (signed comparison), otherwise set it to 0 */
    AMAL_OP_ILE,         /* ile dst, reg1, reg2                  - Set dst to 1 if reg1 is less or equal to reg2 (signed comparison), otherwise set it to 0 */
    AMAL_OP_IGT,         /* igt dst, reg1, reg2                  - Set dst to 1 if reg1 is greater than reg2 (signed comparison), otherwise set it to 0 */
    AMAL_OP_IGE,         /* ige dst, reg1, reg2                  - Set dst to 1 if reg1 is greater or equal to reg2 (signed comparison), otherwise set it to 0 */
    AMAL_OP_LT,          /* lt dst, reg1, reg2                   - Set dst to 1 if reg1 is less than reg2 (unsigned comparison), otherwise set it to 0 */
    AMAL_OP_LE,          /* le dst, reg1, reg2                   - Set dst to 1 if reg1 is less or equal to reg2 (unsigned comparison), otherwise set it to 0 */
    AMAL_OP_GT,          /* gt dst, reg1, reg2                   - Set dst to 1 if reg1 is greater than reg2 (unsigned comparison), otherwise set it to 0 */
    AMAL_OP_GE,          /* ge dst, reg1, reg2                   - Set dst to 1 if reg1 is greater or equal to reg2 (unsigned comparison), otherwise set it to 0 */
    AMAL_OP_BIT_AND,     /* and dst, reg1, reg2                  - Perform bit and on reg1 and reg2, store the result in dst */
    AMAL_OP_JZ,          /* jz reg, label                        - Jump to label in the current function if reg is zero. label is u16 */
    AMAL_OP_JMP,         /* jmp label                            - Unconditional jump to label in the current function. label is u16 */
    AMAL_OP_RET,         /* ret reg                              - Return from the function with reg result */
    AMAL_OP_FUNC_START,  /* func_start flags, num_local_var_reg  - Start of a function which has @num_local_var_reg local variable registers allocated and has the flag @flag. @flag is u8 and @num_local_var_reg is u16 */
    AMAL_OP_FUNC_END,    /* func_end                             - End of a function. Implementation should do a ret here */
    AMAL_OP_LABEL        /* label                                - Label. This is the target of a jump instruction. Jump instructions only jump to labels in the same function scope */
} AmalOpcode;

/* dec05eba = ba5ec0de = basecode */
#define AMAL_BYTECODE_MAGIC_NUMBER "\xde\xc0\x5e\xba"
#define AMAL_BYTECODE_MAGIC_NUMBER_SIZE 4
#define AMAL_BYTECODE_MAJOR_VERSION 1
#define AMAL_BYTECODE_MINOR_VERSION 0
#define AMAL_BYTECODE_PATCH_VERSION 0

#define AMAL_BYTECODE_SECTION_MAGIC_NUMBER (u32)0x004005e4 /* "section\0" in ascii */

#define AMAL_BYTECODE_NUM_REGISTERS 256

typedef enum {
    FUNC_FLAG_NONE       = 0,
    FUNC_FLAG_EXPORTED   = 1 << 0,
    FUNC_FLAG_VARARGS    = 1 << 1
} amal_func_flag;

typedef enum {
    REG_FLAG_NONE        = 0,
    REG_FLAG_PARAM       = 1 << 7
} amal_reg_flag;

typedef u8 AmalOpcodeType;
typedef u8 AmalReg;

#define AMAL_REG_VALUE(reg) ((reg)&0x7f)

/* TODO: Make sure this pragma pack works on all platforms */
#pragma pack(push, 1)
typedef struct {
    u8 magic_number[AMAL_BYTECODE_MAGIC_NUMBER_SIZE]; /* @AMAL_BYTECODE_MAGIC_NUMBER */
    u8 major_version;
    u8 minor_version;
    u8 patch_version;
} BytecodeHeader;
#pragma pack(pop)

/* TODO: Make sure this pragma pack works on all platforms */
#pragma pack(push, 1)
typedef struct {
    u32 func_offset;
    u8 num_params;
    u32 params_num_pointers;
    u32 params_fixed_size;

    u8 num_return_types;
    u32 return_types_num_pointers;
    u32 return_types_fixed_size;
} BytecodeHeaderFunction;
#pragma pack(pop)

/* TODO: Make sure this pragma pack works on all platforms */
#pragma pack(push, 1)
typedef struct {
    u8 num_params;
    u8 num_return_types;
    u8 name_len;
    u8 flags;

    u32 params_num_pointers;
    u32 params_fixed_size;

    u32 return_types_num_pointers;
    u32 return_types_fixed_size;
} BytecodeHeaderExternFunction;
#pragma pack(pop)

/* TODO: Make sure this pragma pack works on all platforms */
#pragma pack(push, 1)
typedef struct {
    u32 function_index;
    #define parser_index function_index
    u32 extern_function_index;
} BytecodeHeaderImport;
#pragma pack(pop)

struct Bytecode {
    Buffer/*<headers + instruction data>*/ data;
    usize import_index;     /* Reference inside @data where imports start */
    u32 funcs_index;        /* Reference inside @data where funcs start */
    u32 extern_funcs_index; /* Reference inside @data where extern funcs start */
    u32 offset;             /* Offset that this bytecode starts from in the final program (all bytecodes combined) */
};

typedef struct {
    jmp_buf env;
    Bytecode *bytecode;
    Parser *parser; /* borrowed */
} BytecodeCompilerContext;

CHECK_RESULT int bytecode_init(Bytecode *self, ArenaAllocator *allocator);

CHECK_RESULT int buffer_append_header(Buffer *program_data);

/* longjump to self->env on failure */
void generate_bytecode_from_ir(BytecodeCompilerContext *self);

#endif