From 840a3c6c5aa2400ce80d8ec7bb8b1a8d6e25770b Mon Sep 17 00:00:00 2001 From: dec05eba Date: Wed, 22 Jan 2020 06:14:42 +0100 Subject: Simplify bytecode --- IMPLEMENTATION.md | 2 ++ example.tsl | 29 +++++++++++++++++++++++++++++ include/bytecode.h | 26 +++++++++++++------------- src/bytecode.c | 32 ++++++++++++++------------------ src/parser.c | 40 +++++++++++----------------------------- src/tokenizer.c | 14 ++++++++++++++ 6 files changed, 83 insertions(+), 60 deletions(-) create mode 100644 IMPLEMENTATION.md diff --git a/IMPLEMENTATION.md b/IMPLEMENTATION.md new file mode 100644 index 0000000..4761c1b --- /dev/null +++ b/IMPLEMENTATION.md @@ -0,0 +1,2 @@ +The bytecode format is inspired by lua, where instead of using registers, data is pushed/poped to/from a stack. +This reduces the size of instructions and make the code cleaner and easier to use. \ No newline at end of file diff --git a/example.tsl b/example.tsl index 629cf46..6f165ed 100644 --- a/example.tsl +++ b/example.tsl @@ -1,10 +1,39 @@ +# loadn 1 +# setv "value1" value1 = 1 + +# loadn 1 +# setv "value1" value1 = 2 + +# loadb true +# setv "value2" value2 = true + +# loadnull +# setv "value3" value3 = null + +# loads "hello world" +# setv "value4" value4 = "hello world" + +# loads "hello" +# loads "world" +# loadn 5 +# list "value5", 3 +# setv "value5" value5 = ["hello", "world", 5] + +# loads "hello" +# loads "world" +# loads "value" +# loadn 23 +# map 4 +# setv "value6" value6 = {"hello": "world", "value": 23} + + value7 = fn () {} value8 = fn (value) {} value9 = { diff --git a/include/bytecode.h b/include/bytecode.h index 4a9d49c..6ce1dc9 100644 --- a/include/bytecode.h +++ b/include/bytecode.h @@ -2,42 +2,42 @@ #define TSL_BYTECODE_H #include "std/buffer.h" +#include "std/string_view.h" +#include "value.h" #include typedef uint8_t TslOpcodeType; -/* Registers are positive if they refer to local variables and negative when they refer to parameters */ -typedef int16_t TslRegister; -typedef uint16_t TslValueIndex; typedef enum { TSL_OPCODE_LOAD_NUMBER, - TSL_OPCODE_MOV_REG + TSL_OPCODE_LOAD_BOOL, + TSL_OPCODE_SETV } TslOpcode; typedef struct { TslBuffer buffer; - TslRegister register_counter; } TslBytecodeWriter; typedef struct { - TslRegister dst_reg; double number; TslOpcodeType opcode; } TslInstructionType1; typedef struct { - TslRegister dst_reg; - TslRegister src_reg; + TslBool value; TslOpcodeType opcode; } TslInstructionType2; +typedef struct { + TslStringView key; + TslOpcodeType opcode; +} TslInstructionType3; + void tsl_bytecode_writer_init(TslBytecodeWriter *self); void tsl_bytecode_writer_deinit(TslBytecodeWriter *self); -void tsl_bytecode_writer_reset_register_counter(TslBytecodeWriter *self); -/* Returns -1 on error (too many registers used (more than 2^15)) */ -TslRegister tsl_bytecode_writer_get_unique_register(TslBytecodeWriter *self); -int tsl_bytecode_writer_load_number(TslBytecodeWriter *self, TslRegister dst, double number); -int tsl_bytecode_writer_mov_reg(TslBytecodeWriter *self, TslRegister dst, TslRegister src); +int tsl_bytecode_writer_loadn(TslBytecodeWriter *self, double number); +int tsl_bytecode_writer_loadb(TslBytecodeWriter *self, TslBool value); +int tsl_bytecode_writer_setv(TslBytecodeWriter *self, TslStringView *key); #endif /* TSL_BYTECODE_H */ diff --git a/src/bytecode.c b/src/bytecode.c index 9406852..d15df9e 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -4,36 +4,32 @@ void tsl_bytecode_writer_init(TslBytecodeWriter *self) { tsl_buffer_init(&self->buffer); - self->register_counter = 0; } void tsl_bytecode_writer_deinit(TslBytecodeWriter *self) { tsl_buffer_deinit(&self->buffer); } -void tsl_bytecode_writer_reset_register_counter(TslBytecodeWriter *self) { - self->register_counter = 0; -} - -TslRegister tsl_bytecode_writer_get_unique_register(TslBytecodeWriter *self) { - if(self->register_counter < INT16_MAX) - return self->register_counter++; - fprintf(stderr, "Error: Too many variables in the same scope\n"); - return -1; -} - -int tsl_bytecode_writer_load_number(TslBytecodeWriter *self, TslRegister dst, double number) { +int tsl_bytecode_writer_loadn(TslBytecodeWriter *self, double number) { TslInstructionType1 instruction; instruction.opcode = TSL_OPCODE_LOAD_NUMBER; - instruction.dst_reg = dst; instruction.number = number; + fprintf(stderr, "loadn %f\n", number); return tsl_buffer_append(&self->buffer, &instruction, sizeof(instruction)); } -int tsl_bytecode_writer_mov_reg(TslBytecodeWriter *self, TslRegister dst, TslRegister src) { +int tsl_bytecode_writer_loadb(TslBytecodeWriter *self, TslBool value) { TslInstructionType2 instruction; - instruction.opcode = TSL_OPCODE_MOV_REG; - instruction.dst_reg = dst; - instruction.src_reg = src; + instruction.opcode = TSL_OPCODE_LOAD_BOOL; + instruction.value = value; + fprintf(stderr, "loadb %s\n", value ? "true" : "false"); + return tsl_buffer_append(&self->buffer, &instruction, sizeof(instruction)); +} + +int tsl_bytecode_writer_setv(TslBytecodeWriter *self, TslStringView *key) { + TslInstructionType3 instruction; + instruction.opcode = TSL_OPCODE_SETV; + instruction.key = *key; + fprintf(stderr, "setv \"%.*s\"\n", (int)key->size, key->data); return tsl_buffer_append(&self->buffer, &instruction, sizeof(instruction)); } diff --git a/src/parser.c b/src/parser.c index bad630e..39fff0c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,7 +16,7 @@ typedef struct { TslHashMap variables; } TslParser; -static TslParseResult tsl_parser_parse_rhs(TslParser *self, TslRegister *reg_result); +static TslParseResult tsl_parser_parse_rhs(TslParser *self); static TslParseResult tsl_parser_parse_expressions(TslParser *self, TslToken end_token); static void tsl_parser_init(TslParser *self, const char *code, size_t code_size) { @@ -44,10 +44,9 @@ static uint64_t hash_string_view(const void *data, size_t size) { #endif static TslParseResult tsl_parser_parse_map(TslParser *self) { - TslRegister src_reg; #define parse_map_element_separator \ return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_COLON)); \ - return_if_error(tsl_parser_parse_rhs(self, &src_reg)); \ + return_if_error(tsl_parser_parse_rhs(self)); \ token = tsl_tokenizer_next(&self->tokenizer); \ if(token == TSL_TOKEN_COMMA) { \ continue; \ @@ -155,9 +154,8 @@ static TslParseResult tsl_parser_parse_fn(TslParser *self) { /* VAR_INDEX = '[' RHS ']' */ static TslParseResult tsl_parser_parse_var_indexing(TslParser *self) { - TslRegister src_reg; return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LBRACKET)); - return_if_error(tsl_parser_parse_rhs(self, &src_reg)); + return_if_error(tsl_parser_parse_rhs(self)); return tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_RBRACKET); } @@ -170,8 +168,7 @@ static TslParseResult tsl_parser_parse_func_call(TslParser *self) { tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_RPAREN */ return TSL_PARSE_RESULT_OK; } else { - TslRegister src_reg; - return_if_error(tsl_parser_parse_rhs(self, &src_reg)); + return_if_error(tsl_parser_parse_rhs(self)); token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_COMMA) { continue; @@ -214,11 +211,10 @@ static TslParseResult tsl_parser_parse_rhs_sub(TslParser *self) { return_if_error(tsl_parser_parse_func_call(self)); return tsl_parser_parse_rhs_sub(self); } else if(token == TSL_TOKEN_ARITHMETIC) { - TslRegister src_reg; tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_ARITHMETIC */ printf("rhs sub arithmetic symbol: %c\n", self->tokenizer.arithmetic_symbol); /* TODO: Handle arithmetic expression precedence */ - return tsl_parser_parse_rhs(self, &src_reg); + return tsl_parser_parse_rhs(self); } /* No sub expression found, possibly a new expression after this (a new expression on a new line), let that @@ -231,11 +227,10 @@ static TslParseResult tsl_parser_parse_rhs_sub(TslParser *self) { static TslParseResult tsl_parser_parse_rhs_sub_arithmetic(TslParser *self) { TslToken token = tsl_tokenizer_peek(&self->tokenizer); if(token == TSL_TOKEN_ARITHMETIC) { - TslRegister src_reg; tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_ARITHMETIC */ printf("rhs sub arithmetic symbol: %c\n", self->tokenizer.arithmetic_symbol); /* TODO: Handle arithmetic expression precedence */ - return tsl_parser_parse_rhs(self, &src_reg); + return tsl_parser_parse_rhs(self); } /* No sub expression found, possibly a new expression after this (a new expression on a new line), let that @@ -245,7 +240,7 @@ static TslParseResult tsl_parser_parse_rhs_sub_arithmetic(TslParser *self) { } /* RHS = (IDENTIFIER|NUM|BOOL|NULL|STRING|MAP|LIST|('fn' FN)|('$' COMMAND)) RHS_SUB? */ -TslParseResult tsl_parser_parse_rhs(TslParser *self, TslRegister *reg_result) { +TslParseResult tsl_parser_parse_rhs(TslParser *self) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_IDENTIFIER) { TslStringView var_name = self->tokenizer.identifier; @@ -253,18 +248,12 @@ TslParseResult tsl_parser_parse_rhs(TslParser *self, TslRegister *reg_result) { printf("rhs var: %.*s\n", (int)var_name.size, var_name.data); return tsl_parser_parse_rhs_sub(self); } else if(token == TSL_TOKEN_NUM) { - *reg_result = tsl_bytecode_writer_get_unique_register(&self->bytecode_writer); - if(*reg_result == -1) { - fprintf(stderr, "Error: Too many registers used\n"); - return TSL_PARSE_RESULT_ERR; - } - - return_if_error(tsl_bytecode_writer_load_number(&self->bytecode_writer, *reg_result, self->tokenizer.number_value)); + return_if_error(tsl_bytecode_writer_loadn(&self->bytecode_writer, self->tokenizer.number_value)); printf("rhs num: %f\n", self->tokenizer.number_value); return tsl_parser_parse_rhs_sub_arithmetic(self); } else if(token == TSL_TOKEN_BOOL) { - assert(0 && "TODO: Implement"); printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false"); + return tsl_bytecode_writer_loadb(&self->bytecode_writer, self->tokenizer.bool_value); } else if(token == TSL_TOKEN_NULL) { assert(0 && "TODO: Implement"); printf("rhs null\n"); @@ -301,16 +290,9 @@ TslParseResult tsl_parser_parse_expressions(TslParser *self, TslToken end_token) TslStringView identifier = self->tokenizer.identifier; printf("identifier: %.*s\n", (int)identifier.size, identifier.data); if(tsl_tokenizer_peek(&self->tokenizer) == TSL_TOKEN_EQUAL) { - TslRegister dst_reg; - TslRegister src_reg; - tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_EQUAL */ - return_if_error(tsl_parser_parse_rhs(self, &src_reg)); - - dst_reg = tsl_bytecode_writer_get_unique_register(&self->bytecode_writer); - if(dst_reg < 0) - return TSL_PARSE_RESULT_ERR; - return_if_error(tsl_bytecode_writer_mov_reg(&self->bytecode_writer, dst_reg, src_reg)); + return_if_error(tsl_parser_parse_rhs(self)); + return_if_error(tsl_bytecode_writer_setv(&self->bytecode_writer, &identifier)); } else { return_if_error(tsl_parser_parse_rhs_sub(self)); } diff --git a/src/tokenizer.c b/src/tokenizer.c index 889152b..02592a9 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -224,6 +224,20 @@ static TslToken tsl_tokenizer_next_internal(TslTokenizer *self) { self->arithmetic_symbol = c; ++self->code_index; return TSL_TOKEN_ARITHMETIC; + } else if(c == '#') { + /* Comment */ + ++self->code_index; + for(;;) { + c = tsl_tokenizer_get_char(self); + if(c == '\n') { + ++self->code_index; + break; + } else if(c == '\0') { + break; + } + ++self->code_index; + } + return tsl_tokenizer_next_internal(self); } else if(c == '\0') { return TSL_TOKEN_END_OF_FILE; } else { -- cgit v1.2.3