From b7f056a73ad4053eb2284c54873dfb3888dcb430 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Wed, 22 Jan 2020 19:14:06 +0100 Subject: Correctly parse and produce bytecode for example --- src/parser.c | 201 ++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 136 insertions(+), 65 deletions(-) (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c index 39fff0c..1dbfe48 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,8 +1,8 @@ #include "../include/parser.h" #include "../include/bytecode.h" -#include "../include/std/hash_map.h" #include #include +#include #define return_if_error(expr) \ { \ @@ -12,22 +12,56 @@ typedef struct { TslTokenizer tokenizer; - TslBytecodeWriter bytecode_writer; - TslHashMap variables; + TslBuffer /*TslBytecodeWriter*/ function_bytecode_list; + TslBuffer /*int*/ function_bytecode_list_index; } TslParser; +static int tsl_parser_get_current_function_index(TslParser *self) { + char *last_item = (char*)tsl_buffer_end(&self->function_bytecode_list_index) - sizeof(int); + return *(int*)last_item; +} + +#define get_function_bytecode(self) ((TslBytecodeWriter*)tsl_buffer_begin(&self->function_bytecode_list) + tsl_parser_get_current_function_index(self)) + static TslParseResult tsl_parser_parse_rhs(TslParser *self); static TslParseResult tsl_parser_parse_expressions(TslParser *self, TslToken end_token); -static void tsl_parser_init(TslParser *self, const char *code, size_t code_size) { +static int tsl_parser_init(TslParser *self, const char *code, size_t code_size) { + TslBytecodeWriter bytecode_writer; + int result = 1; + size_t bytecode_index = 0; tsl_tokenizer_init(&self->tokenizer, code, code_size); - tsl_bytecode_writer_init(&self->bytecode_writer); - tsl_hash_map_init(&self->variables); + tsl_bytecode_writer_init(&bytecode_writer); + tsl_buffer_init(&self->function_bytecode_list); + result = tsl_buffer_append(&self->function_bytecode_list, &bytecode_writer, sizeof(bytecode_writer)); + tsl_buffer_init(&self->function_bytecode_list_index); + result = tsl_buffer_append(&self->function_bytecode_list_index, &bytecode_index, sizeof(bytecode_index)); + return result; } static void tsl_parser_deinit(TslParser *self) { - tsl_bytecode_writer_deinit(&self->bytecode_writer); - tsl_hash_map_deinit(&self->variables); + TslBytecodeWriter *bytecode_writer = tsl_buffer_begin(&self->function_bytecode_list); + TslBytecodeWriter *bytecode_writer_end = tsl_buffer_end(&self->function_bytecode_list); + while(bytecode_writer != bytecode_writer_end) { + tsl_bytecode_writer_deinit(bytecode_writer); + ++bytecode_writer; + } + tsl_buffer_deinit(&self->function_bytecode_list); + tsl_buffer_deinit(&self->function_bytecode_list_index); +} + +static TslParseResult tsl_parser_push_function(TslParser *self) { + int function_index; + TslBytecodeWriter bytecode_writer; + tsl_bytecode_writer_init(&bytecode_writer); + /* TODO: Use bitshift ? */ + function_index = self->function_bytecode_list.size / sizeof(TslBytecodeWriter); + return_if_error(tsl_buffer_append(&self->function_bytecode_list, &bytecode_writer, sizeof(bytecode_writer))); + return tsl_buffer_append(&self->function_bytecode_list_index, &function_index, sizeof(function_index)); +} + +static void tsl_parser_pop_function(TslParser *self) { + tsl_buffer_pop(&self->function_bytecode_list_index, sizeof(int)); } #if 0 @@ -43,10 +77,11 @@ static uint64_t hash_string_view(const void *data, size_t size) { } #endif -static TslParseResult tsl_parser_parse_map(TslParser *self) { +static TslParseResult tsl_parser_parse_map(TslParser *self, int *num_items) { #define parse_map_element_separator \ return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_COLON)); \ return_if_error(tsl_parser_parse_rhs(self)); \ + ++*num_items; \ token = tsl_tokenizer_next(&self->tokenizer); \ if(token == TSL_TOKEN_COMMA) { \ continue; \ @@ -57,19 +92,24 @@ static TslParseResult tsl_parser_parse_map(TslParser *self) { return TSL_PARSE_RESULT_ERR; \ } + *num_items = 0; for(;;) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_NUM) { - printf("rhs num: %f\n", self->tokenizer.number_value); + ++*num_items; + return_if_error(tsl_bytecode_writer_add_ins2(get_function_bytecode(self), TSL_OPCODE_LOADN, self->tokenizer.number_value)); parse_map_element_separator } else if(token == TSL_TOKEN_BOOL) { - printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false"); + ++*num_items; + return_if_error(tsl_bytecode_writer_add_ins3(get_function_bytecode(self), TSL_OPCODE_LOADB, self->tokenizer.bool_value)); parse_map_element_separator } else if(token == TSL_TOKEN_NULL) { - printf("rhs null\n"); + ++*num_items; + return_if_error(tsl_bytecode_writer_add_ins5(get_function_bytecode(self), TSL_OPCODE_LOADNULL)); parse_map_element_separator } else if(token == TSL_TOKEN_STRING) { - printf("rhs string: |%.*s|\n", (int)self->tokenizer.string.size, self->tokenizer.string.data); + ++*num_items; + return_if_error(tsl_bytecode_writer_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADS, &self->tokenizer.string)); parse_map_element_separator } else if(token == TSL_TOKEN_RBRACE) { /* '}' after trailing ',' or an empty map */ @@ -78,12 +118,10 @@ static TslParseResult tsl_parser_parse_map(TslParser *self) { fprintf(stderr, "Error: Expected '}', got TODO\n"); return TSL_PARSE_RESULT_ERR; } - - token = tsl_tokenizer_next(&self->tokenizer); } } -static TslParseResult tsl_parser_parse_list(TslParser *self) { +static TslParseResult tsl_parser_parse_list(TslParser *self, int *num_items) { #define parse_list_element_separator \ token = tsl_tokenizer_next(&self->tokenizer); \ if(token == TSL_TOKEN_COMMA) { \ @@ -95,20 +133,25 @@ static TslParseResult tsl_parser_parse_list(TslParser *self) { return TSL_PARSE_RESULT_ERR; \ } + *num_items = 0; for(;;) { /* TODO: Use tsl_parser_parse_rhs instead */ TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_NUM) { - printf("rhs num: %f\n", self->tokenizer.number_value); + ++*num_items; + return_if_error(tsl_bytecode_writer_add_ins2(get_function_bytecode(self), TSL_OPCODE_LOADN, self->tokenizer.number_value)); parse_list_element_separator } else if(token == TSL_TOKEN_BOOL) { - printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false"); + ++*num_items; + return_if_error(tsl_bytecode_writer_add_ins3(get_function_bytecode(self), TSL_OPCODE_LOADB, self->tokenizer.bool_value)); parse_list_element_separator } else if(token == TSL_TOKEN_NULL) { - printf("rhs null\n"); + ++*num_items; + return_if_error(tsl_bytecode_writer_add_ins5(get_function_bytecode(self), TSL_OPCODE_LOADNULL)); parse_list_element_separator } else if(token == TSL_TOKEN_STRING) { - printf("rhs string: |%.*s|\n", (int)self->tokenizer.string.size, self->tokenizer.string.data); + ++*num_items; + return_if_error(tsl_bytecode_writer_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADS, &self->tokenizer.string)); parse_list_element_separator } else if(token == TSL_TOKEN_RBRACKET) { /* ']' after trailing ',' or an empty list */ @@ -156,11 +199,13 @@ static TslParseResult tsl_parser_parse_fn(TslParser *self) { static TslParseResult tsl_parser_parse_var_indexing(TslParser *self) { return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LBRACKET)); return_if_error(tsl_parser_parse_rhs(self)); - return tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_RBRACKET); + return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_RBRACKET)); + return tsl_bytecode_writer_add_ins5(get_function_bytecode(self), TSL_OPCODE_MINDEX); } /* FUNC_CALL = '(' (RHS ',')* ')' */ -static TslParseResult tsl_parser_parse_func_call(TslParser *self) { +static TslParseResult tsl_parser_parse_func_call(TslParser *self, int *num_args) { + *num_args = 0; return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN)); for(;;) { TslToken token = tsl_tokenizer_peek(&self->tokenizer); @@ -168,6 +213,7 @@ static TslParseResult tsl_parser_parse_func_call(TslParser *self) { tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_RPAREN */ return TSL_PARSE_RESULT_OK; } else { + ++*num_args; return_if_error(tsl_parser_parse_rhs(self)); token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_COMMA) { @@ -184,14 +230,16 @@ static TslParseResult tsl_parser_parse_func_call(TslParser *self) { /* TODO: Do not allow empty command */ /* TODO: Allow command inside another command */ -/* COMMAND = TODO */ -static TslParseResult tsl_parser_parse_command(TslParser *self) { +/* COMMAND = '(' TSL_COMMAND_TOKEN_ARG* ')' */ +static TslParseResult tsl_parser_parse_command(TslParser *self, int *num_args) { + *num_args = 0; return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN)); for(;;) { TslStringView command_arg; TslCommandToken command_token = tsl_tokenizer_next_command_arg(&self->tokenizer, &command_arg); if(command_token == TSL_COMMAND_TOKEN_ARG) { - printf("command arg: |%.*s|\n", (int)command_arg.size, command_arg.data); + ++*num_args; + return_if_error(tsl_bytecode_writer_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADCA, &command_arg)); } else if(command_token == TSL_COMMAND_TOKEN_END) { return TSL_PARSE_RESULT_OK; } else { @@ -201,6 +249,18 @@ static TslParseResult tsl_parser_parse_command(TslParser *self) { } } +static TslOpcode arithmetic_symbol_to_opcode(char arithmetic_symbol) { + switch(arithmetic_symbol) { + case '+': return TSL_OPCODE_ADD; + case '-': return TSL_OPCODE_SUB; + case '*': return TSL_OPCODE_MUL; + case '/': return TSL_OPCODE_DIV; + default: + fprintf(stderr, "Unexpected arithmetic symbol: %c\n", arithmetic_symbol); + abort(); + } +} + /* RHS_SUB = (VAR_INDEX|FUNC_CALL RHS_SUB?)|(TOKEN_ARITHMETIC RHS) */ static TslParseResult tsl_parser_parse_rhs_sub(TslParser *self) { TslToken token = tsl_tokenizer_peek(&self->tokenizer); @@ -208,13 +268,16 @@ static TslParseResult tsl_parser_parse_rhs_sub(TslParser *self) { return_if_error(tsl_parser_parse_var_indexing(self)); return tsl_parser_parse_rhs_sub(self); } else if(token == TSL_TOKEN_LPAREN) { - return_if_error(tsl_parser_parse_func_call(self)); + int num_args; + return_if_error(tsl_parser_parse_func_call(self, &num_args)); + return_if_error(tsl_bytecode_writer_add_ins1(get_function_bytecode(self), TSL_OPCODE_CALLF, num_args)); return tsl_parser_parse_rhs_sub(self); } else if(token == TSL_TOKEN_ARITHMETIC) { + TslOpcode arithmetic_opcode; tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_ARITHMETIC */ - printf("rhs sub arithmetic symbol: %c\n", self->tokenizer.arithmetic_symbol); + arithmetic_opcode = arithmetic_symbol_to_opcode(self->tokenizer.arithmetic_symbol); /* TODO: Handle arithmetic expression precedence */ - return tsl_parser_parse_rhs(self); + return tsl_parser_parse_rhs(self) && tsl_bytecode_writer_add_ins5(get_function_bytecode(self), arithmetic_opcode); } /* No sub expression found, possibly a new expression after this (a new expression on a new line), let that @@ -242,41 +305,48 @@ static TslParseResult tsl_parser_parse_rhs_sub_arithmetic(TslParser *self) { /* RHS = (IDENTIFIER|NUM|BOOL|NULL|STRING|MAP|LIST|('fn' FN)|('$' COMMAND)) RHS_SUB? */ TslParseResult tsl_parser_parse_rhs(TslParser *self) { TslToken token = tsl_tokenizer_next(&self->tokenizer); - if(token == TSL_TOKEN_IDENTIFIER) { - TslStringView var_name = self->tokenizer.identifier; - assert(0 && "TODO: Implement"); - printf("rhs var: %.*s\n", (int)var_name.size, var_name.data); - return tsl_parser_parse_rhs_sub(self); - } else if(token == TSL_TOKEN_NUM) { - return_if_error(tsl_bytecode_writer_loadn(&self->bytecode_writer, self->tokenizer.number_value)); - printf("rhs num: %f\n", self->tokenizer.number_value); - return tsl_parser_parse_rhs_sub_arithmetic(self); - } else if(token == TSL_TOKEN_BOOL) { - printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false"); - return tsl_bytecode_writer_loadb(&self->bytecode_writer, self->tokenizer.bool_value); - } else if(token == TSL_TOKEN_NULL) { - assert(0 && "TODO: Implement"); - printf("rhs null\n"); - } else if(token == TSL_TOKEN_STRING) { - assert(0 && "TODO: Implement"); - printf("rhs string: |%.*s|\n", (int)self->tokenizer.string.size, self->tokenizer.string.data); - } else if(token == TSL_TOKEN_LBRACE) { - assert(0 && "TODO: Implement"); - return tsl_parser_parse_map(self); - } else if(token == TSL_TOKEN_LBRACKET) { - assert(0 && "TODO: Implement"); - return tsl_parser_parse_list(self); - } else if(token == TSL_TOKEN_FN) { - assert(0 && "TODO: Implement"); - return tsl_parser_parse_fn(self); - } else if(token == TSL_TOKEN_DOLLAR_SIGN) { - assert(0 && "TODO: Implement"); - return tsl_parser_parse_command(self); - } else { - fprintf(stderr, "Error: Expected variable, number, bool, null, map, list, function or command, got TODO (%d) (line: %d)\n", token, tsl_tokenizer_get_line_by_index(&self->tokenizer, self->tokenizer.prev_code_index)); - return TSL_PARSE_RESULT_ERR; + switch(token) { + case TSL_TOKEN_IDENTIFIER: { + TslStringView var_name = self->tokenizer.identifier; + return_if_error(tsl_bytecode_writer_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADV, &var_name)); + return tsl_parser_parse_rhs_sub(self); + } + case TSL_TOKEN_NUM: + return_if_error(tsl_bytecode_writer_add_ins2(get_function_bytecode(self), TSL_OPCODE_LOADN, self->tokenizer.number_value)); + return tsl_parser_parse_rhs_sub_arithmetic(self); + case TSL_TOKEN_BOOL: + return tsl_bytecode_writer_add_ins3(get_function_bytecode(self), TSL_OPCODE_LOADB, self->tokenizer.bool_value); + case TSL_TOKEN_NULL: + return tsl_bytecode_writer_add_ins5(get_function_bytecode(self), TSL_OPCODE_LOADNULL); + case TSL_TOKEN_STRING: + return tsl_bytecode_writer_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADS, &self->tokenizer.string); + case TSL_TOKEN_LBRACE: { + int num_items; + return tsl_parser_parse_map(self, &num_items) && + tsl_bytecode_writer_add_ins1(get_function_bytecode(self), TSL_OPCODE_MAP, num_items); + } + case TSL_TOKEN_LBRACKET: { + int num_items; + return tsl_parser_parse_list(self, &num_items) && + tsl_bytecode_writer_add_ins1(get_function_bytecode(self), TSL_OPCODE_LIST, num_items); + } + case TSL_TOKEN_FN: { + int function_index; + return_if_error(tsl_parser_push_function(self)); + return_if_error(tsl_parser_parse_fn(self)); + function_index = tsl_parser_get_current_function_index(self); + tsl_parser_pop_function(self); + return tsl_bytecode_writer_add_ins1(get_function_bytecode(self), TSL_OPCODE_LOADF, function_index); + } + case TSL_TOKEN_DOLLAR_SIGN: { + int num_args; + return tsl_parser_parse_command(self, &num_args) && + tsl_bytecode_writer_add_ins1(get_function_bytecode(self), TSL_OPCODE_CALLC, num_args); + } + default: + fprintf(stderr, "Error: Expected variable, number, bool, null, map, list, function or command, got TODO (%d) (line: %d)\n", token, tsl_tokenizer_get_line_by_index(&self->tokenizer, self->tokenizer.prev_code_index)); + return TSL_PARSE_RESULT_ERR; } - return TSL_PARSE_RESULT_OK; } /* @@ -288,12 +358,13 @@ TslParseResult tsl_parser_parse_expressions(TslParser *self, TslToken end_token) TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_IDENTIFIER) { TslStringView identifier = self->tokenizer.identifier; - printf("identifier: %.*s\n", (int)identifier.size, identifier.data); if(tsl_tokenizer_peek(&self->tokenizer) == TSL_TOKEN_EQUAL) { tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_EQUAL */ return_if_error(tsl_parser_parse_rhs(self)); - return_if_error(tsl_bytecode_writer_setv(&self->bytecode_writer, &identifier)); + return_if_error(tsl_bytecode_writer_add_ins4(get_function_bytecode(self), TSL_OPCODE_SETV, &identifier)); + /* TODO: Assert if there are load bytecode opcodes not followed by set */ } else { + return_if_error(tsl_bytecode_writer_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADV, &identifier)); return_if_error(tsl_parser_parse_rhs_sub(self)); } } else if(token == end_token) { @@ -309,7 +380,7 @@ TslParseResult tsl_parser_parse_expressions(TslParser *self, TslToken end_token) TslParseResult tsl_parse(const char *code, size_t code_size) { TslParseResult result; TslParser parser; - tsl_parser_init(&parser, code, code_size); + return_if_error(tsl_parser_init(&parser, code, code_size)); result = tsl_parser_parse_expressions(&parser, TSL_TOKEN_END_OF_FILE); tsl_parser_deinit(&parser); return result; -- cgit v1.2.3