#include "../include/parser.h" #include "../include/bytecode.h" #include #include #include #define return_if_error(expr) \ { \ if(!(expr)) \ return TSL_PARSE_RESULT_ERR;\ } typedef struct { TslTokenizer tokenizer; TslBuffer /*TslBytecode*/ function_bytecode_list; TslBuffer /*int*/ function_bytecode_list_index; } TslParser; static int tsl_parser_get_current_function_index(TslParser *self) { char *last_item = (char*)tsl_buffer_end(&self->function_bytecode_list_index) - sizeof(int); return *(int*)last_item; } #define get_function_bytecode(self) ((TslBytecode*)tsl_buffer_begin(&self->function_bytecode_list) + tsl_parser_get_current_function_index(self)) static TslParseResult tsl_parser_parse_rhs(TslParser *self); static TslParseResult tsl_parser_parse_expressions(TslParser *self, TslToken end_token); static int tsl_parser_init(TslParser *self, char *code, size_t code_size) { TslBytecode bytecode_writer; int result1 = 1; int result2 = 1; size_t bytecode_index = 0; tsl_tokenizer_init(&self->tokenizer, code, code_size); tsl_bytecode_init(&bytecode_writer); tsl_buffer_init(&self->function_bytecode_list); tsl_buffer_init(&self->function_bytecode_list_index); result1 = tsl_buffer_append(&self->function_bytecode_list, &bytecode_writer, sizeof(bytecode_writer)); result2 = tsl_buffer_append(&self->function_bytecode_list_index, &bytecode_index, sizeof(bytecode_index)); return result1 && result2; } static void tsl_parser_deinit(TslParser *self) { TslBytecode *bytecode_writer = tsl_buffer_begin(&self->function_bytecode_list); TslBytecode *bytecode_writer_end = tsl_buffer_end(&self->function_bytecode_list); while(bytecode_writer != bytecode_writer_end) { tsl_bytecode_deinit(bytecode_writer); ++bytecode_writer; } tsl_buffer_deinit(&self->function_bytecode_list); tsl_buffer_deinit(&self->function_bytecode_list_index); } static TslParseResult tsl_parser_push_function(TslParser *self) { int function_index; TslBytecode bytecode_writer; tsl_bytecode_init(&bytecode_writer); /* TODO: Use bitshift ? */ function_index = self->function_bytecode_list.size / sizeof(TslBytecode); return_if_error(tsl_buffer_append(&self->function_bytecode_list, &bytecode_writer, sizeof(bytecode_writer))); return tsl_buffer_append(&self->function_bytecode_list_index, &function_index, sizeof(function_index)); } static void tsl_parser_pop_function(TslParser *self) { tsl_buffer_pop(&self->function_bytecode_list_index, sizeof(int)); } static TslParseResult tsl_parser_parse_map(TslParser *self, int *num_items) { #define parse_map_element_separator \ return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_COLON)); \ return_if_error(tsl_parser_parse_rhs(self)); \ ++*num_items; \ token = tsl_tokenizer_next(&self->tokenizer); \ if(token == TSL_TOKEN_COMMA) { \ continue; \ } else if(token == TSL_TOKEN_RBRACE) { \ return TSL_PARSE_RESULT_OK; \ } else { \ fprintf(stderr, "Error: Expected ',' or '}', got TODO\n"); \ return TSL_PARSE_RESULT_ERR; \ } *num_items = 0; for(;;) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_STRING) { ++*num_items; return_if_error(tsl_bytecode_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADS, &self->tokenizer.string)); parse_map_element_separator } else if(token == TSL_TOKEN_IDENTIFIER) { /* Use the identifier as a key for the map. This is not a variable, but a key (string) without spaces */ /* This allows syntax like: variable = { key: "value" } */ ++*num_items; return_if_error(tsl_bytecode_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADS, &self->tokenizer.identifier)); parse_map_element_separator } else if(token == TSL_TOKEN_NUM) { ++*num_items; return_if_error(tsl_bytecode_add_ins2(get_function_bytecode(self), TSL_OPCODE_LOADN, self->tokenizer.number_value)); parse_map_element_separator } else if(token == TSL_TOKEN_BOOL) { ++*num_items; return_if_error(tsl_bytecode_add_ins3(get_function_bytecode(self), TSL_OPCODE_LOADB, self->tokenizer.bool_value)); parse_map_element_separator } else if(token == TSL_TOKEN_NULL) { ++*num_items; return_if_error(tsl_bytecode_add_ins5(get_function_bytecode(self), TSL_OPCODE_LOADNULL)); parse_map_element_separator } else if(token == TSL_TOKEN_RBRACE) { /* '}' after trailing ',' or an empty map */ return TSL_PARSE_RESULT_OK; } else { fprintf(stderr, "Error: Expected '}', got TODO\n"); return TSL_PARSE_RESULT_ERR; } } } static TslParseResult tsl_parser_parse_list(TslParser *self, int *num_items) { #define parse_list_element_separator \ token = tsl_tokenizer_next(&self->tokenizer); \ if(token == TSL_TOKEN_COMMA) { \ continue; \ } else if(token == TSL_TOKEN_RBRACKET) { \ return TSL_PARSE_RESULT_OK; \ } else { \ fprintf(stderr, "Error: Expected ',' or ']', got TODO\n"); \ return TSL_PARSE_RESULT_ERR; \ } *num_items = 0; for(;;) { /* TODO: Use tsl_parser_parse_rhs instead */ TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_IDENTIFIER) { ++*num_items; return_if_error(tsl_bytecode_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADV, &self->tokenizer.identifier)); parse_list_element_separator } else if(token == TSL_TOKEN_NUM) { ++*num_items; return_if_error(tsl_bytecode_add_ins2(get_function_bytecode(self), TSL_OPCODE_LOADN, self->tokenizer.number_value)); parse_list_element_separator } else if(token == TSL_TOKEN_BOOL) { ++*num_items; return_if_error(tsl_bytecode_add_ins3(get_function_bytecode(self), TSL_OPCODE_LOADB, self->tokenizer.bool_value)); parse_list_element_separator } else if(token == TSL_TOKEN_NULL) { ++*num_items; return_if_error(tsl_bytecode_add_ins5(get_function_bytecode(self), TSL_OPCODE_LOADNULL)); parse_list_element_separator } else if(token == TSL_TOKEN_STRING) { ++*num_items; return_if_error(tsl_bytecode_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADS, &self->tokenizer.string)); parse_list_element_separator } else if(token == TSL_TOKEN_RBRACKET) { /* ']' after trailing ',' or an empty list */ return TSL_PARSE_RESULT_OK; } else { fprintf(stderr, "Error: Expected '}', got TODO\n"); return TSL_PARSE_RESULT_ERR; } } } /* FN_BODY = '{' EXPRS '}' */ static TslParseResult tsl_parser_parse_fn_body(TslParser *self) { return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LBRACE)); return tsl_parser_parse_expressions(self, TSL_TOKEN_RBRACE); } /* FN = '(' (IDENTIFIER ',')* ')' FN_BODY */ static TslParseResult tsl_parser_parse_fn(TslParser *self) { return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN)); for(;;) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_RPAREN) { return tsl_parser_parse_fn_body(self); } else if(token == TSL_TOKEN_IDENTIFIER) { TslStringView param_name = self->tokenizer.identifier; printf("function parameter: |%.*s|\n", (int)param_name.size, param_name.data); token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_COMMA) { continue; } else if(token == TSL_TOKEN_RPAREN) { return tsl_parser_parse_fn_body(self); } else { fprintf(stderr, "Error: Expected ',' or ')', got TODO\n"); return TSL_PARSE_RESULT_ERR; } } else { fprintf(stderr, "Error: Expected parameter name or ')', got TODO\n"); return TSL_PARSE_RESULT_ERR; } } } /* VAR_INDEX = '[' RHS ']' */ static TslParseResult tsl_parser_parse_var_indexing(TslParser *self) { return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LBRACKET)); return_if_error(tsl_parser_parse_rhs(self)); return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_RBRACKET)); return tsl_bytecode_add_ins5(get_function_bytecode(self), TSL_OPCODE_INDEX); } /* FUNC_CALL = '(' (RHS ',')* ')' */ static TslParseResult tsl_parser_parse_func_call(TslParser *self, int *num_args) { *num_args = 0; return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN)); for(;;) { TslToken token = tsl_tokenizer_peek(&self->tokenizer); if(token == TSL_TOKEN_RPAREN) { tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_RPAREN */ return TSL_PARSE_RESULT_OK; } else { ++*num_args; return_if_error(tsl_parser_parse_rhs(self)); token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_COMMA) { continue; } else if(token == TSL_TOKEN_RPAREN) { return TSL_PARSE_RESULT_OK; } else { fprintf(stderr, "Error: Expected ',' or ')', got TODO\n"); return TSL_PARSE_RESULT_ERR; } } } } /* TODO: Allow command inside another command */ /* COMMAND = '(' TSL_COMMAND_TOKEN_ARG* ')' */ static TslParseResult tsl_parser_parse_command(TslParser *self, int *num_args) { *num_args = 0; return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN)); for(;;) { TslStringView command_arg; TslCommandToken command_token = tsl_tokenizer_next_command_arg(&self->tokenizer, &command_arg); if(command_token == TSL_COMMAND_TOKEN_ARG) { ++*num_args; if(command_arg.data[0] == '$') { command_arg.data += 1; command_arg.size -= 1; if(command_arg.size == 0) { fprintf(stderr, "Error: Expected variable name after '$'\n"); return TSL_PARSE_RESULT_ERR; } return_if_error(tsl_bytecode_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADV, &command_arg)); } else { return_if_error(tsl_bytecode_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADS, &command_arg)); } } else if(command_token == TSL_COMMAND_TOKEN_END) { return TSL_PARSE_RESULT_OK; } else { fprintf(stderr, "Error: Expected command argument or ')', got TODO\n"); return TSL_PARSE_RESULT_ERR; } } } static TslOpcode arithmetic_symbol_to_opcode(char arithmetic_symbol) { switch(arithmetic_symbol) { case '+': return TSL_OPCODE_ADD; case '-': return TSL_OPCODE_SUB; case '*': return TSL_OPCODE_MUL; case '/': return TSL_OPCODE_DIV; default: fprintf(stderr, "Unexpected arithmetic symbol: %c\n", arithmetic_symbol); abort(); } } /* RHS_SUB = (VAR_INDEX|FUNC_CALL RHS_SUB?)|(TOKEN_ARITHMETIC RHS) */ static TslParseResult tsl_parser_parse_rhs_sub(TslParser *self) { TslToken token = tsl_tokenizer_peek(&self->tokenizer); if(token == TSL_TOKEN_LBRACKET) { return_if_error(tsl_parser_parse_var_indexing(self)); return tsl_parser_parse_rhs_sub(self); } else if(token == TSL_TOKEN_LPAREN) { int num_args; return_if_error(tsl_parser_parse_func_call(self, &num_args)); return_if_error(tsl_bytecode_add_ins1(get_function_bytecode(self), TSL_OPCODE_CALLF, num_args)); return tsl_parser_parse_rhs_sub(self); } else if(token == TSL_TOKEN_ARITHMETIC) { TslOpcode arithmetic_opcode; tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_ARITHMETIC */ arithmetic_opcode = arithmetic_symbol_to_opcode(self->tokenizer.arithmetic_symbol); /* TODO: Handle arithmetic expression precedence */ return tsl_parser_parse_rhs(self) && tsl_bytecode_add_ins5(get_function_bytecode(self), arithmetic_opcode); } /* No sub expression found, possibly a new expression after this (a new expression on a new line), let that part of the code handle error if there is any instead. */ return TSL_PARSE_RESULT_OK; } /* RHS_SUB_ARITHMETIC = TOKEN_ARITHMETIC RHS */ static TslParseResult tsl_parser_parse_rhs_sub_arithmetic(TslParser *self) { TslToken token = tsl_tokenizer_peek(&self->tokenizer); if(token == TSL_TOKEN_ARITHMETIC) { tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_ARITHMETIC */ printf("rhs sub arithmetic symbol: %c\n", self->tokenizer.arithmetic_symbol); /* TODO: Handle arithmetic expression precedence */ return tsl_parser_parse_rhs(self); } /* No sub expression found, possibly a new expression after this (a new expression on a new line), let that part of the code handle error if there is any instead. */ return TSL_PARSE_RESULT_OK; } /* RHS = (IDENTIFIER|NUM|BOOL|NULL|STRING|MAP|LIST|('fn' FN)|('$' COMMAND)) RHS_SUB? */ TslParseResult tsl_parser_parse_rhs(TslParser *self) { TslToken token = tsl_tokenizer_next(&self->tokenizer); switch(token) { case TSL_TOKEN_IDENTIFIER: { TslStringView var_name = self->tokenizer.identifier; return_if_error(tsl_bytecode_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADV, &var_name)); return tsl_parser_parse_rhs_sub(self); } case TSL_TOKEN_NUM: return_if_error(tsl_bytecode_add_ins2(get_function_bytecode(self), TSL_OPCODE_LOADN, self->tokenizer.number_value)); return tsl_parser_parse_rhs_sub_arithmetic(self); case TSL_TOKEN_BOOL: return tsl_bytecode_add_ins3(get_function_bytecode(self), TSL_OPCODE_LOADB, self->tokenizer.bool_value); case TSL_TOKEN_NULL: return tsl_bytecode_add_ins5(get_function_bytecode(self), TSL_OPCODE_LOADNULL); case TSL_TOKEN_STRING: return tsl_bytecode_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADS, &self->tokenizer.string); case TSL_TOKEN_LBRACE: { int num_items; return tsl_parser_parse_map(self, &num_items) && tsl_bytecode_add_ins1(get_function_bytecode(self), TSL_OPCODE_MAP, num_items); } case TSL_TOKEN_LBRACKET: { int num_items; return tsl_parser_parse_list(self, &num_items) && tsl_bytecode_add_ins1(get_function_bytecode(self), TSL_OPCODE_LIST, num_items); } case TSL_TOKEN_FN: { int function_index; return_if_error(tsl_parser_push_function(self)); return_if_error(tsl_parser_parse_fn(self)); function_index = tsl_parser_get_current_function_index(self); tsl_parser_pop_function(self); return tsl_bytecode_add_ins1(get_function_bytecode(self), TSL_OPCODE_LOADF, function_index); } case TSL_TOKEN_DOLLAR_SIGN: { int num_args; return_if_error(tsl_parser_parse_command(self, &num_args)) if(num_args == 0) { fprintf(stderr, "Error: Command can't be empty\n"); return TSL_PARSE_RESULT_ERR; } return tsl_bytecode_add_ins1(get_function_bytecode(self), TSL_OPCODE_CALLC, num_args); } default: fprintf(stderr, "Error: Expected variable, number, bool, null, map, list, function or command, got TODO (%d) (line: %d)\n", token, tsl_tokenizer_get_line_by_index(&self->tokenizer, self->tokenizer.prev_code_index)); return TSL_PARSE_RESULT_ERR; } } /* EXPR = IDENTIFIER ('=' RHS)|RHS_SUB EXPRS = EXPR* */ TslParseResult tsl_parser_parse_expressions(TslParser *self, TslToken end_token) { for(;;) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_IDENTIFIER) { TslStringView identifier = self->tokenizer.identifier; if(tsl_tokenizer_peek(&self->tokenizer) == TSL_TOKEN_EQUAL) { tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_EQUAL */ return_if_error(tsl_parser_parse_rhs(self)); return_if_error(tsl_bytecode_add_ins4(get_function_bytecode(self), TSL_OPCODE_SETV, &identifier)); /* TODO: Assert if there are load bytecode opcodes not followed by set */ } else { return_if_error(tsl_bytecode_add_ins4(get_function_bytecode(self), TSL_OPCODE_LOADV, &identifier)); return_if_error(tsl_parser_parse_rhs_sub(self)); } } else if(token == end_token) { break; } else { fprintf(stderr, "Error: Expected identifier, got TODO\n"); return TSL_PARSE_RESULT_ERR; } } return TSL_PARSE_RESULT_OK; } TslParseResult tsl_parse(char *code, size_t code_size, TslProgram *program_output) { TslParseResult result; TslParser parser; assert(code); assert(program_output); tsl_program_init(program_output); return_if_error(tsl_parser_init(&parser, code, code_size)); result = tsl_parser_parse_expressions(&parser, TSL_TOKEN_END_OF_FILE); if(result == TSL_PARSE_RESULT_OK) tsl_buffer_move(&program_output->function_bytecode_list, &parser.function_bytecode_list); tsl_parser_deinit(&parser); return result; }