#include "../include/parser.h" #include "../include/bytecode.h" #include "../include/std/hash_map.h" #include #include #define return_if_error(expr) \ { \ if(!(expr)) \ return TSL_PARSE_RESULT_ERR;\ } typedef struct { TslTokenizer tokenizer; TslBytecodeWriter bytecode_writer; TslHashMap variables; } TslParser; static TslParseResult tsl_parser_parse_rhs(TslParser *self, TslRegister *reg_result); static TslParseResult tsl_parser_parse_expressions(TslParser *self, TslToken end_token); static void tsl_parser_init(TslParser *self, const char *code, size_t code_size) { tsl_tokenizer_init(&self->tokenizer, code, code_size); tsl_bytecode_writer_init(&self->bytecode_writer); tsl_hash_map_init(&self->variables); } static void tsl_parser_deinit(TslParser *self) { tsl_bytecode_writer_deinit(&self->bytecode_writer); tsl_hash_map_deinit(&self->variables); } #if 0 static uint64_t hash_string_view(const void *data, size_t size) { uint64_t result = 0xdec05eba; const uint8_t *p = data; while(size) { result = ((result << 5) + result) + *p; ++p; --size; } return result; } #endif static TslParseResult tsl_parser_parse_map(TslParser *self) { TslRegister src_reg; #define parse_map_element_separator \ return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_COLON)); \ return_if_error(tsl_parser_parse_rhs(self, &src_reg)); \ token = tsl_tokenizer_next(&self->tokenizer); \ if(token == TSL_TOKEN_COMMA) { \ continue; \ } else if(token == TSL_TOKEN_RBRACE) { \ return TSL_PARSE_RESULT_OK; \ } else { \ fprintf(stderr, "Error: Expected ',' or '}', got TODO\n"); \ return TSL_PARSE_RESULT_ERR; \ } for(;;) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_NUM) { printf("rhs num: %f\n", self->tokenizer.number_value); parse_map_element_separator } else if(token == TSL_TOKEN_BOOL) { printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false"); parse_map_element_separator } else if(token == TSL_TOKEN_NULL) { printf("rhs null\n"); parse_map_element_separator } else if(token == TSL_TOKEN_STRING) { printf("rhs string: |%.*s|\n", (int)self->tokenizer.string.size, self->tokenizer.string.data); parse_map_element_separator } else if(token == TSL_TOKEN_RBRACE) { /* '}' after trailing ',' or an empty map */ return TSL_PARSE_RESULT_OK; } else { fprintf(stderr, "Error: Expected '}', got TODO\n"); return TSL_PARSE_RESULT_ERR; } token = tsl_tokenizer_next(&self->tokenizer); } } static TslParseResult tsl_parser_parse_list(TslParser *self) { #define parse_list_element_separator \ token = tsl_tokenizer_next(&self->tokenizer); \ if(token == TSL_TOKEN_COMMA) { \ continue; \ } else if(token == TSL_TOKEN_RBRACKET) { \ return TSL_PARSE_RESULT_OK; \ } else { \ fprintf(stderr, "Error: Expected ',' or ']', got TODO\n"); \ return TSL_PARSE_RESULT_ERR; \ } for(;;) { /* TODO: Use tsl_parser_parse_rhs instead */ TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_NUM) { printf("rhs num: %f\n", self->tokenizer.number_value); parse_list_element_separator } else if(token == TSL_TOKEN_BOOL) { printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false"); parse_list_element_separator } else if(token == TSL_TOKEN_NULL) { printf("rhs null\n"); parse_list_element_separator } else if(token == TSL_TOKEN_STRING) { printf("rhs string: |%.*s|\n", (int)self->tokenizer.string.size, self->tokenizer.string.data); parse_list_element_separator } else if(token == TSL_TOKEN_RBRACKET) { /* ']' after trailing ',' or an empty list */ return TSL_PARSE_RESULT_OK; } else { fprintf(stderr, "Error: Expected '}', got TODO\n"); return TSL_PARSE_RESULT_ERR; } } } /* FN_BODY = '{' EXPRS '}' */ static TslParseResult tsl_parser_parse_fn_body(TslParser *self) { return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LBRACE)); return tsl_parser_parse_expressions(self, TSL_TOKEN_RBRACE); } /* FN = '(' (IDENTIFIER ',')* ')' FN_BODY */ static TslParseResult tsl_parser_parse_fn(TslParser *self) { return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN)); for(;;) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_RPAREN) { return tsl_parser_parse_fn_body(self); } else if(token == TSL_TOKEN_IDENTIFIER) { TslStringView param_name = self->tokenizer.identifier; printf("function parameter: |%.*s|\n", (int)param_name.size, param_name.data); token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_COMMA) { continue; } else if(token == TSL_TOKEN_RPAREN) { return tsl_parser_parse_fn_body(self); } else { fprintf(stderr, "Error: Expected ',' or ')', got TODO\n"); return TSL_PARSE_RESULT_ERR; } } else { fprintf(stderr, "Error: Expected parameter name or ')', got TODO\n"); return TSL_PARSE_RESULT_ERR; } } } /* VAR_INDEX = '[' RHS ']' */ static TslParseResult tsl_parser_parse_var_indexing(TslParser *self) { TslRegister src_reg; return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LBRACKET)); return_if_error(tsl_parser_parse_rhs(self, &src_reg)); return tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_RBRACKET); } /* FUNC_CALL = '(' (RHS ',')* ')' */ static TslParseResult tsl_parser_parse_func_call(TslParser *self) { return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN)); for(;;) { TslToken token = tsl_tokenizer_peek(&self->tokenizer); if(token == TSL_TOKEN_RPAREN) { tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_RPAREN */ return TSL_PARSE_RESULT_OK; } else { TslRegister src_reg; return_if_error(tsl_parser_parse_rhs(self, &src_reg)); token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_COMMA) { continue; } else if(token == TSL_TOKEN_RPAREN) { return TSL_PARSE_RESULT_OK; } else { fprintf(stderr, "Error: Expected ',' or ')', got TODO\n"); return TSL_PARSE_RESULT_ERR; } } } } /* TODO: Do not allow empty command */ /* TODO: Allow command inside another command */ /* COMMAND = TODO */ static TslParseResult tsl_parser_parse_command(TslParser *self) { return_if_error(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN)); for(;;) { TslStringView command_arg; TslCommandToken command_token = tsl_tokenizer_next_command_arg(&self->tokenizer, &command_arg); if(command_token == TSL_COMMAND_TOKEN_ARG) { printf("command arg: |%.*s|\n", (int)command_arg.size, command_arg.data); } else if(command_token == TSL_COMMAND_TOKEN_END) { return TSL_PARSE_RESULT_OK; } else { fprintf(stderr, "Error: Expected command argument or ')', got TODO\n"); return TSL_PARSE_RESULT_ERR; } } } /* RHS_SUB = (VAR_INDEX|FUNC_CALL RHS_SUB?)|(TOKEN_ARITHMETIC RHS) */ static TslParseResult tsl_parser_parse_rhs_sub(TslParser *self) { TslToken token = tsl_tokenizer_peek(&self->tokenizer); if(token == TSL_TOKEN_LBRACKET) { return_if_error(tsl_parser_parse_var_indexing(self)); return tsl_parser_parse_rhs_sub(self); } else if(token == TSL_TOKEN_LPAREN) { return_if_error(tsl_parser_parse_func_call(self)); return tsl_parser_parse_rhs_sub(self); } else if(token == TSL_TOKEN_ARITHMETIC) { TslRegister src_reg; tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_ARITHMETIC */ printf("rhs sub arithmetic symbol: %c\n", self->tokenizer.arithmetic_symbol); /* TODO: Handle arithmetic expression precedence */ return tsl_parser_parse_rhs(self, &src_reg); } /* No sub expression found, possibly a new expression after this (a new expression on a new line), let that part of the code handle error if there is any instead. */ return TSL_PARSE_RESULT_OK; } /* RHS_SUB_ARITHMETIC = TOKEN_ARITHMETIC RHS */ static TslParseResult tsl_parser_parse_rhs_sub_arithmetic(TslParser *self) { TslToken token = tsl_tokenizer_peek(&self->tokenizer); if(token == TSL_TOKEN_ARITHMETIC) { TslRegister src_reg; tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_ARITHMETIC */ printf("rhs sub arithmetic symbol: %c\n", self->tokenizer.arithmetic_symbol); /* TODO: Handle arithmetic expression precedence */ return tsl_parser_parse_rhs(self, &src_reg); } /* No sub expression found, possibly a new expression after this (a new expression on a new line), let that part of the code handle error if there is any instead. */ return TSL_PARSE_RESULT_OK; } /* RHS = (IDENTIFIER|NUM|BOOL|NULL|STRING|MAP|LIST|('fn' FN)|('$' COMMAND)) RHS_SUB? */ TslParseResult tsl_parser_parse_rhs(TslParser *self, TslRegister *reg_result) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_IDENTIFIER) { TslStringView var_name = self->tokenizer.identifier; assert(0 && "TODO: Implement"); printf("rhs var: %.*s\n", (int)var_name.size, var_name.data); return tsl_parser_parse_rhs_sub(self); } else if(token == TSL_TOKEN_NUM) { *reg_result = tsl_bytecode_writer_get_unique_register(&self->bytecode_writer); if(*reg_result == -1) { fprintf(stderr, "Error: Too many registers used\n"); return TSL_PARSE_RESULT_ERR; } return_if_error(tsl_bytecode_writer_load_number(&self->bytecode_writer, *reg_result, self->tokenizer.number_value)); printf("rhs num: %f\n", self->tokenizer.number_value); return tsl_parser_parse_rhs_sub_arithmetic(self); } else if(token == TSL_TOKEN_BOOL) { assert(0 && "TODO: Implement"); printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false"); } else if(token == TSL_TOKEN_NULL) { assert(0 && "TODO: Implement"); printf("rhs null\n"); } else if(token == TSL_TOKEN_STRING) { assert(0 && "TODO: Implement"); printf("rhs string: |%.*s|\n", (int)self->tokenizer.string.size, self->tokenizer.string.data); } else if(token == TSL_TOKEN_LBRACE) { assert(0 && "TODO: Implement"); return tsl_parser_parse_map(self); } else if(token == TSL_TOKEN_LBRACKET) { assert(0 && "TODO: Implement"); return tsl_parser_parse_list(self); } else if(token == TSL_TOKEN_FN) { assert(0 && "TODO: Implement"); return tsl_parser_parse_fn(self); } else if(token == TSL_TOKEN_DOLLAR_SIGN) { assert(0 && "TODO: Implement"); return tsl_parser_parse_command(self); } else { fprintf(stderr, "Error: Expected variable, number, bool, null, map, list, function or command, got TODO (%d) (line: %d)\n", token, tsl_tokenizer_get_line_by_index(&self->tokenizer, self->tokenizer.prev_code_index)); return TSL_PARSE_RESULT_ERR; } return TSL_PARSE_RESULT_OK; } /* EXPR = IDENTIFIER ('=' RHS)|RHS_SUB EXPRS = EXPR* */ TslParseResult tsl_parser_parse_expressions(TslParser *self, TslToken end_token) { for(;;) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_IDENTIFIER) { TslStringView identifier = self->tokenizer.identifier; printf("identifier: %.*s\n", (int)identifier.size, identifier.data); if(tsl_tokenizer_peek(&self->tokenizer) == TSL_TOKEN_EQUAL) { TslRegister dst_reg; TslRegister src_reg; tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_EQUAL */ return_if_error(tsl_parser_parse_rhs(self, &src_reg)); dst_reg = tsl_bytecode_writer_get_unique_register(&self->bytecode_writer); if(dst_reg < 0) return TSL_PARSE_RESULT_ERR; return_if_error(tsl_bytecode_writer_mov_reg(&self->bytecode_writer, dst_reg, src_reg)); } else { return_if_error(tsl_parser_parse_rhs_sub(self)); } } else if(token == end_token) { break; } else { fprintf(stderr, "Error: Expected identifier, got TODO\n"); return TSL_PARSE_RESULT_ERR; } } return TSL_PARSE_RESULT_OK; } TslParseResult tsl_parse(const char *code, size_t code_size) { TslParseResult result; TslParser parser; tsl_parser_init(&parser, code, code_size); result = tsl_parser_parse_expressions(&parser, TSL_TOKEN_END_OF_FILE); tsl_parser_deinit(&parser); return result; }