From 1dbef1bfdefe8d7967a360f00d350db307d344e2 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 18 Jan 2020 08:35:24 +0100 Subject: Add list, map and command parsing --- LICENSE | 13 ++++ Makefile | 6 ++ README.md | 2 + SYNTAX.md | 19 ----- example.tsl | 19 +++++ include/tokenizer.h | 26 ++++++- src/main.c | 87 ++++++++++++++++------ src/parser.c | 204 ++++++++++++++++++++++++++++++++++++++++++---------- src/tokenizer.c | 151 ++++++++++++++++++++++++++++++++++---- 9 files changed, 438 insertions(+), 89 deletions(-) create mode 100644 LICENSE create mode 100644 README.md delete mode 100644 SYNTAX.md create mode 100644 example.tsl diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..456c488 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Copyright (C) 2004 Sam Hocevar + + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/Makefile b/Makefile index 8b6cb84..9ab88c1 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,12 @@ OBJ = main.o tokenizer.o parser.o all: $(OBJ) cc -o tsl $(OBJ) -fPIE +clean: + rm $(OBJ) tsl + +compiledb: + make clean; bear make + main.o: src/main.c include/tokenizer.h cc -c src/main.c $(CFLAGS) diff --git a/README.md b/README.md new file mode 100644 index 0000000..7710e4b --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +A tiny scripting language that is designed to be a replacement for small shell/python scripts.\ +Written in ANSI C to allow embedding everywhere and a WTFPL license that allows it to be used anywhere without any restrictions. diff --git a/SYNTAX.md b/SYNTAX.md deleted file mode 100644 index 149633e..0000000 --- a/SYNTAX.md +++ /dev/null @@ -1,19 +0,0 @@ -``` -value1 = 1 -value2 = true -value3 = null -value4 = "hello world" -value5 = {"hello", "world", 5} -value6 = {"hello": "world", "value": 23} -value7 = fn () {} -value8 = fn (value) {} -value9 = { - "hello": "world", - "sayHello": fn() { - - } -} - -str = value9["hello"] -value9["sayHello"]() -``` diff --git a/example.tsl b/example.tsl new file mode 100644 index 0000000..cfb3f5a --- /dev/null +++ b/example.tsl @@ -0,0 +1,19 @@ +value1 = 1 +value2 = true +value3 = null +value4 = "hello world" +value5 = ["hello", "world", 5] +value6 = {"hello": "world", "value": 23} +value7 = fn () {} +value8 = fn (value) {} +value9 = { + "hello": "world", + "sayHello": fn() { + + } +} + +str = value9["hello"] +value9["sayHello"]() + +response = $(curl https://example.com) \ No newline at end of file diff --git a/include/tokenizer.h b/include/tokenizer.h index fcb9721..98491c7 100644 --- a/include/tokenizer.h +++ b/include/tokenizer.h @@ -22,14 +22,33 @@ typedef enum { TSL_TOKEN_RBRACE, TSL_TOKEN_LBRACKET, TSL_TOKEN_RBRACKET, + TSL_TOKEN_LPAREN, + TSL_TOKEN_RPAREN, TSL_TOKEN_COLON, - TSL_TOKEN_COMMA + TSL_TOKEN_COMMA, + TSL_TOKEN_FN, + TSL_TOKEN_DOLLAR_SIGN } TslToken; +typedef enum { + TSL_COMMAND_TOKEN_END_OF_FILE, + TSL_COMMAND_TOKEN_ARG, + TSL_COMMAND_TOKEN_END +} TslCommandToken; + +typedef struct { + TslToken token; + size_t code_index; + size_t prev_code_index; +} TslTokenizerPeek; + typedef struct { const char *code; size_t code_size; size_t code_index; + size_t prev_code_index; + + TslTokenizerPeek peek; TslStringView identifier; TslStringView string; @@ -41,6 +60,11 @@ void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size); TslToken tsl_tokenizer_next(TslTokenizer *self); int tsl_tokenizer_accept(TslTokenizer *self, TslToken expected_token); +TslToken tsl_tokenizer_peek(TslTokenizer *self); + +TslCommandToken tsl_tokenizer_next_command_arg(TslTokenizer *self, TslStringView *arg); + +int tsl_tokenizer_get_line_by_index(TslTokenizer *self, size_t index); #endif /* TSL_TOKENIZER_H */ diff --git a/src/main.c b/src/main.c index 3cc01a9..b9d64a7 100644 --- a/src/main.c +++ b/src/main.c @@ -1,24 +1,71 @@ #include "../include/parser.h" #include +#include +#include +#include +#include +#include +#include -int main() { - const char *code = -"value1 = 1\n" -"value2 = true\n" -"value3 = null\n" -"value4 = \"hello world\"\n" -"value5 = [\"hello\", \"world\", 5]\n" -"value6 = {\"hello\": \"world\", \"value\": 23}\n" -"value7 = fn () {}\n" -"value8 = fn (value) {}\n" -"value9 = {\n" -" \"hello\": \"world\",\n" -" \"sayHello\": fn() {\n" -" \n" -" }\n" -"}\n" -"\n" -"str = value9[\"hello\"]\n" -"value9[\"sayHello\"]()"; - return tsl_parse(code, strlen(code)); +static char* file_get_content(const char *filepath, size_t *filesize) { + struct stat file_stat; + int fd = open(filepath, O_RDONLY); + char *result = NULL; + *filesize = 0; + if(fd == -1) { + perror(filepath); + return NULL; + } + + if(fstat(fd, &file_stat) == -1) { + perror(filepath); + goto cleanup; + } + + if(!S_ISREG(file_stat.st_mode)) { + fprintf(stderr, "Error: %s is not a file\n", filepath); + goto cleanup; + } + + *filesize = file_stat.st_size; + result = malloc(*filesize + 1); + if(!result) { + *filesize = 0; + fprintf(stderr, "Error: Failed to malloc %lu bytes from file %s\n", *filesize, filepath); + goto cleanup; + } + + result[*filesize] = '\0'; + if((size_t)read(fd, result, *filesize) != *filesize) { + free(result); + *filesize = 0; + fprintf(stderr, "Error: Failed to read all data from file %s\n", filepath); + goto cleanup; + } + + cleanup: + close(fd); + return result; +} + +static void usage() { + puts("usage: tsl [file]"); +} + +int main(int argc, char **argv) { + int result; + size_t filesize; + char *file_content; + + if(argc != 2) { + usage(); + return 1; + } + + file_content = file_get_content(argv[1], &filesize); + if(!file_content) + return 1; + result = tsl_parse(file_content, filesize); + free(file_content); /* Not needed, but it make valgrind happy */ + return result; } diff --git a/src/parser.c b/src/parser.c index 3f9c030..1324305 100644 --- a/src/parser.c +++ b/src/parser.c @@ -6,26 +6,27 @@ typedef struct { } TslParser; static int tsl_parser_parse_rhs(TslParser *self); +static int tsl_parser_parse_expressions(TslParser *self, TslToken end_token); static void tsl_parser_init(TslParser *self, const char *code, size_t code_size) { tsl_tokenizer_init(&self->tokenizer, code, code_size); } static int tsl_parser_parse_map(TslParser *self) { -#define parse_map_element_separator \ - if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_COLON)) \ - return -1; \ - if(tsl_parser_parse_rhs(self) != 0) \ - return -1; \ - token = tsl_tokenizer_next(&self->tokenizer); \ - if(token == TSL_TOKEN_COMMA) { \ - continue; \ - } else if(token == TSL_TOKEN_RBRACE) { \ - return 0; \ - } else { \ - fprintf(stderr, "Error: Expected ',' or '}', got TODO\n"); \ - return -1; \ - } + #define parse_map_element_separator \ + if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_COLON)) \ + return -1; \ + if(tsl_parser_parse_rhs(self) != 0) \ + return -1; \ + token = tsl_tokenizer_next(&self->tokenizer); \ + if(token == TSL_TOKEN_COMMA) { \ + continue; \ + } else if(token == TSL_TOKEN_RBRACE) { \ + return 0; \ + } else { \ + fprintf(stderr, "Error: Expected ',' or '}', got TODO\n"); \ + return -1; \ + } for(;;) { TslToken token = tsl_tokenizer_next(&self->tokenizer); @@ -54,18 +55,19 @@ static int tsl_parser_parse_map(TslParser *self) { } static int tsl_parser_parse_list(TslParser *self) { -#define parse_list_element_separator \ - token = tsl_tokenizer_next(&self->tokenizer); \ - if(token == TSL_TOKEN_COMMA) { \ - continue; \ - } else if(token == TSL_TOKEN_RBRACKET) { \ - return 0; \ - } else { \ - fprintf(stderr, "Error: Expected ',' or ']', got TODO\n"); \ - return -1; \ - } + #define parse_list_element_separator \ + token = tsl_tokenizer_next(&self->tokenizer); \ + if(token == TSL_TOKEN_COMMA) { \ + continue; \ + } else if(token == TSL_TOKEN_RBRACKET) { \ + return 0; \ + } else { \ + fprintf(stderr, "Error: Expected ',' or ']', got TODO\n"); \ + return -1; \ + } for(;;) { + /* TODO: Use tsl_parser_parse_rhs instead */ TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_NUM) { printf("rhs num: %ld\n", self->tokenizer.number_value); @@ -89,9 +91,128 @@ static int tsl_parser_parse_list(TslParser *self) { } } +/* FN_BODY = '{' EXPRS '}' */ +static int tsl_parser_parse_fn_body(TslParser *self) { + if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LBRACE)) + return -1; + return tsl_parser_parse_expressions(self, TSL_TOKEN_RBRACE); +} + +/* FN = '(' (IDENTIFIER ',')* ')' FN_BODY */ +static int tsl_parser_parse_fn(TslParser *self) { + if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN)) + return -1; + + for(;;) { + TslToken token = tsl_tokenizer_next(&self->tokenizer); + if(token == TSL_TOKEN_RPAREN) { + return tsl_parser_parse_fn_body(self); + } else if(token == TSL_TOKEN_IDENTIFIER) { + TslStringView param_name = self->tokenizer.identifier; + token = tsl_tokenizer_next(&self->tokenizer); + if(token == TSL_TOKEN_COMMA) { + continue; + } else if(token == TSL_TOKEN_RPAREN) { + return tsl_parser_parse_fn_body(self); + } else { + fprintf(stderr, "Error: Expected ',' or ')', got TODO\n"); + return -1; + } + } else { + fprintf(stderr, "Error: Expected parameter name or ')', got TODO\n"); + return -1; + } + } +} + +/* VAR_INDEX = '[' RHS ']' */ +static int tsl_parser_parse_var_indexing(TslParser *self) { + if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LBRACKET)) + return -1; + + if(tsl_parser_parse_rhs(self) != 0) + return -1; + + if(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_RBRACKET)) + return 0; + else + return -1; +} + +/* FUNC_CALL = '(' (RHS ',')* ')' */ +static int tsl_parser_parse_func_call(TslParser *self) { + if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN)) + return -1; + + for(;;) { + TslToken token = tsl_tokenizer_peek(&self->tokenizer); + if(token == TSL_TOKEN_RPAREN) { + tsl_tokenizer_next(&self->tokenizer); /* consume previous TSL_TOKEN_RPAREN */ + return 0; + } else { + if(tsl_parser_parse_rhs(self) != 0) + return -1; + token = tsl_tokenizer_next(&self->tokenizer); + if(token == TSL_TOKEN_COMMA) { + continue; + } else if(token == TSL_TOKEN_RPAREN) { + return 0; + } else { + fprintf(stderr, "Error: Expected ',' or ')', got TODO\n"); + return -1; + } + } + } +} + +/* TODO: Do not allow empty command */ +/* TODO: Allow command inside another command */ +/* COMMAND = TODO */ +static int tsl_parser_parse_command(TslParser *self) { + if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN)) + return -1; + + for(;;) { + TslStringView command_arg; + TslCommandToken command_token = tsl_tokenizer_next_command_arg(&self->tokenizer, &command_arg); + if(command_token == TSL_COMMAND_TOKEN_ARG) { + printf("command arg: |%.*s|\n", (int)command_arg.size, command_arg.data); + } else if(command_token == TSL_COMMAND_TOKEN_END) { + return 0; + } else { + fprintf(stderr, "Error: Expected command argument or ')', got TODO\n"); + return -1; + } + } +} + +/* RHS_SUB = VAR_INDEX|FUNC_CALL RHS_SUB? */ +static int tsl_parser_parse_rhs_sub(TslParser *self) { + TslToken token = tsl_tokenizer_peek(&self->tokenizer); + if(token == TSL_TOKEN_LBRACKET) { + if(tsl_parser_parse_var_indexing(self) != 0) + return -1; + return tsl_parser_parse_rhs_sub(self); + } else if(token == TSL_TOKEN_LPAREN) { + if(tsl_parser_parse_func_call(self) != 0) + return -1; + return tsl_parser_parse_rhs_sub(self); + } + /* + No sub expression found, possibly a new expression after this (a new expression on a new line), let that + part of the code handle error if there is any instead. + */ + return 0; +} + +/* RHS = (IDENTIFIER|NUM|BOOL|NULL|STRING|MAP|LIST|('fn' FN)|('$' COMMAND)) RHS_SUB? */ int tsl_parser_parse_rhs(TslParser *self) { TslToken token = tsl_tokenizer_next(&self->tokenizer); - if(token == TSL_TOKEN_NUM) { + if(token == TSL_TOKEN_IDENTIFIER) { + TslStringView var_name = self->tokenizer.identifier; + printf("var: %.*s\n", (int)var_name.size, var_name.data); + return tsl_parser_parse_rhs_sub(self); + } else if(token == TSL_TOKEN_NUM) { printf("rhs num: %ld\n", self->tokenizer.number_value); } else if(token == TSL_TOKEN_BOOL) { printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false"); @@ -100,29 +221,39 @@ int tsl_parser_parse_rhs(TslParser *self) { } else if(token == TSL_TOKEN_STRING) { printf("rhs string: |%.*s|\n", self->tokenizer.string.size, self->tokenizer.string.data); } else if(token == TSL_TOKEN_LBRACE) { - tsl_parser_parse_map(self); + return tsl_parser_parse_map(self); } else if(token == TSL_TOKEN_LBRACKET) { - tsl_parser_parse_list(self); + return tsl_parser_parse_list(self); + } else if(token == TSL_TOKEN_FN) { + return tsl_parser_parse_fn(self); + } else if(token == TSL_TOKEN_DOLLAR_SIGN) { + return tsl_parser_parse_command(self); } else { - fprintf(stderr, "Error: Expected number, bool or null, got TODO\n"); + fprintf(stderr, "Error: Expected variable, number, bool, null, map, list, function or command, got TODO (%d) (line: %d)\n", token, tsl_tokenizer_get_line_by_index(&self->tokenizer, self->tokenizer.prev_code_index)); return -1; } return 0; } -static int tsl_parser_parse(TslParser *self) { +/* + EXPR = IDENTIFIER ('=' RHS)|RHS_SUB + EXPRS = EXPR* +*/ +int tsl_parser_parse_expressions(TslParser *self, TslToken end_token) { for(;;) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_IDENTIFIER) { TslStringView identifier = self->tokenizer.identifier; printf("identifier: %.*s\n", identifier.size, identifier.data); - if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_EQUAL)) { - return -1; - } - if(tsl_parser_parse_rhs(self) != 0) { - return -1; + if(tsl_tokenizer_peek(&self->tokenizer) == TSL_TOKEN_EQUAL) { + tsl_tokenizer_next(&self->tokenizer); /* consume previous TSL_TOKEN_EQUAL */ + if(tsl_parser_parse_rhs(self) != 0) + return -1; + } else { + if(tsl_parser_parse_rhs_sub(self) != 0) + return -1; } - } else if(token == TSL_TOKEN_END_OF_FILE) { + } else if(token == end_token) { break; } else { fprintf(stderr, "Error: Expected identifier, got TODO\n"); @@ -132,8 +263,9 @@ static int tsl_parser_parse(TslParser *self) { return 0; } +/* EXPRS */ int tsl_parse(const char *code, size_t code_size) { TslParser parser; tsl_parser_init(&parser, code, code_size); - return tsl_parser_parse(&parser); + return tsl_parser_parse_expressions(&parser, TSL_TOKEN_END_OF_FILE); } diff --git a/src/tokenizer.c b/src/tokenizer.c index b310aae..89c40cb 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -1,11 +1,17 @@ #include "../include/tokenizer.h" #include #include +#include void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) { self->code = code; self->code_size = code_size; self->code_index = 0; + self->prev_code_index = 0; + + self->peek.token = -1; + self->peek.code_index = 0; + self->peek.prev_code_index = 0; self->identifier.data = NULL; self->identifier.size = 0; @@ -21,19 +27,25 @@ static char tsl_tokenizer_get_char(TslTokenizer *self) { return '\0'; } +static int is_whitespace(char c) { + switch(c) { + case ' ': + case '\n': + case '\t': + case '\r': + return 1; + default: + return 0; + } +} + static void tsl_tokenizer_skip_whitespace(TslTokenizer *self) { for(;;) { char c = tsl_tokenizer_get_char(self); - switch(c) { - case ' ': - case '\n': - case '\t': - case '\r': - ++self->code_index; - break; - default: - return; - } + if(is_whitespace(c)) + ++self->code_index; + else + return; } } @@ -107,8 +119,9 @@ static int tsl_tokenizer_goto_end_of_string(TslTokenizer *self, char string_star } } -TslToken tsl_tokenizer_next(TslTokenizer *self) { +static TslToken tsl_tokenizer_next_internal(TslTokenizer *self) { char c; + self->prev_code_index = self->code_index; tsl_tokenizer_skip_whitespace(self); c = tsl_tokenizer_get_char(self); @@ -125,6 +138,12 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) { self->identifier.size = self->code_index - identifier_start; switch(self->identifier.size) { + case 2: { + if(memcmp(self->identifier.data, "fn", 2) == 0) { + return TSL_TOKEN_FN; + } + break; + } case 4: { if(memcmp(self->identifier.data, "true", 4) == 0) { self->bool_value = 1; @@ -183,12 +202,21 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) { } else if(c == ']') { ++self->code_index; return TSL_TOKEN_RBRACKET; + } else if(c == '(') { + ++self->code_index; + return TSL_TOKEN_LPAREN; + } else if(c == ')') { + ++self->code_index; + return TSL_TOKEN_RPAREN; } else if(c == ',') { ++self->code_index; return TSL_TOKEN_COMMA; } else if(c == ':') { ++self->code_index; return TSL_TOKEN_COLON; + } else if(c == '$') { + ++self->code_index; + return TSL_TOKEN_DOLLAR_SIGN; } else if(c == '\0') { return TSL_TOKEN_END_OF_FILE; } else { @@ -197,11 +225,108 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) { } } +static TslToken tsl_tokenizer_consume_peek(TslTokenizer *self) { + TslToken token = self->peek.token; + self->code_index = self->peek.code_index; + self->prev_code_index = self->peek.prev_code_index; + self->peek.token = -1; + return token; +} + +TslToken tsl_tokenizer_next(TslTokenizer *self) { + if((int)self->peek.token == -1) { + return tsl_tokenizer_next_internal(self); + } else { + return tsl_tokenizer_consume_peek(self); + } +} + int tsl_tokenizer_accept(TslTokenizer *self, TslToken expected_token) { - TslToken actual_token = tsl_tokenizer_next(self); + TslToken actual_token; + if((int)self->peek.token == -1) { + actual_token = tsl_tokenizer_next_internal(self); + } else { + actual_token = tsl_tokenizer_consume_peek(self); + } if(actual_token != expected_token) { - fprintf(stderr, "Error: Expected TODO, got TODO\n"); + fprintf(stderr, "Error: Expected TODO(%d), got TODO(%d)\n", expected_token, actual_token); return 0; } return 1; } + +TslToken tsl_tokenizer_peek(TslTokenizer *self) { + size_t p_prev_code_index = self->prev_code_index; + size_t p_code_index = self->code_index; + + self->peek.token = tsl_tokenizer_next_internal(self); + self->peek.code_index = self->code_index; + self->peek.prev_code_index = self->prev_code_index; + + self->prev_code_index = p_prev_code_index; + self->code_index = p_code_index; + return self->peek.token; +} + +TslCommandToken tsl_tokenizer_next_command_arg(TslTokenizer *self, TslStringView *arg) { + char c; + assert((int)self->peek.token == -1); + self->prev_code_index = self->code_index; + tsl_tokenizer_skip_whitespace(self); + + c = tsl_tokenizer_get_char(self); + if(c == ')') { + ++self->code_index; + return TSL_COMMAND_TOKEN_END; + } else if(c == '"') { + char string_start_symbol = c; + size_t string_start; + ++self->code_index; + string_start = self->code_index; + if(tsl_tokenizer_goto_end_of_string(self, string_start_symbol)) { + arg->data = self->code + string_start; + arg->size = self->code_index - 1 - string_start; + return TSL_COMMAND_TOKEN_ARG; + } else { + return TSL_COMMAND_TOKEN_END_OF_FILE; + } + } else if(c == '\0') { + return TSL_COMMAND_TOKEN_END_OF_FILE; + } else { + /* + TODO: When hitting ", parse to the end of it and make it part of this arg instead of + separating them into two args + */ + size_t arg_start = self->code_index; + int escape_char = tsl_tokenizer_get_char(self) == '\\'; + ++self->code_index; + for(;;) { + c = tsl_tokenizer_get_char(self); + if(is_whitespace(c) || c == ')' || c == '\0') { + break; + } else if(c == '"') { + if(!escape_char) + break; + escape_char = 0; + } else if(c == '\\') { + escape_char = !escape_char; + } else { + escape_char = 0; + } + ++self->code_index; + } + arg->data = self->code + arg_start; + arg->size = self->code_index - arg_start; + return TSL_COMMAND_TOKEN_ARG; + } +} + +int tsl_tokenizer_get_line_by_index(TslTokenizer *self, size_t index) { + size_t i = 0; + int line = 1; + for(; i < index; ++i) { + if(self->code[i] == '\n') + ++line; + } + return line; +} -- cgit v1.2.3