aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2020-01-18 08:35:24 +0100
committerdec05eba <dec05eba@protonmail.com>2020-01-18 08:35:24 +0100
commit1dbef1bfdefe8d7967a360f00d350db307d344e2 (patch)
tree3e7859156e1930488234fbb76c0120250218c5f3 /src
parent1d69979eef57e313ca35b3610cfedf84ca39ae63 (diff)
Add list, map and command parsing
Diffstat (limited to 'src')
-rw-r--r--src/main.c87
-rw-r--r--src/parser.c204
-rw-r--r--src/tokenizer.c151
3 files changed, 373 insertions, 69 deletions
diff --git a/src/main.c b/src/main.c
index 3cc01a9..b9d64a7 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,24 +1,71 @@
#include "../include/parser.h"
#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
-int main() {
- const char *code =
-"value1 = 1\n"
-"value2 = true\n"
-"value3 = null\n"
-"value4 = \"hello world\"\n"
-"value5 = [\"hello\", \"world\", 5]\n"
-"value6 = {\"hello\": \"world\", \"value\": 23}\n"
-"value7 = fn () {}\n"
-"value8 = fn (value) {}\n"
-"value9 = {\n"
-" \"hello\": \"world\",\n"
-" \"sayHello\": fn() {\n"
-" \n"
-" }\n"
-"}\n"
-"\n"
-"str = value9[\"hello\"]\n"
-"value9[\"sayHello\"]()";
- return tsl_parse(code, strlen(code));
+static char* file_get_content(const char *filepath, size_t *filesize) {
+ struct stat file_stat;
+ int fd = open(filepath, O_RDONLY);
+ char *result = NULL;
+ *filesize = 0;
+ if(fd == -1) {
+ perror(filepath);
+ return NULL;
+ }
+
+ if(fstat(fd, &file_stat) == -1) {
+ perror(filepath);
+ goto cleanup;
+ }
+
+ if(!S_ISREG(file_stat.st_mode)) {
+ fprintf(stderr, "Error: %s is not a file\n", filepath);
+ goto cleanup;
+ }
+
+ *filesize = file_stat.st_size;
+ result = malloc(*filesize + 1);
+ if(!result) {
+ *filesize = 0;
+ fprintf(stderr, "Error: Failed to malloc %lu bytes from file %s\n", *filesize, filepath);
+ goto cleanup;
+ }
+
+ result[*filesize] = '\0';
+ if((size_t)read(fd, result, *filesize) != *filesize) {
+ free(result);
+ *filesize = 0;
+ fprintf(stderr, "Error: Failed to read all data from file %s\n", filepath);
+ goto cleanup;
+ }
+
+ cleanup:
+ close(fd);
+ return result;
+}
+
+static void usage() {
+ puts("usage: tsl [file]");
+}
+
+int main(int argc, char **argv) {
+ int result;
+ size_t filesize;
+ char *file_content;
+
+ if(argc != 2) {
+ usage();
+ return 1;
+ }
+
+ file_content = file_get_content(argv[1], &filesize);
+ if(!file_content)
+ return 1;
+ result = tsl_parse(file_content, filesize);
+ free(file_content); /* Not needed, but it make valgrind happy */
+ return result;
}
diff --git a/src/parser.c b/src/parser.c
index 3f9c030..1324305 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -6,26 +6,27 @@ typedef struct {
} TslParser;
static int tsl_parser_parse_rhs(TslParser *self);
+static int tsl_parser_parse_expressions(TslParser *self, TslToken end_token);
static void tsl_parser_init(TslParser *self, const char *code, size_t code_size) {
tsl_tokenizer_init(&self->tokenizer, code, code_size);
}
static int tsl_parser_parse_map(TslParser *self) {
-#define parse_map_element_separator \
- if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_COLON)) \
- return -1; \
- if(tsl_parser_parse_rhs(self) != 0) \
- return -1; \
- token = tsl_tokenizer_next(&self->tokenizer); \
- if(token == TSL_TOKEN_COMMA) { \
- continue; \
- } else if(token == TSL_TOKEN_RBRACE) { \
- return 0; \
- } else { \
- fprintf(stderr, "Error: Expected ',' or '}', got TODO\n"); \
- return -1; \
- }
+ #define parse_map_element_separator \
+ if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_COLON)) \
+ return -1; \
+ if(tsl_parser_parse_rhs(self) != 0) \
+ return -1; \
+ token = tsl_tokenizer_next(&self->tokenizer); \
+ if(token == TSL_TOKEN_COMMA) { \
+ continue; \
+ } else if(token == TSL_TOKEN_RBRACE) { \
+ return 0; \
+ } else { \
+ fprintf(stderr, "Error: Expected ',' or '}', got TODO\n"); \
+ return -1; \
+ }
for(;;) {
TslToken token = tsl_tokenizer_next(&self->tokenizer);
@@ -54,18 +55,19 @@ static int tsl_parser_parse_map(TslParser *self) {
}
static int tsl_parser_parse_list(TslParser *self) {
-#define parse_list_element_separator \
- token = tsl_tokenizer_next(&self->tokenizer); \
- if(token == TSL_TOKEN_COMMA) { \
- continue; \
- } else if(token == TSL_TOKEN_RBRACKET) { \
- return 0; \
- } else { \
- fprintf(stderr, "Error: Expected ',' or ']', got TODO\n"); \
- return -1; \
- }
+ #define parse_list_element_separator \
+ token = tsl_tokenizer_next(&self->tokenizer); \
+ if(token == TSL_TOKEN_COMMA) { \
+ continue; \
+ } else if(token == TSL_TOKEN_RBRACKET) { \
+ return 0; \
+ } else { \
+ fprintf(stderr, "Error: Expected ',' or ']', got TODO\n"); \
+ return -1; \
+ }
for(;;) {
+ /* TODO: Use tsl_parser_parse_rhs instead */
TslToken token = tsl_tokenizer_next(&self->tokenizer);
if(token == TSL_TOKEN_NUM) {
printf("rhs num: %ld\n", self->tokenizer.number_value);
@@ -89,9 +91,128 @@ static int tsl_parser_parse_list(TslParser *self) {
}
}
+/* FN_BODY = '{' EXPRS '}' */
+static int tsl_parser_parse_fn_body(TslParser *self) {
+ if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LBRACE))
+ return -1;
+ return tsl_parser_parse_expressions(self, TSL_TOKEN_RBRACE);
+}
+
+/* FN = '(' (IDENTIFIER ',')* ')' FN_BODY */
+static int tsl_parser_parse_fn(TslParser *self) {
+ if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN))
+ return -1;
+
+ for(;;) {
+ TslToken token = tsl_tokenizer_next(&self->tokenizer);
+ if(token == TSL_TOKEN_RPAREN) {
+ return tsl_parser_parse_fn_body(self);
+ } else if(token == TSL_TOKEN_IDENTIFIER) {
+ TslStringView param_name = self->tokenizer.identifier;
+ token = tsl_tokenizer_next(&self->tokenizer);
+ if(token == TSL_TOKEN_COMMA) {
+ continue;
+ } else if(token == TSL_TOKEN_RPAREN) {
+ return tsl_parser_parse_fn_body(self);
+ } else {
+ fprintf(stderr, "Error: Expected ',' or ')', got TODO\n");
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "Error: Expected parameter name or ')', got TODO\n");
+ return -1;
+ }
+ }
+}
+
+/* VAR_INDEX = '[' RHS ']' */
+static int tsl_parser_parse_var_indexing(TslParser *self) {
+ if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LBRACKET))
+ return -1;
+
+ if(tsl_parser_parse_rhs(self) != 0)
+ return -1;
+
+ if(tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_RBRACKET))
+ return 0;
+ else
+ return -1;
+}
+
+/* FUNC_CALL = '(' (RHS ',')* ')' */
+static int tsl_parser_parse_func_call(TslParser *self) {
+ if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN))
+ return -1;
+
+ for(;;) {
+ TslToken token = tsl_tokenizer_peek(&self->tokenizer);
+ if(token == TSL_TOKEN_RPAREN) {
+ tsl_tokenizer_next(&self->tokenizer); /* consume previous TSL_TOKEN_RPAREN */
+ return 0;
+ } else {
+ if(tsl_parser_parse_rhs(self) != 0)
+ return -1;
+ token = tsl_tokenizer_next(&self->tokenizer);
+ if(token == TSL_TOKEN_COMMA) {
+ continue;
+ } else if(token == TSL_TOKEN_RPAREN) {
+ return 0;
+ } else {
+ fprintf(stderr, "Error: Expected ',' or ')', got TODO\n");
+ return -1;
+ }
+ }
+ }
+}
+
+/* TODO: Do not allow empty command */
+/* TODO: Allow command inside another command */
+/* COMMAND = TODO */
+static int tsl_parser_parse_command(TslParser *self) {
+ if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_LPAREN))
+ return -1;
+
+ for(;;) {
+ TslStringView command_arg;
+ TslCommandToken command_token = tsl_tokenizer_next_command_arg(&self->tokenizer, &command_arg);
+ if(command_token == TSL_COMMAND_TOKEN_ARG) {
+ printf("command arg: |%.*s|\n", (int)command_arg.size, command_arg.data);
+ } else if(command_token == TSL_COMMAND_TOKEN_END) {
+ return 0;
+ } else {
+ fprintf(stderr, "Error: Expected command argument or ')', got TODO\n");
+ return -1;
+ }
+ }
+}
+
+/* RHS_SUB = VAR_INDEX|FUNC_CALL RHS_SUB? */
+static int tsl_parser_parse_rhs_sub(TslParser *self) {
+ TslToken token = tsl_tokenizer_peek(&self->tokenizer);
+ if(token == TSL_TOKEN_LBRACKET) {
+ if(tsl_parser_parse_var_indexing(self) != 0)
+ return -1;
+ return tsl_parser_parse_rhs_sub(self);
+ } else if(token == TSL_TOKEN_LPAREN) {
+ if(tsl_parser_parse_func_call(self) != 0)
+ return -1;
+ return tsl_parser_parse_rhs_sub(self);
+ }
+ /*
+ No sub expression found, possibly a new expression after this (a new expression on a new line), let that
+ part of the code handle error if there is any instead.
+ */
+ return 0;
+}
+
+/* RHS = (IDENTIFIER|NUM|BOOL|NULL|STRING|MAP|LIST|('fn' FN)|('$' COMMAND)) RHS_SUB? */
int tsl_parser_parse_rhs(TslParser *self) {
TslToken token = tsl_tokenizer_next(&self->tokenizer);
- if(token == TSL_TOKEN_NUM) {
+ if(token == TSL_TOKEN_IDENTIFIER) {
+ TslStringView var_name = self->tokenizer.identifier;
+ printf("var: %.*s\n", (int)var_name.size, var_name.data);
+ return tsl_parser_parse_rhs_sub(self);
+ } else if(token == TSL_TOKEN_NUM) {
printf("rhs num: %ld\n", self->tokenizer.number_value);
} else if(token == TSL_TOKEN_BOOL) {
printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false");
@@ -100,29 +221,39 @@ int tsl_parser_parse_rhs(TslParser *self) {
} else if(token == TSL_TOKEN_STRING) {
printf("rhs string: |%.*s|\n", self->tokenizer.string.size, self->tokenizer.string.data);
} else if(token == TSL_TOKEN_LBRACE) {
- tsl_parser_parse_map(self);
+ return tsl_parser_parse_map(self);
} else if(token == TSL_TOKEN_LBRACKET) {
- tsl_parser_parse_list(self);
+ return tsl_parser_parse_list(self);
+ } else if(token == TSL_TOKEN_FN) {
+ return tsl_parser_parse_fn(self);
+ } else if(token == TSL_TOKEN_DOLLAR_SIGN) {
+ return tsl_parser_parse_command(self);
} else {
- fprintf(stderr, "Error: Expected number, bool or null, got TODO\n");
+ fprintf(stderr, "Error: Expected variable, number, bool, null, map, list, function or command, got TODO (%d) (line: %d)\n", token, tsl_tokenizer_get_line_by_index(&self->tokenizer, self->tokenizer.prev_code_index));
return -1;
}
return 0;
}
-static int tsl_parser_parse(TslParser *self) {
+/*
+ EXPR = IDENTIFIER ('=' RHS)|RHS_SUB
+ EXPRS = EXPR*
+*/
+int tsl_parser_parse_expressions(TslParser *self, TslToken end_token) {
for(;;) {
TslToken token = tsl_tokenizer_next(&self->tokenizer);
if(token == TSL_TOKEN_IDENTIFIER) {
TslStringView identifier = self->tokenizer.identifier;
printf("identifier: %.*s\n", identifier.size, identifier.data);
- if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_EQUAL)) {
- return -1;
- }
- if(tsl_parser_parse_rhs(self) != 0) {
- return -1;
+ if(tsl_tokenizer_peek(&self->tokenizer) == TSL_TOKEN_EQUAL) {
+ tsl_tokenizer_next(&self->tokenizer); /* consume previous TSL_TOKEN_EQUAL */
+ if(tsl_parser_parse_rhs(self) != 0)
+ return -1;
+ } else {
+ if(tsl_parser_parse_rhs_sub(self) != 0)
+ return -1;
}
- } else if(token == TSL_TOKEN_END_OF_FILE) {
+ } else if(token == end_token) {
break;
} else {
fprintf(stderr, "Error: Expected identifier, got TODO\n");
@@ -132,8 +263,9 @@ static int tsl_parser_parse(TslParser *self) {
return 0;
}
+/* EXPRS */
int tsl_parse(const char *code, size_t code_size) {
TslParser parser;
tsl_parser_init(&parser, code, code_size);
- return tsl_parser_parse(&parser);
+ return tsl_parser_parse_expressions(&parser, TSL_TOKEN_END_OF_FILE);
}
diff --git a/src/tokenizer.c b/src/tokenizer.c
index b310aae..89c40cb 100644
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@@ -1,11 +1,17 @@
#include "../include/tokenizer.h"
#include <string.h>
#include <stdio.h>
+#include <assert.h>
void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) {
self->code = code;
self->code_size = code_size;
self->code_index = 0;
+ self->prev_code_index = 0;
+
+ self->peek.token = -1;
+ self->peek.code_index = 0;
+ self->peek.prev_code_index = 0;
self->identifier.data = NULL;
self->identifier.size = 0;
@@ -21,19 +27,25 @@ static char tsl_tokenizer_get_char(TslTokenizer *self) {
return '\0';
}
+static int is_whitespace(char c) {
+ switch(c) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\r':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
static void tsl_tokenizer_skip_whitespace(TslTokenizer *self) {
for(;;) {
char c = tsl_tokenizer_get_char(self);
- switch(c) {
- case ' ':
- case '\n':
- case '\t':
- case '\r':
- ++self->code_index;
- break;
- default:
- return;
- }
+ if(is_whitespace(c))
+ ++self->code_index;
+ else
+ return;
}
}
@@ -107,8 +119,9 @@ static int tsl_tokenizer_goto_end_of_string(TslTokenizer *self, char string_star
}
}
-TslToken tsl_tokenizer_next(TslTokenizer *self) {
+static TslToken tsl_tokenizer_next_internal(TslTokenizer *self) {
char c;
+ self->prev_code_index = self->code_index;
tsl_tokenizer_skip_whitespace(self);
c = tsl_tokenizer_get_char(self);
@@ -125,6 +138,12 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) {
self->identifier.size = self->code_index - identifier_start;
switch(self->identifier.size) {
+ case 2: {
+ if(memcmp(self->identifier.data, "fn", 2) == 0) {
+ return TSL_TOKEN_FN;
+ }
+ break;
+ }
case 4: {
if(memcmp(self->identifier.data, "true", 4) == 0) {
self->bool_value = 1;
@@ -183,12 +202,21 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) {
} else if(c == ']') {
++self->code_index;
return TSL_TOKEN_RBRACKET;
+ } else if(c == '(') {
+ ++self->code_index;
+ return TSL_TOKEN_LPAREN;
+ } else if(c == ')') {
+ ++self->code_index;
+ return TSL_TOKEN_RPAREN;
} else if(c == ',') {
++self->code_index;
return TSL_TOKEN_COMMA;
} else if(c == ':') {
++self->code_index;
return TSL_TOKEN_COLON;
+ } else if(c == '$') {
+ ++self->code_index;
+ return TSL_TOKEN_DOLLAR_SIGN;
} else if(c == '\0') {
return TSL_TOKEN_END_OF_FILE;
} else {
@@ -197,11 +225,108 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) {
}
}
+static TslToken tsl_tokenizer_consume_peek(TslTokenizer *self) {
+ TslToken token = self->peek.token;
+ self->code_index = self->peek.code_index;
+ self->prev_code_index = self->peek.prev_code_index;
+ self->peek.token = -1;
+ return token;
+}
+
+TslToken tsl_tokenizer_next(TslTokenizer *self) {
+ if((int)self->peek.token == -1) {
+ return tsl_tokenizer_next_internal(self);
+ } else {
+ return tsl_tokenizer_consume_peek(self);
+ }
+}
+
int tsl_tokenizer_accept(TslTokenizer *self, TslToken expected_token) {
- TslToken actual_token = tsl_tokenizer_next(self);
+ TslToken actual_token;
+ if((int)self->peek.token == -1) {
+ actual_token = tsl_tokenizer_next_internal(self);
+ } else {
+ actual_token = tsl_tokenizer_consume_peek(self);
+ }
if(actual_token != expected_token) {
- fprintf(stderr, "Error: Expected TODO, got TODO\n");
+ fprintf(stderr, "Error: Expected TODO(%d), got TODO(%d)\n", expected_token, actual_token);
return 0;
}
return 1;
}
+
+TslToken tsl_tokenizer_peek(TslTokenizer *self) {
+ size_t p_prev_code_index = self->prev_code_index;
+ size_t p_code_index = self->code_index;
+
+ self->peek.token = tsl_tokenizer_next_internal(self);
+ self->peek.code_index = self->code_index;
+ self->peek.prev_code_index = self->prev_code_index;
+
+ self->prev_code_index = p_prev_code_index;
+ self->code_index = p_code_index;
+ return self->peek.token;
+}
+
+TslCommandToken tsl_tokenizer_next_command_arg(TslTokenizer *self, TslStringView *arg) {
+ char c;
+ assert((int)self->peek.token == -1);
+ self->prev_code_index = self->code_index;
+ tsl_tokenizer_skip_whitespace(self);
+
+ c = tsl_tokenizer_get_char(self);
+ if(c == ')') {
+ ++self->code_index;
+ return TSL_COMMAND_TOKEN_END;
+ } else if(c == '"') {
+ char string_start_symbol = c;
+ size_t string_start;
+ ++self->code_index;
+ string_start = self->code_index;
+ if(tsl_tokenizer_goto_end_of_string(self, string_start_symbol)) {
+ arg->data = self->code + string_start;
+ arg->size = self->code_index - 1 - string_start;
+ return TSL_COMMAND_TOKEN_ARG;
+ } else {
+ return TSL_COMMAND_TOKEN_END_OF_FILE;
+ }
+ } else if(c == '\0') {
+ return TSL_COMMAND_TOKEN_END_OF_FILE;
+ } else {
+ /*
+ TODO: When hitting ", parse to the end of it and make it part of this arg instead of
+ separating them into two args
+ */
+ size_t arg_start = self->code_index;
+ int escape_char = tsl_tokenizer_get_char(self) == '\\';
+ ++self->code_index;
+ for(;;) {
+ c = tsl_tokenizer_get_char(self);
+ if(is_whitespace(c) || c == ')' || c == '\0') {
+ break;
+ } else if(c == '"') {
+ if(!escape_char)
+ break;
+ escape_char = 0;
+ } else if(c == '\\') {
+ escape_char = !escape_char;
+ } else {
+ escape_char = 0;
+ }
+ ++self->code_index;
+ }
+ arg->data = self->code + arg_start;
+ arg->size = self->code_index - arg_start;
+ return TSL_COMMAND_TOKEN_ARG;
+ }
+}
+
+int tsl_tokenizer_get_line_by_index(TslTokenizer *self, size_t index) {
+ size_t i = 0;
+ int line = 1;
+ for(; i < index; ++i) {
+ if(self->code[i] == '\n')
+ ++line;
+ }
+ return line;
+}