diff options
Diffstat (limited to 'src/tokenizer.c')
-rw-r--r-- | src/tokenizer.c | 151 |
1 files changed, 138 insertions, 13 deletions
diff --git a/src/tokenizer.c b/src/tokenizer.c index b310aae..89c40cb 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -1,11 +1,17 @@ #include "../include/tokenizer.h" #include <string.h> #include <stdio.h> +#include <assert.h> void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) { self->code = code; self->code_size = code_size; self->code_index = 0; + self->prev_code_index = 0; + + self->peek.token = -1; + self->peek.code_index = 0; + self->peek.prev_code_index = 0; self->identifier.data = NULL; self->identifier.size = 0; @@ -21,19 +27,25 @@ static char tsl_tokenizer_get_char(TslTokenizer *self) { return '\0'; } +static int is_whitespace(char c) { + switch(c) { + case ' ': + case '\n': + case '\t': + case '\r': + return 1; + default: + return 0; + } +} + static void tsl_tokenizer_skip_whitespace(TslTokenizer *self) { for(;;) { char c = tsl_tokenizer_get_char(self); - switch(c) { - case ' ': - case '\n': - case '\t': - case '\r': - ++self->code_index; - break; - default: - return; - } + if(is_whitespace(c)) + ++self->code_index; + else + return; } } @@ -107,8 +119,9 @@ static int tsl_tokenizer_goto_end_of_string(TslTokenizer *self, char string_star } } -TslToken tsl_tokenizer_next(TslTokenizer *self) { +static TslToken tsl_tokenizer_next_internal(TslTokenizer *self) { char c; + self->prev_code_index = self->code_index; tsl_tokenizer_skip_whitespace(self); c = tsl_tokenizer_get_char(self); @@ -125,6 +138,12 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) { self->identifier.size = self->code_index - identifier_start; switch(self->identifier.size) { + case 2: { + if(memcmp(self->identifier.data, "fn", 2) == 0) { + return TSL_TOKEN_FN; + } + break; + } case 4: { if(memcmp(self->identifier.data, "true", 4) == 0) { self->bool_value = 1; @@ -183,12 +202,21 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) { } else if(c == ']') { ++self->code_index; return TSL_TOKEN_RBRACKET; + } else if(c == '(') { + ++self->code_index; + return TSL_TOKEN_LPAREN; + } else if(c == ')') { + ++self->code_index; + return TSL_TOKEN_RPAREN; } else if(c == ',') { ++self->code_index; return TSL_TOKEN_COMMA; } else if(c == ':') { ++self->code_index; return TSL_TOKEN_COLON; + } else if(c == '$') { + ++self->code_index; + return TSL_TOKEN_DOLLAR_SIGN; } else if(c == '\0') { return TSL_TOKEN_END_OF_FILE; } else { @@ -197,11 +225,108 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) { } } +static TslToken tsl_tokenizer_consume_peek(TslTokenizer *self) { + TslToken token = self->peek.token; + self->code_index = self->peek.code_index; + self->prev_code_index = self->peek.prev_code_index; + self->peek.token = -1; + return token; +} + +TslToken tsl_tokenizer_next(TslTokenizer *self) { + if((int)self->peek.token == -1) { + return tsl_tokenizer_next_internal(self); + } else { + return tsl_tokenizer_consume_peek(self); + } +} + int tsl_tokenizer_accept(TslTokenizer *self, TslToken expected_token) { - TslToken actual_token = tsl_tokenizer_next(self); + TslToken actual_token; + if((int)self->peek.token == -1) { + actual_token = tsl_tokenizer_next_internal(self); + } else { + actual_token = tsl_tokenizer_consume_peek(self); + } if(actual_token != expected_token) { - fprintf(stderr, "Error: Expected TODO, got TODO\n"); + fprintf(stderr, "Error: Expected TODO(%d), got TODO(%d)\n", expected_token, actual_token); return 0; } return 1; } + +TslToken tsl_tokenizer_peek(TslTokenizer *self) { + size_t p_prev_code_index = self->prev_code_index; + size_t p_code_index = self->code_index; + + self->peek.token = tsl_tokenizer_next_internal(self); + self->peek.code_index = self->code_index; + self->peek.prev_code_index = self->prev_code_index; + + self->prev_code_index = p_prev_code_index; + self->code_index = p_code_index; + return self->peek.token; +} + +TslCommandToken tsl_tokenizer_next_command_arg(TslTokenizer *self, TslStringView *arg) { + char c; + assert((int)self->peek.token == -1); + self->prev_code_index = self->code_index; + tsl_tokenizer_skip_whitespace(self); + + c = tsl_tokenizer_get_char(self); + if(c == ')') { + ++self->code_index; + return TSL_COMMAND_TOKEN_END; + } else if(c == '"') { + char string_start_symbol = c; + size_t string_start; + ++self->code_index; + string_start = self->code_index; + if(tsl_tokenizer_goto_end_of_string(self, string_start_symbol)) { + arg->data = self->code + string_start; + arg->size = self->code_index - 1 - string_start; + return TSL_COMMAND_TOKEN_ARG; + } else { + return TSL_COMMAND_TOKEN_END_OF_FILE; + } + } else if(c == '\0') { + return TSL_COMMAND_TOKEN_END_OF_FILE; + } else { + /* + TODO: When hitting ", parse to the end of it and make it part of this arg instead of + separating them into two args + */ + size_t arg_start = self->code_index; + int escape_char = tsl_tokenizer_get_char(self) == '\\'; + ++self->code_index; + for(;;) { + c = tsl_tokenizer_get_char(self); + if(is_whitespace(c) || c == ')' || c == '\0') { + break; + } else if(c == '"') { + if(!escape_char) + break; + escape_char = 0; + } else if(c == '\\') { + escape_char = !escape_char; + } else { + escape_char = 0; + } + ++self->code_index; + } + arg->data = self->code + arg_start; + arg->size = self->code_index - arg_start; + return TSL_COMMAND_TOKEN_ARG; + } +} + +int tsl_tokenizer_get_line_by_index(TslTokenizer *self, size_t index) { + size_t i = 0; + int line = 1; + for(; i < index; ++i) { + if(self->code[i] == '\n') + ++line; + } + return line; +} |