aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizer.c')
-rw-r--r--src/tokenizer.c151
1 files changed, 138 insertions, 13 deletions
diff --git a/src/tokenizer.c b/src/tokenizer.c
index b310aae..89c40cb 100644
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@@ -1,11 +1,17 @@
#include "../include/tokenizer.h"
#include <string.h>
#include <stdio.h>
+#include <assert.h>
void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) {
self->code = code;
self->code_size = code_size;
self->code_index = 0;
+ self->prev_code_index = 0;
+
+ self->peek.token = -1;
+ self->peek.code_index = 0;
+ self->peek.prev_code_index = 0;
self->identifier.data = NULL;
self->identifier.size = 0;
@@ -21,19 +27,25 @@ static char tsl_tokenizer_get_char(TslTokenizer *self) {
return '\0';
}
+static int is_whitespace(char c) {
+ switch(c) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\r':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
static void tsl_tokenizer_skip_whitespace(TslTokenizer *self) {
for(;;) {
char c = tsl_tokenizer_get_char(self);
- switch(c) {
- case ' ':
- case '\n':
- case '\t':
- case '\r':
- ++self->code_index;
- break;
- default:
- return;
- }
+ if(is_whitespace(c))
+ ++self->code_index;
+ else
+ return;
}
}
@@ -107,8 +119,9 @@ static int tsl_tokenizer_goto_end_of_string(TslTokenizer *self, char string_star
}
}
-TslToken tsl_tokenizer_next(TslTokenizer *self) {
+static TslToken tsl_tokenizer_next_internal(TslTokenizer *self) {
char c;
+ self->prev_code_index = self->code_index;
tsl_tokenizer_skip_whitespace(self);
c = tsl_tokenizer_get_char(self);
@@ -125,6 +138,12 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) {
self->identifier.size = self->code_index - identifier_start;
switch(self->identifier.size) {
+ case 2: {
+ if(memcmp(self->identifier.data, "fn", 2) == 0) {
+ return TSL_TOKEN_FN;
+ }
+ break;
+ }
case 4: {
if(memcmp(self->identifier.data, "true", 4) == 0) {
self->bool_value = 1;
@@ -183,12 +202,21 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) {
} else if(c == ']') {
++self->code_index;
return TSL_TOKEN_RBRACKET;
+ } else if(c == '(') {
+ ++self->code_index;
+ return TSL_TOKEN_LPAREN;
+ } else if(c == ')') {
+ ++self->code_index;
+ return TSL_TOKEN_RPAREN;
} else if(c == ',') {
++self->code_index;
return TSL_TOKEN_COMMA;
} else if(c == ':') {
++self->code_index;
return TSL_TOKEN_COLON;
+ } else if(c == '$') {
+ ++self->code_index;
+ return TSL_TOKEN_DOLLAR_SIGN;
} else if(c == '\0') {
return TSL_TOKEN_END_OF_FILE;
} else {
@@ -197,11 +225,108 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) {
}
}
+static TslToken tsl_tokenizer_consume_peek(TslTokenizer *self) {
+ TslToken token = self->peek.token;
+ self->code_index = self->peek.code_index;
+ self->prev_code_index = self->peek.prev_code_index;
+ self->peek.token = -1;
+ return token;
+}
+
+TslToken tsl_tokenizer_next(TslTokenizer *self) {
+ if((int)self->peek.token == -1) {
+ return tsl_tokenizer_next_internal(self);
+ } else {
+ return tsl_tokenizer_consume_peek(self);
+ }
+}
+
int tsl_tokenizer_accept(TslTokenizer *self, TslToken expected_token) {
- TslToken actual_token = tsl_tokenizer_next(self);
+ TslToken actual_token;
+ if((int)self->peek.token == -1) {
+ actual_token = tsl_tokenizer_next_internal(self);
+ } else {
+ actual_token = tsl_tokenizer_consume_peek(self);
+ }
if(actual_token != expected_token) {
- fprintf(stderr, "Error: Expected TODO, got TODO\n");
+ fprintf(stderr, "Error: Expected TODO(%d), got TODO(%d)\n", expected_token, actual_token);
return 0;
}
return 1;
}
+
+TslToken tsl_tokenizer_peek(TslTokenizer *self) {
+ size_t p_prev_code_index = self->prev_code_index;
+ size_t p_code_index = self->code_index;
+
+ self->peek.token = tsl_tokenizer_next_internal(self);
+ self->peek.code_index = self->code_index;
+ self->peek.prev_code_index = self->prev_code_index;
+
+ self->prev_code_index = p_prev_code_index;
+ self->code_index = p_code_index;
+ return self->peek.token;
+}
+
+TslCommandToken tsl_tokenizer_next_command_arg(TslTokenizer *self, TslStringView *arg) {
+ char c;
+ assert((int)self->peek.token == -1);
+ self->prev_code_index = self->code_index;
+ tsl_tokenizer_skip_whitespace(self);
+
+ c = tsl_tokenizer_get_char(self);
+ if(c == ')') {
+ ++self->code_index;
+ return TSL_COMMAND_TOKEN_END;
+ } else if(c == '"') {
+ char string_start_symbol = c;
+ size_t string_start;
+ ++self->code_index;
+ string_start = self->code_index;
+ if(tsl_tokenizer_goto_end_of_string(self, string_start_symbol)) {
+ arg->data = self->code + string_start;
+ arg->size = self->code_index - 1 - string_start;
+ return TSL_COMMAND_TOKEN_ARG;
+ } else {
+ return TSL_COMMAND_TOKEN_END_OF_FILE;
+ }
+ } else if(c == '\0') {
+ return TSL_COMMAND_TOKEN_END_OF_FILE;
+ } else {
+ /*
+ TODO: When hitting ", parse to the end of it and make it part of this arg instead of
+ separating them into two args
+ */
+ size_t arg_start = self->code_index;
+ int escape_char = tsl_tokenizer_get_char(self) == '\\';
+ ++self->code_index;
+ for(;;) {
+ c = tsl_tokenizer_get_char(self);
+ if(is_whitespace(c) || c == ')' || c == '\0') {
+ break;
+ } else if(c == '"') {
+ if(!escape_char)
+ break;
+ escape_char = 0;
+ } else if(c == '\\') {
+ escape_char = !escape_char;
+ } else {
+ escape_char = 0;
+ }
+ ++self->code_index;
+ }
+ arg->data = self->code + arg_start;
+ arg->size = self->code_index - arg_start;
+ return TSL_COMMAND_TOKEN_ARG;
+ }
+}
+
+int tsl_tokenizer_get_line_by_index(TslTokenizer *self, size_t index) {
+ size_t i = 0;
+ int line = 1;
+ for(; i < index; ++i) {
+ if(self->code[i] == '\n')
+ ++line;
+ }
+ return line;
+}