aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizer.c')
-rw-r--r--src/tokenizer.c87
1 files changed, 82 insertions, 5 deletions
diff --git a/src/tokenizer.c b/src/tokenizer.c
index fda3e40..e85c952 100644
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@@ -1,5 +1,7 @@
#include "../include/tokenizer.h"
-#include "../include/mem.h"
+#include "../include/std/mem.h"
+#include "../include/std/log.h"
+#include "../include/std/thread.h"
#include <assert.h>
#include <limits.h>
#include <stdio.h>
@@ -17,12 +19,13 @@ static int isAlphaDigit(int c) {
return isAlpha(c) || isDigit(c);
}
-int tokenizer_init(Tokenizer *self, BufferView code) {
+int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name) {
assert(code.size <= INT_MAX);
self->code = code;
self->index = 0;
self->prev_index = 0;
self->line = 1;
+ self->code_name = code_name.data ? code_name : create_buffer_view("<buffer>", 8);
return 0;
}
@@ -52,9 +55,28 @@ static Token tokenizer_skip_whitespace(Tokenizer *self) {
}
}
+/* Returns -1 if end of string can't be found */
+static int find_end_of_string(BufferView buf, int index) {
+ int c;
+ bool escape_quote;
+ escape_quote = bool_false;
+
+ for(; index < (int)buf.size; ++index) {
+ c = buf.data[index];
+ if(c == '\\')
+ escape_quote = !escape_quote;
+ else if(!escape_quote && c == '"')
+ return index;
+ else
+ escape_quote = bool_false;
+ }
+ return -1;
+}
+
int tokenizer_next(Tokenizer *self, Token *token) {
Token last_token;
int c;
+ int result;
last_token = tokenizer_skip_whitespace(self);
if(last_token == TOK_END_OF_FILE) {
@@ -85,6 +107,20 @@ int tokenizer_next(Tokenizer *self, Token *token) {
*token = TOK_VAR;
else
*token = TOK_IDENTIFIER;
+ } else if(c == '"') {
+ int string_end;
+ ++self->index;
+ string_end = find_end_of_string(self->code, self->index);
+ if(string_end == -1) {
+ tokenizer_print_error(self, "String end not found. Did you forget '\"' or did you have a mismatch of number of '\"'?");
+ return TOKENIZER_ERR;
+ }
+
+ self->value.string.data = &self->code.data[self->index];
+ self->value.string.size = string_end - self->index;
+ self->index = string_end + 1;
+ *token = TOK_STRING;
+ return TOKENIZER_OK;
} else if(c == '=') {
++self->index;
*token = TOK_EQUALS;
@@ -100,8 +136,45 @@ int tokenizer_next(Tokenizer *self, Token *token) {
} else if(c == '}') {
++self->index;
*token = TOK_CLOSING_BRACE;
+ } else if(c == '@') {
+ const char *err_msg;
+ ++self->index;
+ if(self->index + 6 >= (int)self->code.size || !am_memeql(self->code.data + self->index, "import", 6)) {
+ err_msg = "Expected '@import(path)'";
+ goto import_error;
+ }
+ self->index += 6;
+
+ result = tokenizer_next(self, &last_token);
+ if(result != 0 || last_token != TOK_OPEN_PAREN) {
+ err_msg = "Expected '(' after @import";
+ goto import_error;
+ }
+
+ result = tokenizer_next(self, &last_token);
+ if(result != 0 || last_token != TOK_STRING) {
+ err_msg = "Expected string after @import(";
+ goto import_error;
+ }
+
+ if(self->value.string.size == 0) {
+ err_msg = "Path in @import can't be empty";
+ goto import_error;
+ }
+
+ result = tokenizer_next(self, &last_token);
+ if(result != 0 || last_token != TOK_CLOSING_PAREN) {
+ err_msg = "Expected ')' after @import(path";
+ goto import_error;
+ }
+
+ *token = TOK_IMPORT;
+ return TOKENIZER_OK;
+
+ import_error:
+ tokenizer_print_error(self, err_msg);
+ return TOKENIZER_ERR;
} else {
- /*self.printError("Unexpected symbol '{c}'", c);*/
tokenizer_print_error(self, "Unexpected symbol '%c'", c);
return TOKENIZER_UNEXPECTED_TOKEN;
}
@@ -130,8 +203,8 @@ int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result) {
if(actual_token == expected_token) {
*result = bool_true;
} else {
- /* No need to restore self.prev_index as it's updated on the next call to tokenizer_next */
self->index = index;
+ self->prev_index = index;
self->line = line;
*result = bool_false;
}
@@ -167,16 +240,20 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) {
int line_end;
int prev_column;
int i;
+ amal_mutex *mutex;
+ mutex = amal_log_get_mutex();
+ ignore_result_int(amal_mutex_lock(mutex, "tokenizer_print_error"));
va_start(args, fmt);
line_start = tokenizer_get_start_of_line_from_index(self, self->prev_index);
line_end = tokenizer_get_end_of_line_from_index(self, self->prev_index);
prev_column = self->prev_index - line_start;
- fprintf(stderr, "\x1b[1;37m%s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", "file.am", self->line, 1 + prev_column);
+ fprintf(stderr, "\x1b[1;37m%.*s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", (int)self->code_name.size, self->code_name.data, self->line, 1 + prev_column);
vfprintf(stderr, fmt, args);
fprintf(stderr, "\n%.*s\n", line_end - line_start, self->code.data + line_start);
for(i = 0; i < prev_column; ++i)
fprintf(stderr, " ");
fprintf(stderr, "\x1b[1;32m^\x1b[0m\n");
va_end(args);
+ ignore_result_int(amal_mutex_unlock(mutex));
} \ No newline at end of file