#include "../include/tokenizer.h" #include #include void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) { self->code = code; self->code_size = code_size; self->code_index = 0; self->identifier.data = NULL; self->identifier.size = 0; self->string.data = NULL; self->string.size = 0; self->bool_value = 0; self->number_value = 0; } static char tsl_tokenizer_get_char(TslTokenizer *self) { if(self->code_index < self->code_size) return self->code[self->code_index]; return '\0'; } static void tsl_tokenizer_skip_whitespace(TslTokenizer *self) { for(;;) { char c = tsl_tokenizer_get_char(self); switch(c) { case ' ': case '\n': case '\t': case '\r': ++self->code_index; break; default: return; } } } static int is_alpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } static int is_num(char c) { return c >= '0' && c <= '9'; } static int is_identifier_start(char c) { return is_alpha(c) || c == '_'; } static int is_identifier_continue(char c) { return is_alpha(c) || is_num(c) || c == '_'; } const int64_t num_multipliers[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, 1000000000000000000 }; /* TODO: Check if the number if too large to fit into the result */ static int64_t string_to_int(TslStringView *str) { int64_t num = 0; size_t i = 0; for(; i < str->size; ++i) { char digit = str->data[str->size - 1 - i] - '0'; num += digit * num_multipliers[i]; } return num; } static int tsl_tokenizer_goto_end_of_string(TslTokenizer *self, char string_start_symbol) { int string_escape = 0; for(;;) { char c = tsl_tokenizer_get_char(self); if(c == string_start_symbol) { ++self->code_index; if(!string_escape) return 1; string_escape = 0; } else if(c == '\\') { ++self->code_index; string_escape = !string_escape; } else if(c == '\0') { return 0; } else { ++self->code_index; string_escape = 0; } } } TslToken tsl_tokenizer_next(TslTokenizer *self) { char c; tsl_tokenizer_skip_whitespace(self); c = tsl_tokenizer_get_char(self); if(is_identifier_start(c)) { size_t identifier_start = self->code_index; ++self->code_index; for(;;) { c = tsl_tokenizer_get_char(self); if(!is_identifier_continue(c)) break; ++self->code_index; } self->identifier.data = self->code + identifier_start; self->identifier.size = self->code_index - identifier_start; switch(self->identifier.size) { case 4: { if(memcmp(self->identifier.data, "true", 4) == 0) { self->bool_value = 1; return TSL_TOKEN_BOOL; } else if(memcmp(self->identifier.data, "null", 4) == 0) { return TSL_TOKEN_NULL; } break; } case 5: { if(memcmp(self->identifier.data, "false", 5) == 0) { self->bool_value = 0; return TSL_TOKEN_BOOL; } break; } } return TSL_TOKEN_IDENTIFIER; } else if(is_num(c)) { size_t num_start = self->code_index; ++self->code_index; for(;;) { c = tsl_tokenizer_get_char(self); if(!is_num(c)) break; ++self->code_index; } self->identifier.data = self->code + num_start; self->identifier.size = self->code_index - num_start; self->number_value = string_to_int(&self->identifier); return TSL_TOKEN_NUM; } else if(c == '"') { char string_start_symbol = c; size_t string_start; ++self->code_index; string_start = self->code_index; if(tsl_tokenizer_goto_end_of_string(self, string_start_symbol)) { self->string.data = self->code + string_start; self->string.size = self->code_index - 1 - string_start; return TSL_TOKEN_STRING; } else { return TSL_TOKEN_END_OF_FILE; } } else if(c == '=') { ++self->code_index; return TSL_TOKEN_EQUAL; } else if(c == '{') { ++self->code_index; return TSL_TOKEN_LBRACE; } else if(c == '}') { ++self->code_index; return TSL_TOKEN_RBRACE; } else if(c == '[') { ++self->code_index; return TSL_TOKEN_LBRACKET; } else if(c == ']') { ++self->code_index; return TSL_TOKEN_RBRACKET; } else if(c == '\0') { return TSL_TOKEN_END_OF_FILE; } else { fprintf(stderr, "Unexpected symbol '%c'\n", c); return TSL_TOKEN_UNEXPECTED_SYMBOL; } } int tsl_tokenizer_accept(TslTokenizer *self, TslToken expected_token) { TslToken actual_token = tsl_tokenizer_next(self); if(actual_token != expected_token) { fprintf(stderr, "Error: Expected TODO, got TODO\n"); return 0; } return 1; }