#include "../include/tokenizer.h" #include #include void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) { self->code = code; self->code_size = code_size; self->code_index = 0; self->identifier.data = NULL; self->identifier.size = 0; self->bool_value = 0; self->number_value = 0; } static char tsl_tokenizer_get_char(TslTokenizer *self) { if(self->code_index < self->code_size) return self->code[self->code_index]; return '\0'; } static void tsl_tokenizer_skip_whitespace(TslTokenizer *self) { for(;;) { char c = tsl_tokenizer_get_char(self); switch(c) { case ' ': case '\n': case '\t': case '\r': ++self->code_index; break; default: return; } } } static int is_alpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } static int is_num(char c) { return c >= '0' && c <= '9'; } static int is_identifier_start(char c) { return is_alpha(c) || c == '_'; } static int is_identifier_continue(char c) { return is_alpha(c) || is_num(c) || c == '_'; } const int64_t num_multipliers[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, 1000000000000000000 }; /* TODO: Check if the number if too large to fit into the result */ static int64_t string_to_int(TslStringView *str) { int64_t num = 0; for(size_t i = 0; i < str->size; ++i) { char digit = str->data[str->size - 1 - i] - '0'; num += digit * num_multipliers[i]; } return num; } TslToken tsl_tokenizer_next(TslTokenizer *self) { char c; tsl_tokenizer_skip_whitespace(self); c = tsl_tokenizer_get_char(self); if(is_identifier_start(c)) { size_t identifier_start = self->code_index; ++self->code_index; for(;;) { c = tsl_tokenizer_get_char(self); if(!is_identifier_continue(c)) break; ++self->code_index; } self->identifier.data = self->code + identifier_start; self->identifier.size = self->code_index - identifier_start; switch(self->identifier.size) { case 3: { if(memcmp(self->identifier.data, "null", 3) == 0) { return TSL_TOKEN_NULL; } break; } case 4: { if(memcmp(self->identifier.data, "true", 4) == 0) { self->bool_value = 1; return TSL_TOKEN_BOOL; } break; } case 5: { if(memcmp(self->identifier.data, "false", 5) == 0) { self->bool_value = 0; return TSL_TOKEN_BOOL; } break; } } return TSL_TOKEN_IDENTIFIER; } else if(is_num(c)) { size_t num_start = self->code_index; ++self->code_index; for(;;) { c = tsl_tokenizer_get_char(self); if(!is_num(c)) break; ++self->code_index; } self->identifier.data = self->code + num_start; self->identifier.size = self->code_index - num_start; self->number_value = string_to_int(&self->identifier); return TSL_TOKEN_NUM; } else if(c == '=') { ++self->code_index; return TSL_TOKEN_EQUAL; } else if(c == '\0') { return TSL_TOKEN_END_OF_FILE; } else { fprintf(stderr, "Unexpected symbol '%c'\n", c); return TSL_TOKEN_UNEXPECTED_SYMBOL; } }