From 108018e3e7326dabbbef568ab08bc5cebf5d427b Mon Sep 17 00:00:00 2001 From: dec05eba Date: Mon, 20 Jan 2020 23:00:39 +0100 Subject: Add arithmetic, implement hash map --- Makefile | 21 +++--- README.md | 3 + example.tsl | 2 + include/bytecode.h | 35 +++++----- include/std/buffer.h | 4 +- include/std/hash_map.h | 24 +++++++ include/std/string_view.h | 11 +++ include/tokenizer.h | 19 +++--- include/value.h | 49 +++++++++++++ src/bytecode.c | 30 ++++++-- src/parser.c | 41 +++++++++-- src/std/buffer.c | 19 ++++-- src/std/hash_map.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++ src/tokenizer.c | 44 +++++++----- 14 files changed, 407 insertions(+), 65 deletions(-) create mode 100644 include/std/hash_map.h create mode 100644 include/std/string_view.h create mode 100644 include/value.h create mode 100644 src/std/hash_map.c diff --git a/Makefile b/Makefile index e19c75e..399128b 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,10 @@ -CFLAGS = -Wall -Wextra -g3 -ansi -pedantic -OBJ = build/main.o build/tokenizer.o build/parser.o build/bytecode.o build/buffer.o +CFLAGS = -Wall -Wextra -g3 -ansi -pedantic -fPIE +LIBS = -Wl,-Bdynamic -lcurl +OBJ = build/main.o build/tokenizer.o build/parser.o build/bytecode.o build/buffer.o build/hash_map.o +CC = cc all: build_dir $(OBJ) - cc -o build/tsl $(OBJ) -fPIE + $(CC) -o build/tsl $(OBJ) $(LIBS) clean: rm $(OBJ) build/tsl @@ -14,16 +16,19 @@ compiledb: make clean; bear make build/main.o: src/main.c include/tokenizer.h - cc -c src/main.c -o build/main.o $(CFLAGS) + $(CC) -c src/main.c -o build/main.o $(CFLAGS) build/tokenizer.o: src/tokenizer.c include/tokenizer.h - cc -c src/tokenizer.c -o build/tokenizer.o $(CFLAGS) + $(CC) -c src/tokenizer.c -o build/tokenizer.o $(CFLAGS) build/parser.o: src/parser.c include/parser.h - cc -c src/parser.c -o build/parser.o $(CFLAGS) + $(CC) -c src/parser.c -o build/parser.o $(CFLAGS) build/bytecode.o: src/bytecode.c include/bytecode.h - cc -c src/bytecode.c -o build/bytecode.o $(CFLAGS) + $(CC) -c src/bytecode.c -o build/bytecode.o $(CFLAGS) build/buffer.o: src/std/buffer.c include/std/buffer.h - cc -c src/std/buffer.c -o build/buffer.o $(CFLAGS) + $(CC) -c src/std/buffer.c -o build/buffer.o $(CFLAGS) + +build/hash_map.o: src/std/hash_map.c include/std/hash_map.h + $(CC) -c src/std/hash_map.c -o build/hash_map.o $(CFLAGS) diff --git a/README.md b/README.md index 7710e4b..c9c3a82 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ A tiny scripting language that is designed to be a replacement for small shell/python scripts.\ Written in ANSI C to allow embedding everywhere and a WTFPL license that allows it to be used anywhere without any restrictions. +# TODO +* Remove dependency on `gc`. Write our own gc instead. +* Implement big int, which numbers should automatically switch to when they are too large to fit into double. \ No newline at end of file diff --git a/example.tsl b/example.tsl index cfb3f5a..b81af3d 100644 --- a/example.tsl +++ b/example.tsl @@ -16,4 +16,6 @@ value9 = { str = value9["hello"] value9["sayHello"]() +value1 = value1 + 23 + response = $(curl https://example.com) \ No newline at end of file diff --git a/include/bytecode.h b/include/bytecode.h index 4a14b8d..4a9d49c 100644 --- a/include/bytecode.h +++ b/include/bytecode.h @@ -4,37 +4,40 @@ #include "std/buffer.h" #include -/* - All instructions are 4 bytes in size. -*/ - typedef uint8_t TslOpcodeType; -typedef uint8_t TslRegister; +/* Registers are positive if they refer to local variables and negative when they refer to parameters */ +typedef int16_t TslRegister; typedef uint16_t TslValueIndex; typedef enum { - TSL_OPCODE_ASSIGN + TSL_OPCODE_LOAD_NUMBER, + TSL_OPCODE_MOV_REG } TslOpcode; typedef struct { TslBuffer buffer; + TslRegister register_counter; } TslBytecodeWriter; typedef struct { + TslRegister dst_reg; + double number; + TslOpcodeType opcode; +} TslInstructionType1; + +typedef struct { + TslRegister dst_reg; + TslRegister src_reg; TslOpcodeType opcode; - union { - TslRegister dst_reg; - TslRegister src_reg; - } type1; - union { - TslRegister dst_reg; - TslValueIndex value_index; - } type2; -} TslInstruction; +} TslInstructionType2; void tsl_bytecode_writer_init(TslBytecodeWriter *self); void tsl_bytecode_writer_deinit(TslBytecodeWriter *self); +void tsl_bytecode_writer_reset_register_counter(TslBytecodeWriter *self); +/* Returns -1 on error (too many registers used (more than 2^15)) */ +TslRegister tsl_bytecode_writer_get_unique_register(TslBytecodeWriter *self); -int tsl_bytecode_writer_assign(TslBytecodeWriter *self, TslRegister reg, double value); +int tsl_bytecode_writer_load_number(TslBytecodeWriter *self, TslRegister dst, double number); +int tsl_bytecode_writer_mov_reg(TslBytecodeWriter *self, TslRegister dst, TslRegister src); #endif /* TSL_BYTECODE_H */ diff --git a/include/std/buffer.h b/include/std/buffer.h index dea7dbd..db6474e 100644 --- a/include/std/buffer.h +++ b/include/std/buffer.h @@ -16,6 +16,8 @@ typedef struct { void tsl_buffer_init(TslBuffer *self); void tsl_buffer_deinit(TslBuffer *self); -int tsl_buffer_append(TslBuffer *self, void *data, size_t size); +int tsl_buffer_append(TslBuffer *self, const void *data, size_t size); +void* tsl_buffer_begin(TslBuffer *self); +void* tsl_buffer_end(TslBuffer *self); #endif /* TSL_BUFFER_H */ diff --git a/include/std/hash_map.h b/include/std/hash_map.h new file mode 100644 index 0000000..f3a3a6d --- /dev/null +++ b/include/std/hash_map.h @@ -0,0 +1,24 @@ +#ifndef TSL_HASH_MAP_H +#define TSL_HASH_MAP_H + +#include "string_view.h" +#include + +typedef uint64_t (*TslHashFunc)(const void *data, size_t size); + +/* TODO: Optimize small hash map by using the members of the struct instead of allocating on heap */ +typedef struct { + void *buckets_data; /* value=TslHashMapNode, data=|hash(uint64_t) + key_size(size_t) + key_data(...) data_size(size_t) + data_data(...)| */ + size_t buckets_size; + size_t buckets_capacity; + size_t num_items; +} TslHashMap; + +void tsl_hash_map_init(TslHashMap *self); +void tsl_hash_map_deinit(TslHashMap *self); + +int tsl_hash_map_insert(TslHashMap *self, const TslStringView *key, const void *data, size_t size, TslHashFunc hash_func); +/* Get a reference to the value by key @key */ +void* tsl_hash_map_get(TslHashMap *self, const TslStringView *key, TslHashFunc hash_func); + +#endif /* TSL_HASH_MAP_H */ diff --git a/include/std/string_view.h b/include/std/string_view.h new file mode 100644 index 0000000..4f9552a --- /dev/null +++ b/include/std/string_view.h @@ -0,0 +1,11 @@ +#ifndef TSL_STRING_VIEW_H +#define TSL_STRING_VIEW_H + +#include + +typedef struct { + const char *data; + size_t size; +} TslStringView; + +#endif /* TSL_STRING_VIEW_H */ diff --git a/include/tokenizer.h b/include/tokenizer.h index 98491c7..2e7d42b 100644 --- a/include/tokenizer.h +++ b/include/tokenizer.h @@ -1,13 +1,7 @@ #ifndef TSL_TOKENIZER_H #define TSL_TOKENIZER_H -#include -#include - -typedef struct { - const char *data; - size_t size; -} TslStringView; +#include "std/string_view.h" typedef enum { TSL_TOKEN_END_OF_FILE, @@ -27,7 +21,8 @@ typedef enum { TSL_TOKEN_COLON, TSL_TOKEN_COMMA, TSL_TOKEN_FN, - TSL_TOKEN_DOLLAR_SIGN + TSL_TOKEN_DOLLAR_SIGN, + TSL_TOKEN_ARITHMETIC } TslToken; typedef enum { @@ -53,14 +48,20 @@ typedef struct { TslStringView identifier; TslStringView string; int bool_value; - int64_t number_value; + double number_value; + char arithmetic_symbol; } TslTokenizer; void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size); TslToken tsl_tokenizer_next(TslTokenizer *self); int tsl_tokenizer_accept(TslTokenizer *self, TslToken expected_token); +/* + If peek was previously called without consuming the token, then the previous value peek token is returned. + In other words, calling tsl_tokenizer_peek twice in a row will return the same token without progressing. +*/ TslToken tsl_tokenizer_peek(TslTokenizer *self); +TslToken tsl_tokenizer_consume_peek(TslTokenizer *self); TslCommandToken tsl_tokenizer_next_command_arg(TslTokenizer *self, TslStringView *arg); diff --git a/include/value.h b/include/value.h new file mode 100644 index 0000000..1d053b7 --- /dev/null +++ b/include/value.h @@ -0,0 +1,49 @@ +#ifndef TSL_VALUE_H +#define TSL_VALUE_H + +#include +#include + +typedef enum { + TSL_TYPE_NULL, + TSL_TYPE_NUMBER, + TSL_TYPE_STRING, + TSL_TYPE_BOOL, + TSL_TYPE_LIST, + TSL_TYPE_MAP, + TSL_TYPE_USERDATA +} TslType; + +typedef enum { + TSL_FALSE, + TSL_TRUE +} TslBool; + +typedef struct { + char *data; + size_t size; +} TslString; + +/* TODO: Implement this */ +typedef struct { + void *data; +} TslList; + +/* TODO: Implement this */ +typedef struct { + void *data; +} TslMap; + +typedef struct { + union { + double number; + TslString *string; + TslBool boolean; + TslList *list; + TslMap *map; + void *userdata; + } data; + uint8_t type; +} TslValue; + +#endif /* TSL_VALUE_H */ diff --git a/src/bytecode.c b/src/bytecode.c index 3e9a6e8..a0b5406 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -2,18 +2,36 @@ #include void tsl_bytecode_writer_init(TslBytecodeWriter *self) { - assert(sizeof(TslInstruction) == 4); tsl_buffer_init(&self->buffer); + self->register_counter = 0; } void tsl_bytecode_writer_deinit(TslBytecodeWriter *self) { tsl_buffer_deinit(&self->buffer); } -int tsl_bytecode_writer_assign(TslBytecodeWriter *self, TslRegister reg, double value) { - TslInstruction instruction; - instruction.opcode = TSL_OPCODE_ASSIGN; - instruction.type2.dst_reg = reg; - instruction.type2.value_index = value; +void tsl_bytecode_writer_reset_register_counter(TslBytecodeWriter *self) { + self->register_counter = 0; +} + +TslRegister tsl_bytecode_writer_get_unique_register(TslBytecodeWriter *self) { + if(self->register_counter < INT16_MAX) + return self->register_counter++; + return -1; +} + +int tsl_bytecode_writer_load_number(TslBytecodeWriter *self, TslRegister dst, double number) { + TslInstructionType1 instruction; + instruction.opcode = TSL_OPCODE_LOAD_NUMBER; + instruction.dst_reg = dst; + instruction.number = number; + return tsl_buffer_append(&self->buffer, &instruction, sizeof(instruction)); +} + +tsl_bytecode_writer_mov_reg(TslBytecodeWriter *self, TslRegister dst, TslRegister src) { + TslInstructionType2 instruction; + instruction.opcode = TSL_OPCODE_MOV_REG; + instruction.dst_reg = dst; + instruction.src_reg = src; return tsl_buffer_append(&self->buffer, &instruction, sizeof(instruction)); } diff --git a/src/parser.c b/src/parser.c index 176be63..45b4412 100644 --- a/src/parser.c +++ b/src/parser.c @@ -150,7 +150,7 @@ static int tsl_parser_parse_func_call(TslParser *self) { for(;;) { TslToken token = tsl_tokenizer_peek(&self->tokenizer); if(token == TSL_TOKEN_RPAREN) { - tsl_tokenizer_next(&self->tokenizer); /* consume previous TSL_TOKEN_RPAREN */ + tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_RPAREN */ return 0; } else { if(tsl_parser_parse_rhs(self) != 0) @@ -189,7 +189,7 @@ static int tsl_parser_parse_command(TslParser *self) { } } -/* RHS_SUB = VAR_INDEX|FUNC_CALL RHS_SUB? */ +/* RHS_SUB = (VAR_INDEX|FUNC_CALL RHS_SUB?)|(TOKEN_ARITHMETIC RHS) */ static int tsl_parser_parse_rhs_sub(TslParser *self) { TslToken token = tsl_tokenizer_peek(&self->tokenizer); if(token == TSL_TOKEN_LBRACKET) { @@ -200,6 +200,27 @@ static int tsl_parser_parse_rhs_sub(TslParser *self) { if(tsl_parser_parse_func_call(self) != 0) return -1; return tsl_parser_parse_rhs_sub(self); + } else if(token == TSL_TOKEN_ARITHMETIC) { + tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_ARITHMETIC */ + printf("rhs sub arithmetic symbol: %c\n", self->tokenizer.arithmetic_symbol); + /* TODO: Handle arithmetic expression precedence */ + return tsl_parser_parse_rhs(self); + } + /* + No sub expression found, possibly a new expression after this (a new expression on a new line), let that + part of the code handle error if there is any instead. + */ + return 0; +} + +/* RHS_SUB_ARITHMETIC = TOKEN_ARITHMETIC RHS */ +static int tsl_parser_parse_rhs_sub_arithmetic(TslParser *self) { + TslToken token = tsl_tokenizer_peek(&self->tokenizer); + if(token == TSL_TOKEN_ARITHMETIC) { + tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_ARITHMETIC */ + printf("rhs sub arithmetic symbol: %c\n", self->tokenizer.arithmetic_symbol); + /* TODO: Handle arithmetic expression precedence */ + return tsl_parser_parse_rhs(self); } /* No sub expression found, possibly a new expression after this (a new expression on a new line), let that @@ -213,10 +234,20 @@ int tsl_parser_parse_rhs(TslParser *self) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_IDENTIFIER) { TslStringView var_name = self->tokenizer.identifier; - printf("var: %.*s\n", (int)var_name.size, var_name.data); + printf("rhs var: %.*s\n", (int)var_name.size, var_name.data); return tsl_parser_parse_rhs_sub(self); } else if(token == TSL_TOKEN_NUM) { - printf("rhs num: %ld\n", self->tokenizer.number_value); + TslRegister dst_reg = tsl_bytecode_writer_get_unique_register(&self->bytecode_writer); + if(dst_reg == -1) { + fprintf(stderr, "Error: Too many registers used\n"); + return -1; + } + + if(!tsl_bytecode_writer_load_number(&self->bytecode_writer, dst_reg, self->tokenizer.number_value)) + return -1; + + printf("rhs num: %f\n", self->tokenizer.number_value); + return tsl_parser_parse_rhs_sub_arithmetic(self); } else if(token == TSL_TOKEN_BOOL) { printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false"); } else if(token == TSL_TOKEN_NULL) { @@ -249,7 +280,7 @@ int tsl_parser_parse_expressions(TslParser *self, TslToken end_token) { TslStringView identifier = self->tokenizer.identifier; printf("identifier: %.*s\n", identifier.size, identifier.data); if(tsl_tokenizer_peek(&self->tokenizer) == TSL_TOKEN_EQUAL) { - tsl_tokenizer_next(&self->tokenizer); /* consume previous TSL_TOKEN_EQUAL */ + tsl_tokenizer_consume_peek(&self->tokenizer); /* consume previous TSL_TOKEN_EQUAL */ if(tsl_parser_parse_rhs(self) != 0) return -1; } else { diff --git a/src/std/buffer.c b/src/std/buffer.c index 3fc5184..42b1a0f 100644 --- a/src/std/buffer.c +++ b/src/std/buffer.c @@ -1,4 +1,5 @@ #include "../../include/std/buffer.h" +#include #include #include @@ -25,18 +26,28 @@ static int tsl_buffer_ensure_capacity(TslBuffer *self, size_t new_size) { new_size = new_capacity; } new_ptr = realloc(self->data, new_size); - if(!new_ptr) + if(!new_ptr) { + fprintf(stderr, "Error: buffer append failed. Reason: out of memory\n"); return 0; + } self->data = new_ptr; self->capacity = new_size; return 1; } -int tsl_buffer_append(TslBuffer *self, void *data, size_t size) { +int tsl_buffer_append(TslBuffer *self, const void *data, size_t size) { if(!tsl_buffer_ensure_capacity(self, self->size + size)) - return 1; + return 0; memcpy((char*)self->data + self->size, data, size); self->size += size; - return 0; + return 1; +} + +void* tsl_buffer_begin(TslBuffer *self) { + return self->data; +} + +void* tsl_buffer_end(TslBuffer *self) { + return (char*)self->data + self->size; } diff --git a/src/std/hash_map.c b/src/std/hash_map.c new file mode 100644 index 0000000..a0d656d --- /dev/null +++ b/src/std/hash_map.c @@ -0,0 +1,170 @@ +#include "../../include/std/hash_map.h" +#include +#include +#include +#include + +void tsl_hash_map_init(TslHashMap *self) { + self->buckets_data = NULL; + self->buckets_size = 0; + self->buckets_capacity = 0; + self->num_items = 0; +} + +void tsl_hash_map_deinit(TslHashMap *self) { + free(self->buckets_data); +} + +typedef struct TslHashMapNode TslHashMapNode; +struct TslHashMapNode { + void *data; + TslHashMapNode *next; +}; + +static void hash_map_node_init(TslHashMapNode *self) { + self->data = NULL; + self->next = NULL; +} + +static void hash_map_node_get(TslHashMapNode *self, uint64_t *hash, TslStringView *key, size_t *size, uint8_t **data) { + memcpy(hash, (uint8_t*)self->data, sizeof(uint64_t)); + memcpy(&key->size, (uint8_t*)self->data + sizeof(uint64_t), sizeof(key->size)); + key->data = (const char*)self->data + sizeof(uint64_t) + sizeof(key->size); + memcpy(size, (uint8_t*)self->data + sizeof(uint64_t) + sizeof(key->size) + key->size, sizeof(size_t)); + *data = (uint8_t*)self->data + sizeof(uint64_t) + sizeof(key->size) + key->size + sizeof(size_t); +} + +static int tsl_hash_map_append_bucket(TslHashMapNode **head_node, uint64_t hash, const TslStringView *key, size_t size, const void *data) { + TslHashMapNode *next_node; + uint8_t *node_data = malloc(sizeof(hash) + sizeof(size) + size); + if(!node_data) { + fprintf(stderr, "Error: hash map append failed. Reason: out of memory\n"); + return 0; + } + + next_node = malloc(sizeof(TslHashMapNode)); + if(!next_node) { + free(node_data); + fprintf(stderr, "Error: hash map append failed. Reason: out of memory\n"); + return 0; + } + hash_map_node_init(next_node); + + memcpy(node_data, &hash, sizeof(hash)); + memcpy(node_data + sizeof(hash), &key->size, sizeof(key->size)); + memcpy(node_data + sizeof(hash) + sizeof(key->size), key->data, key->size); + memcpy(node_data + sizeof(hash) + sizeof(key->size) + key->size, &size, sizeof(size)); + memcpy(node_data + sizeof(hash) + sizeof(key->size) + key->size + sizeof(size), data, size); + + if(*head_node) { + (*head_node)->data = node_data; + (*head_node)->next = next_node; + } + *head_node = next_node; + return 1; +} + +static int tsl_hash_map_ensure_bucket_capacity(TslHashMap *self, size_t new_size) { + size_t new_capacity = self->num_items; + void *new_ptr; + if(new_size <= self->num_items) + return 1; + + if(new_capacity == 0) + new_capacity = 8; + + while(new_capacity < new_size) { + new_capacity *= 2; + } + + new_ptr = realloc(self->buckets_data, new_capacity); + if(!new_ptr) { + fprintf(stderr, "Error: hash map realloc failed. Reason: out of memory\n"); + return 0; + } + + self->buckets_data = new_ptr; + self->buckets_capacity = new_capacity; + { + TslHashMapNode **bucket = (TslHashMapNode**)((char*)self->buckets_data + self->buckets_size); + TslHashMapNode **bucket_end = (TslHashMapNode**)((char*)self->buckets_data + self->buckets_capacity); + while(bucket != bucket_end) { + *bucket = NULL; + ++bucket; + } + } + + { + TslHashMapNode **bucket = (TslHashMapNode**)self->buckets_data; + TslHashMapNode **bucket_end = (TslHashMapNode**)((char*)self->buckets_data + self->buckets_capacity); + size_t bucket_index = 0; + while(bucket != bucket_end) { + TslHashMapNode *node = *bucket; + TslHashMapNode *prev_node = node; /* Set to node for optimization reason, where prev_node->next = node->next; which becomes no-op */ + while(node) { + uint64_t hash; + TslStringView key; + size_t size; + uint8_t *data; + size_t index; + hash_map_node_get(node, &hash, &key, &size, &data); + + index = hash & (self->buckets_capacity - 1); + if(index != bucket_index) { + TslHashMapNode **new_bucket = (TslHashMapNode**)self->buckets_data + index; + prev_node->next = node->next; + if(*new_bucket) { + node->next = (*new_bucket)->next; + (*new_bucket)->next = node; + } else { + node->next = NULL; + *new_bucket = node; + } + } + + prev_node = node; + node = node->next; + } + ++bucket_index; + ++bucket; + } + } + return 1; +} + +int tsl_hash_map_insert(TslHashMap *self, const TslStringView *key, const void *data, size_t size, TslHashFunc hash_func) { + uint64_t hash = hash_func(key->data, key->size); + size_t index; + TslHashMapNode **bucket; + assert(!tsl_hash_map_get(self, key, hash_func)); + if(!tsl_hash_map_ensure_bucket_capacity(self, self->buckets_size + size)) + return 0; + + index = hash & (self->buckets_capacity - 1); + bucket = (TslHashMapNode**)self->buckets_data + index; + return tsl_hash_map_append_bucket(bucket, hash, key, size, data); +} + +void* tsl_hash_map_get(TslHashMap *self, const TslStringView *key, TslHashFunc hash_func) { + uint64_t hash; + size_t index; + TslHashMapNode *node; + if(self->buckets_capacity == 0) + return NULL; + + hash = hash_func(key->data, key->size); + index = hash & (self->buckets_capacity - 1); + node = *((TslHashMapNode**)self->buckets_data + index); + while(node) { + uint64_t node_hash; + TslStringView node_key; + size_t node_size; + uint8_t *node_data; + hash_map_node_get(node, &node_hash, &node_key, &node_size, &node_data); + if(hash == node_hash && key->size == node_key.size && memcmp(key->data, node_key.data, node_key.size) == 0) + return node_data; + node = node->next; + } + + return NULL; +} diff --git a/src/tokenizer.c b/src/tokenizer.c index 89c40cb..889152b 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -2,6 +2,7 @@ #include #include #include +#include void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) { self->code = code; @@ -19,6 +20,7 @@ void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) self->string.size = 0; self->bool_value = 0; self->number_value = 0; + self->arithmetic_symbol = '\0'; } static char tsl_tokenizer_get_char(TslTokenizer *self) { @@ -173,6 +175,7 @@ static TslToken tsl_tokenizer_next_internal(TslTokenizer *self) { } self->identifier.data = self->code + num_start; self->identifier.size = self->code_index - num_start; + /* TODO: Check if the result of string_to_int is too large to fit into double */ self->number_value = string_to_int(&self->identifier); return TSL_TOKEN_NUM; } else if(c == '"') { @@ -217,6 +220,10 @@ static TslToken tsl_tokenizer_next_internal(TslTokenizer *self) { } else if(c == '$') { ++self->code_index; return TSL_TOKEN_DOLLAR_SIGN; + } else if(c == '+' || c == '-' || c == '*' || c == '/') { + self->arithmetic_symbol = c; + ++self->code_index; + return TSL_TOKEN_ARITHMETIC; } else if(c == '\0') { return TSL_TOKEN_END_OF_FILE; } else { @@ -225,14 +232,6 @@ static TslToken tsl_tokenizer_next_internal(TslTokenizer *self) { } } -static TslToken tsl_tokenizer_consume_peek(TslTokenizer *self) { - TslToken token = self->peek.token; - self->code_index = self->peek.code_index; - self->prev_code_index = self->peek.prev_code_index; - self->peek.token = -1; - return token; -} - TslToken tsl_tokenizer_next(TslTokenizer *self) { if((int)self->peek.token == -1) { return tsl_tokenizer_next_internal(self); @@ -256,16 +255,29 @@ int tsl_tokenizer_accept(TslTokenizer *self, TslToken expected_token) { } TslToken tsl_tokenizer_peek(TslTokenizer *self) { - size_t p_prev_code_index = self->prev_code_index; - size_t p_code_index = self->code_index; + if((int)self->peek.token == -1) { + size_t p_prev_code_index = self->prev_code_index; + size_t p_code_index = self->code_index; - self->peek.token = tsl_tokenizer_next_internal(self); - self->peek.code_index = self->code_index; - self->peek.prev_code_index = self->prev_code_index; + self->peek.token = tsl_tokenizer_next_internal(self); + self->peek.code_index = self->code_index; + self->peek.prev_code_index = self->prev_code_index; - self->prev_code_index = p_prev_code_index; - self->code_index = p_code_index; - return self->peek.token; + self->prev_code_index = p_prev_code_index; + self->code_index = p_code_index; + return self->peek.token; + } else { + return self->peek.token; + } +} + +TslToken tsl_tokenizer_consume_peek(TslTokenizer *self) { + TslToken token = self->peek.token; + assert((int)token != -1); + self->code_index = self->peek.code_index; + self->prev_code_index = self->peek.prev_code_index; + self->peek.token = -1; + return token; } TslCommandToken tsl_tokenizer_next_command_arg(TslTokenizer *self, TslStringView *arg) { -- cgit v1.2.3