aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2019-03-02 21:20:33 +0100
committerdec05eba <dec05eba@protonmail.com>2020-07-25 14:36:46 +0200
commit5e240bdab90c45f935e7d2b33181de13295e7e6b (patch)
tree0718d7ca2386292c5b3646d0cb1ae499bb7ba818 /src
parent2a17f5225a09c01eb04225d0241c686ea553f912 (diff)
Add string, variable and number. Fix identifier match against const and var
Diffstat (limited to 'src')
-rw-r--r--src/ast.c14
-rw-r--r--src/parser.c175
-rw-r--r--src/tokenizer.c269
3 files changed, 391 insertions, 67 deletions
diff --git a/src/ast.c b/src/ast.c
index 56eb34f..75c8afc 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -17,8 +17,9 @@ int funcdecl_add_to_body(FunctionDecl *self, Ast ast) {
return BUFFER_OK;
}
-void funccall_init(FunctionCall *self, BufferView name) {
+int funccall_init(FunctionCall *self, BufferView name, ScopedAllocator *allocator) {
self->name = name;
+ return buffer_init(&self->args, allocator);
}
void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name) {
@@ -30,3 +31,14 @@ void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name) {
void import_init(Import *self, BufferView path) {
self->path = path;
}
+
+int string_init(String *self, BufferView str) {
+ /* TODO: Convert special characters. For example \n should be converted to binary newline etc */
+ self->str = str;
+ return 0;
+}
+
+void number_init(Number *self, i64 value, bool is_integer) {
+ self->value.integer = value;
+ self->is_integer = is_integer;
+} \ No newline at end of file
diff --git a/src/parser.c b/src/parser.c
index e63814f..539ebd3 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -10,8 +10,9 @@
#include <stdlib.h>
#include <assert.h>
-static CHECK_RESULT int parser_queue_file(Parser *self, BufferView path);
+static CHECK_RESULT int parser_parse_rhs_start(Parser *self, Ast *rhs_expr);
static CHECK_RESULT int parser_parse_body(Parser *self, Ast *ast);
+static CHECK_RESULT int parser_queue_file(Parser *self, BufferView path);
int parser_thread_data_init(ParserThreadData *self) {
am_memset(&self->allocator, 0, sizeof(self->allocator));
@@ -42,9 +43,32 @@ int parser_init(Parser *self, amal_compiler *compiler, ScopedAllocator *allocato
self->allocator = allocator;
self->compiler = compiler;
self->started = bool_false;
+ self->error.index = 0;
+ self->error.str = NULL;
return buffer_init(&self->ast_objects, self->allocator);
}
+static bool parser_got_overwritable_error(Parser *self) {
+ return self->error.str != NULL;
+}
+
+/*
+BODY_LOOP = BODY* @end_token
+*/
+static CHECK_RESULT int parser_parse_body_loop(Parser *self, Buffer *body_list, Token end_token) {
+ for(;;) {
+ Ast body_obj;
+ bool is_end_token;
+ return_if_error(tokenizer_consume_if(&self->tokenizer, end_token, &is_end_token));
+ if(is_end_token)
+ break;
+
+ return_if_error(parser_parse_body(self, &body_obj));
+ return_if_error(buffer_append(body_list, &body_obj, sizeof(body_obj)));
+ }
+ return PARSER_OK;
+}
+
/*
LHS = 'const'|'var' IDENTIFIER
*/
@@ -70,7 +94,7 @@ static CHECK_RESULT int parser_parse_lhs(Parser *self, LhsExpr **result) {
}
/*
-CLOSURE = '(' PARAM* ')' '{' BODY* '}'
+CLOSURE = '(' PARAM* ')' '{' BODY_LOOP '}'
*/
static CHECK_RESULT int parser_parse_function_decl(Parser *self, FunctionDecl **func_decl) {
bool result;
@@ -88,38 +112,65 @@ static CHECK_RESULT int parser_parse_function_decl(Parser *self, FunctionDecl **
return_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionDecl), (void**)func_decl));
return_if_error(funcdecl_init(*func_decl, self->allocator));
+ return parser_parse_body_loop(self, &(*func_decl)->body, TOK_CLOSING_BRACE);
+}
+
+/*
+FUNC_ARGS = (RHS_START)? (',' RHS_START)* ')'
+*/
+static CHECK_RESULT int parser_parse_function_args(Parser *self, FunctionCall *func_call) {
+ bool first_arg;
+ first_arg = bool_true;
+
for(;;) {
- Ast body_obj;
- return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_BRACE, &result));
- if(result)
+ Ast arg_expr;
+ bool is_end_token;
+ arg_expr = ast_none();
+
+ return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_PAREN, &is_end_token));
+ if(is_end_token)
break;
- return_if_error(parser_parse_body(self, &body_obj));
- return_if_error(funcdecl_add_to_body(*func_decl, body_obj));
+ if(!first_arg)
+ return_if_error(tokenizer_accept(&self->tokenizer, TOK_COMMA));
+ first_arg = bool_false;
+
+ return_if_error(parser_parse_rhs_start(self, &arg_expr));
+ return_if_error(buffer_append(&func_call->args, &arg_expr, sizeof(arg_expr)));
}
+
return PARSER_OK;
}
/*
-FUNC_CALL = IDENTIFIER '(' RHS* ')'
+VARIABLE = IDENTIFIER
+FUNC_CALL_OR_VARIABLE = VARIABLE '(' FUNC_ARGS ')'
*/
-static CHECK_RESULT int parser_parse_function_call(Parser *self, FunctionCall **func_call) {
+static CHECK_RESULT int parser_parse_function_call_or_variable(Parser *self, Ast *expr) {
bool result;
- BufferView func_name;
- *func_call = NULL;
+ BufferView identifier;
+ FunctionCall *func_call;
return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IDENTIFIER, &result));
if(!result)
return PARSER_OK;
- func_name = self->tokenizer.value.identifier;
- return_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_PAREN));
- /* TODO: Parse arguments */
- return_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN));
-
- return_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionCall), (void**)func_call));
- funccall_init(*func_call, func_name);
- return PARSER_OK;
+ identifier = self->tokenizer.value.identifier;
+ return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &result));
+ if(!result) {
+ Variable *variable;
+ return_if_error(scoped_allocator_alloc(self->allocator, sizeof(Variable), (void**)&variable));
+ variable->name = identifier;
+ expr->type = AST_VARIABLE;
+ expr->value.variable = variable;
+ return PARSER_OK;
+ }
+
+ return_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionCall), (void**)&func_call));
+ return_if_error(funccall_init(func_call, self->tokenizer.value.identifier, self->allocator));
+ expr->type = AST_FUNCTION_CALL;
+ expr->value.func_call = func_call;
+ return parser_parse_function_args(self, func_call);
}
/*
@@ -138,32 +189,56 @@ static CHECK_RESULT int parser_parse_import(Parser *self, Import **import) {
return PARSER_OK;
}
+static CHECK_RESULT int parser_parse_number(Parser *self, Ast *rhs_expr) {
+ bool result;
+ Number *number;
+
+ return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_NUMBER, &result));
+ if(!result)
+ return PARSER_OK;
+
+ return_if_error(scoped_allocator_alloc(self->allocator, sizeof(Number), (void**)&number));
+ number_init(number, self->tokenizer.value.integer, self->tokenizer.number_is_integer);
+ rhs_expr->type = AST_NUMBER;
+ rhs_expr->value.number = number;
+ return PARSER_OK;
+}
+
/*
-RHS = CLOSURE | FUNC_CALL | IMPORT
+RHS = STRING | NUMBER | FUNC_CALL_OR_VARIABLE
*/
static CHECK_RESULT int parser_parse_rhs(Parser *self, Ast *rhs_expr) {
- FunctionDecl *func_decl;
- FunctionCall *func_call;
- Import *import;
- /* bool result;*/
+ bool result;
-/*
return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_STRING, &result));
if(result) {
String *string;
return_if_error(scoped_allocator_alloc(self->allocator, sizeof(String), (void**)&string));
- string_init(string, self->tokenizer.value.string);
+ return_if_error(string_init(string, self->tokenizer.value.string));
rhs_expr->type = AST_STRING;
- rhs_expr->value.string = func_call;
+ rhs_expr->value.string = string;
return PARSER_OK;
}
-*/
- return_if_error(parser_parse_function_call(self, &func_call));
- if(func_call) {
- rhs_expr->type = AST_FUNCTION_CALL;
- rhs_expr->value.func_call = func_call;
+
+ return_if_error(parser_parse_number(self, rhs_expr));
+ if(rhs_expr->type != AST_NONE)
return PARSER_OK;
- }
+
+ return_if_error(parser_parse_function_call_or_variable(self, rhs_expr));
+ if(rhs_expr->type != AST_NONE)
+ return PARSER_OK;
+
+ self->error = tokenizer_create_error(&self->tokenizer, "Expected string, variable or function call");
+ return PARSER_UNEXPECTED_TOKEN;
+}
+
+/*
+RHS_START = CLOSURE | IMPORT | RHS
+*/
+int parser_parse_rhs_start(Parser *self, Ast *rhs_expr) {
+ FunctionDecl *func_decl;
+ Import *import;
+ int result;
return_if_error(parser_parse_function_decl(self, &func_decl));
if(func_decl) {
@@ -180,24 +255,26 @@ static CHECK_RESULT int parser_parse_rhs(Parser *self, Ast *rhs_expr) {
return PARSER_OK;
}
- /* TODO: Convert token to string */
- tokenizer_print_error(&self->tokenizer, "Expected function declaration or function call");
- return PARSER_UNEXPECTED_TOKEN;
+ result = parser_parse_rhs(self, rhs_expr);
+ if(result == PARSER_UNEXPECTED_TOKEN && parser_got_overwritable_error(self))
+ self->error = tokenizer_create_error(&self->tokenizer, "Expected string, variable, closure, function call or import");
+ return result;
}
/*
-BODY = (LHS '=' RHS) |
- RHS
+BODY = (LHS '=' RHS_START) |
+ RHS_START
*/
int parser_parse_body(Parser *self, Ast *ast) {
LhsExpr *lhs_expr;
Ast rhs_expr;
+ rhs_expr = ast_none();
return_if_error(parser_parse_lhs(self, &lhs_expr));
if(lhs_expr)
return_if_error(tokenizer_accept(&self->tokenizer, TOK_EQUALS));
- return_if_error(parser_parse_rhs(self, &rhs_expr));
+ return_if_error(parser_parse_rhs_start(self, &rhs_expr));
if(lhs_expr) {
lhs_expr->rhs_expr = rhs_expr;
ast->type = AST_LHS;
@@ -209,23 +286,15 @@ int parser_parse_body(Parser *self, Ast *ast) {
}
/*
-ROOT = BODY*
+ROOT = BODY_LOOP
*/
int parser_parse_buffer(Parser *self, BufferView code_buffer, BufferView buffer_name) {
- Ast ast;
+ int result;
return_if_error(tokenizer_init(&self->tokenizer, code_buffer, buffer_name));
-
- for(;;) {
- bool isEof;
- return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_END_OF_FILE, &isEof));
- if(isEof)
- break;
-
- return_if_error(parser_parse_body(self, &ast));
- return_if_error(buffer_append(&self->ast_objects, &ast, sizeof(ast)));
- }
-
- return PARSER_OK;
+ result = parser_parse_body_loop(self, &self->ast_objects, TOK_END_OF_FILE);
+ if(self->error.str != NULL)
+ tokenizer_print_error_object(&self->tokenizer, &self->error);
+ return result;
}
int parser_parse_file(Parser *self, BufferView filepath) {
diff --git a/src/tokenizer.c b/src/tokenizer.c
index fdb06cd..41d46fb 100644
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@@ -28,6 +28,7 @@ int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name) {
self->token = TOK_NONE;
self->needs_update = bool_true;
self->code_name = code_name.data ? code_name : create_buffer_view("<buffer>", 8);
+ self->number_is_integer = bool_false;
return 0;
}
@@ -60,7 +61,7 @@ static Token tokenizer_skip_whitespace(Tokenizer *self) {
}
/* Returns -1 if end of string can't be found */
-static int find_end_of_string(BufferView buf, int index) {
+static CHECK_RESULT int find_end_of_string(BufferView buf, int index) {
int c;
bool escape_quote;
escape_quote = bool_false;
@@ -77,6 +78,44 @@ static int find_end_of_string(BufferView buf, int index) {
return -1;
}
+#define I64_OVERFLOW_ERROR -1
+static CHECK_RESULT int string_to_integer_unchecked(BufferView str, i64 *result) {
+ int i;
+ i64 value;
+
+ value = 0;
+ for(i = 0; i < (int)str.size; ++i) {
+ i64 value_before;
+ value_before = value;
+ value *= 10 + str.data[i];
+ /* overflow */
+ if(value < value_before)
+ return I64_OVERFLOW_ERROR;
+ }
+
+ *result = value;
+ return 0;
+}
+
+#define F64_OVERFLOW_ERROR -1
+static CHECK_RESULT int string_to_float_unchecked(BufferView str, f64 *result) {
+ int i;
+ f64 value;
+
+ value = 0.0;
+ for(i = 0; i < (int)str.size; ++i) {
+ f64 value_before;
+ value_before = value;
+ value *= 10.0 + str.data[i];
+ /* overflow */
+ if(value < value_before)
+ return F64_OVERFLOW_ERROR;
+ }
+
+ *result = value;
+ return 0;
+}
+
static CHECK_RESULT int tokenizer_next(Tokenizer *self, Token *token);
static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
@@ -106,12 +145,23 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
self->value.identifier = create_buffer_view(self->code.data + identifier_start, self->index - identifier_start);
- if(am_memeql(self->value.identifier.data, "const", 5))
- *token = TOK_CONST;
- else if(am_memeql(self->value.identifier.data, "var", 3))
- *token = TOK_VAR;
- else
- *token = TOK_IDENTIFIER;
+ switch(self->value.identifier.size) {
+ case 3: {
+ if(am_memeql(self->value.identifier.data, "var", 3)) {
+ *token = TOK_VAR;
+ return TOKENIZER_OK;
+ }
+ break;
+ }
+ case 5: {
+ if(am_memeql(self->value.identifier.data, "const", 5)) {
+ *token = TOK_CONST;
+ return TOKENIZER_OK;
+ }
+ break;
+ }
+ }
+ *token = TOK_IDENTIFIER;
} else if(c == '"') {
int string_end;
++self->index;
@@ -126,6 +176,52 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
self->index = string_end + 1;
*token = TOK_STRING;
return TOKENIZER_OK;
+ } else if(isDigit(c)) {
+ int number_start;
+ int dot_index;
+ BufferView number_str;
+ number_start = self->index;
+ dot_index = -1;
+ ++self->index;
+
+ while(self->index < (int)self->code.size) {
+ c = tokenizer_get_char(self);
+ if(isDigit(c))
+ ++self->index;
+ else if(c == '.' && dot_index == -1) {
+ dot_index = self->index;
+ ++self->index;
+ } else {
+ break;
+ }
+ }
+
+ number_str = create_buffer_view(self->code.data + number_start, self->index - number_start);
+
+ if(dot_index == -1) {
+ int result;
+ result = string_to_integer_unchecked(number_str, &self->value.integer);
+ if(result != 0) {
+ /* TODO */
+ tokenizer_print_error(self, "Integer value %.*s is too large to fit in signed 64-bit. Support for large integers is not supported yet.", number_str.size, number_str.data);
+ return TOKENIZER_ERR;
+ }
+ self->number_is_integer = bool_true;
+ } else {
+ int result;
+ result = string_to_float_unchecked(number_str, &self->value.floating);
+ if(result != 0) {
+ /* TODO */
+ tokenizer_print_error(self, "Float value %.*s is too large to fit in 64-bit. Support for large floating numbers is not supported yet.", number_str.size, number_str.data);
+ return TOKENIZER_ERR;
+ }
+ self->number_is_integer = bool_false;
+ }
+ *token = TOK_NUMBER;
+ return TOKENIZER_OK;
+ } else if(c == '.') {
+ ++self->index;
+ *token = TOK_DOT;
} else if(c == '=') {
++self->index;
*token = TOK_EQUALS;
@@ -135,6 +231,9 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
} else if(c == ')') {
++self->index;
*token = TOK_CLOSING_PAREN;
+ } else if(c == ',') {
+ ++self->index;
+ *token = TOK_COMMA;
} else if(c == '{') {
++self->index;
*token = TOK_OPEN_BRACE;
@@ -194,16 +293,148 @@ int tokenizer_next(Tokenizer *self, Token *token) {
return result;
}
+static usize strlen(const char *str) {
+ usize len;
+ len = 0;
+ while(*str != '\0') {
+ ++len;
+ ++str;
+ }
+ return len;
+}
+
+static BufferView tokenizer_expected_token_as_string(Token token) {
+ const char *str;
+ switch(token) {
+ case TOK_NONE:
+ str = "none";
+ break;
+ case TOK_END_OF_FILE:
+ str = "<eof>";
+ break;
+ case TOK_IDENTIFIER:
+ str = "identifier";
+ break;
+ case TOK_CONST:
+ str = "const";
+ break;
+ case TOK_VAR:
+ str = "var";
+ break;
+ case TOK_STRING:
+ str = "string";
+ break;
+ case TOK_EQUALS:
+ str = "=";
+ break;
+ case TOK_OPEN_PAREN:
+ str = "(";
+ break;
+ case TOK_CLOSING_PAREN:
+ str = ")";
+ break;
+ case TOK_COMMA:
+ str = ",";
+ break;
+ case TOK_OPEN_BRACE:
+ str = "{";
+ break;
+ case TOK_CLOSING_BRACE:
+ str = "}";
+ break;
+ case TOK_IMPORT:
+ str = "import";
+ break;
+ case TOK_NUMBER:
+ str = "number";
+ break;
+ case TOK_DOT:
+ str = ".";
+ break;
+ default:
+ str = "Unknown token";
+ break;
+ }
+ return create_buffer_view(str, strlen(str));
+}
+
+static BufferView tokenizer_actual_token_as_string(Tokenizer *self) {
+ #if 0
+ const char *str;
+ switch(self->token) {
+ case TOK_NONE:
+ str = "none";
+ break;
+ case TOK_END_OF_FILE:
+ str = "<eof>";
+ break;
+ case TOK_IDENTIFIER:
+ /*return self->value.identifier;
+ */
+ str = "identifier";
+ break;
+ case TOK_CONST:
+ str = "const";
+ break;
+ case TOK_VAR:
+ str = "var";
+ break;
+ case TOK_STRING:
+ /*return self->value.string;*/
+ str = "string";
+ break;
+ case TOK_EQUALS:
+ str = "=";
+ break;
+ case TOK_OPEN_PAREN:
+ str = "(";
+ break;
+ case TOK_CLOSING_PAREN:
+ str = ")";
+ break;
+ case TOK_OPEN_BRACE:
+ str = "{";
+ break;
+ case TOK_CLOSING_BRACE:
+ str = "}";
+ break;
+ case TOK_IMPORT:
+ str = "import";
+ break;
+ case TOK_COMMA:
+ str = ",";
+ break;
+ default:
+ str = "Unknown token";
+ break;
+ }
+ return create_buffer_view(str, strlen(str));
+ #endif
+ return tokenizer_expected_token_as_string(self->token);
+}
+
int tokenizer_accept(Tokenizer *self, Token expected_token) {
Token actual_token;
- return_if_error(tokenizer_next(self, &actual_token));
- if(actual_token == expected_token) {
- self->needs_update = bool_true;
- return TOKENIZER_OK;
+ BufferView actual_token_str;
+ BufferView expected_token_str;
+
+ if(!self->needs_update) {
+ bool match;
+ match = (self->token == expected_token);
+ self->needs_update = match;
+ if(match)
+ return TOKENIZER_OK;
+ } else {
+ return_if_error(tokenizer_next(self, &actual_token));
+ if(actual_token == expected_token) {
+ self->needs_update = bool_true;
+ return TOKENIZER_OK;
+ }
}
- /* Todo: convert token to string */
- tokenizer_print_error(self, "Expected %d, got %d", expected_token, actual_token);
+ expected_token_str = tokenizer_expected_token_as_string(expected_token);
+ actual_token_str = tokenizer_actual_token_as_string(self);
+ tokenizer_print_error(self, "Expected \"%.*s\", got \"%.*s\"", expected_token_str.size, expected_token_str.data, actual_token_str.size, actual_token_str.data);
return TOKENIZER_UNEXPECTED_TOKEN;
}
@@ -268,3 +499,15 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) {
va_end(args);
ignore_result_int(amal_mutex_unlock(mutex));
}
+
+void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error) {
+ self->prev_index = error->index;
+ tokenizer_print_error(self, error->str);
+}
+
+TokenizerError tokenizer_create_error(Tokenizer *tokenizer, const char *err_str) {
+ TokenizerError result;
+ result.index = tokenizer->prev_index;
+ result.str = err_str;
+ return result;
+} \ No newline at end of file