From 5df7f92e715ba764ee57f65d78e73111492bb64c Mon Sep 17 00:00:00 2001 From: dec05eba Date: Wed, 20 Mar 2019 18:53:47 +0100 Subject: Add pub keyword, more import stuff, optimize hash map Hash map now stores hash of keys to reduce the number of hash operations. Positive: faster insert/get. Negative: more space required (to store usize hash). --- doc/DESIGN.md | 28 ++++++++++++++++++++++++-- include/ast.h | 8 ++++---- include/tokenizer.h | 3 ++- src/ast.c | 26 +++++++++--------------- src/compiler.c | 4 ++-- src/parser.c | 17 +++++++++++++--- src/std/hash_map.c | 58 +++++++++++++++++++++++++++++++++++------------------ src/tokenizer.c | 6 ++++++ tests/io.amal | 2 +- tests/main.amal | 3 ++- tests/main.c | 1 + 11 files changed, 106 insertions(+), 50 deletions(-) diff --git a/doc/DESIGN.md b/doc/DESIGN.md index 7781cd1..fae9ef2 100644 --- a/doc/DESIGN.md +++ b/doc/DESIGN.md @@ -62,7 +62,7 @@ const main = fn() !void { var str1 = "hello"; var str2 = "world"; - var str3 = try str1 + " " + str2; + var str3 = try str1 + " " + str2; // `try` is needed here, because str concat can fail var str4 = try str1 + 20; // error, can't add number to string. Preferable use str.fmt or explicitly cast to string var str5 = try str1 + str(20); // ok, number explicitly cast to string @@ -144,6 +144,30 @@ const main = fn { } ``` +## Encapsulation +By default declared data can't be used outside the file they were declared in. To make the declared data accessible +from other files you can use the `pub` keyword. +json.amal +``` +pub const Object = struct { + // fields... +} + +pub const to_object = fn(data: &str) Object { + // code to convert string to Object... +} +``` + +main.amal: +``` +const json = @import("json.amal"); + +const main = fn { + // Type is json.Object + var json_obj = json.to_object("{ \"key\": 42 }"); +} +``` + ## Generic programming ``` const add = fn(comptime T: type, a: T, b: T) !T { @@ -163,7 +187,7 @@ Rust doesn't handle this but Amalgam does it using #reallocatable(instance). Reallocatable should be ignored if the reference that taken from the reallocatable memory doesn't change location after realloc, which would be the case for pointers. ``` -const ArrayList = @import("std.array.ArrayList") +const ArrayList = @import("std.array.ArrayList"); const User = struct { name: str, diff --git a/include/ast.h b/include/ast.h index e49ad08..7b16796 100644 --- a/include/ast.h +++ b/include/ast.h @@ -65,12 +65,12 @@ typedef struct { AstValue value; AstType type; AstResolveStatus resolve_status; - StructDecl *resolved_type; + LhsExpr *resolved_type; } Ast; struct Scope { Buffer ast_objects; - HashMap/*(key=BufferView, value=Ast)*/ named_objects; + HashMap/*(key=BufferView, value=Ast)*/ named_objects; Scope *parent; }; @@ -81,7 +81,6 @@ struct FileScopeReference { struct Variable { BufferView name; - Ast resolved_variable; }; struct FunctionDecl { @@ -104,6 +103,7 @@ struct StructField { }; struct LhsExpr { + bool is_pub; bool is_const; BufferView var_name; Variable type; @@ -149,7 +149,7 @@ CHECK_RESULT int funcdecl_init(FunctionDecl *self, Scope *parent, ScopedAllocato CHECK_RESULT int funccall_init(FunctionCall *self, BufferView name, ScopedAllocator *allocator); CHECK_RESULT int structdecl_init(StructDecl *self, Scope *parent, ScopedAllocator *allocator); void structfield_init(StructField *self, BufferView name, BufferView type_name); -void lhsexpr_init(LhsExpr *self, bool is_const, BufferView var_name); +void lhsexpr_init(LhsExpr *self, bool is_pub, bool is_const, BufferView var_name); void import_init(Import *self, BufferView path); CHECK_RESULT int string_init(String *self, BufferView str); void number_init(Number *self, i64 value, bool is_integer); diff --git a/include/tokenizer.h b/include/tokenizer.h index c4c3725..3944ed1 100644 --- a/include/tokenizer.h +++ b/include/tokenizer.h @@ -30,7 +30,8 @@ typedef enum { TOK_NUMBER, TOK_SEMICOLON, TOK_COLON, - TOK_BINOP + TOK_BINOP, + TOK_PUB } Token; typedef struct { diff --git a/src/ast.c b/src/ast.c index a51ae71..1bb1eb3 100644 --- a/src/ast.c +++ b/src/ast.c @@ -82,7 +82,8 @@ void structfield_init(StructField *self, BufferView name, BufferView type_name) variable_init(&self->type, type_name); } -void lhsexpr_init(LhsExpr *self, bool is_const, BufferView var_name) { +void lhsexpr_init(LhsExpr *self, bool is_pub, bool is_const, BufferView var_name) { + self->is_pub = is_pub; self->is_const = is_const; variable_init(&self->type, create_buffer_view_null()); self->var_name = var_name; @@ -107,7 +108,6 @@ void number_init(Number *self, i64 value, bool is_integer) { void variable_init(Variable *self, BufferView name) { self->name = name; - self->resolved_variable = ast_none(); } void binop_init(Binop *self) { @@ -171,7 +171,7 @@ void scope_resolve(Scope *self, AstCompilerContext *context) { context->scope = self->parent; } -static Ast scope_get_resolved_variable(Scope *self, AstCompilerContext *context, BufferView name) { +static LhsExpr* scope_get_resolved_variable(Scope *self, AstCompilerContext *context, BufferView name) { Ast result; bool exists; Scope *prev_scope; @@ -199,20 +199,13 @@ static Ast scope_get_resolved_variable(Scope *self, AstCompilerContext *context, context->scope = prev_scope; assert(result.type == AST_LHS); - return result.value.lhs_expr->rhs_expr; + return result.value.lhs_expr; } -static void variable_resolve(Variable *self, AstCompilerContext *context, StructDecl **resolved_type) { - self->resolved_variable = scope_get_resolved_variable(context->scope, context, self->name); - /* TODO: Implement */ - if(self->resolved_variable.type == AST_STRUCT_DECL) { - *resolved_type = self->resolved_variable.value.struct_decl; - } else if(self->resolved_variable.type == AST_FUNCTION_DECL) { - /* TODO: Set resolved type to function declaration return type */ - *resolved_type = NULL; - } else { - *resolved_type = NULL; - } +/* @resolved_type is the same field as the ast resolved_type for the variable @self */ +static void variable_resolve(Variable *self, AstCompilerContext *context, LhsExpr **resolved_type) { + /* TODO: Verify this is correct in all cases */ + *resolved_type = scope_get_resolved_variable(context->scope, context, self->name); } static void lhs_resolve(Ast *self, AstCompilerContext *context) { @@ -269,10 +262,9 @@ static void funccall_resolve(Ast *self, AstCompilerContext *context) { } static void structdecl_resolve(Ast *self, AstCompilerContext *context) { - /* TODO: Implement */ + /* TODO: What to do with resolved_type? */ StructDecl *struct_decl; struct_decl = self->value.struct_decl; - self->resolved_type = struct_decl; scope_resolve(&struct_decl->body, context); } diff --git a/src/compiler.c b/src/compiler.c index 8510a2c..6f9cc92 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -47,7 +47,7 @@ static CHECK_RESULT int create_default_type(amal_compiler *compiler, const char return_if_error(structdecl_init(struct_decl, &compiler->root_scope, &compiler->allocator)); return_if_error(scoped_allocator_alloc(&compiler->allocator, sizeof(LhsExpr), (void**)&lhs_expr)); - lhsexpr_init(lhs_expr, bool_true, create_buffer_view(name, strnlen(name, PATH_MAX))); + lhsexpr_init(lhs_expr, bool_true, bool_true, create_buffer_view(name, strnlen(name, PATH_MAX))); ast_init(&lhs_expr->rhs_expr, struct_decl, AST_STRUCT_DECL); ast_init(&expr, lhs_expr, AST_LHS); return scope_add_child(&compiler->root_scope, &expr); @@ -351,7 +351,6 @@ static CHECK_RESULT int amal_compiler_load_file_join_threads(amal_compiler *self ParserThreadData *parser_thread_data; bool work_failed; - result = AMAL_COMPILER_ERR; assert(amal_thread_is_main()); thread_return_data = NULL; work_failed = bool_false; @@ -449,6 +448,7 @@ int amal_compiler_load_file(amal_compiler *self, const char *filepath, FileScope bool new_entry; return_if_error(try_create_file_scope(self, filepath, file_scope, &new_entry)); + assert(file_scope && *file_scope && (*file_scope)->canonical_path.data); filepath_view = create_buffer_view((*file_scope)->canonical_path.data, (*file_scope)->canonical_path.size); if(!new_entry) { amal_log_info("amal_compiler_load_file: file already parsed: %.*s", filepath_view.size, filepath_view.data); diff --git a/src/parser.c b/src/parser.c index 5396b72..57606b5 100644 --- a/src/parser.c +++ b/src/parser.c @@ -75,6 +75,10 @@ int parser_init(Parser *self, amal_compiler *compiler, ScopedAllocator *allocato return scope_init(&self->scope, &compiler->root_scope, self->allocator); } +static bool parser_is_global_scope(Parser *self) { + return self->current_scope == &self->scope; +} + /* BODY_LOOP = BODY* @end_token */ @@ -158,12 +162,21 @@ LHS = ('const' TOK_IDENTIFIER VAR_TYPE_DEF? '=') | ('var' TOK_IDENTIFIER VAR_TYPE_DEF? '='|';') */ static THROWABLE parser_parse_lhs(Parser *self, LhsExpr **result, bool *assignment_or_rhs) { + bool is_pub; bool is_const; bool match; BufferView var_name; *result = NULL; *assignment_or_rhs = bool_true; + throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_PUB, &is_pub)); + if(is_pub && !parser_is_global_scope(self)) { + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_code_reference_index(&self->tokenizer, self->tokenizer.value.identifier.data), + "Only declarations in global scope can be public"); + throw(PARSER_UNEXPECTED_TOKEN); + } + throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CONST, &is_const)); if(!is_const) { bool isVar; @@ -175,7 +188,7 @@ static THROWABLE parser_parse_lhs(Parser *self, LhsExpr **result, bool *assignme throw_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); var_name = self->tokenizer.value.identifier; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(LhsExpr), (void**)result)); - lhsexpr_init(*result, is_const, var_name); + lhsexpr_init(*result, is_pub, is_const, var_name); try(parser_parse_var_type_def(self, &(*result)->type.name)); @@ -574,7 +587,6 @@ int parser_parse_buffer(Parser *self, BufferView code_buffer, BufferView buffer_ if(result != 0) { amal_log_info("Failed, reason: %d", result); } - assert(self->current_scope == &self->scope); return result; } @@ -620,7 +632,6 @@ int parser_queue_file(Parser *self, BufferView path, FileScopeReference **file_s self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_code_reference_index(&self->tokenizer, path.data), "Failed to while parsing path %s (invalid path?)", path_relative); - self->error_context = ERROR_CONTEXT_NONE; am_free(path_relative); throw(result); } diff --git a/src/std/hash_map.c b/src/std/hash_map.c index 61030da..ab580ba 100644 --- a/src/std/hash_map.c +++ b/src/std/hash_map.c @@ -9,6 +9,13 @@ Basic hash map implementation. TODO: Improve performance #define HASH_MAP_INITIAL_SIZE 8 +/* Structure: +HashMapBucketNode *next; +usize hash; +u32 key_size; +u8[..] key; +u8[..] value; +*/ typedef struct HashMapBucketNode HashMapBucketNode; typedef struct { @@ -22,37 +29,47 @@ static void bucket_node_set_next(HashMapBucketNode *self, HashMapBucketNode *nex static HashMapBucketNode* bucket_node_get_next(HashMapBucketNode *self) { HashMapBucketNode *next; - am_memcpy(&next, self, sizeof(HashMapBucketNode*)); + am_memcpy(&next, self, sizeof(next)); return next; } +static void bucket_node_set_hash(HashMapBucketNode *self, usize hash) { + am_memcpy((char*)self + sizeof(HashMapBucketNode*), &hash, sizeof(hash)); +} + +static usize bucket_node_get_hash(HashMapBucketNode *self) { + usize hash; + am_memcpy(&hash, (char*)self + sizeof(HashMapBucketNode*), sizeof(hash)); + return hash; +} + static void bucket_node_set_key(HashMapBucketNode *self, BufferView key) { u32 key_size; key_size = (u32)key.size; - am_memcpy((char*)self + sizeof(HashMapBucketNode*), &key_size, sizeof(u32)); - am_memcpy((char*)self + sizeof(HashMapBucketNode*) + sizeof(u32), key.data, key_size); + am_memcpy((char*)self + sizeof(HashMapBucketNode*) + sizeof(usize), &key_size, sizeof(u32)); + am_memcpy((char*)self + sizeof(HashMapBucketNode*) + sizeof(usize) + sizeof(u32), key.data, key_size); } static BufferView bucket_node_get_key(HashMapBucketNode *self) { BufferView key; u32 key_size; - am_memcpy(&key_size, (char*)self + sizeof(HashMapBucketNode*), sizeof(u32)); + am_memcpy(&key_size, (char*)self + sizeof(HashMapBucketNode*) + sizeof(usize), sizeof(u32)); key.size = key_size; - key.data = (char*)self + sizeof(HashMapBucketNode*) + sizeof(u32); + key.data = (char*)self + sizeof(HashMapBucketNode*) + sizeof(usize) + sizeof(u32); return key; } static void bucket_node_set_value(HashMapBucketNode *self, void *value, usize value_type_size) { u32 key_size; - am_memcpy(&key_size, (char*)self + sizeof(HashMapBucketNode*), sizeof(key_size)); - am_memcpy((char*)self + sizeof(HashMapBucketNode*) + sizeof(u32) + key_size, value, value_type_size); + am_memcpy(&key_size, (char*)self + sizeof(HashMapBucketNode*) + sizeof(usize), sizeof(key_size)); + am_memcpy((char*)self + sizeof(HashMapBucketNode*) + sizeof(usize) + sizeof(u32) + key_size, value, value_type_size); } static void* bucket_node_get_value(HashMapBucketNode *self) { u32 key_size; void *value; - am_memcpy(&key_size, (char*)self + sizeof(HashMapBucketNode*), sizeof(key_size)); - value = (char*)self + sizeof(HashMapBucketNode*) + sizeof(u32) + key_size; + am_memcpy(&key_size, (char*)self + sizeof(HashMapBucketNode*) + sizeof(usize), sizeof(key_size)); + value = (char*)self + sizeof(HashMapBucketNode*) + sizeof(usize) + sizeof(u32) + key_size; return value; } @@ -72,12 +89,13 @@ int hash_map_init(HashMap *self, ScopedAllocator *allocator, usize value_type_si return 0; } -static CHECK_RESULT int hash_map_bucket_add(HashMap *self, HashMapBucket *bucket, BufferView key, void *value) { +static CHECK_RESULT int hash_map_bucket_add(HashMap *self, HashMapBucket *bucket, BufferView key, void *value, usize hash) { HashMapBucketNode *new_bucket_node; return_if_error(scoped_allocator_alloc(self->allocator, - sizeof(HashMapBucketNode*) + sizeof(u32) + key.size + self->value_type_size, + sizeof(HashMapBucketNode*) + sizeof(hash) + sizeof(u32) + key.size + self->value_type_size, (void**)&new_bucket_node)); bucket_node_set_next(new_bucket_node, bucket->start); + bucket_node_set_hash(new_bucket_node, hash); bucket_node_set_key(new_bucket_node, key); bucket_node_set_value(new_bucket_node, value, self->value_type_size); bucket->start = new_bucket_node; @@ -102,11 +120,9 @@ static void hash_map_reorder_nodes(HashMap *self, usize end_index) { prev_bucket_node = NULL; bucket_node = bucket->start; while(bucket_node) { - BufferView bucket_key; usize bucket_index; - - bucket_key = bucket_node_get_key(bucket_node); - bucket_index = self->hash_func((const u8*)bucket_key.data, bucket_key.size) % bucket_size; + bucket_index = bucket_node_get_hash(bucket_node) % bucket_size; + if(bucket_index != index) { /* Add node to new bucket */ HashMapBucketNode *moved_node; @@ -145,6 +161,7 @@ static CHECK_RESULT int hash_map_increase_buckets(HashMap *self) { } int hash_map_insert(HashMap *self, BufferView key, void *value) { + usize hash; usize bucket_index; usize bucket_size; HashMapBucket *bucket; @@ -155,9 +172,10 @@ int hash_map_insert(HashMap *self, BufferView key, void *value) { bucket_size = buffer_get_size(&self->buckets, HashMapBucket); } - bucket_index = self->hash_func((const u8*)key.data, key.size) % bucket_size; + hash = self->hash_func((const u8*)key.data, key.size); + bucket_index = hash % bucket_size; bucket = ((HashMapBucket*)self->buckets.data) + bucket_index; - return_if_error(hash_map_bucket_add(self, bucket, key, value)); + return_if_error(hash_map_bucket_add(self, bucket, key, value, hash)); ++self->num_elements; return 0; } @@ -165,17 +183,19 @@ int hash_map_insert(HashMap *self, BufferView key, void *value) { bool hash_map_get(HashMap *self, BufferView key, void *value) { usize bucket_size; usize bucket_index; + usize hash; HashMapBucket *bucket; HashMapBucketNode *bucket_node; bucket_size = buffer_get_size(&self->buckets, HashMapBucket); - bucket_index = self->hash_func((const u8*)key.data, key.size) % bucket_size; + hash = self->hash_func((const u8*)key.data, key.size); + bucket_index = hash % bucket_size; bucket = ((HashMapBucket*)self->buckets.data) + bucket_index; for(bucket_node = bucket->start; bucket_node; bucket_node = bucket_node_get_next(bucket_node)) { BufferView bucket_key; bucket_key = bucket_node_get_key(bucket_node); - if(self->compare_func(&key, &bucket_key) == 0) { + if(hash == bucket_node_get_hash(bucket_node) && self->compare_func(&key, &bucket_key) == 0) { am_memcpy(value, bucket_node_get_value(bucket_node), self->value_type_size); return bool_true; } diff --git a/src/tokenizer.c b/src/tokenizer.c index 4403251..c59f80c 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -175,6 +175,9 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { if(am_memeql(self->value.identifier.data, "var", 3)) { *token = TOK_VAR; return TOKENIZER_OK; + } else if(am_memeql(self->value.identifier.data, "pub", 3)) { + *token = TOK_PUB; + return TOKENIZER_OK; } break; } @@ -442,6 +445,9 @@ static BufferView tokenizer_expected_token_as_string(Token token) { case TOK_COLON: str = ":"; break; + case TOK_PUB: + str = "pub"; + break; } return create_buffer_view(str, strlen(str)); } diff --git a/tests/io.amal b/tests/io.amal index 5505d44..0e1a682 100644 --- a/tests/io.amal +++ b/tests/io.amal @@ -1,5 +1,5 @@ const a = @import("sub/a.amal"); -const puts = fn { +pub const puts = fn { } \ No newline at end of file diff --git a/tests/main.amal b/tests/main.amal index d085501..65ac43b 100644 --- a/tests/main.amal +++ b/tests/main.amal @@ -15,7 +15,8 @@ const main = fn { const num2 = 23232; const num3 = num1 + num2 * 30; const num4 = (num1 + num2) * num3 * ((34 + 32) / 234.345); - //const num4 = 23; + // pub cost num34 = 45; // error, only declarations in global scope can be public + //const num4 = 23; // error, variable redeclaration /* episfjpseifipesf */ diff --git a/tests/main.c b/tests/main.c index 2b57bcf..0dfd878 100644 --- a/tests/main.c +++ b/tests/main.c @@ -41,6 +41,7 @@ static CHECK_RESULT int test_hash_map() { return 0; } +/* TODO: Restrict variables in global scope to const */ int main() { amal_compiler compiler; FileScopeReference *file_scope; -- cgit v1.2.3