#include "../../include/std/hash_map.h" #include "../../include/std/arena_allocator.h" #include "../../include/std/mem.h" #include /* Basic hash map implementation */ /* TODO: Improve performance. For example replace % with & by first making sure size of bucket is a power of two. TODO: Instead of allocating space for the key, use the key data pointer and size directly. TODO: Instead of allocating space for the data, allow the user to pass a pointer in the insert method and use that directly. */ #define HASH_MAP_INITIAL_SIZE 8 /* Structure: HashMapBucketNode *next; usize hash; u32 key_size; u8[..] key; u8[..] value; */ typedef struct HashMapBucketNode HashMapBucketNode; typedef struct { HashMapBucketNode *start; } HashMapBucket; static void bucket_node_set_next(HashMapBucketNode *self, HashMapBucketNode *next) { assert(next != self); am_memcpy(self, &next, sizeof(next)); } static HashMapBucketNode* bucket_node_get_next(HashMapBucketNode *self) { HashMapBucketNode *next; am_memcpy(&next, self, sizeof(next)); return next; } static void bucket_node_set_hash(HashMapBucketNode *self, usize hash) { am_memcpy((char*)self + sizeof(HashMapBucketNode*), &hash, sizeof(hash)); } static usize bucket_node_get_hash(HashMapBucketNode *self) { usize hash; am_memcpy(&hash, (char*)self + sizeof(HashMapBucketNode*), sizeof(hash)); return hash; } static void bucket_node_set_key(HashMapBucketNode *self, BufferView key) { u32 key_size; key_size = (u32)key.size; am_memcpy((char*)self + sizeof(HashMapBucketNode*) + sizeof(usize), &key_size, sizeof(u32)); am_memcpy((char*)self + sizeof(HashMapBucketNode*) + sizeof(usize) + sizeof(u32), key.data, key_size); } static BufferView bucket_node_get_key(HashMapBucketNode *self) { BufferView key; u32 key_size; am_memcpy(&key_size, (char*)self + sizeof(HashMapBucketNode*) + sizeof(usize), sizeof(u32)); key.size = key_size; key.data = (char*)self + sizeof(HashMapBucketNode*) + sizeof(usize) + sizeof(u32); return key; } static void bucket_node_set_value(HashMapBucketNode *self, void *value, usize value_type_size) { u32 key_size; am_memcpy(&key_size, (char*)self + sizeof(HashMapBucketNode*) + sizeof(usize), sizeof(key_size)); am_memcpy((char*)self + sizeof(HashMapBucketNode*) + sizeof(usize) + sizeof(u32) + key_size, value, value_type_size); } static void* bucket_node_get_value(HashMapBucketNode *self) { u32 key_size; void *value; am_memcpy(&key_size, (char*)self + sizeof(HashMapBucketNode*) + sizeof(usize), sizeof(key_size)); value = (char*)self + sizeof(HashMapBucketNode*) + sizeof(usize) + sizeof(u32) + key_size; return value; } int hash_map_init(HashMap *self, ArenaAllocator *allocator, usize value_type_size, HashMapCompare key_compare_func, HashMapHash key_hash_func) { assert(key_compare_func); assert(key_hash_func); self->allocator = allocator; self->value_type_size = value_type_size; self->num_elements = 0; self->compare_func = key_compare_func; self->hash_func = key_hash_func; return_if_error(buffer_init(&self->buckets, self->allocator)); assert(self->buckets.size == 0); return_if_error(buffer_append_empty(&self->buckets, sizeof(HashMapBucket) * HASH_MAP_INITIAL_SIZE)); assert(HASH_MAP_INITIAL_SIZE != 0); am_memset(self->buckets.data, 0, self->buckets.size); return 0; } static CHECK_RESULT int hash_map_bucket_add(HashMap *self, HashMapBucket *bucket, BufferView key, void *value, usize hash) { HashMapBucketNode *new_bucket_node; return_if_error(arena_allocator_alloc(self->allocator, sizeof(HashMapBucketNode*) + sizeof(hash) + sizeof(u32) + key.size + self->value_type_size, (void**)&new_bucket_node)); bucket_node_set_next(new_bucket_node, bucket->start); bucket_node_set_hash(new_bucket_node, hash); bucket_node_set_key(new_bucket_node, key); bucket_node_set_value(new_bucket_node, value, self->value_type_size); bucket->start = new_bucket_node; return 0; } static void hash_map_reorder_nodes(HashMap *self, usize end_index) { HashMapBucket *bucket; HashMapBucket *bucket_end; usize bucket_size; usize index; /* TODO: bucket_end should be the old capacity. There is no need to reorder the new buckets */ bucket = (HashMapBucket*)self->buckets.data; bucket_end = ((HashMapBucket*)self->buckets.data) + end_index; bucket_size = buffer_get_size(&self->buckets, HashMapBucket); index = 0; for(; bucket != bucket_end; ++bucket, ++index) { HashMapBucketNode *prev_bucket_node; HashMapBucketNode *bucket_node; prev_bucket_node = NULL; bucket_node = bucket->start; while(bucket_node) { usize bucket_index; bucket_index = bucket_node_get_hash(bucket_node) % bucket_size; if(bucket_index != index) { /* Add node to new bucket */ HashMapBucketNode *moved_node; HashMapBucket *new_bucket; new_bucket = ((HashMapBucket*)self->buckets.data) + bucket_index; moved_node = bucket_node; bucket_node = bucket_node_get_next(bucket_node); bucket_node_set_next(moved_node, new_bucket->start); new_bucket->start = moved_node; /* Remove current from chain by chaining previous and next */ if(prev_bucket_node) bucket_node_set_next(prev_bucket_node, bucket_node); else bucket->start = bucket_node; } else { prev_bucket_node = bucket_node; bucket_node = bucket_node_get_next(bucket_node); } } } } static CHECK_RESULT int hash_map_increase_buckets(HashMap *self) { usize prev_num_elements; usize bytes_to_append; prev_num_elements = buffer_get_size(&self->buckets, HashMapBucket); bytes_to_append = (usize)(prev_num_elements * 1.5) * sizeof(HashMapBucket); return_if_error(buffer_append_empty(&self->buckets, bytes_to_append)); am_memset(((HashMapBucket*)self->buckets.data) + prev_num_elements, 0, bytes_to_append); hash_map_reorder_nodes(self, prev_num_elements); return 0; } int hash_map_insert(HashMap *self, BufferView key, void *value) { usize hash; usize bucket_index; usize bucket_size; HashMapBucket *bucket; bucket_size = buffer_get_size(&self->buckets, HashMapBucket); if(self->num_elements == bucket_size) { return_if_error(hash_map_increase_buckets(self)); bucket_size = buffer_get_size(&self->buckets, HashMapBucket); } hash = self->hash_func((const u8*)key.data, key.size); bucket_index = hash % bucket_size; bucket = ((HashMapBucket*)self->buckets.data) + bucket_index; return_if_error(hash_map_bucket_add(self, bucket, key, value, hash)); ++self->num_elements; return 0; } /* TODO: Remove this. Use hash_map_get_ref instead */ bool hash_map_get(HashMap *self, BufferView key, void *value) { #if 0 usize bucket_size; usize bucket_index; usize hash; HashMapBucket *bucket; HashMapBucketNode *bucket_node; bucket_size = buffer_get_size(&self->buckets, HashMapBucket); hash = self->hash_func((const u8*)key.data, key.size); bucket_index = hash % bucket_size; bucket = ((HashMapBucket*)self->buckets.data) + bucket_index; for(bucket_node = bucket->start; bucket_node; bucket_node = bucket_node_get_next(bucket_node)) { BufferView bucket_key; bucket_key = bucket_node_get_key(bucket_node); if(hash == bucket_node_get_hash(bucket_node) && self->compare_func(&key, &bucket_key) == 0) { if(value) am_memcpy(value, bucket_node_get_value(bucket_node), self->value_type_size); return bool_true; } } return bool_false; #endif void *ref; if(!hash_map_get_ref(self, key, &ref)) return bool_false; am_memcpy(value, ref, self->value_type_size); return bool_true; } bool hash_map_get_ref(HashMap *self, BufferView key, void **value) { usize bucket_size; usize bucket_index; usize hash; HashMapBucket *bucket; HashMapBucketNode *bucket_node; bucket_size = buffer_get_size(&self->buckets, HashMapBucket); if(bucket_size == 0) return bool_false; hash = self->hash_func((const u8*)key.data, key.size); bucket_index = hash % bucket_size; bucket = ((HashMapBucket*)self->buckets.data) + bucket_index; for(bucket_node = bucket->start; bucket_node; bucket_node = bucket_node_get_next(bucket_node)) { BufferView bucket_key; bucket_key = bucket_node_get_key(bucket_node); if(hash == bucket_node_get_hash(bucket_node) && self->compare_func(&key, &bucket_key) == 0) { if(value) *value = bucket_node_get_value(bucket_node); return bool_true; } } return bool_false; } bool hash_map_contains(HashMap *self, BufferView key) { return hash_map_get_ref(self, key, NULL); } #define MIN(a, b) ((a) < (b) ? (a) : (b)) int hash_map_compare_string(const void *a, const void *b) { const BufferView *lhs; const BufferView *rhs; lhs = a; rhs = b; if(lhs->size != rhs->size) return -1; return am_memcmp(lhs->data, rhs->data, MIN(lhs->size, rhs->size)); }