apfl/src/hashmap.c

480 lines
12 KiB
C
Raw Normal View History

2021-12-10 20:22:16 +00:00
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "hashmap.h"
#include "resizable.h"
struct bucket {
void *keys;
void *values;
size_t len;
size_t cap;
};
struct apfl_hashmap_struct {
struct apfl_hashmap_callbacks callbacks;
size_t keysize;
size_t valsize;
size_t nbuckets;
struct bucket *buckets;
};
struct apfl_hashmap_cursor_struct {
apfl_hashmap map;
size_t bucket;
size_t i;
};
#define FNV_PRIME 1099511628211U
apfl_hash
apfl_hash_fnv1a_add(const void *data, size_t len, apfl_hash hash)
{
for (size_t i = 0; i < len; i++) {
uint8_t byte = ((uint8_t *)data)[i];
hash ^= byte;
hash *= FNV_PRIME;
}
return hash;
}
apfl_hash
apfl_hash_fnv1a(const void *data, size_t len)
{
return apfl_hash_fnv1a_add(data, len, APFL_HASH_FNV1A_INIT);
}
#define HAS_CALLBACK(map, cb) ((map)->callbacks.cb != NULL)
#define INVOKE_CALLBACK(map, cb, ...) (map)->callbacks.cb((map)->callbacks.opaque, __VA_ARGS__)
static bool
keys_eq(const apfl_hashmap map, const void *a, const void *b)
2021-12-10 20:22:16 +00:00
{
if (HAS_CALLBACK(map, keys_eq)) {
return INVOKE_CALLBACK(map, keys_eq, a, b);
} else {
return memcmp(a, b, map->keysize) == 0;
}
}
static apfl_hash
calc_hash(apfl_hashmap map, const void *key)
{
if (HAS_CALLBACK(map, calc_hash)) {
return INVOKE_CALLBACK(map, calc_hash, key);
} else {
return apfl_hash_fnv1a(key, map->keysize);
}
}
static void
destroy_key(apfl_hashmap map, void *key)
{
if (HAS_CALLBACK(map, destroy_key)) {
INVOKE_CALLBACK(map, destroy_key, key);
}
}
static void
destroy_value(apfl_hashmap map, void *value)
{
if (HAS_CALLBACK(map, destroy_value)) {
INVOKE_CALLBACK(map, destroy_value, value);
}
}
static bool
copy_key(apfl_hashmap map, void *dest, void *src)
2021-12-10 20:22:16 +00:00
{
if (HAS_CALLBACK(map, copy_key)) {
return INVOKE_CALLBACK(map, copy_key, dest, src);
} else {
memcpy(dest, src, map->keysize);
return true;
}
}
static bool
copy_value(apfl_hashmap map, void *dest, void *src)
2021-12-10 20:22:16 +00:00
{
if (HAS_CALLBACK(map, copy_value)) {
return INVOKE_CALLBACK(map, copy_value, dest, src);
} else {
memcpy(dest, src, map->valsize);
return true;
}
}
#define CAP_GROW 5
static_assert(CAP_GROW >= 1, "CAP_GROW must be at least 1");
static size_t
calc_new_cap(size_t old_cap)
{
return old_cap + CAP_GROW;
}
#define KVADDR(base, elemsize, off) (((char*)(base)) + ((elemsize)*(off)))
static bool
find_key_in_bucket(const apfl_hashmap map, struct bucket *bucket, const void *key, size_t *off)
2021-12-10 20:22:16 +00:00
{
size_t keysize = map->keysize;
for (size_t i = 0; i < bucket->len; i++) {
if (keys_eq(map, key, KVADDR(bucket->keys, keysize, i))) {
*off = i;
return true;
}
}
return false;
}
static bool
set_in_bucket(apfl_hashmap map, struct bucket *bucket, void *key, void *value)
2021-12-10 20:22:16 +00:00
{
size_t keysize = map->keysize;
size_t valsize = map->valsize;
size_t i;
if (find_key_in_bucket(map, bucket, key, &i)) {
void *dest = KVADDR(bucket->values, valsize, i);
destroy_value(map, dest);
return copy_value(map, dest, value);
}
if (bucket->len <= bucket->cap) {
size_t new_cap = calc_new_cap(bucket->cap);
void *newmem;
newmem = realloc(bucket->keys, new_cap * keysize);
if (newmem == NULL) {
return false;
}
bucket->keys = newmem;
newmem = realloc(bucket->values, new_cap * valsize);
if (newmem == NULL) {
return false;
}
bucket->values = newmem;
bucket->cap = new_cap;
}
if (!copy_key(map, KVADDR(bucket->keys, keysize, bucket->len), key)) {
return false;
}
if (!copy_value(map, KVADDR(bucket->values, valsize, bucket->len), value)) {
destroy_key(map, KVADDR(bucket->keys, keysize, bucket->len));
return false;
}
bucket->len++;
return true;
}
static bool
get_in_bucket(const apfl_hashmap map, struct bucket *bucket, const void *key, void *value)
2021-12-10 20:22:16 +00:00
{
size_t i;
if (!find_key_in_bucket(map, bucket, key, &i)) {
return false;
}
if (value != NULL) {
size_t valsize = map->valsize;
if (!copy_value(map, value, KVADDR(bucket->values, valsize, i))) {
return false; // TODO: This way, we cant distinguish an error in copy_value from a non-set key
}
}
return true;
}
static struct bucket *
bucket_by_key(apfl_hashmap map, const void *key)
{
apfl_hash hash = calc_hash(map, key);
return &map->buckets[hash % map->nbuckets];
}
static void
delete_in_bucket(apfl_hashmap map, struct bucket *bucket, const void *key)
{
size_t i;
if (!find_key_in_bucket(map, bucket, key, &i)) {
return;
}
size_t keysize = map->keysize;
size_t valsize = map->valsize;
destroy_key(map, KVADDR(bucket->keys, keysize, i));
destroy_value(map, KVADDR(bucket->values, valsize, i));
assert(bucket->len >= (i+1));
memmove(
KVADDR(bucket->keys, keysize, i),
KVADDR(bucket->keys, keysize, i+1),
(bucket->len - (i+1)) * keysize
);
memmove(
KVADDR(bucket->values, valsize, i),
KVADDR(bucket->values, valsize, i+1),
(bucket->len - (i+1)) * valsize
);
assert(bucket->len > 0); // if len == 0, we would not have found an entry
bucket->len--;
}
#define INITIAL_NBUCKETS 16 // Must be a power of 2
static apfl_hashmap
hashmap_new(struct apfl_hashmap_callbacks callbacks, size_t nbuckets, size_t keysize, size_t valsize)
2021-12-10 20:22:16 +00:00
{
apfl_hashmap map = malloc(sizeof(struct apfl_hashmap_struct));
if (map == NULL) {
goto fail;
}
map->callbacks = callbacks;
map->keysize = keysize;
map->valsize = valsize;
map->nbuckets = nbuckets;
map->buckets = malloc(sizeof(struct bucket) * nbuckets);
2021-12-10 20:22:16 +00:00
if (map->buckets == NULL) {
goto fail;
}
for (size_t i = 0; i < nbuckets; i++) {
2021-12-10 20:22:16 +00:00
map->buckets[i] = (struct bucket) {
.keys = NULL,
.values = NULL,
.len = 0,
.cap = 0,
};
}
return map;
fail:
free(map);
return NULL;
}
apfl_hashmap
apfl_hashmap_new(struct apfl_hashmap_callbacks callbacks, size_t keysize, size_t valsize)
{
return hashmap_new(callbacks, INITIAL_NBUCKETS, keysize, valsize);
}
2021-12-10 20:22:16 +00:00
void
apfl_hashmap_delete(apfl_hashmap map, const void *key)
{
delete_in_bucket(map, bucket_by_key(map, key), key);
}
bool
apfl_hashmap_get(const apfl_hashmap map, const void *key, void *value)
2021-12-10 20:22:16 +00:00
{
return get_in_bucket(map, bucket_by_key(map, key), key, value);
}
bool
apfl_hashmap_set(apfl_hashmap map, void *key, void *value)
2021-12-10 20:22:16 +00:00
{
return set_in_bucket(map, bucket_by_key(map, key), key, value);
}
static void
destroy_bucket(apfl_hashmap map, struct bucket *bucket)
{
for (size_t i = 0; i < bucket->len; i++) {
destroy_key(map, KVADDR(bucket->keys, map->keysize, i));
destroy_value(map, KVADDR(bucket->values, map->valsize, i));
}
free(bucket->keys);
free(bucket->values);
bucket->len = 0;
bucket->cap = 0;
}
size_t
apfl_hashmap_count(const apfl_hashmap map)
{
size_t count = 0;
for (size_t i = 0; i < map->nbuckets; i++) {
count += map->buckets[i].len;
}
return count;
}
2021-12-10 20:22:16 +00:00
void
apfl_hashmap_destroy(apfl_hashmap map)
{
if (map == NULL) {
return;
}
if (map->buckets != NULL) {
for (size_t i = 0; i < map->nbuckets; i++) {
destroy_bucket(map, &map->buckets[i]);
}
free(map->buckets);
}
free(map);
}
apfl_hashmap
apfl_hashmap_copy(apfl_hashmap src)
{
size_t keysize = src->keysize;
size_t valsize = src->valsize;
apfl_hashmap dst = hashmap_new(src->callbacks, src->nbuckets, keysize, valsize);
if (dst == NULL) {
return NULL;
}
for (size_t i = 0; i < dst->nbuckets; i++) {
struct bucket *srcbucket = &src->buckets[i];
struct bucket *dstbucket = &dst->buckets[i];
size_t len = srcbucket->len;
dstbucket->keys = malloc(keysize * len);
dstbucket->values = malloc(valsize * len);
if (dstbucket->keys == NULL || dstbucket->values == NULL) {
free(dstbucket->keys);
dstbucket->keys = NULL;
free(dstbucket->values);
dstbucket->values = NULL;
goto fail;
}
dstbucket->cap = len;
for (; dstbucket->len < len; dstbucket->len++) {
void *keyaddr = KVADDR(dstbucket->keys, keysize, dstbucket->len);
if (!copy_key(
dst,
keyaddr,
KVADDR(srcbucket->keys, keysize, srcbucket->len)
)) {
goto fail;
}
if (!copy_value(
dst,
KVADDR(dstbucket->values, valsize, dstbucket->len),
KVADDR(srcbucket->values, valsize, srcbucket->len)
)) {
destroy_key(dst, keyaddr);
goto fail;
}
}
}
fail:
apfl_hashmap_destroy(dst);
return NULL;
}
2021-12-10 20:22:16 +00:00
static void
cursor_skip_empty_buckets(apfl_hashmap_cursor cur)
{
apfl_hashmap map = cur->map;
while (cur->bucket < map->nbuckets && map->buckets[cur->bucket].len == 0) {
cur->bucket++;
}
}
apfl_hashmap_cursor
apfl_hashmap_get_cursor(const apfl_hashmap map)
2021-12-10 20:22:16 +00:00
{
apfl_hashmap_cursor cursor = malloc(sizeof(struct apfl_hashmap_cursor_struct));
if (cursor != NULL) {
cursor->map = map;
cursor->i = 0;
cursor->bucket = 0;
cursor_skip_empty_buckets(cursor);
}
return cursor;
}
bool
apfl_hashmap_cursor_is_end(apfl_hashmap_cursor cursor)
{
return cursor->bucket >= cursor->map->nbuckets;
}
static struct bucket *
cursor_get_bucket(apfl_hashmap_cursor cursor)
{
return apfl_hashmap_cursor_is_end(cursor)
? NULL
: &cursor->map->buckets[cursor->bucket];
}
void
apfl_hashmap_cursor_next(apfl_hashmap_cursor cursor)
{
struct bucket *bucket = cursor_get_bucket(cursor);
if (bucket == NULL) {
return; // End already reached
}
cursor->i++;
if (cursor->i < bucket->len) {
return;
}
cursor->bucket++;
cursor->i = 0;
cursor_skip_empty_buckets(cursor);
}
#define CURSOR_GET(cursor, out, bucketmemb, sizememb, copy) \
struct bucket *bucket = cursor_get_bucket(cursor); \
\
if (bucket == NULL) { \
return false; /* End already reached */ \
} \
\
if (cursor->i >= bucket->len) { \
return false; \
} \
\
size_t size = cursor->map->sizememb; \
\
return copy( \
cursor->map, \
out, \
KVADDR(bucket->bucketmemb, size, cursor->i) \
2021-12-10 20:22:16 +00:00
); \
bool
apfl_hashmap_cursor_get_key(apfl_hashmap_cursor cursor, void *key)
{
CURSOR_GET(cursor, key, keys, keysize, copy_key)
}
bool
apfl_hashmap_cursor_get_value(apfl_hashmap_cursor cursor, void *value)
{
CURSOR_GET(cursor, value, values, valsize, copy_value)
}