By user10362540


2018-11-08 23:02:11 8 Comments

Hi I am attempting to implement a really simple hashmap in regular C with a string as key and a void pointer as value as I wish to use the map for multiple data types.

So far I have this

struct node{
    void * value;
    char * key;
};

unsigned long strhash(char *string)
{   
    unsigned long hash = 5381;
    int c;

    while ((c = *string++))
    {   
        hash = ((hash << 5) + hash) + c;
    }   
    return hash;
}


map_t *map_create(int maxSize){

    map_t *map = malloc(sizeof(map_t));
    map->curSize = 0;
    map->maxSize = maxSize;
    map->nodes = calloc(map->maxSize, sizeof(node_t *));

    return map;
}


node_t *node_create(char *key, void *value){

    node_t *node = malloc(sizeof(node_t));
    node->key = key;
    node->value = value;
    return node;
}

void map_insert(map_t *map, char *key, void *value){

    node_t *node = node_create(key, value);

    int idx = strhash(key) % map->maxSize;
    if(map->nodes[idx] == NULL){
        map->nodes[idx] = node;
    }else{
        while(map->nodes[idx] != NULL){
            idx++%map->maxSize;
        }
        map->nodes[idx] = node;
    }   
    return;
}

void map_print(map_t *map){

    for(int i = 0; i < map->maxSize; i++){
        if(map->nodes[i] != NULL){
            printf("index: %d\t value: %d\n",i, *(int*)map->nodes[i]->value);
        }
    }
    return;
}

void map_destroy(map_t *map){
     for(int i = 0; i < map->maxSize; i++){
        if(map->nodes[i] != NULL){
            free(map->nodes[i]);
        }
    }
    free(map->nodes);
    free(map);
    return;
}



int main(){

    map_t *map = map_create(32);
    for(int i = 0; i < 30; i++){
        map_insert(map, (char*)&i, &i);
    }
    map_print(map);
    map_destroy(map);
    return 0;
}

The problem is the output is not as I'd expect when the map gets printed all that is retrieved is the value "30" on all indexes which is the last number inserted into the map. If I change the value to type int the map works as expected, so is there must be something crucial I am missing in regards to pointers.

I am not the greatest at C so any light which could be shed on this would be most appreciated.

3 comments

@Nominal Animal 2018-11-09 02:15:14

OP stores a reference to the same value, so of course all lookups yield the same value (which is not even a string, but whatever the storage representation of the value of the variable i happens to be).

I prefer chaining the hash map entries, and keeping a copy of the hash in the entry:

struct entry {
    struct entry *next;
    size_t        hash;
    void         *data;
    size_t        data_size;
    int           data_type;
    unsigned char name[];
};

typedef struct {
    size_t         size;
    size_t         used;  /* Number of entries, total */
    struct entry **slot;  /* Array of entry pointers */
    size_t       (*hash)(const unsigned char *, size_t);
} hashmap;

int hashmap_new(hashmap *hmap, const size_t size,
                size_t (*hash)(const unsigned char *, size_t))
{
    if (!hmap)
        return -1; /* No hashmap specified */

    hmap->size = 0;
    hmap->used = 0;
    hmap->slot = NULL;
    hmap->hash = NULL;

    if (size < 1)
        return -1; /* Invalid size */
    if (!hash)
        return -1; /* No hash function specified. */

    hmap->slot = calloc(size, sizeof hmap->slot[0]);
    if (!hmap->slot)
        return -1; /* Not enough memory */

    hmap->size = size;
    hmap->hash = hash;

    return 0;
}

void hashmap_free(hashmap *hmap)
{
    if (hmap) {
        size_t  i = hmap->size;
        while (i-->0) {
            struct entry *next = hmap->slot[i];
            struct entry *curr;

            while (next) {
                curr = next;
                next = next->next;

                free(curr->data);

                /* Poison the entry, to help detect use-after-free bugs. */
                curr->next = NULL;
                curr->data = NULL;
                curr->hash = 0;
                curr->data_size = 0;
                curr->data_type = 0;
                curr->name[0] = '\0';

                free(curr);
            }
        }
    }

    free(hmap->slot);
    hmap->size = 0;
    hmap->used = 0;
    hmap->slot = NULL;
    hmap->hash = NULL;
}

To insert a key-value pair, the function either uses the data specified as-is, in which case it's the caller's responsibility to ensure each key has their own unique data not overwritten later; or we copy the user data. In the above hashmap_free() function, you'll see free(curr->data);; it assumes we allocated memory dynamically, and copied the user data there. So:

int hashmap_add(hashmap *hmap, const unsigned char *name,
                const void *data, const size_t data_size,
                const int data_type)
{
    const size_t  namelen = (name) ? strlen(name) : 0;
    struct entry *curr;
    size_t        i;

    if (!hmap)
        return -1; /* No hashmap specified. */

    if (name_len < 1)
        return -1; /* NULL or empty name. */

    /* Allocate memory for the hashmap entry,
       including enough room for the name, and end of string '\0'. */
    curr = malloc(sizeof (struct entry) + namelen + 1;
    if (!curr)
        return -1; /* Out of memory. */

    /* Copy data, if any. */
    if (data_size > 0) {
        curr->data = malloc(data_size);
        if (!curr->data) {
            free(curr);
            return -1; /* Out of memory. */
        }
        memcpy(curr->data, data, data_size);
    } else {
        curr->data      = NULL;
        curr->data_size = 0;
    }

    curr->data_type = data_type;

    /* Calculate the hash of the name. */
    curr->hash = hmap->hash(name, namelen);

    /* Copy name, including the trailing '\0'. */
    memcpy(curr->name, name, namelen + 1);

    /* Slot to prepend to. */
    i = curr->hash % hmap->size;

    curr->next = hmap->slot[i];
    hmap->slot[i] = curr;

    /* An additional node added. */
    hmap->used++;

    return 0;
}

The meaning of data_type is completely up to the user of the code. Lookup can be made based on the hash and the data type:

/* Returns 0 if found. */
int hashmap_find(hashmap *hmap, const unsigned char *name,
                 const int data_type,
                 void **dataptr_to, size_t *size_to)
{
    struct entry  *curr;
    size_t         hash;

    if (size_to)
        *size_to = 0;
    if (dataptr_to)
        *dataptr_to = NULL;

    if (!hmap)
        return -1; /* No hashmap specified. */
    if (!name || !*name)
        return -1; /* NULL or empty name. */

    hash = hmap->hash(name, strlen(name));
    curr = hmap->slot[hash % hmap->size];

    for (curr = hmap->slot[hash % hmap->size]; curr != NULL; curr = curr->next) {
        if (curr->data_type == data_type && curr->hash == hash &&
            !strcmp(curr->name, name)) {
            /* Data type an name matches. Save size if requested. */
            if (size_to)
                *size_to = curr->data_size;
            if (dataptr_to)
                *dataptr_to = curr->data;
            return 0; /* Found. */
        }
    }

    return -1; /* Not found. */
}

The above lookup returns 0 if found, and nonzero if error or not found. (This way, even zero-size NULL data can be stored in the hash map.)

If the number of data types supported is small, say 32, then using an unsigned int with each bit (1U<<0 == 1, 1U<<1 == 2, 1U<<2 == 4, and so on) reserved for a specific type, you can do the lookup using a mask, allowing only the specified types. Similarly, the data_type can be a mask, describing which types the value can be interpreted as (almost always will have just one bit set).

This scheme also allows one to dynamically resize the hashmap, by allocating a new slot array of pointers, and moving each old entry to the new one. The keys don't need to be rehashed, because the original hash is stored in each entry. For lookup efficiency, the chains (hanging off each slot) should be as short as possible. A common "rule of thumb" is that hashmap->size should be between hashmap->used and 2 * hashmap->used.

@Barmar 2018-11-08 23:14:29

The problem is that you're using the same pointer every time you call map_insert(). It just stores the pointer, it doesn't copy the data. Each time through the loop you change the contents of that memory, so all the hash map elements point to that same value.

There are two ways you can fix it. One way is to always make a dynamically-allocated copy of the data before calling map_insert():

for (int i = 0; i < 30; i++) {
    int *i_copy = malloc(sizeof *i_copy);
    *i_copy = i;
    map_insert(map, (char *)i_copy, (char *)i_copy);
}

The other option is to add the size of the value to the map_insert() and node_create() arguments. Then node_create call malloc() and memcpy() to copy the value to dynamic memory.

BTW, there's another problem. The key is supposed to be a null-terminated string (strhash() depends on this), but you're using &i, which is a pointer to an integer. Casting a pointer to an integer to char* doesn't return a string, it just returns a pointer to the same location with a different data type. I haven't fixed this above.

@Morpheus 2018-11-08 23:12:26

When you call map_insert(map, (char*)&i, &i); the value inserted into hasmap is the pointer to i variable, i.e. its address in memory, and not the value of i. So when you change i value inside the for loop there is the side-effect to all entries into the hashmap, and at the end of the loop you only see the last value assigned.

@John Bollinger 2018-11-08 23:16:22

More or less. The "less" is that changing the value of i has no effect at all on the map entries. The value of i is not part of them. This is in fact the key thing the OP should take away.

@Morpheus 2018-11-08 23:35:50

Yes I said that badly: I wanted to say that because all entries have the same values of &i, a change to i has the consequence that deferencing all hashmap values returns the last value setted to i.

Related Questions

Sponsored Content

36 Answered Questions

[SOLVED] Differences between HashMap and Hashtable?

14 Answered Questions

[SOLVED] What is a smart pointer and when should I use one?

33 Answered Questions

12 Answered Questions

[SOLVED] How do function pointers in C work?

  • 2009-05-08 15:49:17
  • Yuval Adam
  • 710696 View
  • 1036 Score
  • 12 Answer
  • Tags:   c function-pointers

22 Answered Questions

[SOLVED] Why should I use a pointer rather than the object itself?

  • 2014-03-03 11:54:16
  • gEdringer
  • 265868 View
  • 1379 Score
  • 22 Answer
  • Tags:   c++ pointers c++11

7 Answered Questions

[SOLVED] Iterate through a HashMap

16 Answered Questions

[SOLVED] How to update a value, given a key in a java hashmap?

  • 2010-11-11 18:34:01
  • laertis
  • 602055 View
  • 506 Score
  • 16 Answer
  • Tags:   java key hashmap

1 Answered Questions

[SOLVED] Why does the Book say I must cast malloc?

1 Answered Questions

[SOLVED] malloc, allocating a matrix

  • 2013-12-30 13:22:16
  • AlbertoZ
  • 303 View
  • 0 Score
  • 1 Answer
  • Tags:   c pointers

7 Answered Questions

[SOLVED] Pointer to a casted Pointer?

  • 2009-12-16 02:42:18
  • Grey
  • 447 View
  • 2 Score
  • 7 Answer
  • Tags:   c pointers

Sponsored Content