root/livinglogic.visitors/aht.c @ 0:57496fb3b5c1

Revision 0:57496fb3b5c1, 18.8 KB (checked in by Nikolas Tautenhahn <nik@…>, 10 years ago)

initial commit of visitors 0.7, beginning a LL fork

Line 
1/* An implementation of in-memory hash tables:
2 * Copyright (c) 2000-2004 Salvatore Sanfilippo <antirez@invece.org>
3 *
4 * -- VERSION 2004.05.22 --
5 *
6 * COPYRIGHT AND PERMISSION NOTICE
7 * -------------------------------
8 *
9 * Copyright (c) 2000 Salvatore Sanfilippo <antirez@invece.org>
10 * Copyright (c) 2001 Salvatore Sanfilippo <antirez@invece.org>
11 * Copyright (c) 2002 Salvatore Sanfilippo <antirez@invece.org>
12 * Copyright (c) 2003 Salvatore Sanfilippo <antirez@invece.org>
13 * Copyright (c) 2004 Salvatore Sanfilippo <antirez@invece.org>
14 *
15 * All rights reserved.
16 *
17 * Permission is hereby granted, free of charge, to any person obtaining a
18 * copy of this software and associated documentation files (the
19 * "Software"), to deal in the Software without restriction, including
20 * without limitation the rights to use, copy, modify, merge, publish,
21 * distribute, and/or sell copies of the Software, and to permit persons
22 * to whom the Software is furnished to do so, provided that the above
23 * copyright notice(s) and this permission notice appear in all copies of
24 * the Software and that both the above copyright notice(s) and this
25 * permission notice appear in supporting documentation.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
28 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
30 * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
31 * HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
32 * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
33 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
34 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
35 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
36 *
37 * Except as contained in this notice, the name of a copyright holder
38 * shall not be used in advertising or otherwise to promote the sale, use
39 * or other dealings in this Software without prior written authorization
40 * of the copyright holder.
41 *
42 * CHANGELOG
43 * ---------
44 *
45 * 22May2004 - Fixed a but in ht_destroy(). Now after this call the
46 * hashtable is really ready to be reused. Fixed also a memory leak
47 * in the same function. Luckly this function is only called at exit
48 * in many programs.
49 *
50 * OVERVIEW
51 * --------
52 *
53 * AHT is an implementation of a dictionary with support for
54 * INSERT, DELETE and SEARCH operations. It uses the hash table
55 * as base data structure to provide almost constant times for
56 * the three operations. AHT also automatically care about the
57 * size of the current key-values set increasing the hash table
58 * as needed.
59 *
60 * DESIGN PRINCIPLE
61 * ----------------
62 *
63 * - AHT try to resist to attacker-induced worst-case behaviour
64 *   trought the randomization of the hash-function. This is
65 *   optional.
66 *
67 * - AHT take care of the hash table expansion when needed.
68 *   The hash table load ranges from 0 to 0.5, the hash table
69 *   size is a power of two.
70 *
71 * - A simple implementation. The collisions resolution used
72 *   is a simple linear probing, that takes advantage of
73 *   the modern CPU caches, the low hash table max load and
74 *   the use of a strong hash function provided with this library
75 *   (ht_strong_hash), should mitigate the primary clustering
76 *   enough. Experimental results shown that double hashing
77 *   was a performance lost with common key types in modern
78 *   CPUs.
79 *
80 * - Moderatly method oriented, it is possible to define the hash
81 *   function, key/value destructors, key compare function, for a
82 *   given hash table, but not with a per-element base.
83 *
84 * === WARNING ===
85 * =    Before to use this library, think about the -fact- that the
86 * =    worst case is O(N). Like for the quick sort algorithm, it may
87 * =    be a bad idea to use this library in medical software, or other
88 * =    software for wich the worst case should be taken in account
89 * =    even if not likely to happen.
90 * =    Good alternatives are red-black trees, and other trees with
91 * =    a good worst-case behavior.
92 * ===============
93 *
94 * TODO
95 * ----
96 *
97 * - Write the documentation
98 * - ht_copy() to copy an element between hash tables
99 * - ht_dup() to duplicate an entire hash table
100 * - ht_merge() to add the content of one hash table to another
101 * - disk operations, the ability to save an hashtable from the
102 *   memory to the disk and the reverse operation.
103 *
104 * Most of this features needs additional methods, like one
105 * to copy an object, and should return an error if such methods
106 * are not defined.
107 *
108 */
109
110#include <sys/types.h>
111#include <stdio.h>
112#include <stdlib.h>
113#include <string.h>
114#include <assert.h>
115#include "aht.h"
116
117/* -------------------------- private prototypes ---------------------------- */
118static int ht_expand_if_needed(struct hashtable *t);
119static unsigned int next_power(unsigned int size);
120static int ht_insert(struct hashtable *t, void *key, unsigned int *avail_index);
121
122/* The special ht_free_element pointer is used to mark
123 * a freed element in the hash table (note that the elements
124 * neven used are just NULL pointers) */
125static struct ht_ele *ht_free_element = (void*) -1;
126
127/* -------------------------- hash functions -------------------------------- */
128/* The djb hash function, that's under public domain */
129u_int32_t djb_hash(unsigned char *buf, size_t len)
130{
131    u_int32_t h = 5381;
132    while(len--)
133        h = (h + (h << 5)) ^ *buf++;
134    return h;
135}
136
137u_int32_t djb_hashR(unsigned char *buf, size_t len)
138{
139    u_int32_t h = 5381;
140    buf += len-1;
141    while(len--)
142        h = (h + (h << 5)) ^ *buf--;
143    return h;
144}
145
146/* Another trivial hash function */
147#define ROT32R(x,n) (((x)>>n)|(x<<(32-n)))
148u_int32_t trivial_hash(unsigned char *buf, size_t len)
149{
150    u_int32_t h = 0;
151    while(len--) {
152        h = h + *buf++;
153        h = ROT32R(h, 3);
154    }
155    return h;
156}
157
158u_int32_t trivial_hashR(unsigned char *buf, size_t len)
159{
160    u_int32_t h = 0;
161    buf += len-1;
162    while(len--) {
163        h = h + *buf--;
164        h = ROT32R(h, 3);
165    }
166    return h;
167}
168
169/* A strong hash function that should be the default with this
170 * hashtable implementation. Our hash tables does not support
171 * double hashing for design: the idea is to avoid double
172 * hashing and use a bit slower but very strong hash function like
173 * this. This should provide quite good performances with
174 * all the kinds of keys if you take the default max load of 50%.
175 *
176 * For more information see: http://burtleburtle.net/bob/hash/evahash.html */
177
178/* The mixing step */
179#define mix(a,b,c) \
180{ \
181  a=a-b;  a=a-c;  a=a^(c>>13); \
182  b=b-c;  b=b-a;  b=b^(a<<8);  \
183  c=c-a;  c=c-b;  c=c^(b>>13); \
184  a=a-b;  a=a-c;  a=a^(c>>12); \
185  b=b-c;  b=b-a;  b=b^(a<<16); \
186  c=c-a;  c=c-b;  c=c^(b>>5);  \
187  a=a-b;  a=a-c;  a=a^(c>>3);  \
188  b=b-c;  b=b-a;  b=b^(a<<10); \
189  c=c-a;  c=c-b;  c=c^(b>>15); \
190}
191
192/* The whole new hash function */
193u_int32_t __ht_strong_hash(u_int8_t *k, u_int32_t length, u_int32_t initval)
194{
195    u_int32_t a,b,c;    /* the internal state */
196    u_int32_t len;      /* how many key bytes still need mixing */
197
198    /* Set up the internal state */
199    len = length;
200    a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
201    c = initval;         /* variable initialization of internal state */
202
203    /*---------------------------------------- handle most of the key */
204    while (len >= 12)
205    {
206        a=a+(k[0]+((u_int32_t)k[1]<<8)+((u_int32_t)k[2]<<16)+
207                           ((u_int32_t)k[3]<<24));
208        b=b+(k[4]+((u_int32_t)k[5]<<8)+((u_int32_t)k[6]<<16)+
209                           ((u_int32_t)k[7]<<24));
210        c=c+(k[8]+((u_int32_t)k[9]<<8)+((u_int32_t)k[10]<<16)+
211                           ((u_int32_t)k[11]<<24));
212        mix(a,b,c);
213        k = k+12; len = len-12;
214    }
215
216    /*------------------------------------- handle the last 11 bytes */
217    c = c+length;
218    switch(len)              /* all the case statements fall through */
219    {
220        case 11: c=c+((u_int32_t)k[10]<<24);
221        case 10: c=c+((u_int32_t)k[9]<<16);
222        case 9 : c=c+((u_int32_t)k[8]<<8);
223        /* the first byte of c is reserved for the length */
224        case 8 : b=b+((u_int32_t)k[7]<<24);
225        case 7 : b=b+((u_int32_t)k[6]<<16);
226        case 6 : b=b+((u_int32_t)k[5]<<8);
227        case 5 : b=b+k[4];
228        case 4 : a=a+((u_int32_t)k[3]<<24);
229        case 3 : a=a+((u_int32_t)k[2]<<16);
230        case 2 : a=a+((u_int32_t)k[1]<<8);
231        case 1 : a=a+k[0];
232        /* case 0: nothing left to add */
233    }
234    mix(a,b,c);
235    /*-------------------------------------------- report the result */
236    return c;
237}
238
239/* ----------------------------- API implementation ------------------------- */
240/* reset an hashtable already initialized with ht_init().
241 * NOTE: This function should only called by ht_destroy(). */
242static void ht_reset(struct hashtable *t)
243{
244    t->table = NULL;
245    t->size = 0;
246    t->sizemask = 0;
247    t->used = 0;
248    t->collisions = 0;
249}
250
251/* Initialize the hash table */
252int ht_init(struct hashtable *t)
253{
254    ht_reset(t);
255    t->hashf = ht_hash_pointer;
256    t->key_destructor = ht_no_destructor;
257    t->val_destructor = ht_no_destructor;
258    t->key_compare = ht_compare_ptr;
259    return HT_OK;
260}
261
262/* Resize the table to the minimal size that contains all the elements */
263int ht_resize(struct hashtable *t)
264{
265    int minimal = (t->used * 2)+1;
266
267    if (minimal < HT_INITIAL_SIZE)
268        minimal = HT_INITIAL_SIZE;
269    return ht_expand(t, minimal);
270}
271
272/* Move an element accross hash tables */
273int ht_move(struct hashtable *orig, struct hashtable *dest, unsigned int index)
274{
275    int ret;
276    unsigned int new_index;
277
278    /* If the element isn't in the table ht_search will store
279     * the index of the free ht_ele in the integer pointer by *index */
280    ret = ht_insert(dest, orig->table[index]->key, &new_index);
281    if (ret != HT_OK)
282        return ret;
283
284    /* Move the element */
285    dest->table[new_index] = orig->table[index];
286    orig->table[index] = ht_free_element;
287    orig->used--;
288    dest->used++;
289    return HT_OK;
290}
291
292/* Expand or create the hashtable */
293int ht_expand(struct hashtable *t, size_t size)
294{
295    struct hashtable n; /* the new hashtable */
296    unsigned int realsize = next_power(size), i;
297
298    /* the size is invalid if it is smaller than the number of
299     * elements already inside the hashtable */
300    if (t->used >= size)
301        return HT_INVALID;
302
303    ht_init(&n);
304    n.size = realsize;
305    n.sizemask = realsize-1;
306    n.table = malloc(realsize*sizeof(struct ht_ele*));
307    if (n.table == NULL)
308        return HT_NOMEM;
309    /* Copy methods */
310    n.hashf = t->hashf;
311    n.key_destructor = t->key_destructor;
312    n.val_destructor = t->val_destructor;
313    n.key_compare= t->key_compare;
314
315    /* Initialize all the pointers to NULL */
316    memset(n.table, 0, realsize*sizeof(struct ht_ele*));
317
318    /* Copy all the elements from the old to the new table:
319     * note that if the old hash table is empty t->size is zero,
320     * so ht_expand() acts like an ht_create() */
321    n.used = t->used;
322    for (i = 0; i < t->size && t->used > 0; i++) {
323        if (t->table[i] != NULL && t->table[i] != ht_free_element) {
324            u_int32_t h;
325
326            /* Get the new element index: note that we
327             * know that there aren't freed elements in 'n' */
328            h = n.hashf(t->table[i]->key) & n.sizemask;
329            if (n.table[h]) {
330                n.collisions++;
331                while(1) {
332                    h = (h+1) & n.sizemask;
333                    if (!n.table[h])
334                        break;
335                    n.collisions++;
336                }
337            }
338            /* Move the element */
339            n.table[h] = t->table[i];
340            t->used--;
341        }
342    }
343    assert(t->used == 0);
344    free(t->table);
345
346    /* Remap the new hashtable in the old */
347    *t = n;
348    return HT_OK;
349}
350
351/* Add an element, discarding the old if the key already exists */
352int ht_replace(struct hashtable *t, void *key, void *data)
353{
354    int ret;
355    unsigned int index;
356
357    /* Try to add the element */
358    ret = ht_add(t, key, data);
359    if (ret == HT_OK || ret != HT_BUSY)
360        return ret;
361    /* It already exists, get the index */
362    ret = ht_search(t, key, &index);
363    assert(ret == HT_FOUND);
364    /* Remove the old */
365    ret = ht_free(t, index);
366    assert(ret == HT_OK);
367    /* And add the new */
368    return ht_add(t, key, data);
369}
370
371/* Add an element to the target hash table */
372int ht_add(struct hashtable *t, void *key, void *data)
373{
374    int ret;
375    unsigned int index;
376
377    /* If the element isn't in the table ht_insert() will store
378     * the index of the free ht_ele in the integer pointer by *index */
379    ret = ht_insert(t, key, &index);
380    if (ret != HT_OK)
381        return ret;
382
383    /* Allocates the memory and stores key */
384    if ((t->table[index] = malloc(sizeof(struct ht_ele))) == NULL)
385        return HT_NOMEM;
386    /* Store the pointers */
387    t->table[index]->key = key;
388    t->table[index]->data = data;
389    t->used++;
390    return HT_OK;
391}
392
393/* search and remove an element */
394int ht_rm(struct hashtable *t, void *key)
395{
396    int ret;
397    unsigned int index;
398
399    if ((ret = ht_search(t, key, &index)) != HT_FOUND)
400        return ret;
401    return ht_free(t, index);
402}
403
404/* Destroy an entire hash table */
405int ht_destroy(struct hashtable *t)
406{
407    unsigned int i;
408
409    /* Free all the elements */
410    for (i = 0; i < t->size && t->used > 0; i++) {
411        if (t->table[i] != NULL && t->table[i] != ht_free_element) {
412            if (t->key_destructor)
413                t->key_destructor(t->table[i]->key);
414            if (t->val_destructor)
415                t->val_destructor(t->table[i]->data);
416            free(t->table[i]);
417            t->used--;
418        }
419    }
420    /* Free the table and the allocated cache structure */
421    free(t->table);
422    /* Re-initialize the table */
423    ht_reset(t);
424    return HT_OK; /* Actually ht_destroy never fails */
425}
426
427/* Free an element in the hash table */
428int ht_free(struct hashtable *t, unsigned int index)
429{
430    if (index >= t->size)
431        return HT_IOVERFLOW; /* Index overflow */
432    /* ht_free() calls against non-existent elements are ignored */
433    if (t->table[index] != NULL && t->table[index] != ht_free_element) {
434        /* release the key */
435        if (t->key_destructor)
436            t->key_destructor(t->table[index]->key);
437        /* release the value */
438        if (t->val_destructor)
439            t->val_destructor(t->table[index]->data);
440        /* free the element structure */
441        free(t->table[index]);
442        /* mark the element as freed */
443        t->table[index] = ht_free_element;
444        t->used--;
445    }
446    return HT_OK;
447}
448
449/* Search the element with the given key */
450int ht_search(struct hashtable *t, void *key, unsigned int *found_index)
451{
452    int ret;
453    u_int32_t h;
454
455    /* Expand the hashtable if needed */
456    if (t->size == 0) {
457        if ((ret = ht_expand_if_needed(t)) != HT_OK)
458            return ret;
459    }
460
461    /* Try using the first hash functions */
462    h = t->hashf(key) & t->sizemask;
463    /* this handles the removed elements */
464    if (!t->table[h])
465        return HT_NOTFOUND;
466    if (t->table[h] != ht_free_element &&
467        t->key_compare(key, t->table[h]->key))
468    {
469        *found_index = h;
470        return HT_FOUND;
471    }
472
473    while(1) {
474        h = (h+1) & t->sizemask;
475        /* this handles the removed elements */
476        if (t->table[h] == ht_free_element)
477            continue;
478        if (!t->table[h])
479            return HT_NOTFOUND;
480        if (t->key_compare(key, t->table[h]->key)) {
481            *found_index = h;
482            return HT_FOUND;
483        }
484    }
485}
486
487/* This function is used to run the entire hash table,
488 * it returns:
489 * 1  if the element with the given index is valid
490 * 0  if the element with the given index is empty or marked free
491 * -1 if the element if out of the range */
492int ht_get_byindex(struct hashtable *t, unsigned int index)
493{
494    if (index >= t->size)
495        return -1;
496    if (t->table[index] == NULL || t->table[index] == ht_free_element)
497        return 0;
498    return 1;
499}
500
501/* Returns the hash table as an array of paris of key/value void* pointers.
502 * The array is allocated with malloc() and should be freed when no
503 * longer useful. The key and value pointers should not be freed or
504 * altered in any way, they will be handled by the hash table structure.
505 *
506 * This function is mainly useful to sort the hashtable's content
507 * without to alter the hashtable itself.
508 *
509 * Returns NULL on out of memory. */
510void **ht_get_array(struct hashtable *t)
511{
512    int used = ht_used(t);
513    void **table, **tptr;
514    long idx;
515
516    if ((table = (void**) malloc(sizeof(void*)*(used*2))) == NULL)
517        return NULL;
518    tptr = table;
519    for (idx = 0; ;idx++) {
520        int type = ht_get_byindex(t, idx);
521        if (type == -1) break;
522        if (type == 0) continue;
523        *tptr++ = ht_key(t, idx);
524        *tptr++ = ht_value(t, idx);
525    }
526    return table;
527}
528/* ------------------------- private functions ------------------------------ */
529
530/* Expand the hash table if needed */
531static int ht_expand_if_needed(struct hashtable *t)
532{
533    /* If the hash table is empty expand it to the intial size,
534     * if the table is half-full redobule its size. */
535    if (t->size == 0)
536        return ht_expand(t, HT_INITIAL_SIZE);
537    if (t->size <= t->used*2)
538        return ht_expand(t, t->size * 2);
539    return HT_OK;
540}
541
542/* Our hash table capability is a power of two */
543static unsigned int next_power(unsigned int size)
544{
545    unsigned int i = 256;
546
547    if (size >= 2147483648U)
548        return 2147483648U;
549    while(1) {
550        if (i >= size)
551            return i;
552        i *= 2;
553    }
554}
555
556/* the insert function to add elements out of ht expansion */
557static int ht_insert(struct hashtable *t, void *key, unsigned int *avail_index)
558{
559    int ret;
560    u_int32_t h;
561
562    /* Expand the hashtable if needed */
563    if ((ret = ht_expand_if_needed(t)) != HT_OK)
564        return ret;
565
566    /* Try using the first hash functions */
567    h = t->hashf(key) & t->sizemask;
568    /* this handles the removed elements */
569    if (!t->table[h] || t->table[h] == ht_free_element) {
570        *avail_index = h;
571        return HT_OK;
572    }
573    t->collisions++;
574    if (t->key_compare(key, t->table[h]->key))
575        return HT_BUSY;
576
577    while(1) {
578        h = (h+1) & t->sizemask;
579        /* this handles the removed elements */
580        if (!t->table[h] || t->table[h] == ht_free_element) {
581            *avail_index = h;
582            return HT_OK;
583        }
584        t->collisions++;
585        if (t->key_compare(key, t->table[h]->key))
586            return HT_BUSY;
587    }
588}
589
590/* ------------------------- provided destructors --------------------------- */
591
592/* destructor for heap allocated keys/values */
593void ht_destructor_free(void *obj)
594{
595    free(obj);
596}
597
598/* ------------------------- provided comparators --------------------------- */
599
600/* default key_compare method */
601int ht_compare_ptr(void *key1, void *key2)
602{
603    return (key1 == key2);
604}
605
606/* key compare for nul-terminated strings */
607int ht_compare_string(void *key1, void *key2)
608{
609    return (strcmp(key1, key2) == 0) ? 1 : 0;
610}
611
612/* -------------------- hash functions for common data types --------------- */
613
614/* We make this global to allow hash function randomization,
615 * as security measure against attacker-induced worst case behaviuor.
616 *
617 * Note that being H_i the strong hash function with init value of i
618 * and H_i' the same hash function with init value of i' than:
619 *
620 * if H_i(StringOne) is equal to H_i(CollidingStringTwo)
621 *
622 *    it is NOT true that
623 *
624 *  H_i'(StringOne) is equal to H_i''(CollidingStringTwo)
625 */
626static u_int32_t strong_hash_init_val = 0xF937A21;
627
628/* Set the secret initialization value. It should be set from
629 * a secure PRNG like /dev/urandom at program initialization time */
630void ht_set_strong_hash_init_val(u_int32_t secret)
631{
632    strong_hash_init_val = secret;
633}
634
635/* __ht_strong_hash wrapper that mix a user-provided initval
636 * with the global strong_hash_init_val. __ht_strong_hash is
637 * even exported directly. */
638u_int32_t ht_strong_hash(u_int8_t *k, u_int32_t length, u_int32_t initval)
639{
640    return __ht_strong_hash(k, length, initval^strong_hash_init_val);
641}
642
643/* Hash function suitable for C strings and other data types using
644 * a 0-byte as terminator */
645u_int32_t ht_hash_string(void *key)
646{
647    return __ht_strong_hash(key, strlen(key), strong_hash_init_val);
648}
649
650/* This one is to hash the value of the pointer itself. */
651u_int32_t ht_hash_pointer(void *key)
652{
653    return __ht_strong_hash((void*)&key, sizeof(void*), strong_hash_init_val);
654}
Note: See TracBrowser for help on using the browser.