diff options
Diffstat (limited to 'ext/tdb/murmur1.c')
-rw-r--r-- | ext/tdb/murmur1.c | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/ext/tdb/murmur1.c b/ext/tdb/murmur1.c new file mode 100644 index 0000000..1880cc6 --- /dev/null +++ b/ext/tdb/murmur1.c @@ -0,0 +1,151 @@ +#include "rbtdb.h" +#include <assert.h> + +/* + * https://sites.google.com/site/murmurhash/ + * + * Public Domain hash functions by Austin Appleby. + * + * Trivially adapted for use with Ruby TDB by Eric Wong. + */ + +/* + * 'm' and 'r' are mixing constants generated offline. + * They're not really 'magic', they just happen to work well. + */ +static const unsigned int m = 0xc6a4a793; +static const int r = 16; +static const unsigned int seed; + +unsigned int rbtdb_murmur1(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + /* Initialize the hash to a 'random' value */ + unsigned int h = seed ^ (len * m); + + while (len >= 4) { + h += *(const unsigned int *)data; + h *= m; + h ^= h >> r; + + data += 4; + len -= 4; + } + + /* Handle the last few bytes of the input array */ + switch (len) { + case 3: + h += data[2] << 16; + case 2: + h += data[1] << 8; + case 1: + h += data[0]; + h *= m; + h ^= h >> r; + }; + + /* + * Do a few final mixes of the hash to ensure the last few + * bytes are well-incorporated. + */ + h *= m; + h ^= h >> 10; + h *= m; + h ^= h >> 17; + + return h; +} + +/* adapted from MurmurHashAligned */ +unsigned int rbtdb_murmur1_aligned(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + unsigned int h = seed ^ (len * m); + union { const unsigned char *byte; int integer; } cast = { data }; + int align = cast.integer & 3; + + if (align & (len >= 4)) { + /* Pre-load the temp registers */ + unsigned int t = 0, d = 0; + int sl, sr, pack; + + switch (align) { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } + + t <<= (8 * align); + + data += 4 - align; + len -= 4 - align; + + sl = 8 * (4 - align); + sr = 8 * align; + + /* Mix */ + while (len >= 4) { + assert((cast.integer & 3) == 0); + + d = *(const unsigned int *)data; + t = (t >> sr) | (d << sl); + h += t; + h *= m; + h ^= h >> r; + t = d; + + data += 4; + len -= 4; + } + + /* Handle leftover data in temp registers */ + pack = len < align ? len : align; + d = 0; + + switch (pack) { + case 3: + d |= data[2] << 16; + case 2: + d |= data[1] << 8; + case 1: + d |= data[0]; + case 0: + h += (t >> sr) | (d << sl); + h *= m; + h ^= h >> r; + } + + data += pack; + len -= pack; + } else { + while (len >= 4) { + h += *(const unsigned int *)data; + h *= m; + h ^= h >> r; + + data += 4; + len -= 4; + } + } + + /* Handle tail bytes */ + switch (len) { + case 3: + h += data[2] << 16; + case 2: + h += data[1] << 8; + case 1: + h += data[0]; + h *= m; + h ^= h >> r; + }; + + h *= m; + h ^= h >> 10; + h *= m; + h ^= h >> 17; + + return h; +} |