about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2014-02-09 09:22:52 +0000
committerEric Wong <normalperson@yhbt.net>2014-02-09 09:25:19 +0000
commit434a20bcbc306ffb484c4a38e99ca82de2c0e08b (patch)
tree0f5b8f93a30b26b526eba9c6fe063b9906d8d46a
parent1ed6aacf5cb3265191acc61449fe68abd7c8f503 (diff)
downloadruby-tdb-434a20bcbc306ffb484c4a38e99ca82de2c0e08b.tar.gz
It's the preferred hash function nowadays by Ruby itself, so it
probably makes sense to add it
-rw-r--r--Hash_Functions13
-rw-r--r--ext/tdb/hash_functions.c2
-rw-r--r--ext/tdb/rbtdb.h1
-rw-r--r--ext/tdb/siphash24.c328
-rw-r--r--ext/tdb/tdb.c1
-rw-r--r--test/test_tdb.rb2
6 files changed, 344 insertions, 3 deletions
diff --git a/Hash_Functions b/Hash_Functions
index 623bfab..e285120 100644
--- a/Hash_Functions
+++ b/Hash_Functions
@@ -19,6 +19,16 @@ corruption, so don't do it.
   {lookup3 hash}[http://www.burtleburtle.net/bob/c/lookup3.c].
   <code>:hash => :jenkins_lookup3</code>
 
+== SipHash
+
+https://en.wikipedia.org/wiki/Sip_Hash
+https://131002.net/siphash/
+
+Currently (as of Ruby 2.1) the favored hash in Ruby for hash-flooding
+protection.
+
+* :siphash24 - the reference implementation
+
 == Murmur family
 
 The {Murmur}[https://sites.google.com/site/murmurhash/] family of hashes
@@ -63,7 +73,6 @@ other architectures).
 
 == Bernstein hashes
 
-* :djb3 - The hash currently favored by Bernstein.
-  See http://www.cse.yorku.ca/~oz/hash.html
+* :djb3 - See http://www.cse.yorku.ca/~oz/hash.html
 
 * :djb2 - See http://www.cse.yorku.ca/~oz/hash.html
diff --git a/ext/tdb/hash_functions.c b/ext/tdb/hash_functions.c
index 6c93c4b..aab2fc5 100644
--- a/ext/tdb/hash_functions.c
+++ b/ext/tdb/hash_functions.c
@@ -10,6 +10,7 @@ static VALUE fn(VALUE self,VALUE str) \
         return UINT2NUM(rbtdb_##fn(&data)); \
 }
 
+HASH_FN(siphash24)
 HASH_FN(murmur1)
 HASH_FN(murmur1_aligned)
 HASH_FN(murmur2)
@@ -29,6 +30,7 @@ void rbtdb_init_tdb_hash_functions(void)
         VALUE cTDB = rb_const_get(rb_cObject, rb_intern("TDB"));
         VALUE mHashFunctions = rb_define_module_under(cTDB, "HashFunctions");
 
+        HASH_M(siphash24);
         HASH_M(murmur1);
         HASH_M(murmur1_aligned);
         HASH_M(murmur2);
diff --git a/ext/tdb/rbtdb.h b/ext/tdb/rbtdb.h
index 076a618..381d79a 100644
--- a/ext/tdb/rbtdb.h
+++ b/ext/tdb/rbtdb.h
@@ -3,6 +3,7 @@
 #include <ruby.h>
 #include <tdb.h>
 
+unsigned int rbtdb_siphash24(TDB_DATA *key);
 unsigned int rbtdb_murmur1(TDB_DATA *key);
 unsigned int rbtdb_murmur1_aligned(TDB_DATA *key);
 unsigned int rbtdb_murmur2(TDB_DATA *key);
diff --git a/ext/tdb/siphash24.c b/ext/tdb/siphash24.c
new file mode 100644
index 0000000..ab51406
--- /dev/null
+++ b/ext/tdb/siphash24.c
@@ -0,0 +1,328 @@
+#include "rbtdb.h"
+/*
+   SipHash reference C implementation
+   Trivially adapted by Eric Wong for ruby-tdb
+
+   Written in 2012 by
+   Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
+   Daniel J. Bernstein <djb@cr.yp.to>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along with
+   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+typedef uint64_t u64;
+typedef uint32_t u32;
+typedef uint8_t u8;
+
+#define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) )
+
+#define U32TO8_LE(p, v)         \
+    (p)[0] = (u8)((v)      ); (p)[1] = (u8)((v) >>  8); \
+    (p)[2] = (u8)((v) >> 16); (p)[3] = (u8)((v) >> 24);
+
+#define U64TO8_LE(p, v)         \
+  U32TO8_LE((p),     (u32)((v)      ));   \
+  U32TO8_LE((p) + 4, (u32)((v) >> 32));
+
+#define U8TO64_LE(p) \
+  (((u64)((p)[0])      ) | \
+   ((u64)((p)[1]) <<  8) | \
+   ((u64)((p)[2]) << 16) | \
+   ((u64)((p)[3]) << 24) | \
+   ((u64)((p)[4]) << 32) | \
+   ((u64)((p)[5]) << 40) | \
+   ((u64)((p)[6]) << 48) | \
+   ((u64)((p)[7]) << 56))
+
+#define SIPROUND            \
+  do {              \
+    v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \
+    v2 += v3; v3=ROTL(v3,16); v3 ^= v2;     \
+    v0 += v3; v3=ROTL(v3,21); v3 ^= v0;     \
+    v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \
+  } while(0)
+
+/* SipHash-2-4 */
+static int
+crypto_auth(unsigned char *out, const unsigned char *in,
+            size_t inlen, const unsigned char *k)
+{
+        /* "somepseudorandomlygeneratedbytes" */
+        u64 v0 = 0x736f6d6570736575ULL;
+        u64 v1 = 0x646f72616e646f6dULL;
+        u64 v2 = 0x6c7967656e657261ULL;
+        u64 v3 = 0x7465646279746573ULL;
+        u64 b;
+        u64 k0 = U8TO64_LE(k);
+        u64 k1 = U8TO64_LE(k + 8);
+        u64 m;
+        const u8 *end = in + inlen - (inlen % sizeof(u64));
+        const int left = inlen & 7;
+        b = ((u64) inlen) << 56;
+        v3 ^= k1;
+        v2 ^= k0;
+        v1 ^= k1;
+        v0 ^= k0;
+
+        for (; in != end; in += 8) {
+                m = U8TO64_LE(in);
+#ifdef DEBUG
+                printf("(%3d) v0 %08x %08x\n", (int)inlen, (u32) (v0 >> 32),
+                       (u32) v0);
+                printf("(%3d) v1 %08x %08x\n", (int)inlen, (u32) (v1 >> 32),
+                       (u32) v1);
+                printf("(%3d) v2 %08x %08x\n", (int)inlen, (u32) (v2 >> 32),
+                       (u32) v2);
+                printf("(%3d) v3 %08x %08x\n", (int)inlen, (u32) (v3 >> 32),
+                       (u32) v3);
+                printf("(%3d) compress %08x %08x\n", (int)inlen,
+                       (u32) (m >> 32), (u32) m);
+#endif
+                v3 ^= m;
+                SIPROUND;
+                SIPROUND;
+                v0 ^= m;
+        }
+
+        switch (left) {
+        case 7:
+                b |= ((u64) in[6]) << 48;
+
+        case 6:
+                b |= ((u64) in[5]) << 40;
+
+        case 5:
+                b |= ((u64) in[4]) << 32;
+
+        case 4:
+                b |= ((u64) in[3]) << 24;
+
+        case 3:
+                b |= ((u64) in[2]) << 16;
+
+        case 2:
+                b |= ((u64) in[1]) << 8;
+
+        case 1:
+                b |= ((u64) in[0]);
+                break;
+
+        case 0:
+                break;
+        }
+
+#ifdef DEBUG
+        printf("(%3d) v0 %08x %08x\n", (int)inlen, (u32) (v0 >> 32), (u32) v0);
+        printf("(%3d) v1 %08x %08x\n", (int)inlen, (u32) (v1 >> 32), (u32) v1);
+        printf("(%3d) v2 %08x %08x\n", (int)inlen, (u32) (v2 >> 32), (u32) v2);
+        printf("(%3d) v3 %08x %08x\n", (int)inlen, (u32) (v3 >> 32), (u32) v3);
+        printf("(%3d) padding   %08x %08x\n", (int)inlen, (u32) (b >> 32),
+               (u32) b);
+#endif
+        v3 ^= b;
+        SIPROUND;
+        SIPROUND;
+        v0 ^= b;
+#ifdef DEBUG
+        printf("(%3d) v0 %08x %08x\n", (int)inlen, (u32) (v0 >> 32), (u32) v0);
+        printf("(%3d) v1 %08x %08x\n", (int)inlen, (u32) (v1 >> 32), (u32) v1);
+        printf("(%3d) v2 %08x %08x\n", (int)inlen, (u32) (v2 >> 32), (u32) v2);
+        printf("(%3d) v3 %08x %08x\n", (int)inlen, (u32) (v3 >> 32), (u32) v3);
+#endif
+        v2 ^= 0xff;
+        SIPROUND;
+        SIPROUND;
+        SIPROUND;
+        SIPROUND;
+        b = v0 ^ v1 ^ v2 ^ v3;
+        U64TO8_LE(out, b);
+        return 0;
+}
+
+/*
+   SipHash-2-4 output with
+   k = 00 01 02 ...
+   and
+   in = (empty string)
+   in = 00 (1 byte)
+   in = 00 01 (2 bytes)
+   in = 00 01 02 (3 bytes)
+   ...
+   in = 00 01 02 ... 3e (63 bytes)
+*/
+u8 vectors[64][8] = {
+        {0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72,}
+        ,
+        {0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74,}
+        ,
+        {0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d,}
+        ,
+        {0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85,}
+        ,
+        {0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf,}
+        ,
+        {0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18,}
+        ,
+        {0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb,}
+        ,
+        {0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab,}
+        ,
+        {0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93,}
+        ,
+        {0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e,}
+        ,
+        {0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a,}
+        ,
+        {0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4,}
+        ,
+        {0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75,}
+        ,
+        {0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14,}
+        ,
+        {0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7,}
+        ,
+        {0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1,}
+        ,
+        {0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f,}
+        ,
+        {0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69,}
+        ,
+        {0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b,}
+        ,
+        {0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb,}
+        ,
+        {0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe,}
+        ,
+        {0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0,}
+        ,
+        {0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93,}
+        ,
+        {0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8,}
+        ,
+        {0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8,}
+        ,
+        {0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc,}
+        ,
+        {0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17,}
+        ,
+        {0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f,}
+        ,
+        {0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde,}
+        ,
+        {0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6,}
+        ,
+        {0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad,}
+        ,
+        {0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32,}
+        ,
+        {0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71,}
+        ,
+        {0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7,}
+        ,
+        {0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12,}
+        ,
+        {0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15,}
+        ,
+        {0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31,}
+        ,
+        {0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02,}
+        ,
+        {0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca,}
+        ,
+        {0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a,}
+        ,
+        {0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e,}
+        ,
+        {0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad,}
+        ,
+        {0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18,}
+        ,
+        {0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4,}
+        ,
+        {0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9,}
+        ,
+        {0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9,}
+        ,
+        {0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb,}
+        ,
+        {0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0,}
+        ,
+        {0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6,}
+        ,
+        {0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7,}
+        ,
+        {0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee,}
+        ,
+        {0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1,}
+        ,
+        {0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a,}
+        ,
+        {0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81,}
+        ,
+        {0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f,}
+        ,
+        {0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24,}
+        ,
+        {0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7,}
+        ,
+        {0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea,}
+        ,
+        {0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60,}
+        ,
+        {0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66,}
+        ,
+        {0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c,}
+        ,
+        {0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f,}
+        ,
+        {0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5,}
+        ,
+        {0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95,}
+};
+
+int rbtdb_siphash_test_vectors(void)
+{
+#define MAXLEN 64
+        u8 in[MAXLEN], out[8], k[16];
+        int i;
+        int ok = 1;
+
+        for (i = 0; i < 16; ++i)
+                k[i] = i;
+
+        for (i = 0; i < MAXLEN; ++i) {
+                in[i] = i;
+                crypto_auth(out, in, i, k);
+
+                if (memcmp(out, vectors[i], 8)) {
+                        printf("test vector failed for %d bytes\n", i);
+                        ok = 0;
+                }
+        }
+
+        return ok;
+}
+
+unsigned int rbtdb_siphash24(TDB_DATA *data)
+{
+        /* n.b.: tdb is for persistent storage, so the key must be constant */
+        static uint8_t key[] = "trivial database";
+        /* static const uint8_t check[(sizeof(key) >= 16)]; */
+        union {
+                uint32_t x32[2];
+                uint64_t x64;
+        } out;
+        unsigned char *in = data->dptr;
+        size_t len = data->dsize;
+
+        crypto_auth((unsigned char *)&out, in, len, key);
+
+        return (unsigned int)(out.x32[0] ^ out.x32[1]);
+}
diff --git a/ext/tdb/tdb.c b/ext/tdb/tdb.c
index 754d712..58e16bf 100644
--- a/ext/tdb/tdb.c
+++ b/ext/tdb/tdb.c
@@ -81,6 +81,7 @@ static void init_hashes(void)
 {
 #define HF(x) \
 rb_hash_aset(hashes,ID2SYM(rb_intern(#x)),ULONG2NUM((unsigned long)rbtdb_##x))
+        HF(siphash24);
         HF(murmur1);
         HF(murmur1_aligned);
         HF(murmur2);
diff --git a/test/test_tdb.rb b/test/test_tdb.rb
index 9b2ed71..4b3f223 100644
--- a/test/test_tdb.rb
+++ b/test/test_tdb.rb
@@ -192,7 +192,7 @@ class TestTdb < Test::Unit::TestCase
   def test_alternate_hashes
     results = {}
     expect = TDB::HASHES.to_a.map { |k,v| [ k.to_s, v.to_s ] }.sort
-    %w(default jenkins_lookup3 djb2 djb3 fnv1a
+    %w(default siphash24 jenkins_lookup3 djb2 djb3 fnv1a
        murmur1 murmur1_aligned murmur2 murmur2a murmur2_aligned
        murmur3a murmur3f).each do |h|
       assert_nothing_raised do