From 5d6dc5c742f827350490d8f33c4c89b203ae7460 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 10 Dec 2010 19:49:12 -0800 Subject: String#hash is not stable across processes :< We need to implement our own hash functions for splitting databases across multiple files. This was totally fucking up Rainbows! --- lib/metropolis.rb | 2 ++ lib/metropolis/multi_hash.rb | 24 ++++++++++++++++++++++++ lib/metropolis/multi_hash/digest.rb | 23 +++++++++++++++++++++++ lib/metropolis/multi_hash/to_i.rb | 12 ++++++++++++ lib/metropolis/tc/hdb.rb | 6 ++++-- lib/metropolis/tc/hdb/ex.rb | 2 +- lib/metropolis/tc/hdb/ro.rb | 2 +- lib/metropolis/tdb/multi.rb | 4 +++- test/test_tdb.rb | 29 +++++++++++++++++++++++++++++ 9 files changed, 99 insertions(+), 5 deletions(-) create mode 100644 lib/metropolis/multi_hash.rb create mode 100644 lib/metropolis/multi_hash/digest.rb create mode 100644 lib/metropolis/multi_hash/to_i.rb diff --git a/lib/metropolis.rb b/lib/metropolis.rb index c38d4af..0af1465 100644 --- a/lib/metropolis.rb +++ b/lib/metropolis.rb @@ -9,6 +9,7 @@ module Metropolis autoload :TC, 'metropolis/tc' autoload :Hash, 'metropolis/hash' autoload :TDB, 'metropolis/tdb' + autoload :MultiHash, 'metropolis/multi_hash' def self.new(opts = {}) opts = opts.dup @@ -19,6 +20,7 @@ module Metropolis @query = @uri.query ? Rack::Utils.parse_query(@uri.query) : nil @path_pattern = opts[:path_pattern] @path = @uri.path if @uri.path != '/' + @multi_hash = opts[:multi_hash] end base = case uri.scheme diff --git a/lib/metropolis/multi_hash.rb b/lib/metropolis/multi_hash.rb new file mode 100644 index 0000000..65ed6fe --- /dev/null +++ b/lib/metropolis/multi_hash.rb @@ -0,0 +1,24 @@ +# -*- encoding: binary -*- +module Metropolis::MultiHash + autoload :Digest, 'metropolis/multi_hash/digest' + autoload :ToI, 'metropolis/multi_hash/to_i' + + def self.extended(obj) + sym = obj.instance_eval { + case @multi_hash.to_s + when /\Ato_i/ + extend Metropolis::MultiHash::ToI + when /\Adigest_/ + extend Metropolis::MultiHash::Digest + when /\Atdb_hash_/ + extend TDB::HashFunctions + end + @multi_hash + } + obj.respond_to?(sym) or + raise ArgumentError, "multi_hash=#{sym} not supported" + (class << obj; self; end).instance_eval do + alias_method :multi_hash, sym + end + end +end diff --git a/lib/metropolis/multi_hash/digest.rb b/lib/metropolis/multi_hash/digest.rb new file mode 100644 index 0000000..974d7ac --- /dev/null +++ b/lib/metropolis/multi_hash/digest.rb @@ -0,0 +1,23 @@ +# -*- encoding: binary -*- +require 'digest' +module Metropolis::MultiHash::Digest + def digest_sha1(key) + ::Digest::SHA1.digest(key)[0,4].unpack("N")[0] + end + + def digest_md5(key) + ::Digest::MD5.digest(key)[0,4].unpack("N")[0] + end + + def digest_sha256(key) + ::Digest::SHA256.digest(key)[0,4].unpack("N")[0] + end + + def digest_sha384(key) + ::Digest::SHA384.digest(key)[0,4].unpack("N")[0] + end + + def digest_sha512(key) + ::Digest::SHA512.digest(key)[0,4].unpack("N")[0] + end +end diff --git a/lib/metropolis/multi_hash/to_i.rb b/lib/metropolis/multi_hash/to_i.rb new file mode 100644 index 0000000..69b2819 --- /dev/null +++ b/lib/metropolis/multi_hash/to_i.rb @@ -0,0 +1,12 @@ +# -*- encoding: binary -*- +# simple "hashing" method which converts keys to integers, +# this may be useful for databases that only store numeric keys +module Metropolis::MultiHash::ToI + def to_i(key) + key.to_i + end + + def to_i_16(key) + key.to_i(16) + end +end diff --git a/lib/metropolis/tc/hdb.rb b/lib/metropolis/tc/hdb.rb index d0833f8..e63b015 100644 --- a/lib/metropolis/tc/hdb.rb +++ b/lib/metropolis/tc/hdb.rb @@ -68,6 +68,8 @@ module Metropolis::TC::HDB end [ hdb, path ] end + @multi_hash ||= :digest_sha1 + extend Metropolis::MultiHash extend(RO) if @readonly extend(EX) if @exclusive end @@ -77,7 +79,7 @@ module Metropolis::TC::HDB end def writer(key, &block) - hdb, path = @dbv[key.hash % @nr_slots] + hdb, path = @dbv[multi_hash(key) % @nr_slots] hdb.open(path, @wr_flags) or ex!(:open, hdb) yield hdb ensure @@ -85,7 +87,7 @@ module Metropolis::TC::HDB end def reader(key) - hdb, path = @dbv[key.hash % @nr_slots] + hdb, path = @dbv[multi_hash(key) % @nr_slots] hdb.open(path, @rd_flags) or ex!(:open, hdb) yield hdb ensure diff --git a/lib/metropolis/tc/hdb/ex.rb b/lib/metropolis/tc/hdb/ex.rb index 5bc7f39..d205a76 100644 --- a/lib/metropolis/tc/hdb/ex.rb +++ b/lib/metropolis/tc/hdb/ex.rb @@ -11,7 +11,7 @@ module Metropolis::TC::HDB::EX end def reader(key) - yield @ex_dbv[key.hash % @nr_slots] + yield @ex_dbv[multi_hash(key) % @nr_slots] end alias_method :writer, :reader diff --git a/lib/metropolis/tc/hdb/ro.rb b/lib/metropolis/tc/hdb/ro.rb index 62ededc..72fa968 100644 --- a/lib/metropolis/tc/hdb/ro.rb +++ b/lib/metropolis/tc/hdb/ro.rb @@ -14,6 +14,6 @@ module Metropolis::TC::HDB::RO end def reader(key) - yield @ro_dbv[key.hash % @nr_slots] + yield @ro_dbv[multi_hash(key) % @nr_slots] end end diff --git a/lib/metropolis/tdb/multi.rb b/lib/metropolis/tdb/multi.rb index b22e63b..68c8bf5 100644 --- a/lib/metropolis/tdb/multi.rb +++ b/lib/metropolis/tdb/multi.rb @@ -2,6 +2,8 @@ module Metropolis::TDB::Multi def self.extended(obj) obj.instance_eval do + @multi_hash ||= :tdb_hash_murmur2 + extend Metropolis::MultiHash @dbv = (0...@nr_slots).to_a.map do |slot| path = sprintf(@path_pattern, slot) ::TDB.new(path, @tdb_opts) @@ -10,7 +12,7 @@ module Metropolis::TDB::Multi end def db(key, &block) - yield @dbv[key.hash % @nr_slots] + yield @dbv[multi_hash(key) % @nr_slots] end def close! diff --git a/test/test_tdb.rb b/test/test_tdb.rb index 0fead86..3f607d1 100644 --- a/test/test_tdb.rb +++ b/test/test_tdb.rb @@ -15,6 +15,35 @@ class Test_TDB < Test::Unit::TestCase @app_opts = { :uri => @uri, :path_pattern => @path_pattern } end + def test_alternate_hash + n = 7 + @app = Metropolis.new(@app_opts.merge(:nr_slots => n, :multi_hash => :to_i)) + req = Rack::MockRequest.new(app) + o = { :lint => true, :fatal => true, :input => "." } + (1..8).each do |i| + r = req.put("/#{i * n}", o) + assert_equal 201, r.status + assert_equal "Created\n", r.body + end + tmp = Hash.new { |h,k| h[k] = {} } + @app.instance_eval do + @dbv.each_with_index { |db,i| db.each { |k,v| tmp[i][k] = v } } + end + expect = { + 0 => { + "7" => ".", + "14" => ".", + "21" => ".", + "28" => ".", + "35" => ".", + "42" => ".", + "49" => ".", + "56" => ".", + } + } + assert_equal expect, tmp + end + def teardown Dir[@path_pattern.sub!(/%\d*x/, '*')].each { |x| File.unlink(x) } end -- cgit v1.2.3-24-ge0c7