diff options
-rw-r--r-- | .document | 7 | ||||
-rw-r--r-- | .gitignore | 19 | ||||
-rw-r--r-- | COPYING | 165 | ||||
-rwxr-xr-x | GIT-VERSION-GEN | 40 | ||||
-rw-r--r-- | GNUmakefile | 190 | ||||
-rw-r--r-- | Hash_Functions | 67 | ||||
-rw-r--r-- | LICENSE | 17 | ||||
-rw-r--r-- | README | 57 | ||||
-rw-r--r-- | Rakefile | 140 | ||||
-rw-r--r-- | TODO | 7 | ||||
-rw-r--r-- | ext/tdb/djb.c | 26 | ||||
-rw-r--r-- | ext/tdb/extconf.rb | 12 | ||||
-rw-r--r-- | ext/tdb/fnv.c | 28 | ||||
-rw-r--r-- | ext/tdb/lookup3.c | 429 | ||||
-rw-r--r-- | ext/tdb/murmur1.c | 151 | ||||
-rw-r--r-- | ext/tdb/murmur2.c | 290 | ||||
-rw-r--r-- | ext/tdb/rbtdb.h | 22 | ||||
-rw-r--r-- | ext/tdb/tdb.c | 679 | ||||
-rw-r--r-- | lib/tdb.rb | 2 | ||||
-rw-r--r-- | setup.rb | 1586 | ||||
-rw-r--r-- | tdb.gemspec | 36 | ||||
-rw-r--r-- | test/test_tdb.rb | 260 |
22 files changed, 4230 insertions, 0 deletions
diff --git a/.document b/.document new file mode 100644 index 0000000..e5c2c99 --- /dev/null +++ b/.document @@ -0,0 +1,7 @@ +LICENSE +README +TODO +NEWS +ChangeLog +lib +ext/tdb/tdb.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1a4b837 --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +/local.mk +*.o +*.log +*.so +*.rbc +/.config +/InstalledFiles +/doc +/local.mk +Makefile +log/ +pkg/ +/NEWS +/ChangeLog +/.manifest +/GIT-VERSION-FILE +/man +tags +TAGS @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN new file mode 100755 index 0000000..92e03e3 --- /dev/null +++ b/GIT-VERSION-GEN @@ -0,0 +1,40 @@ +#!/bin/sh + +GVF=GIT-VERSION-FILE +DEF_VER=v0.0.0.GIT + +LF=' +' + +# First see if there is a version file (included in release tarballs), +# then try git-describe, then default. +if test -f version +then + VN=$(cat version) || VN="$DEF_VER" +elif test -d .git -o -f .git && + VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && + case "$VN" in + *$LF*) (exit 1) ;; + v[0-9]*) + git update-index -q --refresh + test -z "$(git diff-index --name-only HEAD --)" || + VN="$VN-dirty" ;; + esac +then + VN=$(echo "$VN" | sed -e 's/-/./g'); +else + VN="$DEF_VER" +fi + +VN=$(expr "$VN" : v*'\(.*\)') + +if test -r $GVF +then + VC=$(sed -e 's/^GIT_VERSION = //' <$GVF) +else + VC=unset +fi +test "$VN" = "$VC" || { + echo >&2 "GIT_VERSION = $VN" + echo "GIT_VERSION = $VN" >$GVF +} diff --git a/GNUmakefile b/GNUmakefile new file mode 100644 index 0000000..384e286 --- /dev/null +++ b/GNUmakefile @@ -0,0 +1,190 @@ +# use GNU Make to run tests in parallel, and without depending on RubyGems +all:: +RUBY = ruby +RAKE = rake +RSYNC = rsync +GIT_URL = git://git.bogomips.org/ruby-tdb.git + +GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE + @./GIT-VERSION-GEN +-include GIT-VERSION-FILE +-include local.mk +ifeq ($(DLEXT),) # "so" for Linux + DLEXT := $(shell $(RUBY) -rrbconfig -e 'puts Config::CONFIG["DLEXT"]') +endif +ifeq ($(RUBY_VERSION),) + RUBY_VERSION := $(shell $(RUBY) -e 'puts RUBY_VERSION') +endif + +install: + $(prep_setup_rb) + $(RM) -r .install-tmp + mkdir .install-tmp + $(RUBY) setup.rb all + $(RM) $^ + $(RM) -r .install-tmp + $(prep_setup_rb) + +setup_rb_files := .config InstalledFiles +prep_setup_rb := @-$(RM) $(setup_rb_files);$(MAKE) -C $(ext) clean + +clean: + -$(MAKE) -C ext/tdb clean + $(RM) $(setup_rb_files) ext/tdb/Makefile + +pkg_extra := GIT-VERSION-FILE NEWS ChangeLog +manifest: $(pkg_extra) + $(RM) .manifest + $(MAKE) .manifest + +.manifest: + (git ls-files && \ + for i in $@ $(pkg_extra) $(man1_paths); \ + do echo $$i; done) | LC_ALL=C sort > $@+ + cmp $@+ $@ || mv $@+ $@ + $(RM) $@+ + +NEWS: GIT-VERSION-FILE + $(RAKE) -s news_rdoc > $@+ + mv $@+ $@ + +latest: NEWS + @awk 'BEGIN{RS="=== ";ORS=""}NR==2{sub(/\n$$/,"");print RS""$$0 }' $< + +SINCE = +ChangeLog: LOG_VERSION = \ + $(shell git rev-parse -q "$(GIT_VERSION)" >/dev/null 2>&1 && \ + echo $(GIT_VERSION) || git describe) +ifneq ($(SINCE),) +ChangeLog: log_range = v$(SINCE)..$(LOG_VERSION) +endif +ChangeLog: GIT-VERSION-FILE + @echo "ChangeLog from $(GIT_URL) ($(log_range))" > $@+ + @echo >> $@+ + git log $(log_range) | sed -e 's/^/ /' >> $@+ + mv $@+ $@ + +news_atom := http://bogomips.org/ruby-tdb/NEWS.atom.xml +cgit_atom := http://git.bogomips.org/cgit/ruby-tdb.git/atom/?h=master +atom = <link rel="alternate" title="Atom feed" href="$(1)" \ + type="application/atom+xml"/> + +# using rdoc 2.5.x +doc: .document NEWS ChangeLog + rdoc -t "$(shell sed -ne '1s/^= //p' README)" + install -m644 COPYING doc/COPYING + install -m644 $(shell grep '^[A-Z]' .document) doc/ + $(RUBY) -i -p -e \ + '$$_.gsub!("</title>",%q{\&$(call atom,$(cgit_atom))})' \ + doc/ChangeLog.html + $(RUBY) -i -p -e \ + '$$_.gsub!("</title>",%q{\&$(call atom,$(news_atom))})' \ + doc/NEWS.html doc/README.html + $(RAKE) -s news_atom > doc/NEWS.atom.xml + cd doc && ln README.html tmp && mv tmp index.html + +ifneq ($(VERSION),) +rfproject := qrp +rfpackage := tdb +pkggem := pkg/$(rfpackage)-$(VERSION).gem +pkgtgz := pkg/$(rfpackage)-$(VERSION).tgz +release_notes := release_notes-$(VERSION) +release_changes := release_changes-$(VERSION) + +release-notes: $(release_notes) +release-changes: $(release_changes) +$(release_changes): + $(RAKE) -s release_changes > $@+ + $(VISUAL) $@+ && test -s $@+ && mv $@+ $@ +$(release_notes): + GIT_URL=$(GIT_URL) $(RAKE) -s release_notes > $@+ + $(VISUAL) $@+ && test -s $@+ && mv $@+ $@ + +# ensures we're actually on the tagged $(VERSION), only used for release +verify: + test x"$(shell umask)" = x0022 + git rev-parse --verify refs/tags/v$(VERSION)^{} + git diff-index --quiet HEAD^0 + test `git rev-parse --verify HEAD^0` = \ + `git rev-parse --verify refs/tags/v$(VERSION)^{}` + +fix-perms: + -git ls-tree -r HEAD | awk '/^100644 / {print $$NF}' | xargs chmod 644 + -git ls-tree -r HEAD | awk '/^100755 / {print $$NF}' | xargs chmod 755 + +gem: $(pkggem) + +install-gem: $(pkggem) + gem install $(CURDIR)/$< + +$(pkggem): manifest fix-perms + gem build $(rfpackage).gemspec + mkdir -p pkg + mv $(@F) $@ + +$(pkgtgz): distdir = $(basename $@) +$(pkgtgz): HEAD = v$(VERSION) +$(pkgtgz): manifest fix-perms + @test -n "$(distdir)" + $(RM) -r $(distdir) + mkdir -p $(distdir) + tar cf - `cat .manifest` | (cd $(distdir) && tar xf -) + cd pkg && tar cf - $(basename $(@F)) | gzip -9 > $(@F)+ + mv $@+ $@ + +package: $(pkgtgz) $(pkggem) + +test-release: verify package $(release_notes) $(release_changes) +release: verify package $(release_notes) $(release_changes) + # make tgz release on RubyForge + rubyforge add_release -f -n $(release_notes) -a $(release_changes) \ + $(rfproject) $(rfpackage) $(VERSION) $(pkgtgz) + # push gem to RubyGems.org + gem push $(pkggem) + # in case of gem downloads from RubyForge releases page + -rubyforge add_file \ + $(rfproject) $(rfpackage) $(VERSION) $(pkggem) + $(RAKE) raa_update VERSION=$(VERSION) +else +gem install-gem: GIT-VERSION-FILE + $(MAKE) $@ VERSION=$(GIT_VERSION) +endif + +ext := ext/tdb/tdb_ext.$(DLEXT) +ext/tdb/Makefile: ext/tdb/extconf.rb + cd $(@D) && $(RUBY) extconf.rb $(EXTCONF_ARGS) + +$(ext): $(wildcard ext/tdb/*.[ch] ext/tdb/*/*.h) ext/tdb/Makefile + $(MAKE) -C $(@D) + +all:: test + +build: $(ext) +test_units := $(wildcard test/test_*.rb) +test: test-unit +test-unit: $(test_units) +$(test_units): build + $(RUBY) -I lib:ext/tdb $@ + +# this requires GNU coreutils variants +publish_doc: + -git set-file-times + $(RM) -r doc ChangeLog NEWS + $(MAKE) doc LOG_VERSION=$(shell git tag -l | tail -1) + $(MAKE) -s latest > doc/LATEST + find doc/images doc/js -type f | \ + TZ=UTC xargs touch -d '1970-01-01 00:00:00' doc/rdoc.css + $(MAKE) doc_gz + chmod 644 $$(find doc -type f) + $(RSYNC) -av doc/ bogomips.org:/srv/bogomips/ruby-tdb/ + git ls-files | xargs touch + +# Create gzip variants of the same timestamp as the original so nginx +# "gzip_static on" can serve the gzipped versions directly. +doc_gz: docs = $(shell find doc -type f ! -regex '^.*\.\(gif\|jpg\|png\|gz\)$$') +doc_gz: + touch doc/NEWS.atom.xml -d "$$(awk 'NR==1{print $$4,$$5,$$6}' NEWS)" + for i in $(docs); do \ + gzip --rsyncable -9 < $$i > $$i.gz; touch -r $$i $$i.gz; done + +.PHONY: .FORCE-GIT-VERSION-FILE doc manifest man test $(test_units) diff --git a/Hash_Functions b/Hash_Functions new file mode 100644 index 0000000..5289086 --- /dev/null +++ b/Hash_Functions @@ -0,0 +1,67 @@ += Brief overview of hash functions supported by Ruby TDB + +Ruby TDB supports several alternative hash functions in addition to the +defaults supported by TDB upstream. Hash functions behave and perform +differently depending on the key and type of keys you use. We support +several popular hash functions (and will accept patches to support +more). + +Changing hash functions on an already-created database will cause +corruption, so don't do it. + +== TDB Upstream Defaults + +* the default hash use by TDB is based on the hash algorithm from gdbm. + You may specify this by passing explicitly to TDB.new: + <code>:hash => :default</code> + +* the new default (available via TDB::INCOMPATIBLE_HASH) is the Jenkins + {lookup3 hash}[http://www.burtleburtle.net/bob/c/lookup3.c]. + <code>:hash => :jenkins_lookup3</code> + +== Murmur family + +The {Murmur}[https://sites.google.com/site/murmurhash/] family of hashes +are supported by Ruby TDB. MurmurHash3 will be supported as soon as it +becomes finalized. Most of these are not endian-neutral so databases +are no compatible between machines of different endianness and were +designed with x86 and x86_64 in mind (they may crash or not work on +other architectures). + +* :murmur2 - the simple and fast implementation + +* :murmur2a - words of the author: + + This is a variant of MurmurHash2 modified to use the + Merkle-Damgard construction. Bulk speed should be identical to + Murmur2, small-key speed will be 10%-20% slower due to the added + overhead at the end of the hash. + + This variant fixes a minor issue where null keys were more likely to + collide with each other than expected, and also makes the algorithm + more amenable to incremental implementations. All other caveats from + MurmurHash2 still apply. + +* :murmur2_aligned - a safer, but slower variant of :murmur2 designed + for platforms where unaligned 4-byte reads can crash the machine. + +* :murmur2_neutral - endian/alignment-neutral version of the simple + implementation, half as fast according to the author. + +* :murmur1 - simple and fast historical version + +* :murmur1_aligned - according to the author, the performance of this + one should be as good or better than the simple version. + +== FNV family + +* :fnv1a - the recommended variant of the popular + {Fowler-Noll-Vo}[http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash_function] + hash function + +== Bernstein hashes + +* :djb3 - The hash currently favored by Bernstein. + See [http://www.cse.yorku.ca/~oz/hash.html] + +* :djb2 - See [http://www.cse.yorku.ca/~oz/hash.html] @@ -0,0 +1,17 @@ +Ruby tdb is copyrighted Free Software by all contributors, see logs in +revision control for names and email addresses of all of them. + +This library is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version +{3}[http://www.gnu.org/licenses/lgpl-3.0.txt] of the License, or (at +your option) any later version. + +Ruby tdb is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with Ruby tdb; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 @@ -0,0 +1,57 @@ += tdb - Trivial Database bindings for Ruby + +TDB is much like other DBM implementations, except it allows concurrent +writer processes. TDB was initially developed for Samba, but is used by +other projects as well. These Ruby bindings allow Ruby apps to read and +write to the same databases used by Samba! + +== Features + +* Concurrent reader and writer processes may safely operate on the + same file. + +* Releases the GVL for slow disk operations under Ruby 1.9 + +* Includes several {hash functions}[link:Hash_Functions] + not included by upstream TDB. + +== Install + +The original tdb library from the main [site]{http://tdb.samba.org/} is +required. Debian users can just <code>apt-get install tdb-dev</code>. + +The library consists of a C extension so you'll need a C compiler +and Ruby development libraries/headers. + +You may download the tarball from our +{site}[http://bogomips.org/ruby-tdb/] and run setup.rb after unpacking +it: + + http://bogomips.org/ruby-tdb/files/ + +You may also install it via RubyGems on RubyGems.org: + + gem install tdb + +You can get the latest source via git from the following locations +(these versions may not be stable): + + git://git.bogomips.org/ruby-tdb.git + git://repo.or.cz/ruby-tdb.git (mirror) + +You may browse the code from the web and download the latest snapshot +tarballs here: + +* http://git.bogomips.org/cgit/ruby-tdb.git (cgit) +* http://repo.or.cz/w/ruby-tdb.git (gitweb) + +See the HACKING guide on how to contribute and build prerelease gems +from git. + +== Contact + +All feedback (bug reports, user/development dicussion, patches, pull +requests) go to the {mailing list}[mailto:ruby.tdb@librelist.org]. + +For the latest on tdb releases, you may check our NEWS page (and +subscribe to our Atom feed). diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..d8158a0 --- /dev/null +++ b/Rakefile @@ -0,0 +1,140 @@ +# -*- encoding: binary -*- + +# most tasks are in the GNUmakefile which offers better parallelism + +def tags + timefmt = '%Y-%m-%dT%H:%M:%SZ' + @tags ||= `git tag -l`.split(/\n/).map do |tag| + if %r{\Av[\d\.]+} =~ tag + header, subject, body = `git cat-file tag #{tag}`.split(/\n\n/, 3) + header = header.split(/\n/) + tagger = header.grep(/\Atagger /).first + body ||= "initial" + { + :time => Time.at(tagger.split(/ /)[-2].to_i).utc.strftime(timefmt), + :tagger_name => %r{^tagger ([^<]+)}.match(tagger)[1].strip, + :tagger_email => %r{<([^>]+)>}.match(tagger)[1].strip, + :id => `git rev-parse refs/tags/#{tag}`.chomp!, + :tag => tag, + :subject => subject, + :body => body, + } + end + end.compact.sort { |a,b| b[:time] <=> a[:time] } +end + +cgit_url = "http://git.bogomips.org/cgit/ruby-tdb.git" +git_url = ENV['GIT_URL'] || 'git://git.bogomips.org/ruby-tdb.git' +web_url = "http://bogomips.org/ruby-tdb/" + +desc 'prints news as an Atom feed' +task :news_atom do + require 'nokogiri' + new_tags = tags[0,10] + puts(Nokogiri::XML::Builder.new do + feed :xmlns => "http://www.w3.org/2005/Atom" do + id! "#{web_url}NEWS.atom.xml" + title "Ruby tdb news" + subtitle "Trivial Database bindings for Ruby" + link! :rel => "alternate", :type => "text/html", + :href => "#{web_url}NEWS.html" + updated(new_tags.empty? ? "1970-01-01T00:00:00Z" : new_tags.first[:time]) + new_tags.each do |tag| + entry do + title tag[:subject] + updated tag[:time] + published tag[:time] + author { + name tag[:tagger_name] + email tag[:tagger_email] + } + url = "#{cgit_url}/tag/?id=#{tag[:tag]}" + link! :rel => "alternate", :type => "text/html", :href =>url + id! url + message_only = tag[:body].split(/\n.+\(\d+\):\n {6}/s).first.strip + content({:type =>:text}, message_only) + content(:type =>:xhtml) { pre tag[:body] } + end + end + end + end.to_xml) +end + +desc 'prints RDoc-formatted news' +task :news_rdoc do + tags.each do |tag| + time = tag[:time].tr!('T', ' ').gsub!(/:\d\dZ/, ' UTC') + puts "=== #{tag[:tag].sub(/^v/, '')} / #{time}" + puts "" + + body = tag[:body] + puts tag[:body].gsub(/^/sm, " ").gsub(/[ \t]+$/sm, "") + puts "" + end +end + +desc "print release changelog for Rubyforge" +task :release_changes do + version = ENV['VERSION'] or abort "VERSION= needed" + version = "v#{version}" + vtags = tags.map { |tag| tag[:tag] =~ /\Av/ and tag[:tag] }.sort + prev = vtags[vtags.index(version) - 1] + if prev + system('git', 'diff', '--stat', prev, version) or abort $? + puts "" + system('git', 'log', "#{prev}..#{version}") or abort $? + else + system('git', 'log', version) or abort $? + end +end + +desc "print release notes for Rubyforge" +task :release_notes do + spec = Gem::Specification.load('tdb.gemspec') + puts spec.description.strip + puts "" + puts "* #{spec.homepage}" + puts "* #{spec.email}" + puts "* #{git_url}" + + _, _, body = `git cat-file tag v#{spec.version}`.split(/\n\n/, 3) + print "\nChanges:\n\n" + puts body +end + +desc "post to RAA" +task :raa_update do + require 'net/http' + require 'net/netrc' + rc = Net::Netrc.locate('tdb-raa') or abort "~/.netrc not found" + password = rc.password + + s = Gem::Specification.load('tdb.gemspec') + desc = [ s.description.strip ] + desc << "" + desc << "* #{s.email}" + desc << "* #{git_url}" + desc << "* #{cgit_url}" + desc = desc.join("\n") + uri = URI.parse('http://raa.ruby-lang.org/regist.rhtml') + form = { + :name => s.name, + :short_description => s.summary, + :version => s.version.to_s, + :status => 'experimental', + :owner => s.authors.first, + :email => s.email, + :category_major => 'Library', + :category_minor => 'Database', + :url => s.homepage, + :download => 'http://bogomips.org/ruby-tdb/files/', + :license => "LGPL", + :description_style => 'Plain', + :description => desc, + :pass => password, + :submit => 'Update', + } + res = Net::HTTP.post_form(uri, form) + p res + puts res.body +end @@ -0,0 +1,7 @@ +* port the final Murmur3 hash implementation + +* support more TDB-specific features + +* make it reasonably API-compatible with other DBM bindings + +* RDoc documentation diff --git a/ext/tdb/djb.c b/ext/tdb/djb.c new file mode 100644 index 0000000..83abe34 --- /dev/null +++ b/ext/tdb/djb.c @@ -0,0 +1,26 @@ +#include "rbtdb.h" + +unsigned int rbtdb_djb2(TDB_DATA *data) +{ + unsigned char *key = data->dptr; + size_t len = data->dsize; + unsigned int hash = 5381; + unsigned int i; + + for (i = 0; i < len; ++i) + hash = ((hash << 5) + hash) + key[i]; /* (hash*33) + key[i] */ + + return hash; +} +unsigned int rbtdb_djb3(TDB_DATA *data) +{ + unsigned char *key = data->dptr; + size_t len = data->dsize; + unsigned int hash = 5381; + unsigned int i; + + for (i = 0; i < len; ++i) + hash = ((hash << 5) + hash) ^ key[i]; /* (hash*33) ^ key[i] */ + + return hash; +} diff --git a/ext/tdb/extconf.rb b/ext/tdb/extconf.rb new file mode 100644 index 0000000..32adafe --- /dev/null +++ b/ext/tdb/extconf.rb @@ -0,0 +1,12 @@ +require 'mkmf' + +have_func('rb_thread_blocking_region') +have_func('rb_thread_call_with_gvl') + +dir_config('tdb') +have_header('tdb.h') or abort 'tdb.h missing' +have_library('tdb') or abort 'libtdb missing' +have_func('tdb_jenkins_hash') +have_const('TDB_ERR_NESTING', 'tdb.h') + +create_makefile('tdb_ext') diff --git a/ext/tdb/fnv.c b/ext/tdb/fnv.c new file mode 100644 index 0000000..769a3d7 --- /dev/null +++ b/ext/tdb/fnv.c @@ -0,0 +1,28 @@ +#include "rbtdb.h" + +#define FNV1A_32A_INIT (unsigned int)0x811c9dc5 +#define FNV_32_PRIME (unsigned int)0x01000193 + +unsigned int rbtdb_fnv1a(TDB_DATA * data) +{ + unsigned char *bp = data->dptr; + unsigned char *be = bp + data->dsize; + unsigned int h = FNV1A_32A_INIT; + + /* FNV-1a hash each octet in the buffer */ + while (bp < be) { + + /* xor the bottom with the current octet */ + h ^= (unsigned)*bp++; + + /* multiply by the 32 bit FNV magic prime mod 2^32 */ +#if defined(NO_FNV_GCC_OPTIMIZATION) + h *= FNV_32_PRIME; +#else + h += (h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24); +#endif + } + + /* return our new hash value */ + return h; +} diff --git a/ext/tdb/lookup3.c b/ext/tdb/lookup3.c new file mode 100644 index 0000000..23a9088 --- /dev/null +++ b/ext/tdb/lookup3.c @@ -0,0 +1,429 @@ +#include "rbtdb.h" + +/* + * lookup3 implementation copied from tdb.git + * (commit 3258cf3f11bf7c68a2e69e1808c4551cc899725a), + * as that tdb distribution isn't commonly available yet (as of 2010.11.29) + */ +#ifndef HAVE_TDB_JENKINS_HASH + +#ifndef WORDS_BIGENDIAN +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 +#else +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 1 +#endif + +/* +------------------------------------------------------------------------------- +lookup3.c, by Bob Jenkins, May 2006, Public Domain. + +These are functions for producing 32-bit hashes for hash table lookup. +hash_word(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() +are externally useful functions. Routines to test the hash are included +if SELF_TEST is defined. You can use this free for any purpose. It's in +the public domain. It has no warranty. + +You probably want to use hashlittle(). hashlittle() and hashbig() +hash byte arrays. hashlittle() is is faster than hashbig() on +little-endian machines. Intel and AMD are little-endian machines. +On second thought, you probably want hashlittle2(), which is identical to +hashlittle() except it returns two 32-bit hashes for the price of one. +You could implement hashbig2() if you wanted but I haven't bothered here. + +If you want to find a hash of, say, exactly 7 integers, do + a = i1; b = i2; c = i3; + mix(a,b,c); + a += i4; b += i5; c += i6; + mix(a,b,c); + a += i7; + final(a,b,c); +then use c as the hash value. If you have a variable length array of +4-byte integers to hash, use hash_word(). If you have a byte array (like +a character string), use hashlittle(). If you have several byte arrays, or +a mix of things, see the comments above hashlittle(). + +Why is this so big? I read 12 bytes at a time into 3 4-byte integers, +then mix those integers. This is fast (you can do a lot more thorough +mixing with 12*3 instructions on 3 integers than you can with 3 instructions +on 1 byte), but shoehorning those bytes into integers efficiently is messy. +*/ + +#define hashsize(n) ((uint32_t)1<<(n)) +#define hashmask(n) (hashsize(n)-1) +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* +------------------------------------------------------------------------------- +hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + val2 : IN: can be any 4-byte value OUT: second 32 bit hash. +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. Note that the return value is better +mixed than val2, so use that first. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); + +By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this +code any way you wish, private, educational, or commercial. It's free. + +Use for hash table lookup, or anything where one collision in 2^^32 is +acceptable. Do NOT use for cryptographic purposes. +------------------------------------------------------------------------------- +*/ + +static uint32_t hashlittle(const void *key, size_t length) +{ + uint32_t a, b, c; /* internal state */ + union { + const void *ptr; + size_t i; + } u; /* needed for Mac Powerbook G4 */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t) length); + + u.ptr = key; + if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ +#ifdef VALGRIND + const uint8_t *k8; +#endif + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a, b, c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch (length) { + case 12: + c += k[2]; + b += k[1]; + a += k[0]; + break; + case 11: + c += k[2] & 0xffffff; + b += k[1]; + a += k[0]; + break; + case 10: + c += k[2] & 0xffff; + b += k[1]; + a += k[0]; + break; + case 9: + c += k[2] & 0xff; + b += k[1]; + a += k[0]; + break; + case 8: + b += k[1]; + a += k[0]; + break; + case 7: + b += k[1] & 0xffffff; + a += k[0]; + break; + case 6: + b += k[1] & 0xffff; + a += k[0]; + break; + case 5: + b += k[1] & 0xff; + a += k[0]; + break; + case 4: + a += k[0]; + break; + case 3: + a += k[0] & 0xffffff; + break; + case 2: + a += k[0] & 0xffff; + break; + case 1: + a += k[0] & 0xff; + break; + case 0: + return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch (length) { + case 12: + c += k[2]; + b += k[1]; + a += k[0]; + break; + case 11: + c += ((uint32_t) k8[10]) << 16; /* fall through */ + case 10: + c += ((uint32_t) k8[9]) << 8; /* fall through */ + case 9: + c += k8[8]; /* fall through */ + case 8: + b += k[1]; + a += k[0]; + break; + case 7: + b += ((uint32_t) k8[6]) << 16; /* fall through */ + case 6: + b += ((uint32_t) k8[5]) << 8; /* fall through */ + case 5: + b += k8[4]; /* fall through */ + case 4: + a += k[0]; + break; + case 3: + a += ((uint32_t) k8[2]) << 16; /* fall through */ + case 2: + a += ((uint32_t) k8[1]) << 8; /* fall through */ + case 1: + a += k8[0]; + break; + case 0: + return c; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) { + a += k[0] + (((uint32_t) k[1]) << 16); + b += k[2] + (((uint32_t) k[3]) << 16); + c += k[4] + (((uint32_t) k[5]) << 16); + mix(a, b, c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch (length) { + case 12: + c += k[4] + (((uint32_t) k[5]) << 16); + b += k[2] + (((uint32_t) k[3]) << 16); + a += k[0] + (((uint32_t) k[1]) << 16); + break; + case 11: + c += ((uint32_t) k8[10]) << 16; /* fall through */ + case 10: + c += k[4]; + b += k[2] + (((uint32_t) k[3]) << 16); + a += k[0] + (((uint32_t) k[1]) << 16); + break; + case 9: + c += k8[8]; /* fall through */ + case 8: + b += k[2] + (((uint32_t) k[3]) << 16); + a += k[0] + (((uint32_t) k[1]) << 16); + break; + case 7: + b += ((uint32_t) k8[6]) << 16; /* fall through */ + case 6: + b += k[2]; + a += k[0] + (((uint32_t) k[1]) << 16); + break; + case 5: + b += k8[4]; /* fall through */ + case 4: + a += k[0] + (((uint32_t) k[1]) << 16); + break; + case 3: + a += ((uint32_t) k8[2]) << 16; /* fall through */ + case 2: + a += k[0]; + break; + case 1: + a += k8[0]; + break; + case 0: + return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) { + a += k[0]; + a += ((uint32_t) k[1]) << 8; + a += ((uint32_t) k[2]) << 16; + a += ((uint32_t) k[3]) << 24; + b += k[4]; + b += ((uint32_t) k[5]) << 8; + b += ((uint32_t) k[6]) << 16; + b += ((uint32_t) k[7]) << 24; + c += k[8]; + c += ((uint32_t) k[9]) << 8; + c += ((uint32_t) k[10]) << 16; + c += ((uint32_t) k[11]) << 24; + mix(a, b, c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch (length) { /* all the case statements fall through */ + case 12: + c += ((uint32_t) k[11]) << 24; + case 11: + c += ((uint32_t) k[10]) << 16; + case 10: + c += ((uint32_t) k[9]) << 8; + case 9: + c += k[8]; + case 8: + b += ((uint32_t) k[7]) << 24; + case 7: + b += ((uint32_t) k[6]) << 16; + case 6: + b += ((uint32_t) k[5]) << 8; + case 5: + b += k[4]; + case 4: + a += ((uint32_t) k[3]) << 24; + case 3: + a += ((uint32_t) k[2]) << 16; + case 2: + a += ((uint32_t) k[1]) << 8; + case 1: + a += k[0]; + break; + case 0: + return c; + } + } + + final(a, b, c); + return c; +} + +unsigned int rbtdb_jenkins_lookup3(TDB_DATA * key) +{ + return hashlittle(key->dptr, key->dsize); +} +#endif /* !HAVE_TDB_JENKINS_HASH */ diff --git a/ext/tdb/murmur1.c b/ext/tdb/murmur1.c new file mode 100644 index 0000000..1880cc6 --- /dev/null +++ b/ext/tdb/murmur1.c @@ -0,0 +1,151 @@ +#include "rbtdb.h" +#include <assert.h> + +/* + * https://sites.google.com/site/murmurhash/ + * + * Public Domain hash functions by Austin Appleby. + * + * Trivially adapted for use with Ruby TDB by Eric Wong. + */ + +/* + * 'm' and 'r' are mixing constants generated offline. + * They're not really 'magic', they just happen to work well. + */ +static const unsigned int m = 0xc6a4a793; +static const int r = 16; +static const unsigned int seed; + +unsigned int rbtdb_murmur1(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + /* Initialize the hash to a 'random' value */ + unsigned int h = seed ^ (len * m); + + while (len >= 4) { + h += *(const unsigned int *)data; + h *= m; + h ^= h >> r; + + data += 4; + len -= 4; + } + + /* Handle the last few bytes of the input array */ + switch (len) { + case 3: + h += data[2] << 16; + case 2: + h += data[1] << 8; + case 1: + h += data[0]; + h *= m; + h ^= h >> r; + }; + + /* + * Do a few final mixes of the hash to ensure the last few + * bytes are well-incorporated. + */ + h *= m; + h ^= h >> 10; + h *= m; + h ^= h >> 17; + + return h; +} + +/* adapted from MurmurHashAligned */ +unsigned int rbtdb_murmur1_aligned(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + unsigned int h = seed ^ (len * m); + union { const unsigned char *byte; int integer; } cast = { data }; + int align = cast.integer & 3; + + if (align & (len >= 4)) { + /* Pre-load the temp registers */ + unsigned int t = 0, d = 0; + int sl, sr, pack; + + switch (align) { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } + + t <<= (8 * align); + + data += 4 - align; + len -= 4 - align; + + sl = 8 * (4 - align); + sr = 8 * align; + + /* Mix */ + while (len >= 4) { + assert((cast.integer & 3) == 0); + + d = *(const unsigned int *)data; + t = (t >> sr) | (d << sl); + h += t; + h *= m; + h ^= h >> r; + t = d; + + data += 4; + len -= 4; + } + + /* Handle leftover data in temp registers */ + pack = len < align ? len : align; + d = 0; + + switch (pack) { + case 3: + d |= data[2] << 16; + case 2: + d |= data[1] << 8; + case 1: + d |= data[0]; + case 0: + h += (t >> sr) | (d << sl); + h *= m; + h ^= h >> r; + } + + data += pack; + len -= pack; + } else { + while (len >= 4) { + h += *(const unsigned int *)data; + h *= m; + h ^= h >> r; + + data += 4; + len -= 4; + } + } + + /* Handle tail bytes */ + switch (len) { + case 3: + h += data[2] << 16; + case 2: + h += data[1] << 8; + case 1: + h += data[0]; + h *= m; + h ^= h >> r; + }; + + h *= m; + h ^= h >> 10; + h *= m; + h ^= h >> 17; + + return h; +} diff --git a/ext/tdb/murmur2.c b/ext/tdb/murmur2.c new file mode 100644 index 0000000..6b6f8a6 --- /dev/null +++ b/ext/tdb/murmur2.c @@ -0,0 +1,290 @@ +#include "rbtdb.h" +/* + * https://sites.google.com/site/murmurhash/ + * + * Public Domain hash functions by Austin Appleby. + * + * Trivially adapted for use with Ruby TDB by Eric Wong. + */ + +/* + * 'm' and 'r' are mixing constants generated offline. + * They're not really 'magic', they just happen to work well. + */ + +static const unsigned int m = 0x5bd1e995; +static const int r = 24; +static const unsigned int seed; + +unsigned int rbtdb_murmur2(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + /* Initialize the hash to a 'random' value */ + unsigned int h = seed ^ len; + + while (len >= 4) { + unsigned int k = *(const unsigned int *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + /* Handle the last few bytes of the input array */ + switch (len) { + case 3: + h ^= data[2] << 16; + case 2: + h ^= data[1] << 8; + case 1: + h ^= data[0]; + h *= m; + }; + + /* + * Do a few final mixes of the hash to ensure the last few + * bytes are well-incorporated. + */ + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +/* + * This is a variant of MurmurHash2 modified to use the Merkle-Damgard + * construction. Bulk speed should be identical to Murmur2, small-key speed + * will be 10%-20% slower due to the added overhead at the end of the hash. + * + * This variant fixes a minor issue where null keys were more likely to + * collide with each other than expected, and also makes the algorithm + * more amenable to incremental implementations. All other caveats from + * MurmurHash2 still apply. + */ + +#define mmix(h,k) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0) + +unsigned int rbtdb_murmur2a(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + unsigned int l = (unsigned int)len; + unsigned int h = seed; + unsigned int t = 0; + + while (len >= 4) { + unsigned int k = *(const unsigned int *)data; + + mmix(h, k); + + data += 4; + len -= 4; + } + + switch (len) { + case 3: + t ^= data[2] << 16; + case 2: + t ^= data[1] << 8; + case 1: + t ^= data[0]; + }; + + mmix(h, t); + mmix(h, l); + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +/* + * Same algorithm as MurmurHash2, but only does aligned reads - should be safer + * on certain platforms + * + * Performance will be lower than MurmurHash2 + */ + +#define MIX(h,k,m) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0) + +unsigned int rbtdb_murmur2_aligned(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + unsigned int h = seed ^ len; + union { const unsigned char *byte; int integer; } cast = { data }; + int align = cast.integer & 3; + + if (align && (len >= 4)) { + /* Pre-load the temp registers */ + unsigned int t = 0, d = 0; + int sl, sr; + + switch (align) { + case 1: + t |= data[2] << 16; + case 2: + t |= data[1] << 8; + case 3: + t |= data[0]; + } + + t <<= (8 * align); + + data += 4 - align; + len -= 4 - align; + + sl = 8 * (4 - align); + sr = 8 * align; + + /* Mix */ + while (len >= 4) { + unsigned int k; + + d = *(const unsigned int *)data; + t = (t >> sr) | (d << sl); + + k = t; + + MIX(h, k, m); + + t = d; + + data += 4; + len -= 4; + } + + /* Handle leftover data in temp registers */ + d = 0; + if (len >= align) { + unsigned int k; + + switch (align) { + case 3: + d |= data[2] << 16; + case 2: + d |= data[1] << 8; + case 1: + d |= data[0]; + } + + k = (t >> sr) | (d << sl); + MIX(h, k, m); + + data += align; + len -= align; + + /* Handle tail bytes */ + switch (len) { + case 3: + h ^= data[2] << 16; + case 2: + h ^= data[1] << 8; + case 1: + h ^= data[0]; + h *= m; + }; + } else { + switch (len) { + case 3: + d |= data[2] << 16; + case 2: + d |= data[1] << 8; + case 1: + d |= data[0]; + case 0: + h ^= (t >> sr) | (d << sl); + h *= m; + } + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } else { + while (len >= 4) { + unsigned int k = *(const unsigned int *)data; + + MIX(h, k, m); + + data += 4; + len -= 4; + } + + /* Handle tail bytes */ + switch (len) { + case 3: + h ^= data[2] << 16; + case 2: + h ^= data[1] << 8; + case 1: + h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } +} + +/* + * Same as MurmurHash2, but endian- and alignment-neutral. + * Half the speed though, alas. + */ +unsigned int rbtdb_murmur2_neutral(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + unsigned int h = seed ^ len; + + while (len >= 4) { + unsigned int k; + + k = data[0]; + k |= data[1] << 8; + k |= data[2] << 16; + k |= data[3] << 24; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + switch (len) { + case 3: + h ^= data[2] << 16; + case 2: + h ^= data[1] << 8; + case 1: + h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} diff --git a/ext/tdb/rbtdb.h b/ext/tdb/rbtdb.h new file mode 100644 index 0000000..2ddbcd3 --- /dev/null +++ b/ext/tdb/rbtdb.h @@ -0,0 +1,22 @@ +#ifndef RBTDB_H +#define RBTDB_H +#include <ruby.h> +#include <tdb.h> + +unsigned int rbtdb_murmur1(TDB_DATA *key); +unsigned int rbtdb_murmur1_aligned(TDB_DATA *key); +unsigned int rbtdb_murmur2(TDB_DATA *key); +unsigned int rbtdb_murmur2a(TDB_DATA *key); +unsigned int rbtdb_murmur2_neutral(TDB_DATA *key); +unsigned int rbtdb_murmur2_aligned(TDB_DATA *key); +unsigned int rbtdb_fnv1a(TDB_DATA *key); +unsigned int rbtdb_djb2(TDB_DATA *key); +unsigned int rbtdb_djb3(TDB_DATA *key); +#ifdef HAVE_TDB_JENKINS_HASH +# define rbtdb_jenkins_lookup3 tdb_jenkins_hash +#else +unsigned int rbtdb_jenkins_lookup3(TDB_DATA *key); +#endif +#define rbtdb_default 0 + +#endif /* RBTDB_H */ diff --git a/ext/tdb/tdb.c b/ext/tdb/tdb.c new file mode 100644 index 0000000..cfe7970 --- /dev/null +++ b/ext/tdb/tdb.c @@ -0,0 +1,679 @@ +#include "rbtdb.h" +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#ifdef HAVE_RUBY_ST_H +# include <ruby/st.h> +#else +# include <st.h> +#endif + +static VALUE cTDB, cERR; +static st_table *exc_hash; +static VALUE hashes; + +/* must be a macro to prevent GC from killing converted 'val's */ +#define TO_TDB_DATA(data,val) do { \ + StringValue(val); \ + (data).dptr = (unsigned char *)RSTRING_PTR(val); \ + (data).dsize = RSTRING_LEN(val); \ +} while (0) + +static void init_exc(enum TDB_ERROR ecode, const char *name) +{ + VALUE exc = rb_define_class_under(cERR, name, cERR); + st_insert(exc_hash, (st_data_t)ecode, (st_data_t)exc); +} + +static void init_errors(void) +{ + cERR = rb_define_class_under(cTDB, "ERR", rb_eStandardError); + exc_hash = st_init_numtable(); + + init_exc(TDB_ERR_CORRUPT, "CORRUPT"); + init_exc(TDB_ERR_IO, "IO"); + init_exc(TDB_ERR_LOCK, "LOCK"); + init_exc(TDB_ERR_OOM, "OOM"); + init_exc(TDB_ERR_EXISTS, "EXISTS"), + init_exc(TDB_ERR_NOLOCK, "NOLOCK"); + init_exc(TDB_ERR_LOCK_TIMEOUT, "LOCK_TIMEOUT"); + init_exc(TDB_ERR_EINVAL, "EINVAL"); + init_exc(TDB_ERR_NOEXIST, "NOEXIST"); + init_exc(TDB_ERR_RDONLY, "RDONLY"); +#ifdef HAVE_CONST_TDB_ERR_NESTING + init_exc(TDB_ERR_NESTING, "NESTING"); +#endif /* HAVE_CONST_TDB_ERR_NESTING */ +} + +static void my_raise(struct tdb_context *tdb) +{ + enum TDB_ERROR ecode = tdb_error(tdb); + const char *str = tdb_errorstr(tdb); + VALUE exc; + + switch (ecode) { + case TDB_SUCCESS: + rb_bug("attempted to raise with no error"); + case TDB_ERR_CORRUPT: + case TDB_ERR_IO: + case TDB_ERR_LOCK: + case TDB_ERR_OOM: + case TDB_ERR_EXISTS: + case TDB_ERR_NOLOCK: + case TDB_ERR_LOCK_TIMEOUT: + case TDB_ERR_EINVAL: + case TDB_ERR_NOEXIST: + case TDB_ERR_RDONLY: +#ifdef HAVE_CONST_TDB_ERR_NESTING + case TDB_ERR_NESTING: +#endif /* HAVE_CONST_TDB_ERR_NESTING */ + if (!st_lookup(exc_hash, (st_data_t)ecode, (st_data_t *)&exc)) + rb_bug("no-existent exception: %s\n", str); + } + rb_raise(exc, str); +} + +static void init_hashes(void) +{ +#define HF(x) \ +rb_hash_aset(hashes,ID2SYM(rb_intern(#x)),ULONG2NUM((unsigned long)rbtdb_##x)) + HF(murmur1); + HF(murmur1_aligned); + HF(murmur2); + HF(murmur2a); + HF(murmur2_neutral); + HF(murmur2_aligned); + HF(fnv1a); + HF(djb2); + HF(djb3); + HF(jenkins_lookup3); + HF(default); +} + +#ifndef HAVE_RB_THREAD_BLOCKING_REGION +/* (very) partial emulation of the 1.9 rb_thread_blocking_region under 1.8 */ +# include <rubysig.h> +typedef VALUE rb_blocking_function_t(void *); +static VALUE my_tbr(rb_blocking_function_t *fn, void *data) +{ + VALUE rv; + + TRAP_BEG; + rv = fn(data); + TRAP_END; + + return rv; +} +#else +static VALUE my_tbr(rb_blocking_function_t *fn, void *data) +{ + return rb_thread_blocking_region(fn, data, RUBY_UBF_IO, 0); +} +#endif /* HAVE_RUBY_THREAD_BLOCKING_REGION */ + +static void gcfree(void *ptr) +{ + struct tdb_context *tdb = ptr; + + if (tdb) + (void)tdb_close(tdb); +} + +static VALUE alloc(VALUE klass) +{ + return Data_Wrap_Struct(klass, NULL, gcfree, NULL); +} + +static struct tdb_context *db(VALUE self, int check_opened) +{ + struct tdb_context *tdb; + + Data_Get_Struct(self, struct tdb_context, tdb); + + if (!tdb && check_opened) + rb_raise(rb_eIOError, "closed database"); + + return tdb; +} + +struct open_args { + const char *name; + int hash_size; + int tdb_flags; + int open_flags; + mode_t mode; + struct tdb_logging_context *log_ctx; + tdb_hash_func hash_fn; +}; + +static VALUE nogvl_open(void *ptr) +{ + struct open_args *o = ptr; + struct tdb_context *tdb; + + tdb = tdb_open_ex(o->name, o->hash_size, o->tdb_flags, + o->open_flags, o->mode, o->log_ctx, o->hash_fn); + + return (VALUE)tdb; +} + +static void set_args(struct open_args *o, VALUE opts) +{ + VALUE tmp; + + o->name = NULL; + o->hash_size = 0; /* default */ + o->tdb_flags = TDB_DEFAULT; + o->open_flags = O_RDWR | O_CREAT; + o->mode = 0666; + o->log_ctx = NULL; + o->hash_fn = NULL; + + if (NIL_P(opts)) + return; + Check_Type(opts, T_HASH); + + tmp = rb_hash_aref(opts, ID2SYM(rb_intern("hash_size"))); + if (!NIL_P(tmp)) + o->hash_size = NUM2INT(tmp); + + tmp = rb_hash_aref(opts, ID2SYM(rb_intern("mode"))); + if (!NIL_P(tmp)) + o->mode = NUM2UINT(tmp); + + tmp = rb_hash_aref(opts, ID2SYM(rb_intern("open_flags"))); + if (!NIL_P(tmp)) + o->open_flags = NUM2INT(tmp); + + tmp = rb_hash_aref(opts, ID2SYM(rb_intern("tdb_flags"))); + if (!NIL_P(tmp)) + o->tdb_flags = NUM2INT(tmp); + + tmp = rb_hash_aref(opts, ID2SYM(rb_intern("hash"))); + if (!NIL_P(tmp)) { + VALUE num = rb_hash_aref(hashes, tmp); + + if (NIL_P(num)) { + tmp = rb_inspect(tmp); + rb_raise(rb_eArgError, + "`%s' is not a valid hash function", + StringValuePtr(tmp)); + } + + o->hash_fn = (tdb_hash_func)NUM2ULONG(num); + } +} + +static VALUE init(int argc, VALUE *argv, VALUE self) +{ + struct tdb_context *tdb = db(self, 0); + VALUE path, opts; + struct open_args o; + + if (tdb) + rb_raise(rb_eRuntimeError, "TDB already initialized"); + rb_scan_args(argc, argv, "11", &path, &opts); + set_args(&o, opts); + + if (NIL_P(path)) + o.tdb_flags |= TDB_INTERNAL; + else + o.name = StringValuePtr(path); + + tdb = (struct tdb_context *)my_tbr(nogvl_open, &o); + if (!tdb) { + switch (errno) { + case ENOMEM: + case EMFILE: + case ENFILE: + rb_gc(); + tdb = (struct tdb_context *)my_tbr(nogvl_open, &o); + } + if (!tdb) + rb_sys_fail("tdb_open_ex"); + } + DATA_PTR(self) = tdb; + + return self; +} + +/* tdb_close can do a lot, including cancel transactions an munmap */ +static VALUE nogvl_close(void *ptr) +{ + struct tdb_context *tdb = ptr; + + return (VALUE)tdb_close(tdb); +} + +static VALUE tdbclose(VALUE self) +{ + struct tdb_context *tdb = db(self, 1); + + DATA_PTR(self) = NULL; + + if ((int)my_tbr(nogvl_close, tdb) == -1) + rb_sys_fail("tdb_close"); + + return Qnil; +} + +static VALUE closed(VALUE self) +{ + struct tdb_context *tdb = db(self, 0); + + return tdb ? Qfalse : Qtrue; +} + +#ifdef HAVE_RB_THREAD_CALL_WITH_GVL +/* missing prototype in ruby.h: */ +void *rb_thread_call_with_gvl(void *(*func)(void *), void *data); +#else +static void * my_rb_thread_call_with_gvl(void *(*func)(void *), void *data) +{ + return (*func)(data); +} +#define rb_thread_call_with_gvl my_rb_thread_call_with_gvl +#endif /* !HAVE_RB_THREAD_CALL_WITH_GVL */ + +/* + * We avoid the extra malloc/free pair enforced by tdb_fetch. We + * use tdb_parse_record to give us pointers to (hopefully) mmap-ed + * regions and create a String object directly off that region. + */ +struct fetch_parse_args { + struct tdb_context *tdb; + union { + TDB_DATA key; + TDB_DATA val; + } as; + VALUE value; +}; + +static VALUE str_new_tdb_data(TDB_DATA *val) +{ + return rb_str_new((const char *)val->dptr, val->dsize); +} + +static void *gvl_str_new(void *data) +{ + struct fetch_parse_args *f = data; + + f->value = str_new_tdb_data(&f->as.val); + + return NULL; +} + +static int fetch_parse(TDB_DATA key, TDB_DATA val, void *data) +{ + struct fetch_parse_args *f = data; + + f->as.val = val; + (void)rb_thread_call_with_gvl(gvl_str_new, data); + + return 0; +} + +static VALUE nogvl_parse_record(void *ptr) +{ + struct fetch_parse_args *f = ptr; + + if (tdb_parse_record(f->tdb, f->as.key, fetch_parse, ptr) == -1) + return Qnil; + + return f->value; +} + +static VALUE fetch(VALUE self, VALUE key) +{ + struct fetch_parse_args f; + + f.tdb = db(self, 1); + TO_TDB_DATA(f.as.key, key); + f.value = Qnil; + + return my_tbr(nogvl_parse_record, &f); +} + +struct store_args { + struct tdb_context *tdb; + TDB_DATA key; + TDB_DATA val; + int flag; +}; + +static VALUE nogvl_store(void *ptr) +{ + struct store_args *s = ptr; + + return (VALUE)tdb_store(s->tdb, s->key, s->val, s->flag); +} + +static VALUE rbtdb_store(VALUE self, VALUE key, VALUE val, int flag, int soft) +{ + struct store_args s; + + s.tdb = db(self, 1); + TO_TDB_DATA(s.key, key); + TO_TDB_DATA(s.val, val); + s.flag = flag; + + if ((int)my_tbr(nogvl_store, &s) == -1) { + if (soft) { + int ecode = tdb_error(s.tdb); + + if ((flag == TDB_INSERT) && (ecode == TDB_ERR_EXISTS)) + return Qnil; + if ((flag == TDB_MODIFY) && (ecode == TDB_ERR_NOEXIST)) + return Qnil; + } + my_raise(s.tdb); + } + + return val; +} + +static VALUE store(VALUE self, VALUE key, VALUE val) +{ + return rbtdb_store(self, key, val, 0, 0); +} + +static VALUE insert_bang(VALUE self, VALUE key, VALUE val) +{ + return rbtdb_store(self, key, val, TDB_INSERT, 0); +} + +static VALUE insert(VALUE self, VALUE key, VALUE val) +{ + return rbtdb_store(self, key, val, TDB_INSERT, 1); +} + +static VALUE modify_bang(VALUE self, VALUE key, VALUE val) +{ + return rbtdb_store(self, key, val, TDB_MODIFY, 0); +} + +static VALUE modify(VALUE self, VALUE key, VALUE val) +{ + return rbtdb_store(self, key, val, TDB_MODIFY, 1); +} + +struct exists_args { + struct tdb_context *tdb; + TDB_DATA key; +}; + +static VALUE nogvl_exists(void *ptr) +{ + struct exists_args *e = ptr; + + return tdb_exists(e->tdb, e->key) == 0 ? Qfalse : Qtrue; +} + +static VALUE has_key(VALUE self, VALUE key) +{ + struct exists_args e; + + e.tdb = db(self, 1); + TO_TDB_DATA(e.key, key); + + return my_tbr(nogvl_exists, &e); +} + +struct traverse_args { + struct tdb_context *tdb; + TDB_DATA key; + TDB_DATA val; + int state; +}; + +static VALUE protected_yield(VALUE val) +{ + VALUE *kv = (VALUE *)val; + + return rb_yield_values(2, kv[0], kv[1]); +} + +static void *my_yield(void *data) +{ + struct traverse_args *t = data; + VALUE kv[2]; + + kv[0] = str_new_tdb_data(&t->key); + kv[1] = str_new_tdb_data(&t->val); + + rb_protect(protected_yield, (VALUE)kv, &t->state); + + return NULL; +} + +static int +traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA val, void *data) +{ + struct traverse_args *t = data; + + t->key = key; + t->val = val; + (void)rb_thread_call_with_gvl(my_yield, t); + + return t->state; +} + +static VALUE nogvl_traverse(void *ptr) +{ + struct traverse_args *t = ptr; + + (void)tdb_traverse(t->tdb, traverse_fn, t); + + return Qfalse; +} + +static VALUE each(VALUE self) +{ + struct traverse_args t; + + t.tdb = db(self, 1); + t.state = 0; + + my_tbr(nogvl_traverse, &t); + if (t.state) + rb_jump_tag(t.state); + return self; +} + +struct delete_args { + struct tdb_context *tdb; + TDB_DATA key; +}; + +static VALUE nogvl_delete(void *ptr) +{ + struct delete_args *d = ptr; + + return tdb_delete(d->tdb, d->key) == 0 ? Qtrue : Qfalse; +} + +static VALUE nuke(VALUE self, VALUE key) +{ + struct delete_args d; + + d.tdb = db(self, 1); + TO_TDB_DATA(d.key, key); + + return my_tbr(nogvl_delete, &d); +} + +static VALUE delete(VALUE self, VALUE key) +{ + VALUE rc = fetch(self, key); + + if (! NIL_P(rc)) + if (nuke(self, key) == Qfalse) + return Qnil; + return rc; +} + +static VALUE lockall(VALUE self) +{ + struct tdb_context *tdb = db(self, 1); + if ((int)my_tbr((rb_blocking_function_t *)tdb_lockall, tdb)) + my_raise(tdb); + + return Qtrue; +} + +static VALUE trylockall(VALUE self) +{ + struct tdb_context *tdb = db(self, 1); + void *fn = tdb_lockall_nonblock; + + if ((int)my_tbr((rb_blocking_function_t *)fn, tdb)) { + if (tdb_error(tdb) == TDB_ERR_LOCK) + return Qfalse; + my_raise(tdb); + } + return Qtrue; +} + +static VALUE unlockall(VALUE self) +{ + struct tdb_context *tdb = db(self, 1); + if ((int)my_tbr((rb_blocking_function_t *)tdb_unlockall, tdb)) + my_raise(tdb); + return Qtrue; +} + +static VALUE lockall_read(VALUE self) +{ + struct tdb_context *tdb = db(self, 1); + if ((int)my_tbr((rb_blocking_function_t *)tdb_lockall_read, tdb)) + my_raise(tdb); + return Qtrue; +} + +static VALUE trylockall_read(VALUE self) +{ + struct tdb_context *tdb = db(self, 1); + void *fn = tdb_lockall_read_nonblock; + if ((int)my_tbr((rb_blocking_function_t *)fn, tdb)) { + if (tdb_error(tdb) == TDB_ERR_LOCK) + return Qfalse; + my_raise(tdb); + } + return Qtrue; +} + +static VALUE unlockall_read(VALUE self) +{ + struct tdb_context *tdb = db(self, 1); + if ((int)my_tbr((rb_blocking_function_t *)tdb_unlockall_read, tdb)) + my_raise(tdb); + return Qtrue; +} + +static VALUE lockall_mark(VALUE self) +{ + struct tdb_context *tdb = db(self, 1); + if ((int)my_tbr((rb_blocking_function_t *)tdb_lockall_mark, tdb)) + my_raise(tdb); + return Qtrue; +} + +static VALUE lockall_unmark(VALUE self) +{ + struct tdb_context *tdb = db(self, 1); + if ((int)my_tbr((rb_blocking_function_t *)tdb_lockall_unmark, tdb)) + my_raise(tdb); + return Qtrue; +} + +void Init_tdb_ext(void) +{ + cTDB = rb_define_class("TDB", rb_cObject); + + hashes = rb_hash_new(); + rb_define_const(cTDB, "HASHES", hashes); + + rb_define_alloc_func(cTDB, alloc); + rb_include_module(cTDB, rb_mEnumerable); + + rb_define_method(cTDB, "initialize", init, -1); + rb_define_method(cTDB, "close", tdbclose, 0); + rb_define_method(cTDB, "closed?", closed, 0); + + rb_define_method(cTDB, "fetch", fetch, 1); + rb_define_method(cTDB, "[]", fetch, 1); + rb_define_method(cTDB, "store", store, 2); + rb_define_method(cTDB, "[]=", store, 2); + rb_define_method(cTDB, "insert!", insert_bang, 2); + rb_define_method(cTDB, "modify!", modify_bang, 2); + rb_define_method(cTDB, "insert", insert, 2); + rb_define_method(cTDB, "modify", modify, 2); + + rb_define_method(cTDB, "key?", has_key, 1); + rb_define_method(cTDB, "has_key?", has_key, 1); + rb_define_method(cTDB, "include?", has_key, 1); + rb_define_method(cTDB, "member?", has_key, 1); + rb_define_method(cTDB, "each", each, 0); + rb_define_method(cTDB, "nuke!", nuke, 1); + rb_define_method(cTDB, "delete", delete, 1); + + rb_define_method(cTDB, "lockall", lockall, 0); + rb_define_method(cTDB, "trylockall", trylockall, 0); + rb_define_method(cTDB, "unlockall", unlockall, 0); + rb_define_method(cTDB, "lockall_read", lockall_read, 0); + rb_define_method(cTDB, "trylockall_read", trylockall_read, 0); + rb_define_method(cTDB, "unlockall_read", unlockall_read, 0); + rb_define_method(cTDB, "lockall_mark", lockall_mark, 0); + rb_define_method(cTDB, "lockall_unmark", lockall_unmark, 0); + + init_errors(); + init_hashes(); + +#define tdb_CONST(x) rb_define_const(cTDB, #x, UINT2NUM(TDB_##x)) + + /* just a readability place holder */ + tdb_CONST(DEFAULT); + + /* clear database if we are the only one with it open */ + tdb_CONST(CLEAR_IF_FIRST); + + /* don't store on disk, use in-memory database */ + tdb_CONST(INTERNAL); + + /* don't do any locking */ + tdb_CONST(NOLOCK); + + /* don't use mmap */ + tdb_CONST(NOMMAP); + + /* convert endian (internal use) */ + tdb_CONST(CONVERT); + + /* header is big-endian (internal use) */ + tdb_CONST(BIGENDIAN); + + /* don't use synchronous transactions */ + tdb_CONST(NOSYNC); + + /* maintain a sequence number */ + tdb_CONST(SEQNUM); + + /* Activate the per-hashchain freelist, default 5 */ + tdb_CONST(VOLATILE); + +#ifdef TDB_ALLOW_NESTING + /* Allow transactions to nest */ + tdb_CONST(ALLOW_NESTING); +#endif + +#ifdef TDB_DISALLOW_NESTING + /* Disallow transactions to nest */ + tdb_CONST(DISALLOW_NESTING); +#endif + +#ifdef TDB_INCOMPATIBLE_HASH + /* Better hashing: can't be opened by tdb < 1.2.6. */ + tdb_CONST(INCOMPATIBLE_HASH); +#endif +} diff --git a/lib/tdb.rb b/lib/tdb.rb new file mode 100644 index 0000000..2a75193 --- /dev/null +++ b/lib/tdb.rb @@ -0,0 +1,2 @@ +# -*- encoding: binary -*- +require 'tdb_ext' diff --git a/setup.rb b/setup.rb new file mode 100644 index 0000000..5eb5006 --- /dev/null +++ b/setup.rb @@ -0,0 +1,1586 @@ +# -*- encoding: binary -*- +# +# setup.rb +# +# Copyright (c) 2000-2005 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# + +unless Enumerable.method_defined?(:map) # Ruby 1.4.6 + module Enumerable + alias map collect + end +end + +unless File.respond_to?(:read) # Ruby 1.6 + def File.read(fname) + open(fname) {|f| + return f.read + } + end +end + +unless Errno.const_defined?(:ENOTEMPTY) # Windows? + module Errno + class ENOTEMPTY + # We do not raise this exception, implementation is not needed. + end + end +end + +def File.binread(fname) + open(fname, 'rb') {|f| + return f.read + } +end + +# for corrupted Windows' stat(2) +def File.dir?(path) + File.directory?((path[-1,1] == '/') ? path : path + '/') +end + + +class ConfigTable + + include Enumerable + + def initialize(rbconfig) + @rbconfig = rbconfig + @items = [] + @table = {} + # options + @install_prefix = nil + @config_opt = nil + @verbose = true + @no_harm = false + end + + attr_accessor :install_prefix + attr_accessor :config_opt + + attr_writer :verbose + + def verbose? + @verbose + end + + attr_writer :no_harm + + def no_harm? + @no_harm + end + + def [](key) + lookup(key).resolve(self) + end + + def []=(key, val) + lookup(key).set val + end + + def names + @items.map {|i| i.name } + end + + def each(&block) + @items.each(&block) + end + + def key?(name) + @table.key?(name) + end + + def lookup(name) + @table[name] or setup_rb_error "no such config item: #{name}" + end + + def add(item) + @items.push item + @table[item.name] = item + end + + def remove(name) + item = lookup(name) + @items.delete_if {|i| i.name == name } + @table.delete_if {|name, i| i.name == name } + item + end + + def load_script(path, inst = nil) + if File.file?(path) + MetaConfigEnvironment.new(self, inst).instance_eval File.read(path), path + end + end + + def savefile + '.config' + end + + def load_savefile + begin + File.foreach(savefile()) do |line| + k, v = *line.split(/=/, 2) + self[k] = v.strip + end + rescue Errno::ENOENT + setup_rb_error $!.message + "\n#{File.basename($0)} config first" + end + end + + def save + @items.each {|i| i.value } + File.open(savefile(), 'w') {|f| + @items.each do |i| + f.printf "%s=%s\n", i.name, i.value if i.value? and i.value + end + } + end + + def load_standard_entries + standard_entries(@rbconfig).each do |ent| + add ent + end + end + + def standard_entries(rbconfig) + c = rbconfig + + rubypath = File.join(c['bindir'], c['ruby_install_name'] + c['EXEEXT']) + + major = c['MAJOR'].to_i + minor = c['MINOR'].to_i + teeny = c['TEENY'].to_i + version = "#{major}.#{minor}" + + # ruby ver. >= 1.4.4? + newpath_p = ((major >= 2) or + ((major == 1) and + ((minor >= 5) or + ((minor == 4) and (teeny >= 4))))) + + if c['rubylibdir'] + # V > 1.6.3 + libruby = "#{c['prefix']}/lib/ruby" + librubyver = c['rubylibdir'] + librubyverarch = c['archdir'] + siteruby = c['sitedir'] + siterubyver = c['sitelibdir'] + siterubyverarch = c['sitearchdir'] + elsif newpath_p + # 1.4.4 <= V <= 1.6.3 + libruby = "#{c['prefix']}/lib/ruby" + librubyver = "#{c['prefix']}/lib/ruby/#{version}" + librubyverarch = "#{c['prefix']}/lib/ruby/#{version}/#{c['arch']}" + siteruby = c['sitedir'] + siterubyver = "$siteruby/#{version}" + siterubyverarch = "$siterubyver/#{c['arch']}" + else + # V < 1.4.4 + libruby = "#{c['prefix']}/lib/ruby" + librubyver = "#{c['prefix']}/lib/ruby/#{version}" + librubyverarch = "#{c['prefix']}/lib/ruby/#{version}/#{c['arch']}" + siteruby = "#{c['prefix']}/lib/ruby/#{version}/site_ruby" + siterubyver = siteruby + siterubyverarch = "$siterubyver/#{c['arch']}" + end + parameterize = lambda {|path| + path.sub(/\A#{Regexp.quote(c['prefix'])}/, '$prefix') + } + + if arg = c['configure_args'].split.detect {|arg| /--with-make-prog=/ =~ arg } + makeprog = arg.sub(/'/, '').split(/=/, 2)[1] + else + makeprog = 'make' + end + + [ + ExecItem.new('installdirs', 'std/site/home', + 'std: install under libruby; site: install under site_ruby; home: install under $HOME')\ + {|val, table| + case val + when 'std' + table['rbdir'] = '$librubyver' + table['sodir'] = '$librubyverarch' + when 'site' + table['rbdir'] = '$siterubyver' + table['sodir'] = '$siterubyverarch' + when 'home' + setup_rb_error '$HOME was not set' unless ENV['HOME'] + table['prefix'] = ENV['HOME'] + table['rbdir'] = '$libdir/ruby' + table['sodir'] = '$libdir/ruby' + end + }, + PathItem.new('prefix', 'path', c['prefix'], + 'path prefix of target environment'), + PathItem.new('bindir', 'path', parameterize.call(c['bindir']), + 'the directory for commands'), + PathItem.new('libdir', 'path', parameterize.call(c['libdir']), + 'the directory for libraries'), + PathItem.new('datadir', 'path', parameterize.call(c['datadir']), + 'the directory for shared data'), + PathItem.new('mandir', 'path', parameterize.call(c['mandir']), + 'the directory for man pages'), + PathItem.new('sysconfdir', 'path', parameterize.call(c['sysconfdir']), + 'the directory for system configuration files'), + PathItem.new('localstatedir', 'path', parameterize.call(c['localstatedir']), + 'the directory for local state data'), + PathItem.new('libruby', 'path', libruby, + 'the directory for ruby libraries'), + PathItem.new('librubyver', 'path', librubyver, + 'the directory for standard ruby libraries'), + PathItem.new('librubyverarch', 'path', librubyverarch, + 'the directory for standard ruby extensions'), + PathItem.new('siteruby', 'path', siteruby, + 'the directory for version-independent aux ruby libraries'), + PathItem.new('siterubyver', 'path', siterubyver, + 'the directory for aux ruby libraries'), + PathItem.new('siterubyverarch', 'path', siterubyverarch, + 'the directory for aux ruby binaries'), + PathItem.new('rbdir', 'path', '$siterubyver', + 'the directory for ruby scripts'), + PathItem.new('sodir', 'path', '$siterubyverarch', + 'the directory for ruby extentions'), + PathItem.new('rubypath', 'path', rubypath, + 'the path to set to #! line'), + ProgramItem.new('rubyprog', 'name', rubypath, + 'the ruby program using for installation'), + ProgramItem.new('makeprog', 'name', makeprog, + 'the make program to compile ruby extentions'), + SelectItem.new('shebang', 'all/ruby/never', 'ruby', + 'shebang line (#!) editing mode'), + BoolItem.new('without-ext', 'yes/no', 'no', + 'does not compile/install ruby extentions') + ] + end + private :standard_entries + + def load_multipackage_entries + multipackage_entries().each do |ent| + add ent + end + end + + def multipackage_entries + [ + PackageSelectionItem.new('with', 'name,name...', '', 'ALL', + 'package names that you want to install'), + PackageSelectionItem.new('without', 'name,name...', '', 'NONE', + 'package names that you do not want to install') + ] + end + private :multipackage_entries + + ALIASES = { + 'std-ruby' => 'librubyver', + 'stdruby' => 'librubyver', + 'rubylibdir' => 'librubyver', + 'archdir' => 'librubyverarch', + 'site-ruby-common' => 'siteruby', # For backward compatibility + 'site-ruby' => 'siterubyver', # For backward compatibility + 'bin-dir' => 'bindir', + 'bin-dir' => 'bindir', + 'rb-dir' => 'rbdir', + 'so-dir' => 'sodir', + 'data-dir' => 'datadir', + 'ruby-path' => 'rubypath', + 'ruby-prog' => 'rubyprog', + 'ruby' => 'rubyprog', + 'make-prog' => 'makeprog', + 'make' => 'makeprog' + } + + def fixup + ALIASES.each do |ali, name| + @table[ali] = @table[name] + end + @items.freeze + @table.freeze + @options_re = /\A--(#{@table.keys.join('|')})(?:=(.*))?\z/ + end + + def parse_opt(opt) + m = @options_re.match(opt) or setup_rb_error "config: unknown option #{opt}" + m.to_a[1,2] + end + + def dllext + @rbconfig['DLEXT'] + end + + def value_config?(name) + lookup(name).value? + end + + class Item + def initialize(name, template, default, desc) + @name = name.freeze + @template = template + @value = default + @default = default + @description = desc + end + + attr_reader :name + attr_reader :description + + attr_accessor :default + alias help_default default + + def help_opt + "--#{@name}=#{@template}" + end + + def value? + true + end + + def value + @value + end + + def resolve(table) + @value.gsub(%r<\$([^/]+)>) { table[$1] } + end + + def set(val) + @value = check(val) + end + + private + + def check(val) + setup_rb_error "config: --#{name} requires argument" unless val + val + end + end + + class BoolItem < Item + def config_type + 'bool' + end + + def help_opt + "--#{@name}" + end + + private + + def check(val) + return 'yes' unless val + case val + when /\Ay(es)?\z/i, /\At(rue)?\z/i then 'yes' + when /\An(o)?\z/i, /\Af(alse)\z/i then 'no' + else + setup_rb_error "config: --#{@name} accepts only yes/no for argument" + end + end + end + + class PathItem < Item + def config_type + 'path' + end + + private + + def check(path) + setup_rb_error "config: --#{@name} requires argument" unless path + path[0,1] == '$' ? path : File.expand_path(path) + end + end + + class ProgramItem < Item + def config_type + 'program' + end + end + + class SelectItem < Item + def initialize(name, selection, default, desc) + super + @ok = selection.split('/') + end + + def config_type + 'select' + end + + private + + def check(val) + unless @ok.include?(val.strip) + setup_rb_error "config: use --#{@name}=#{@template} (#{val})" + end + val.strip + end + end + + class ExecItem < Item + def initialize(name, selection, desc, &block) + super name, selection, nil, desc + @ok = selection.split('/') + @action = block + end + + def config_type + 'exec' + end + + def value? + false + end + + def resolve(table) + setup_rb_error "$#{name()} wrongly used as option value" + end + + undef set + + def evaluate(val, table) + v = val.strip.downcase + unless @ok.include?(v) + setup_rb_error "invalid option --#{@name}=#{val} (use #{@template})" + end + @action.call v, table + end + end + + class PackageSelectionItem < Item + def initialize(name, template, default, help_default, desc) + super name, template, default, desc + @help_default = help_default + end + + attr_reader :help_default + + def config_type + 'package' + end + + private + + def check(val) + unless File.dir?("packages/#{val}") + setup_rb_error "config: no such package: #{val}" + end + val + end + end + + class MetaConfigEnvironment + def initialize(config, installer) + @config = config + @installer = installer + end + + def config_names + @config.names + end + + def config?(name) + @config.key?(name) + end + + def bool_config?(name) + @config.lookup(name).config_type == 'bool' + end + + def path_config?(name) + @config.lookup(name).config_type == 'path' + end + + def value_config?(name) + @config.lookup(name).config_type != 'exec' + end + + def add_config(item) + @config.add item + end + + def add_bool_config(name, default, desc) + @config.add BoolItem.new(name, 'yes/no', default ? 'yes' : 'no', desc) + end + + def add_path_config(name, default, desc) + @config.add PathItem.new(name, 'path', default, desc) + end + + def set_config_default(name, default) + @config.lookup(name).default = default + end + + def remove_config(name) + @config.remove(name) + end + + # For only multipackage + def packages + raise '[setup.rb fatal] multi-package metaconfig API packages() called for single-package; contact application package vendor' unless @installer + @installer.packages + end + + # For only multipackage + def declare_packages(list) + raise '[setup.rb fatal] multi-package metaconfig API declare_packages() called for single-package; contact application package vendor' unless @installer + @installer.packages = list + end + end + +end # class ConfigTable + + +# This module requires: #verbose?, #no_harm? +module FileOperations + + def mkdir_p(dirname, prefix = nil) + dirname = prefix + File.expand_path(dirname) if prefix + $stderr.puts "mkdir -p #{dirname}" if verbose? + return if no_harm? + + # Does not check '/', it's too abnormal. + dirs = File.expand_path(dirname).split(%r<(?=/)>) + if /\A[a-z]:\z/i =~ dirs[0] + disk = dirs.shift + dirs[0] = disk + dirs[0] + end + dirs.each_index do |idx| + path = dirs[0..idx].join('') + Dir.mkdir path unless File.dir?(path) + end + end + + def rm_f(path) + $stderr.puts "rm -f #{path}" if verbose? + return if no_harm? + force_remove_file path + end + + def rm_rf(path) + $stderr.puts "rm -rf #{path}" if verbose? + return if no_harm? + remove_tree path + end + + def remove_tree(path) + if File.symlink?(path) + remove_file path + elsif File.dir?(path) + remove_tree0 path + else + force_remove_file path + end + end + + def remove_tree0(path) + Dir.foreach(path) do |ent| + next if ent == '.' + next if ent == '..' + entpath = "#{path}/#{ent}" + if File.symlink?(entpath) + remove_file entpath + elsif File.dir?(entpath) + remove_tree0 entpath + else + force_remove_file entpath + end + end + begin + Dir.rmdir path + rescue Errno::ENOTEMPTY + # directory may not be empty + end + end + + def move_file(src, dest) + force_remove_file dest + begin + File.rename src, dest + rescue + File.open(dest, 'wb') {|f| + f.write File.binread(src) + } + File.chmod File.stat(src).mode, dest + File.unlink src + end + end + + def force_remove_file(path) + begin + remove_file path + rescue + end + end + + def remove_file(path) + File.chmod 0777, path + File.unlink path + end + + def install(from, dest, mode, prefix = nil) + $stderr.puts "install #{from} #{dest}" if verbose? + return if no_harm? + + realdest = prefix ? prefix + File.expand_path(dest) : dest + realdest = File.join(realdest, File.basename(from)) if File.dir?(realdest) + str = File.binread(from) + if diff?(str, realdest) + verbose_off { + rm_f realdest if File.exist?(realdest) + } + File.open(realdest, 'wb') {|f| + f.write str + } + File.chmod mode, realdest + + File.open("#{objdir_root()}/InstalledFiles", 'a') {|f| + if prefix + f.puts realdest.sub(prefix, '') + else + f.puts realdest + end + } + end + end + + def diff?(new_content, path) + return true unless File.exist?(path) + new_content != File.binread(path) + end + + def command(*args) + $stderr.puts args.join(' ') if verbose? + system(*args) or raise RuntimeError, + "system(#{args.map{|a| a.inspect }.join(' ')}) failed" + end + + def ruby(*args) + command config('rubyprog'), *args + end + + def make(task = nil) + command(*[config('makeprog'), task].compact) + end + + def extdir?(dir) + File.exist?("#{dir}/MANIFEST") or File.exist?("#{dir}/extconf.rb") + end + + def files_of(dir) + Dir.open(dir) {|d| + return d.select {|ent| File.file?("#{dir}/#{ent}") } + } + end + + DIR_REJECT = %w( . .. CVS SCCS RCS CVS.adm .svn ) + + def directories_of(dir) + Dir.open(dir) {|d| + return d.select {|ent| File.dir?("#{dir}/#{ent}") } - DIR_REJECT + } + end + +end + + +# This module requires: #srcdir_root, #objdir_root, #relpath +module HookScriptAPI + + def get_config(key) + @config[key] + end + + alias config get_config + + # obsolete: use metaconfig to change configuration + def set_config(key, val) + @config[key] = val + end + + # + # srcdir/objdir (works only in the package directory) + # + + def curr_srcdir + "#{srcdir_root()}/#{relpath()}" + end + + def curr_objdir + "#{objdir_root()}/#{relpath()}" + end + + def srcfile(path) + "#{curr_srcdir()}/#{path}" + end + + def srcexist?(path) + File.exist?(srcfile(path)) + end + + def srcdirectory?(path) + File.dir?(srcfile(path)) + end + + def srcfile?(path) + File.file?(srcfile(path)) + end + + def srcentries(path = '.') + Dir.open("#{curr_srcdir()}/#{path}") {|d| + return d.to_a - %w(. ..) + } + end + + def srcfiles(path = '.') + srcentries(path).select {|fname| + File.file?(File.join(curr_srcdir(), path, fname)) + } + end + + def srcdirectories(path = '.') + srcentries(path).select {|fname| + File.dir?(File.join(curr_srcdir(), path, fname)) + } + end + +end + + +class ToplevelInstaller + + Version = '3.4.1' + Copyright = 'Copyright (c) 2000-2005 Minero Aoki' + + TASKS = [ + [ 'all', 'do config, setup, then install' ], + [ 'config', 'saves your configurations' ], + [ 'show', 'shows current configuration' ], + [ 'setup', 'compiles ruby extentions and others' ], + [ 'install', 'installs files' ], + [ 'test', 'run all tests in test/' ], + [ 'clean', "does `make clean' for each extention" ], + [ 'distclean',"does `make distclean' for each extention" ] + ] + + def ToplevelInstaller.invoke + config = ConfigTable.new(load_rbconfig()) + config.load_standard_entries + config.load_multipackage_entries if multipackage? + config.fixup + klass = (multipackage?() ? ToplevelInstallerMulti : ToplevelInstaller) + klass.new(File.dirname($0), config).invoke + end + + def ToplevelInstaller.multipackage? + File.dir?(File.dirname($0) + '/packages') + end + + def ToplevelInstaller.load_rbconfig + if arg = ARGV.detect {|arg| /\A--rbconfig=/ =~ arg } + ARGV.delete(arg) + load File.expand_path(arg.split(/=/, 2)[1]) + $".push 'rbconfig.rb' + else + require 'rbconfig' + end + ::Config::CONFIG + end + + def initialize(ardir_root, config) + @ardir = File.expand_path(ardir_root) + @config = config + # cache + @valid_task_re = nil + end + + def config(key) + @config[key] + end + + def inspect + "#<#{self.class} #{__id__()}>" + end + + def invoke + run_metaconfigs + case task = parsearg_global() + when nil, 'all' + parsearg_config + init_installers + exec_config + exec_setup + exec_install + else + case task + when 'config', 'test' + ; + when 'clean', 'distclean' + @config.load_savefile if File.exist?(@config.savefile) + else + @config.load_savefile + end + __send__ "parsearg_#{task}" + init_installers + __send__ "exec_#{task}" + end + end + + def run_metaconfigs + @config.load_script "#{@ardir}/metaconfig" + end + + def init_installers + @installer = Installer.new(@config, @ardir, File.expand_path('.')) + end + + # + # Hook Script API bases + # + + def srcdir_root + @ardir + end + + def objdir_root + '.' + end + + def relpath + '.' + end + + # + # Option Parsing + # + + def parsearg_global + while arg = ARGV.shift + case arg + when /\A\w+\z/ + setup_rb_error "invalid task: #{arg}" unless valid_task?(arg) + return arg + when '-q', '--quiet' + @config.verbose = false + when '--verbose' + @config.verbose = true + when '--help' + print_usage $stdout + exit 0 + when '--version' + puts "#{File.basename($0)} version #{Version}" + exit 0 + when '--copyright' + puts Copyright + exit 0 + else + setup_rb_error "unknown global option '#{arg}'" + end + end + nil + end + + def valid_task?(t) + valid_task_re() =~ t + end + + def valid_task_re + @valid_task_re ||= /\A(?:#{TASKS.map {|task,desc| task }.join('|')})\z/ + end + + def parsearg_no_options + unless ARGV.empty? + task = caller(0).first.slice(%r<`parsearg_(\w+)'>, 1) + setup_rb_error "#{task}: unknown options: #{ARGV.join(' ')}" + end + end + + alias parsearg_show parsearg_no_options + alias parsearg_setup parsearg_no_options + alias parsearg_test parsearg_no_options + alias parsearg_clean parsearg_no_options + alias parsearg_distclean parsearg_no_options + + def parsearg_config + evalopt = [] + set = [] + @config.config_opt = [] + while i = ARGV.shift + if /\A--?\z/ =~ i + @config.config_opt = ARGV.dup + break + end + name, value = *@config.parse_opt(i) + if @config.value_config?(name) + @config[name] = value + else + evalopt.push [name, value] + end + set.push name + end + evalopt.each do |name, value| + @config.lookup(name).evaluate value, @config + end + # Check if configuration is valid + set.each do |n| + @config[n] if @config.value_config?(n) + end + end + + def parsearg_install + @config.no_harm = false + @config.install_prefix = '' + while a = ARGV.shift + case a + when '--no-harm' + @config.no_harm = true + when /\A--prefix=/ + path = a.split(/=/, 2)[1] + path = File.expand_path(path) unless path[0,1] == '/' + @config.install_prefix = path + else + setup_rb_error "install: unknown option #{a}" + end + end + end + + def print_usage(out) + out.puts 'Typical Installation Procedure:' + out.puts " $ ruby #{File.basename $0} config" + out.puts " $ ruby #{File.basename $0} setup" + out.puts " # ruby #{File.basename $0} install (may require root privilege)" + out.puts + out.puts 'Detailed Usage:' + out.puts " ruby #{File.basename $0} <global option>" + out.puts " ruby #{File.basename $0} [<global options>] <task> [<task options>]" + + fmt = " %-24s %s\n" + out.puts + out.puts 'Global options:' + out.printf fmt, '-q,--quiet', 'suppress message outputs' + out.printf fmt, ' --verbose', 'output messages verbosely' + out.printf fmt, ' --help', 'print this message' + out.printf fmt, ' --version', 'print version and quit' + out.printf fmt, ' --copyright', 'print copyright and quit' + out.puts + out.puts 'Tasks:' + TASKS.each do |name, desc| + out.printf fmt, name, desc + end + + fmt = " %-24s %s [%s]\n" + out.puts + out.puts 'Options for CONFIG or ALL:' + @config.each do |item| + out.printf fmt, item.help_opt, item.description, item.help_default + end + out.printf fmt, '--rbconfig=path', 'rbconfig.rb to load',"running ruby's" + out.puts + out.puts 'Options for INSTALL:' + out.printf fmt, '--no-harm', 'only display what to do if given', 'off' + out.printf fmt, '--prefix=path', 'install path prefix', '' + out.puts + end + + # + # Task Handlers + # + + def exec_config + @installer.exec_config + @config.save # must be final + end + + def exec_setup + @installer.exec_setup + end + + def exec_install + @installer.exec_install + end + + def exec_test + @installer.exec_test + end + + def exec_show + @config.each do |i| + printf "%-20s %s\n", i.name, i.value if i.value? + end + end + + def exec_clean + @installer.exec_clean + end + + def exec_distclean + @installer.exec_distclean + end + +end # class ToplevelInstaller + + +class ToplevelInstallerMulti < ToplevelInstaller + + include FileOperations + + def initialize(ardir_root, config) + super + @packages = directories_of("#{@ardir}/packages") + raise 'no package exists' if @packages.empty? + @root_installer = Installer.new(@config, @ardir, File.expand_path('.')) + end + + def run_metaconfigs + @config.load_script "#{@ardir}/metaconfig", self + @packages.each do |name| + @config.load_script "#{@ardir}/packages/#{name}/metaconfig" + end + end + + attr_reader :packages + + def packages=(list) + raise 'package list is empty' if list.empty? + list.each do |name| + raise "directory packages/#{name} does not exist"\ + unless File.dir?("#{@ardir}/packages/#{name}") + end + @packages = list + end + + def init_installers + @installers = {} + @packages.each do |pack| + @installers[pack] = Installer.new(@config, + "#{@ardir}/packages/#{pack}", + "packages/#{pack}") + end + with = extract_selection(config('with')) + without = extract_selection(config('without')) + @selected = @installers.keys.select {|name| + (with.empty? or with.include?(name)) \ + and not without.include?(name) + } + end + + def extract_selection(list) + a = list.split(/,/) + a.each do |name| + setup_rb_error "no such package: #{name}" unless @installers.key?(name) + end + a + end + + def print_usage(f) + super + f.puts 'Inluded packages:' + f.puts ' ' + @packages.sort.join(' ') + f.puts + end + + # + # Task Handlers + # + + def exec_config + run_hook 'pre-config' + each_selected_installers {|inst| inst.exec_config } + run_hook 'post-config' + @config.save # must be final + end + + def exec_setup + run_hook 'pre-setup' + each_selected_installers {|inst| inst.exec_setup } + run_hook 'post-setup' + end + + def exec_install + run_hook 'pre-install' + each_selected_installers {|inst| inst.exec_install } + run_hook 'post-install' + end + + def exec_test + run_hook 'pre-test' + each_selected_installers {|inst| inst.exec_test } + run_hook 'post-test' + end + + def exec_clean + rm_f @config.savefile + run_hook 'pre-clean' + each_selected_installers {|inst| inst.exec_clean } + run_hook 'post-clean' + end + + def exec_distclean + rm_f @config.savefile + run_hook 'pre-distclean' + each_selected_installers {|inst| inst.exec_distclean } + run_hook 'post-distclean' + end + + # + # lib + # + + def each_selected_installers + Dir.mkdir 'packages' unless File.dir?('packages') + @selected.each do |pack| + $stderr.puts "Processing the package `#{pack}' ..." if verbose? + Dir.mkdir "packages/#{pack}" unless File.dir?("packages/#{pack}") + Dir.chdir "packages/#{pack}" + yield @installers[pack] + Dir.chdir '../..' + end + end + + def run_hook(id) + @root_installer.run_hook id + end + + # module FileOperations requires this + def verbose? + @config.verbose? + end + + # module FileOperations requires this + def no_harm? + @config.no_harm? + end + +end # class ToplevelInstallerMulti + + +class Installer + + FILETYPES = %w( bin lib ext data conf man ) + + include FileOperations + include HookScriptAPI + + def initialize(config, srcroot, objroot) + @config = config + @srcdir = File.expand_path(srcroot) + @objdir = File.expand_path(objroot) + @currdir = '.' + end + + def inspect + "#<#{self.class} #{File.basename(@srcdir)}>" + end + + def noop(rel) + end + + # + # Hook Script API base methods + # + + def srcdir_root + @srcdir + end + + def objdir_root + @objdir + end + + def relpath + @currdir + end + + # + # Config Access + # + + # module FileOperations requires this + def verbose? + @config.verbose? + end + + # module FileOperations requires this + def no_harm? + @config.no_harm? + end + + def verbose_off + begin + save, @config.verbose = @config.verbose?, false + yield + ensure + @config.verbose = save + end + end + + # + # TASK config + # + + def exec_config + exec_task_traverse 'config' + end + + alias config_dir_bin noop + alias config_dir_lib noop + + def config_dir_ext(rel) + extconf if extdir?(curr_srcdir()) + end + + alias config_dir_data noop + alias config_dir_conf noop + alias config_dir_man noop + + def extconf + ruby "#{curr_srcdir()}/extconf.rb", *@config.config_opt + end + + # + # TASK setup + # + + def exec_setup + exec_task_traverse 'setup' + end + + def setup_dir_bin(rel) + files_of(curr_srcdir()).each do |fname| + update_shebang_line "#{curr_srcdir()}/#{fname}" + end + end + + alias setup_dir_lib noop + + def setup_dir_ext(rel) + make if extdir?(curr_srcdir()) + end + + alias setup_dir_data noop + alias setup_dir_conf noop + alias setup_dir_man noop + + def update_shebang_line(path) + return if no_harm? + return if config('shebang') == 'never' + old = Shebang.load(path) + if old + $stderr.puts "warning: #{path}: Shebang line includes too many args. It is not portable and your program may not work." if old.args.size > 1 + new = new_shebang(old) + return if new.to_s == old.to_s + else + return unless config('shebang') == 'all' + new = Shebang.new(config('rubypath')) + end + $stderr.puts "updating shebang: #{File.basename(path)}" if verbose? + open_atomic_writer(path) {|output| + File.open(path, 'rb') {|f| + f.gets if old # discard + output.puts new.to_s + output.print f.read + } + } + end + + def new_shebang(old) + if /\Aruby/ =~ File.basename(old.cmd) + Shebang.new(config('rubypath'), old.args) + elsif File.basename(old.cmd) == 'env' and old.args.first == 'ruby' + Shebang.new(config('rubypath'), old.args[1..-1]) + else + return old unless config('shebang') == 'all' + Shebang.new(config('rubypath')) + end + end + + def open_atomic_writer(path, &block) + tmpfile = File.basename(path) + '.tmp' + begin + File.open(tmpfile, 'wb', &block) + File.rename tmpfile, File.basename(path) + ensure + File.unlink tmpfile if File.exist?(tmpfile) + end + end + + class Shebang + def Shebang.load(path) + line = nil + File.open(path) {|f| + line = f.gets + } + return nil unless /\A#!/ =~ line + parse(line) + end + + def Shebang.parse(line) + cmd, *args = *line.strip.sub(/\A\#!/, '').split(' ') + new(cmd, args) + end + + def initialize(cmd, args = []) + @cmd = cmd + @args = args + end + + attr_reader :cmd + attr_reader :args + + def to_s + "#! #{@cmd}" + (@args.empty? ? '' : " #{@args.join(' ')}") + end + end + + # + # TASK install + # + + def exec_install + rm_f 'InstalledFiles' + exec_task_traverse 'install' + end + + def install_dir_bin(rel) + install_files targetfiles(), "#{config('bindir')}/#{rel}", 0755 + end + + def install_dir_lib(rel) + install_files libfiles(), "#{config('rbdir')}/#{rel}", 0644 + end + + def install_dir_ext(rel) + return unless extdir?(curr_srcdir()) + install_files rubyextentions('.'), + "#{config('sodir')}/#{File.dirname(rel)}", + 0555 + end + + def install_dir_data(rel) + install_files targetfiles(), "#{config('datadir')}/#{rel}", 0644 + end + + def install_dir_conf(rel) + # FIXME: should not remove current config files + # (rename previous file to .old/.org) + install_files targetfiles(), "#{config('sysconfdir')}/#{rel}", 0644 + end + + def install_dir_man(rel) + install_files targetfiles(), "#{config('mandir')}/#{rel}", 0644 + end + + def install_files(list, dest, mode) + mkdir_p dest, @config.install_prefix + list.each do |fname| + install fname, dest, mode, @config.install_prefix + end + end + + def libfiles + glob_reject(%w(*.y *.output), targetfiles()) + end + + def rubyextentions(dir) + ents = glob_select("*.#{@config.dllext}", targetfiles()) + if ents.empty? + setup_rb_error "no ruby extention exists: 'ruby #{$0} setup' first" + end + ents + end + + def targetfiles + mapdir(existfiles() - hookfiles()) + end + + def mapdir(ents) + ents.map {|ent| + if File.exist?(ent) + then ent # objdir + else "#{curr_srcdir()}/#{ent}" # srcdir + end + } + end + + # picked up many entries from cvs-1.11.1/src/ignore.c + JUNK_FILES = %w( + core RCSLOG tags TAGS .make.state + .nse_depinfo #* .#* cvslog.* ,* .del-* *.olb + *~ *.old *.bak *.BAK *.orig *.rej _$* *$ + + *.org *.in .* + ) + + def existfiles + glob_reject(JUNK_FILES, (files_of(curr_srcdir()) | files_of('.'))) + end + + def hookfiles + %w( pre-%s post-%s pre-%s.rb post-%s.rb ).map {|fmt| + %w( config setup install clean ).map {|t| sprintf(fmt, t) } + }.flatten + end + + def glob_select(pat, ents) + re = globs2re([pat]) + ents.select {|ent| re =~ ent } + end + + def glob_reject(pats, ents) + re = globs2re(pats) + ents.reject {|ent| re =~ ent } + end + + GLOB2REGEX = { + '.' => '\.', + '$' => '\$', + '#' => '\#', + '*' => '.*' + } + + def globs2re(pats) + /\A(?:#{ + pats.map {|pat| pat.gsub(/[\.\$\#\*]/) {|ch| GLOB2REGEX[ch] } }.join('|') + })\z/ + end + + # + # TASK test + # + + TESTDIR = 'test' + + def exec_test + unless File.directory?('test') + $stderr.puts 'no test in this package' if verbose? + return + end + $stderr.puts 'Running tests...' if verbose? + begin + require 'test/unit' + rescue LoadError + setup_rb_error 'test/unit cannot loaded. You need Ruby 1.8 or later to invoke this task.' + end + runner = Test::Unit::AutoRunner.new(true) + runner.to_run << TESTDIR + runner.run + end + + # + # TASK clean + # + + def exec_clean + exec_task_traverse 'clean' + rm_f @config.savefile + rm_f 'InstalledFiles' + end + + alias clean_dir_bin noop + alias clean_dir_lib noop + alias clean_dir_data noop + alias clean_dir_conf noop + alias clean_dir_man noop + + def clean_dir_ext(rel) + return unless extdir?(curr_srcdir()) + make 'clean' if File.file?('Makefile') + end + + # + # TASK distclean + # + + def exec_distclean + exec_task_traverse 'distclean' + rm_f @config.savefile + rm_f 'InstalledFiles' + end + + alias distclean_dir_bin noop + alias distclean_dir_lib noop + + def distclean_dir_ext(rel) + return unless extdir?(curr_srcdir()) + make 'distclean' if File.file?('Makefile') + end + + alias distclean_dir_data noop + alias distclean_dir_conf noop + alias distclean_dir_man noop + + # + # Traversing + # + + def exec_task_traverse(task) + run_hook "pre-#{task}" + FILETYPES.each do |type| + if type == 'ext' and config('without-ext') == 'yes' + $stderr.puts 'skipping ext/* by user option' if verbose? + next + end + traverse task, type, "#{task}_dir_#{type}" + end + run_hook "post-#{task}" + end + + def traverse(task, rel, mid) + dive_into(rel) { + run_hook "pre-#{task}" + __send__ mid, rel.sub(%r[\A.*?(?:/|\z)], '') + directories_of(curr_srcdir()).each do |d| + traverse task, "#{rel}/#{d}", mid + end + run_hook "post-#{task}" + } + end + + def dive_into(rel) + return unless File.dir?("#{@srcdir}/#{rel}") + + dir = File.basename(rel) + Dir.mkdir dir unless File.dir?(dir) + prevdir = Dir.pwd + Dir.chdir dir + $stderr.puts '---> ' + rel if verbose? + @currdir = rel + yield + Dir.chdir prevdir + $stderr.puts '<--- ' + rel if verbose? + @currdir = File.dirname(rel) + end + + def run_hook(id) + path = [ "#{curr_srcdir()}/#{id}", + "#{curr_srcdir()}/#{id}.rb" ].detect {|cand| File.file?(cand) } + return unless path + begin + instance_eval File.read(path), path, 1 + rescue + raise if $DEBUG + setup_rb_error "hook #{path} failed:\n" + $!.message + end + end + +end # class Installer + + +class SetupError < StandardError; end + +def setup_rb_error(msg) + raise SetupError, msg +end + +if $0 == __FILE__ + begin + ToplevelInstaller.invoke + rescue SetupError + raise if $DEBUG + $stderr.puts $!.message + $stderr.puts "Try 'ruby #{$0} --help' for detailed usage." + exit 1 + end +end diff --git a/tdb.gemspec b/tdb.gemspec new file mode 100644 index 0000000..aa10d0a --- /dev/null +++ b/tdb.gemspec @@ -0,0 +1,36 @@ +ENV["VERSION"] or abort "VERSION= must be specified" +manifest = File.readlines('.manifest').map! { |x| x.chomp! } +summary = File.readlines("README")[0].gsub(/\A=\s+\S+[^\w]+/, '').strip +description = File.read("README").split(/\n\n/)[1].strip + +Gem::Specification.new do |s| + s.name = %q{tdb} + s.version = ENV["VERSION"] + + s.homepage = 'http://bogomips.org/ruby-tdb/' + s.authors = ["Ruby tdb hackers"] + s.date = Time.now.utc.strftime('%Y-%m-%d') + s.description = description + s.email = %q{ruby.tdb@librelist.org} + + s.extra_rdoc_files = File.readlines('.document').map! do |x| + x.chomp! + if File.directory?(x) + manifest.grep(%r{\A#{x}/}) + elsif File.file?(x) + x + else + nil + end + end.flatten.compact + + s.files = manifest + s.rdoc_options = [ "-t", summary ] + s.require_paths = %w(lib ext) + s.rubyforge_project = %q{tdb} + s.summary = summary + s.test_files = Dir['test/test_*.rb'] + s.extensions = %w(ext/tdb/extconf.rb) + + # s.license = %w(LGPL) # disabled for compatibility with older RubyGems +end diff --git a/test/test_tdb.rb b/test/test_tdb.rb new file mode 100644 index 0000000..188e219 --- /dev/null +++ b/test/test_tdb.rb @@ -0,0 +1,260 @@ +# -*- encoding: binary -*- +$stdout.sync = $stderr.sync = true +require 'test/unit' +require 'tempfile' +$-w = true +require 'tdb' + +class TestTdb < Test::Unit::TestCase + + def setup + @tmp = @tdb = nil + end + + def teardown + @tmp.close! if @tmp.respond_to?(:close!) + @tdb.close if @tdb && ! @tdb.closed? + end + + def test_create_file + assert_nothing_raised do + @tmp = Tempfile.new('tdb') + File.unlink(@tmp.path) + end + @tdb = TDB.new(@tmp.path) + assert_kind_of TDB, @tdb + assert File.exist?(@tmp.path) + end + + def test_to_a + @tdb = TDB.new(nil) + assert_equal [], @tdb.to_a + end + + def test_each + @tdb = TDB.new(nil) + @tdb["X"] = "Y" + + tmp = [] + rc = @tdb.each { |k,v| tmp << [k, v ] } + assert_equal([ %w(X Y) ], tmp) + assert_equal @tdb.object_id, rc.object_id + + tmp = [] + assert_raises(EOFError) { + @tdb.each { |k,v| raise EOFError, "FOO"; tmp << [ k, v ] } + } + assert tmp.empty? + + tmp = [] + rc = catch(:zzz) { @tdb.each { |k,v| throw(:zzz, "FOO"); tmp << [ k, v ] } } + assert_equal rc, "FOO" + assert tmp.empty? + end + + def test_each_bigger + @tdb = TDB.new(nil) + @tdb["a"] = "A" + @tdb["b"] = "B" + @tdb["c"] = "C" + + tmp = [] + rc = @tdb.each { |k,v| tmp << [k, v ] } + assert_equal 3, tmp.size + assert_equal @tdb.object_id, rc.object_id + + tmp = [] + assert_raises(EOFError) { + @tdb.each { |k,v| + tmp << [ k, v ] + raise EOFError, "FOO" + } + } + assert_equal 1, tmp.size + + tmp = [] + rc = catch(:zzz) { + @tdb.each { |k,v| + tmp << [ k, v ] + throw(:zzz, "FOO") + } + } + assert_equal rc, "FOO" + assert_equal 1, tmp.size + end + + def test_memory + assert_nothing_raised do + @tdb = TDB.new(nil) + end + assert ! @tdb.closed? + assert_nil @tdb.close + assert @tdb.closed? + assert_raises(IOError) { @tdb.close } + end + + def test_delete + @tdb = TDB.new(nil) + @tdb["hello"] = "X" + assert_equal "X", @tdb.delete("hello") + assert_nil @tdb["hello"] + assert_nil @tdb.fetch("hello") + assert_nil @tdb.delete("hello") + @tdb["hello"] = "world" + assert_equal "world", @tdb.delete("hello") + assert_nil @tdb.delete("hello") + end + + def test_nuke! + @tdb = TDB.new(nil) + assert_equal false, @tdb.nuke!("hello") + @tdb["hello"] = "world" + assert_equal true, @tdb.nuke!("hello") + assert ! @tdb.include?("hello") + assert_equal false, @tdb.nuke!("hello") + end + + def test_exists? + @tdb = TDB.new(nil) + assert_equal false, @tdb.key?("hello") + assert_equal false, @tdb.include?("hello") + @tdb["hello"] = "world" + assert_equal true, @tdb.key?("hello") + end + + def test_store_fetch_mem + @tdb = TDB.new(nil) + assert_nothing_raised { @tdb["hello"] = "world" } + assert_equal "world", @tdb["hello"] + @tdb.store("hello", "Z") + assert_equal "Z", @tdb["hello"] + assert_equal "Z", @tdb.fetch("hello") + end + + def test_store_modify_mem + @tdb = TDB.new(nil) + assert_nothing_raised { @tdb["hello"] = "world" } + assert_equal "world", @tdb["hello"] + assert_equal "Z", @tdb.modify("hello", "Z") + assert_equal "Z", @tdb["hello"] + + assert_nil @tdb.modify("none", "Z") + assert_raises(TDB::ERR::NOEXIST) { @tdb.modify!("none", "Z") } + end + + def test_store_insert_mem + @tdb = TDB.new(nil) + assert_equal "world", @tdb.insert("hello", "world") + assert_equal "world", @tdb["hello"] + assert_nil @tdb.insert("hello", "Z") + assert_raises(TDB::ERR::EXISTS) { @tdb.insert!("hello", "Z") } + assert_equal "world", @tdb["hello"] + end + + def test_gc + assert_nothing_raised do + 100000.times { TDB.new(nil) } + 100000.times { TDB.new(Tempfile.new('tdb').path) } + end + end if ENV["TEST_GC"] + + def test_new_with_hash_size + assert_nothing_raised { TDB.new(nil, :hash_size => 6) } + assert_raises(TypeError) { TDB.new(nil, :hash_size => "6") } + end + + def test_const + assert_equal 0, TDB::DEFAULT + assert_equal 1, TDB::CLEAR_IF_FIRST + end + + def test_new_with_open_flags + @tmp = Tempfile.new('tdb_excl') + assert_raises(Errno::EEXIST) { + TDB.new(@tmp.path, :open_flags => IO::EXCL|IO::CREAT|IO::RDWR) + } + File.unlink(@tmp.path) + assert_nothing_raised { + @tdb = TDB.new(@tmp.path, :open_flags => IO::EXCL|IO::CREAT|IO::RDWR) + } + end + + def test_open_with_tdb_flags + assert_nothing_raised do + @tdb = TDB.new("/non/existent/file", :tdb_flags => TDB::INTERNAL) + end + end + + def test_alternate_hashes + results = {} + expect = TDB::HASHES.to_a.map { |k,v| [ k.to_s, v.to_s ] }.sort + %w(default jenkins_lookup3 djb2 djb3 fnv1a + murmur1 murmur1_aligned murmur2 murmur2a murmur2_aligned).each do |h| + assert_nothing_raised do + tdb = TDB.new(nil, :hash => h.to_sym) + TDB::HASHES.each do |k,v| + tdb[k.to_s] = v.to_s + end + assert_equal expect, tdb.to_a.sort + assert_nil tdb.close + end + assert_raises(ArgumentError) do + TDB.new(nil, :hash => h) + end + end + end + + def test_lock_unlock_all + @tmp = Tempfile.new('tdb') + File.unlink(@tmp.path) + @tdb = TDB.new(@tmp.path) + assert_equal true, @tdb.lockall + assert_equal true, @tdb.unlockall + assert_raises(TDB::ERR::LOCK) { @tdb.unlockall } + end + + def test_read_locks + @tmp = Tempfile.new('tdb') + File.unlink(@tmp.path) + @tdb = TDB.new(@tmp.path) + assert_equal true, @tdb.lockall_read + assert_equal true, @tdb.unlockall_read + assert_raises(TDB::ERR::LOCK) { @tdb.unlockall_read } + assert_equal true, @tdb.trylockall_read + assert_equal true, @tdb.unlockall_read + assert_raises(TDB::ERR::LOCK) { @tdb.unlockall_read } + end + + def test_mark_locks + @tmp = Tempfile.new('tdb') + File.unlink(@tmp.path) + @tdb = TDB.new(@tmp.path) + assert_equal true, @tdb.lockall_mark + assert_equal true, @tdb.lockall_unmark + assert_raises(TDB::ERR::LOCK) { @tdb.lockall_unmark } + end + + def test_trylockall + @tmp = Tempfile.new('tdb') + File.unlink(@tmp.path) + @tdb = TDB.new(@tmp.path) + ard, awr = IO.pipe + brd, bwr = IO.pipe + pid = fork do + @tdb.close + ard.close + bwr.close + tdb = TDB.new(@tmp.path) + assert_equal true, tdb.lockall + awr.close + brd.read + end + awr.close + brd.close + assert_equal "", ard.read + assert_equal false, @tdb.trylockall + bwr.close + assert Process.waitpid2(pid)[1].success? + assert_equal true, @tdb.trylockall + end +end |