unicorn Ruby/Rack server user+dev discussion/patches/pulls/bugs/help
 help / Atom feed
* [RFC] deduplicate strings VM-wide in Ruby 2.5+
@ 2018-12-06 23:44 Eric Wong
  2018-12-07  4:56 ` Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2018-12-06 23:44 UTC (permalink / raw)
  To: unicorn-public

String#-@ deduplicates strings starting with Ruby 2.5.0
Hash#[]= deduplicates strings starting in Ruby 2.6.0-rc1

This allows us to save a small amount of memory by sharing
objects with other parts of the stack (e.g. Rack).
---
   RFC because I've only lightly-tested this and only with Ruby 2.6rc1.
   Will get around to testing later (because more hardware problems,
   trying new SATA cables...)

 ext/unicorn_http/common_field_optimization.h | 26 ++++++++++++++++---
 ext/unicorn_http/extconf.rb                  | 27 ++++++++++++++++++++
 test/unit/test_http_parser.rb                | 16 ++++++++++++
 3 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/ext/unicorn_http/common_field_optimization.h b/ext/unicorn_http/common_field_optimization.h
index 251e734..4b9f062 100644
--- a/ext/unicorn_http/common_field_optimization.h
+++ b/ext/unicorn_http/common_field_optimization.h
@@ -58,6 +58,23 @@ static struct common_field common_http_fields[] = {
 
 #define HTTP_PREFIX "HTTP_"
 #define HTTP_PREFIX_LEN (sizeof(HTTP_PREFIX) - 1)
+static ID id_uminus;
+
+/* this dedupes under Ruby 2.5+ (December 2017) */
+static VALUE str_dd_freeze(VALUE str)
+{
+  if (STR_UMINUS_DEDUPE)
+    return rb_funcall(str, id_uminus, 0);
+
+  /* freeze,since it speeds up older MRI slightly */
+  OBJ_FREEZE(str);
+  return str;
+}
+
+static VALUE str_new_dd_freeze(const char *ptr, long len)
+{
+  return str_dd_freeze(rb_str_new(ptr, len));
+}
 
 /* this function is not performance-critical, called only at load time */
 static void init_common_fields(VALUE mark_ary)
@@ -65,18 +82,19 @@ static void init_common_fields(VALUE mark_ary)
   int i;
   struct common_field *cf = common_http_fields;
   char tmp[64];
+
+  id_uminus = rb_intern("-@");
   memcpy(tmp, HTTP_PREFIX, HTTP_PREFIX_LEN);
 
   for(i = ARRAY_SIZE(common_http_fields); --i >= 0; cf++) {
     /* Rack doesn't like certain headers prefixed with "HTTP_" */
     if (!strcmp("CONTENT_LENGTH", cf->name) ||
         !strcmp("CONTENT_TYPE", cf->name)) {
-      cf->value = rb_str_new(cf->name, cf->len);
+      cf->value = str_new_dd_freeze(cf->name, cf->len);
     } else {
       memcpy(tmp + HTTP_PREFIX_LEN, cf->name, cf->len + 1);
-      cf->value = rb_str_new(tmp, HTTP_PREFIX_LEN + cf->len);
+      cf->value = str_new_dd_freeze(tmp, HTTP_PREFIX_LEN + cf->len);
     }
-    cf->value = rb_obj_freeze(cf->value);
     rb_ary_push(mark_ary, cf->value);
   }
 }
@@ -105,7 +123,7 @@ static VALUE uncommon_field(const char *field, size_t flen)
   memcpy(RSTRING_PTR(f) + HTTP_PREFIX_LEN, field, flen);
   assert(*(RSTRING_PTR(f) + RSTRING_LEN(f)) == '\0' &&
          "string didn't end with \\0"); /* paranoia */
-  return rb_obj_freeze(f);
+  return HASH_ASET_DEDUPE ? f : str_dd_freeze(f);
 }
 
 #endif /* common_field_optimization_h */
diff --git a/ext/unicorn_http/extconf.rb b/ext/unicorn_http/extconf.rb
index 2fc60fe..5b7a8ca 100644
--- a/ext/unicorn_http/extconf.rb
+++ b/ext/unicorn_http/extconf.rb
@@ -8,4 +8,31 @@
 have_func("rb_hash_clear", "ruby.h") # Ruby 2.0+
 have_func("gmtime_r", "time.h")
 
+message('checking if String#-@ (str_uminus) dedupes... ')
+begin
+  a = -(%w(t e s t).join)
+  b = -(%w(t e s t).join)
+  if a.equal?(b)
+    $CPPFLAGS += ' -DSTR_UMINUS_DEDUPE=1 '
+    message("yes\n")
+  else
+    $CPPFLAGS += ' -DSTR_UMINUS_DEDUPE=0 '
+    message("no, needs Ruby 2.5+\n")
+  end
+rescue NoMethodError
+  $CPPFLAGS += ' -DSTR_UMINUS_DEDUPE=0 '
+  message("no, String#-@ not available\n")
+end
+
+message('checking if Hash#[]= (rb_hash_aset) dedupes... ')
+h = {}
+h[%w(m k m f).join('')] = :foo
+if 'mkmf'.freeze.equal?(h.keys[0])
+  $CPPFLAGS += ' -DHASH_ASET_DEDUPE=1 '
+  message("yes\n")
+else
+  $CPPFLAGS += ' -DHASH_ASET_DEDUPE=0 '
+  message("no, needs Ruby 2.6+\n")
+end
+
 create_makefile("unicorn_http")
diff --git a/test/unit/test_http_parser.rb b/test/unit/test_http_parser.rb
index 31e6f71..697af44 100644
--- a/test/unit/test_http_parser.rb
+++ b/test/unit/test_http_parser.rb
@@ -865,4 +865,20 @@ def test_memsize
   rescue LoadError
     # not all Ruby implementations have objspace
   end
+
+  def test_dedupe
+    parser = HttpParser.new
+    # n.b. String#freeze optimization doesn't work under modern test-unit
+    exp = -'HTTP_HOST'
+    get = "GET / HTTP/1.1\r\nHost: example.com\r\nHavpbea-fhpxf: true\r\n\r\n"
+    assert parser.add_parse(get)
+    key = parser.env.keys.detect { |k| k == exp }
+    assert_same exp, key
+
+    if RUBY_VERSION.to_r >= 2.6 # 2.6.0-rc1+
+      exp = -'HTTP_HAVPBEA_FHPXF'
+      key = parser.env.keys.detect { |k| k == exp }
+      assert_same exp, key
+    end
+  end if RUBY_VERSION.to_r >= 2.5 && RUBY_ENGINE == 'ruby'
 end

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [RFC] deduplicate strings VM-wide in Ruby 2.5+
  2018-12-06 23:44 [RFC] deduplicate strings VM-wide in Ruby 2.5+ Eric Wong
@ 2018-12-07  4:56 ` Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2018-12-07  4:56 UTC (permalink / raw)
  To: unicorn-public

Since 'mkmf' is already in the fstring table as that exists in
the source; so I'll at least have to squash this on top for
Ruby <=2.5:

diff --git a/ext/unicorn_http/extconf.rb b/ext/unicorn_http/extconf.rb
index 5b7a8ca..d5f81fb 100644
--- a/ext/unicorn_http/extconf.rb
+++ b/ext/unicorn_http/extconf.rb
@@ -26,8 +26,11 @@
 
 message('checking if Hash#[]= (rb_hash_aset) dedupes... ')
 h = {}
-h[%w(m k m f).join('')] = :foo
-if 'mkmf'.freeze.equal?(h.keys[0])
+x = {}
+r = rand.to_s
+h[%W(#{r}).join('')] = :foo
+x[%W(#{r}).join('')] = :foo
+if x.keys[0].equal?(h.keys[0])
   $CPPFLAGS += ' -DHASH_ASET_DEDUPE=1 '
   message("yes\n")
 else

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-06 23:44 [RFC] deduplicate strings VM-wide in Ruby 2.5+ Eric Wong
2018-12-07  4:56 ` Eric Wong

unicorn Ruby/Rack server user+dev discussion/patches/pulls/bugs/help

Archives are clonable:
	git clone --mirror https://bogomips.org/unicorn-public
	git clone --mirror http://ou63pmih66umazou.onion/unicorn-public

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.lang.ruby.unicorn
	nntp://ou63pmih66umazou.onion/inbox.comp.lang.ruby.unicorn

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox