about summary refs log tree commit
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2010-04-22 12:19:57 -0700
committerEric Wong <normalperson@yhbt.net>2010-04-22 12:19:57 -0700
commit7b4e67b990e0b92b0843223f3f6ab2269cca668d (patch)
tree2aa0cbc8b9c2a65aa92f1601102df3549f6d3ac4
parent8e09f77a7aee616096060d321798732221e69c09 (diff)
downloadrpatricia-7b4e67b990e0b92b0843223f3f6ab2269cca668d.tar.gz
rely on garbage collector to manage user-supplied node data
This prevents segfaults from double free() calls when using
Patricia#destroy on node data or the same object twice.
Patricia#destroy and Patricia#clear are now no-ops for backwards
compatibility.

One (hopefully welcome) side effect of this change is that
arbitrary Ruby objects may be attached to nodes, not just
strings.  String objects will continue to alway be duplicated
when returned to the user for backwards compatibility, but
arbitrary Ruby objects may be attached to each node.  This
feature can now be used to track node usage data directly in the
Patricia object without the need for a separate lookup table.

The only downside of this change is that the GC mark phase
takes a tiny bit longer due to the presence of more Ruby
objects.

There are also additional tests for the new behavior, including
an expensive test to force GC pressure and ensure there are no
memory leaks.
-rw-r--r--ext/rpatricia/rpatricia.c77
-rw-r--r--test/test_gc.rb39
-rw-r--r--test/test_match_data.rb46
3 files changed, 146 insertions, 16 deletions
diff --git a/ext/rpatricia/rpatricia.c b/ext/rpatricia/rpatricia.c
index ec8c6a3..0fc1002 100644
--- a/ext/rpatricia/rpatricia.c
+++ b/ext/rpatricia/rpatricia.c
@@ -12,20 +12,28 @@ static VALUE cPatricia;
 
 static void dummy(void) {}
 
+/*
+ * this method only exists for backwards compatibility, we now rely
+ * on the GC to do all the dirty work of freeing node data for us
+ */
 static VALUE
 p_destroy (VALUE self)
 {
-  patricia_tree_t *tree;
-  Data_Get_Struct(self, patricia_tree_t, tree);
-  Destroy_Patricia(tree, free);
   return Qtrue;
 }
 
+static void
+p_node_mark (void *ptr)
+{
+  patricia_node_t *node = ptr;
+
+  rb_gc_mark((VALUE)node->data);
+}
+
 static VALUE
 p_add (int argc, VALUE *argv, VALUE self)
 {
-  int str_len;
-  char *user_data;
+  VALUE user_data;
   patricia_tree_t *tree;
   patricia_node_t *node;
   prefix_t *prefix;
@@ -39,15 +47,18 @@ p_add (int argc, VALUE *argv, VALUE self)
   Deref_Prefix(prefix);
 
   if (argc == 2) {
-    user_data = STR2CSTR(argv[1]);
-    str_len = strlen(user_data);
-    node->data = (char *) malloc((str_len + 1) * sizeof(char));
-    sprintf((char *)node->data, user_data);
+    user_data = argv[1];
+
+    /* for backwards compatibility, we always dup and return new strings */
+    if (TYPE(user_data) == T_STRING)
+      user_data = rb_obj_dup(user_data);
   } else {
-    node->data = (char *) malloc(sizeof(char));
-    sprintf((char *)node->data, "");
+    user_data = rb_str_new(NULL, 0);
   }
-  return Data_Wrap_Struct(cPatricia, 0, 0, node);
+  PATRICIA_DATA_SET(node, user_data);
+
+  /* node will be freed when parent is freed */
+  return Data_Wrap_Struct(cPatricia, p_node_mark, 0, node);
 }
 
 static VALUE
@@ -85,7 +96,7 @@ p_match (VALUE self, VALUE r_key)
   Deref_Prefix (prefix);
 
   if (node)
-    return Data_Wrap_Struct(cPatricia, 0, 0, node);
+    return Data_Wrap_Struct(cPatricia, p_node_mark, 0, node);
   else
     return Qfalse;
 
@@ -104,7 +115,7 @@ p_match_exact (VALUE self, VALUE r_key)
   Deref_Prefix (prefix);
 
   if (node)
-    return Data_Wrap_Struct(cPatricia, 0, 0, node);
+    return Data_Wrap_Struct(cPatricia, p_node_mark, 0, node);
   else
     return Qfalse;
 }
@@ -140,9 +151,17 @@ p_print_nodes (VALUE self)
 static VALUE
 p_data (VALUE self)
 {
+  VALUE user_data;
   patricia_node_t *node;
   Data_Get_Struct(self, patricia_node_t, node);
-  return rb_str_new2((char *)node->data);
+
+  user_data = (VALUE)node->data;
+
+  /* for backwards compatibility, we always dup and return new strings */
+  if (TYPE(user_data) == T_STRING)
+    user_data = rb_obj_dup(user_data);
+
+  return user_data;
 }
 
 static VALUE
@@ -173,12 +192,38 @@ p_prefixlen (VALUE self)
   return INT2NUM(node->prefix->bitlen);
 }
 
+/* called during GC for each node->data attached to a Patricia tree */
+static void
+p_tree_mark_each(prefix_t *prefix, void *data)
+{
+  VALUE user_data = (VALUE)data;
+
+  rb_gc_mark(user_data);
+}
+
+static void
+p_tree_mark (void *ptr)
+{
+  patricia_tree_t *tree = ptr;
+
+  patricia_process(tree, p_tree_mark_each);
+}
+
+static void
+p_tree_free (void *ptr)
+{
+  patricia_tree_t *tree = ptr;
+
+  /* no need to explicitly free each node->data, GC will do it for us */
+  Destroy_Patricia(tree, NULL);
+}
+
 static VALUE
 p_new (VALUE self)
 {
   patricia_tree_t *tree;
   tree = New_Patricia(32); /* assuming only IPv4 */
-  return Data_Wrap_Struct(cPatricia, 0, 0, tree);
+  return Data_Wrap_Struct(cPatricia, p_tree_mark, p_tree_free, tree);
 }
 
 void
diff --git a/test/test_gc.rb b/test/test_gc.rb
new file mode 100644
index 0000000..d567696
--- /dev/null
+++ b/test/test_gc.rb
@@ -0,0 +1,39 @@
+require 'test/unit'
+require 'rpatricia'
+
+# this takes a while, watch memory usage not grow in top(1) or similar
+
+class TestGc < Test::Unit::TestCase
+
+  def setup
+    @arrays = Patricia.new
+    @arrays.add('10.0.0.0/8', [])
+    @arrays.add('127.0.0.0/24', [])
+
+    @strings = Patricia.new
+    @strings.add('10.0.0.0/8', "big lan")
+    @strings.add('127.0.0.0/24', "localhost")
+  end
+
+  def test_gc
+    assert_nothing_raised do
+      10_000_000.times do
+        t = Patricia.new
+        t.add('10.0.0.0/8', {})
+        t.add('127.0.0.0/24', "home sweet home")
+      end
+    end
+
+    # ensure what we created originally didn't get GC-ed'
+    100.times do
+      assert_equal [], @arrays.match_best('127.0.0.1').data
+      assert_equal "localhost", @strings.match_best('127.0.0.1').data
+    end
+  end
+
+  def test_destroy
+    assert @strings.destroy
+    assert @strings.destroy
+  end
+
+end
diff --git a/test/test_match_data.rb b/test/test_match_data.rb
new file mode 100644
index 0000000..e85a015
--- /dev/null
+++ b/test/test_match_data.rb
@@ -0,0 +1,46 @@
+require 'test/unit'
+require 'rpatricia'
+
+class TestMatchData < Test::Unit::TestCase
+
+  def test_match_data_non_string
+    ary = []
+    t = Patricia.new
+    t.add('10.0.0.0/8', ary)
+
+    z = t.match_best '10.1.1.1'
+    assert_equal ary.object_id, z.data.object_id
+    z.data << '10.1.1.1'
+
+    t.match_best('10.2.2.2').data.include?('10.1.1.1')
+  end
+
+  # backwards compatibility, we always return a new string
+  def test_match_data_string
+    data = "hello"
+    t = Patricia.new
+    t.add('10.0.0.0/8', data)
+
+    z = t.match_best '10.1.1.1'
+    assert_equal "hello", z.data
+    assert(data.object_id != z.data.object_id)
+
+    y = t.match_best '10.1.1.1'
+    assert_equal "hello", y.data
+    assert(y.data.object_id != z.data.object_id)
+  end
+
+  def test_match_data_nothing
+    t = Patricia.new
+    t.add('10.0.0.0/8')
+
+    z = t.match_best '10.1.1.1'
+    assert_equal "", z.data
+    z_object_id = z.data
+
+    y = t.match_best '10.1.1.1'
+    assert_equal "", y.data
+    assert(y.data.object_id != z_object_id)
+  end
+
+end