From 7b4e67b990e0b92b0843223f3f6ab2269cca668d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 22 Apr 2010 12:19:57 -0700 Subject: rely on garbage collector to manage user-supplied node data This prevents segfaults from double free() calls when using Patricia#destroy on node data or the same object twice. Patricia#destroy and Patricia#clear are now no-ops for backwards compatibility. One (hopefully welcome) side effect of this change is that arbitrary Ruby objects may be attached to nodes, not just strings. String objects will continue to alway be duplicated when returned to the user for backwards compatibility, but arbitrary Ruby objects may be attached to each node. This feature can now be used to track node usage data directly in the Patricia object without the need for a separate lookup table. The only downside of this change is that the GC mark phase takes a tiny bit longer due to the presence of more Ruby objects. There are also additional tests for the new behavior, including an expensive test to force GC pressure and ensure there are no memory leaks. --- ext/rpatricia/rpatricia.c | 77 +++++++++++++++++++++++++++++++++++++---------- test/test_gc.rb | 39 ++++++++++++++++++++++++ test/test_match_data.rb | 46 ++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 16 deletions(-) create mode 100644 test/test_gc.rb create mode 100644 test/test_match_data.rb diff --git a/ext/rpatricia/rpatricia.c b/ext/rpatricia/rpatricia.c index ec8c6a3..0fc1002 100644 --- a/ext/rpatricia/rpatricia.c +++ b/ext/rpatricia/rpatricia.c @@ -12,20 +12,28 @@ static VALUE cPatricia; static void dummy(void) {} +/* + * this method only exists for backwards compatibility, we now rely + * on the GC to do all the dirty work of freeing node data for us + */ static VALUE p_destroy (VALUE self) { - patricia_tree_t *tree; - Data_Get_Struct(self, patricia_tree_t, tree); - Destroy_Patricia(tree, free); return Qtrue; } +static void +p_node_mark (void *ptr) +{ + patricia_node_t *node = ptr; + + rb_gc_mark((VALUE)node->data); +} + static VALUE p_add (int argc, VALUE *argv, VALUE self) { - int str_len; - char *user_data; + VALUE user_data; patricia_tree_t *tree; patricia_node_t *node; prefix_t *prefix; @@ -39,15 +47,18 @@ p_add (int argc, VALUE *argv, VALUE self) Deref_Prefix(prefix); if (argc == 2) { - user_data = STR2CSTR(argv[1]); - str_len = strlen(user_data); - node->data = (char *) malloc((str_len + 1) * sizeof(char)); - sprintf((char *)node->data, user_data); + user_data = argv[1]; + + /* for backwards compatibility, we always dup and return new strings */ + if (TYPE(user_data) == T_STRING) + user_data = rb_obj_dup(user_data); } else { - node->data = (char *) malloc(sizeof(char)); - sprintf((char *)node->data, ""); + user_data = rb_str_new(NULL, 0); } - return Data_Wrap_Struct(cPatricia, 0, 0, node); + PATRICIA_DATA_SET(node, user_data); + + /* node will be freed when parent is freed */ + return Data_Wrap_Struct(cPatricia, p_node_mark, 0, node); } static VALUE @@ -85,7 +96,7 @@ p_match (VALUE self, VALUE r_key) Deref_Prefix (prefix); if (node) - return Data_Wrap_Struct(cPatricia, 0, 0, node); + return Data_Wrap_Struct(cPatricia, p_node_mark, 0, node); else return Qfalse; @@ -104,7 +115,7 @@ p_match_exact (VALUE self, VALUE r_key) Deref_Prefix (prefix); if (node) - return Data_Wrap_Struct(cPatricia, 0, 0, node); + return Data_Wrap_Struct(cPatricia, p_node_mark, 0, node); else return Qfalse; } @@ -140,9 +151,17 @@ p_print_nodes (VALUE self) static VALUE p_data (VALUE self) { + VALUE user_data; patricia_node_t *node; Data_Get_Struct(self, patricia_node_t, node); - return rb_str_new2((char *)node->data); + + user_data = (VALUE)node->data; + + /* for backwards compatibility, we always dup and return new strings */ + if (TYPE(user_data) == T_STRING) + user_data = rb_obj_dup(user_data); + + return user_data; } static VALUE @@ -173,12 +192,38 @@ p_prefixlen (VALUE self) return INT2NUM(node->prefix->bitlen); } +/* called during GC for each node->data attached to a Patricia tree */ +static void +p_tree_mark_each(prefix_t *prefix, void *data) +{ + VALUE user_data = (VALUE)data; + + rb_gc_mark(user_data); +} + +static void +p_tree_mark (void *ptr) +{ + patricia_tree_t *tree = ptr; + + patricia_process(tree, p_tree_mark_each); +} + +static void +p_tree_free (void *ptr) +{ + patricia_tree_t *tree = ptr; + + /* no need to explicitly free each node->data, GC will do it for us */ + Destroy_Patricia(tree, NULL); +} + static VALUE p_new (VALUE self) { patricia_tree_t *tree; tree = New_Patricia(32); /* assuming only IPv4 */ - return Data_Wrap_Struct(cPatricia, 0, 0, tree); + return Data_Wrap_Struct(cPatricia, p_tree_mark, p_tree_free, tree); } void diff --git a/test/test_gc.rb b/test/test_gc.rb new file mode 100644 index 0000000..d567696 --- /dev/null +++ b/test/test_gc.rb @@ -0,0 +1,39 @@ +require 'test/unit' +require 'rpatricia' + +# this takes a while, watch memory usage not grow in top(1) or similar + +class TestGc < Test::Unit::TestCase + + def setup + @arrays = Patricia.new + @arrays.add('10.0.0.0/8', []) + @arrays.add('127.0.0.0/24', []) + + @strings = Patricia.new + @strings.add('10.0.0.0/8', "big lan") + @strings.add('127.0.0.0/24', "localhost") + end + + def test_gc + assert_nothing_raised do + 10_000_000.times do + t = Patricia.new + t.add('10.0.0.0/8', {}) + t.add('127.0.0.0/24', "home sweet home") + end + end + + # ensure what we created originally didn't get GC-ed' + 100.times do + assert_equal [], @arrays.match_best('127.0.0.1').data + assert_equal "localhost", @strings.match_best('127.0.0.1').data + end + end + + def test_destroy + assert @strings.destroy + assert @strings.destroy + end + +end diff --git a/test/test_match_data.rb b/test/test_match_data.rb new file mode 100644 index 0000000..e85a015 --- /dev/null +++ b/test/test_match_data.rb @@ -0,0 +1,46 @@ +require 'test/unit' +require 'rpatricia' + +class TestMatchData < Test::Unit::TestCase + + def test_match_data_non_string + ary = [] + t = Patricia.new + t.add('10.0.0.0/8', ary) + + z = t.match_best '10.1.1.1' + assert_equal ary.object_id, z.data.object_id + z.data << '10.1.1.1' + + t.match_best('10.2.2.2').data.include?('10.1.1.1') + end + + # backwards compatibility, we always return a new string + def test_match_data_string + data = "hello" + t = Patricia.new + t.add('10.0.0.0/8', data) + + z = t.match_best '10.1.1.1' + assert_equal "hello", z.data + assert(data.object_id != z.data.object_id) + + y = t.match_best '10.1.1.1' + assert_equal "hello", y.data + assert(y.data.object_id != z.data.object_id) + end + + def test_match_data_nothing + t = Patricia.new + t.add('10.0.0.0/8') + + z = t.match_best '10.1.1.1' + assert_equal "", z.data + z_object_id = z.data + + y = t.match_best '10.1.1.1' + assert_equal "", y.data + assert(y.data.object_id != z_object_id) + end + +end -- cgit v1.2.3-24-ge0c7