Skip to content

Commit c48f273

Browse files
tonyseekStephenSorriaux
authored andcommitted
fix(recipe): No more memory leak once TreeCache was closed (#524)
fix(recipe): Fix memory leak of TreeCache recipe. Fix memory leak on idle handler and on closed TreeCache. Add new memory tests for TreeCache recipe that uses objgraph and other tests for various handler on TreeCache. Let TreeCache start in a safe way. The doc now suggest to close unused TreeCache.
1 parent 03340fb commit c48f273

File tree

7 files changed

+171
-15
lines changed

7 files changed

+171
-15
lines changed

kazoo/handlers/eventlet.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ def _process_completion_queue(self):
107107
except Exception:
108108
LOG.warning("Exception in worker completion queue greenlet",
109109
exc_info=True)
110+
finally:
111+
del cb # release before possible idle
110112

111113
def _process_callback_queue(self):
112114
while True:
@@ -119,6 +121,8 @@ def _process_callback_queue(self):
119121
except Exception:
120122
LOG.warning("Exception in worker callback queue greenlet",
121123
exc_info=True)
124+
finally:
125+
del cb # release before possible idle
122126

123127
def start(self):
124128
if not self._started:

kazoo/handlers/gevent.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,17 @@ def greenlet_worker():
6969
while True:
7070
try:
7171
func = queue.get()
72-
if func is _STOP:
73-
break
74-
func()
72+
try:
73+
if func is _STOP:
74+
break
75+
func()
76+
except Exception as exc:
77+
log.warning("Exception in worker greenlet")
78+
log.exception(exc)
79+
finally:
80+
del func # release before possible idle
7581
except self.queue_empty:
7682
continue
77-
except Exception as exc:
78-
log.warning("Exception in worker greenlet")
79-
log.exception(exc)
8083
return gevent.spawn(greenlet_worker)
8184

8285
def start(self):

kazoo/handlers/threading.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def _thread_worker(): # pragma: nocover
126126
log.exception("Exception in worker queue thread")
127127
finally:
128128
queue.task_done()
129+
del func # release before possible idle
129130
except self.queue_empty:
130131
continue
131132
t = self.spawn(_thread_worker)

kazoo/recipe/cache.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import os
2020

2121
from kazoo.exceptions import NoNodeError, KazooException
22+
from kazoo.protocol.paths import _prefix_root
2223
from kazoo.protocol.states import KazooState, EventType
2324

2425

@@ -57,6 +58,12 @@ def start(self):
5758
After a cache started, all changes of subtree will be synchronized
5859
from the ZooKeeper server. Events will be fired for those activity.
5960
61+
Don't forget to call :meth:`close` if a tree was started and you don't
62+
need it anymore, or you will leak the memory of cached nodes, even if
63+
you have released all references to the :class:`TreeCache` instance.
64+
Because there are so many callbacks that have been registered to the
65+
Kazoo client.
66+
6067
See also :meth:`~TreeCache.listen`.
6168
6269
.. note::
@@ -75,7 +82,10 @@ def start(self):
7582
self._client.ensure_path(self._root._path)
7683

7784
if self._client.connected:
78-
self._root.on_created()
85+
# The on_created and other on_* methods must not be invoked outside
86+
# the background task. This is the key to keep concurrency safe
87+
# without lock.
88+
self._in_background(self._root.on_created)
7989

8090
def close(self):
8191
"""Closes the cache.
@@ -95,6 +105,10 @@ def close(self):
95105
self._task_queue.put(self._STOP)
96106
self._client.remove_listener(self._session_watcher)
97107
with handle_exception(self._error_listeners):
108+
# We must invoke on_deleted outside background queue because:
109+
# 1. The background task has been stopped.
110+
# 2. The on_deleted on closed tree does not communicate with
111+
# ZooKeeper actually.
98112
self._root.on_deleted()
99113

100114
def listen(self, listener):
@@ -185,6 +199,9 @@ def _do_background(self):
185199
func, args, kwargs = cb
186200
func(*args, **kwargs)
187201

202+
# release before possible idle
203+
del cb, func, args, kwargs
204+
188205
def _session_watcher(self, state):
189206
if state == KazooState.SUSPENDED:
190207
self._publish_event(TreeEvent.CONNECTION_SUSPENDED)
@@ -241,6 +258,7 @@ def on_deleted(self):
241258
old_child.on_deleted()
242259

243260
if self._tree._state == self._tree.STATE_CLOSED:
261+
self._reset_watchers()
244262
return
245263

246264
old_state, self._state = self._state, self.STATE_DEAD
@@ -253,10 +271,18 @@ def on_deleted(self):
253271
child = self._path[len(self._parent._path) + 1:]
254272
if self._parent._children.get(child) is self:
255273
del self._parent._children[child]
274+
self._reset_watchers()
256275

257276
def _publish_event(self, *args, **kwargs):
258277
return self._tree._publish_event(*args, **kwargs)
259278

279+
def _reset_watchers(self):
280+
client = self._tree._client
281+
for _watchers in (client._data_watchers, client._child_watchers):
282+
_path = _prefix_root(client.chroot, self._path)
283+
_watcher = _watchers.get(_path, set())
284+
_watcher.discard(self._process_watch)
285+
260286
def _refresh(self):
261287
self._refresh_data()
262288
self._refresh_children()
@@ -391,10 +417,11 @@ def handle_exception(listeners):
391417
yield
392418
except Exception as e:
393419
logger.debug('processing error: %r', e)
394-
for listener in listeners:
395-
try:
396-
listener(e)
397-
except: # pragma: no cover
398-
logger.exception('Exception handling exception') # oops
420+
if listeners:
421+
for listener in listeners:
422+
try:
423+
listener(e)
424+
except BaseException: # pragma: no cover
425+
logger.exception('Exception handling exception') # oops
399426
else:
400427
logger.exception('No listener to process %r', e)

kazoo/tests/test_cache.py

Lines changed: 122 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,44 @@
1+
import gc
2+
import importlib
13
import uuid
24

35
from mock import patch, call, Mock
46
from nose.tools import eq_, ok_, assert_not_equal, raises
7+
from objgraph import count as count_refs_by_type
58

6-
from kazoo.testing import KazooTestCase
9+
from kazoo.testing import KazooTestHarness
710
from kazoo.exceptions import KazooException
811
from kazoo.recipe.cache import TreeCache, TreeNode, TreeEvent
912

1013

11-
class KazooTreeCacheTests(KazooTestCase):
14+
class KazooAdaptiveHandlerTestCase(KazooTestHarness):
15+
HANDLERS = (
16+
('kazoo.handlers.gevent', 'SequentialGeventHandler'),
17+
('kazoo.handlers.eventlet', 'SequentialEventletHandler'),
18+
('kazoo.handlers.threading', 'SequentialThreadingHandler'),
19+
)
1220

21+
def setUp(self):
22+
self.handler = self.choose_an_installed_handler()
23+
self.setup_zookeeper(handler=self.handler)
24+
25+
def tearDown(self):
26+
self.handler = None
27+
self.teardown_zookeeper()
28+
29+
def choose_an_installed_handler(self):
30+
for handler_module, handler_class in self.HANDLERS:
31+
try:
32+
mod = importlib.import_module(handler_module)
33+
cls = getattr(mod, handler_class)
34+
except ImportError:
35+
continue
36+
else:
37+
return cls()
38+
raise ImportError('No available handler')
39+
40+
41+
class KazooTreeCacheTests(KazooAdaptiveHandlerTestCase):
1342
def setUp(self):
1443
super(KazooTreeCacheTests, self).setUp()
1544
self._event_queue = self.client.handler.queue_impl()
@@ -18,12 +47,15 @@ def setUp(self):
1847
self.cache = None
1948

2049
def tearDown(self):
21-
super(KazooTreeCacheTests, self).tearDown()
2250
if not self._error_queue.empty():
2351
try:
2452
raise self._error_queue.get()
2553
except FakeException:
2654
pass
55+
if self.cache is not None:
56+
self.cache.close()
57+
self.cache = None
58+
super(KazooTreeCacheTests, self).tearDown()
2759

2860
def make_cache(self):
2961
if self.cache is None:
@@ -51,6 +83,29 @@ def spy_client(self, method_name):
5183
method = getattr(self.client, method_name)
5284
return patch.object(self.client, method_name, wraps=method)
5385

86+
def _wait_gc(self):
87+
# trigger switching on some coroutine handlers
88+
self.client.handler.sleep_func(0.1)
89+
90+
completion_queue = getattr(self.handler, 'completion_queue', None)
91+
if completion_queue is not None:
92+
while not self.client.handler.completion_queue.empty():
93+
self.client.handler.sleep_func(0.1)
94+
95+
for gen in range(3):
96+
gc.collect(gen)
97+
98+
def count_tree_node(self):
99+
# inspect GC and count tree nodes for checking memory leak
100+
for retry in range(10):
101+
result = set()
102+
for _ in range(5):
103+
self._wait_gc()
104+
result.add(count_refs_by_type('TreeNode'))
105+
if len(result) == 1:
106+
return list(result)[0]
107+
raise RuntimeError('could not count refs exactly')
108+
54109
def test_start(self):
55110
self.make_cache()
56111
self.wait_cache(since=TreeEvent.INITIALIZED)
@@ -74,12 +129,29 @@ def test_start_closed(self):
74129
self.cache.start()
75130

76131
def test_close(self):
132+
eq_(self.count_tree_node(), 0)
133+
77134
self.make_cache()
78135
self.wait_cache(since=TreeEvent.INITIALIZED)
79136
self.client.create(self.path + '/foo/bar/baz', makepath=True)
80137
for _ in range(3):
81138
self.wait_cache(TreeEvent.NODE_ADDED)
82139

140+
# setup stub watchers which are outside of tree cache
141+
stub_data_watcher = Mock(spec=lambda event: None)
142+
stub_child_watcher = Mock(spec=lambda event: None)
143+
self.client.get(self.path + '/foo', stub_data_watcher)
144+
self.client.get_children(self.path + '/foo', stub_child_watcher)
145+
146+
# watchers inside tree cache should be here
147+
root_path = self.client.chroot + self.path
148+
eq_(len(self.client._data_watchers[root_path + '/foo']), 2)
149+
eq_(len(self.client._data_watchers[root_path + '/foo/bar']), 1)
150+
eq_(len(self.client._data_watchers[root_path + '/foo/bar/baz']), 1)
151+
eq_(len(self.client._child_watchers[root_path + '/foo']), 2)
152+
eq_(len(self.client._child_watchers[root_path + '/foo/bar']), 1)
153+
eq_(len(self.client._child_watchers[root_path + '/foo/bar/baz']), 1)
154+
83155
self.cache.close()
84156

85157
# nothing should be published since tree closed
@@ -93,6 +165,53 @@ def test_close(self):
93165
# node state should not be changed
94166
assert_not_equal(self.cache._root._state, TreeNode.STATE_DEAD)
95167

168+
# watchers should be reset
169+
eq_(len(self.client._data_watchers[root_path + '/foo']), 1)
170+
eq_(len(self.client._data_watchers[root_path + '/foo/bar']), 0)
171+
eq_(len(self.client._data_watchers[root_path + '/foo/bar/baz']), 0)
172+
eq_(len(self.client._child_watchers[root_path + '/foo']), 1)
173+
eq_(len(self.client._child_watchers[root_path + '/foo/bar']), 0)
174+
eq_(len(self.client._child_watchers[root_path + '/foo/bar/baz']), 0)
175+
176+
# outside watchers should not be deleted
177+
eq_(list(self.client._data_watchers[root_path + '/foo'])[0],
178+
stub_data_watcher)
179+
eq_(list(self.client._child_watchers[root_path + '/foo'])[0],
180+
stub_child_watcher)
181+
182+
# should not be any leaked memory (tree node) here
183+
self.cache = None
184+
eq_(self.count_tree_node(), 0)
185+
186+
def test_delete_operation(self):
187+
self.make_cache()
188+
self.wait_cache(since=TreeEvent.INITIALIZED)
189+
190+
eq_(self.count_tree_node(), 1)
191+
192+
self.client.create(self.path + '/foo/bar/baz', makepath=True)
193+
for _ in range(3):
194+
self.wait_cache(TreeEvent.NODE_ADDED)
195+
196+
self.client.delete(self.path + '/foo', recursive=True)
197+
for _ in range(3):
198+
self.wait_cache(TreeEvent.NODE_REMOVED)
199+
200+
# tree should be empty
201+
eq_(self.cache._root._children, {})
202+
203+
# watchers should be reset
204+
root_path = self.client.chroot + self.path
205+
eq_(self.client._data_watchers[root_path + '/foo'], set())
206+
eq_(self.client._data_watchers[root_path + '/foo/bar'], set())
207+
eq_(self.client._data_watchers[root_path + '/foo/bar/baz'], set())
208+
eq_(self.client._child_watchers[root_path + '/foo'], set())
209+
eq_(self.client._child_watchers[root_path + '/foo/bar'], set())
210+
eq_(self.client._child_watchers[root_path + '/foo/bar/baz'], set())
211+
212+
# should not be any leaked memory (tree node) here
213+
eq_(self.count_tree_node(), 1)
214+
96215
def test_children_operation(self):
97216
self.make_cache()
98217
self.wait_cache(since=TreeEvent.INITIALIZED)

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ mock==1.0.1
33
nose==1.3.3
44
pure-sasl==0.5.1
55
flake8==2.3.0
6+
objgraph==3.4.0

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
'nose',
2626
'flake8',
2727
'pure-sasl',
28+
'objgraph',
2829
]
2930

3031
if not (PYTHON3 or PYPY):

0 commit comments

Comments
 (0)