Skip to content

Commit dbc54d2

Browse files
authored
[cherry pick] fix DataLoader memory leak (#34301)
* fix DataLoader memory leak. test=develop * fix unittest abort. test=develop * enable test_dataloader_dataset & skip GPU tensor break. test=develop
1 parent 5e3ae97 commit dbc54d2

File tree

2 files changed

+7
-22
lines changed

2 files changed

+7
-22
lines changed

python/paddle/fluid/dataloader/dataloader_iter.py

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,6 @@ def __init__(self, loader):
125125

126126
self._init_thread()
127127

128-
# if user exit python program when dataloader is still
129-
# iterating, resource may no release safely, so we
130-
# add __del__ function to to CleanupFuncRegistrar
131-
# to make sure __del__ is always called when program
132-
# exit for resoure releasing safely
133-
CleanupFuncRegistrar.register(self.__del__)
134-
135128
def _init_thread(self):
136129
self._var_names = [v.name for v in self._feed_list]
137130
self._shapes = [v.shape for v in self._feed_list]
@@ -229,9 +222,7 @@ def __next__(self):
229222
def _shutdown_thread(self):
230223
if self._thread:
231224
self._thread_done_event.set()
232-
if self._thread is not threading.current_thread():
233-
self._thread.join()
234-
self._thread = None
225+
self._thread = None
235226

236227
# python2 compatibility
237228
def next(self):
@@ -287,17 +278,6 @@ def __init__(self, loader):
287278
self._init_thread()
288279
self._shutdown = False
289280

290-
# if user exit python program when dataloader is still
291-
# iterating, resource may no release safely, so we
292-
# add _shutdown_on_exit function to to CleanupFuncRegistrar
293-
# to make sure _try_shutdown_all is always called when program
294-
# exit for resoure releasing safely
295-
# worker join may hang for in _try_shutdown_all call in atexit
296-
# for main process is in atexit state in some OS, so we add
297-
# timeout=1 for shutdown function call in atexit, for shutdown
298-
# function call in __del__, we keep it as it is
299-
CleanupFuncRegistrar.register(self._shutdown_on_exit)
300-
301281
def _init_workers(self):
302282
# multiprocess worker and indice queue list initial as empty
303283
self._workers = []

python/paddle/fluid/tests/unittests/test_dataloader_dataset.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,12 @@ def test_single_process(self):
6666
for image, label in loader:
6767
self.assertTrue(image.place.is_gpu_place())
6868
self.assertTrue(label.place.is_cuda_pinned_place())
69-
break
69+
# FIXME(dkp): when input tensor is in GPU place and
70+
# iteration break in the median, it seems the GPU
71+
# tensor put into blocking_queue cannot be safely
72+
# released and may cause ABRT/SEGV, this should
73+
# be fixed
74+
# break
7075

7176
def test_multi_process(self):
7277
# DataLoader with multi-process mode is not supported on MacOs and Windows currently

0 commit comments

Comments
 (0)