Skip to content

Commit 9d89b4d

Browse files
committed
CU-8699qg2tg: Hopefully fix multiprocessing stalling by removing the … (#36)
* CU-8699qg2tg: Hopefully fix multiprocessing stalling by removing the dunder del method from the usage monitor for multiprocessing * CU-8699qg2tg: Fix minor oversight in changes * CU-8699qg2tg: Hopefully fix usage monitoring with no dunder del method for good
1 parent 26980a7 commit 9d89b4d

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

medcat-v2/medcat/cat.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from datetime import date
66
from concurrent.futures import ProcessPoolExecutor, as_completed, Future
77
import itertools
8+
from contextlib import contextmanager
89

910
import shutil
1011
import logging
@@ -30,7 +31,7 @@
3031
from medcat.utils.defaults import avoid_legacy_conversion
3132
from medcat.utils.defaults import doing_legacy_conversion_message
3233
from medcat.utils.defaults import LegacyConversionDisabledError
33-
from medcat.utils.usage_monitoring import UsageMonitor
34+
from medcat.utils.usage_monitoring import UsageMonitor, _NoDelUM
3435
from medcat.utils.import_utils import MissingDependenciesError
3536

3637

@@ -357,6 +358,28 @@ def get_entities_multi_texts(
357358
yield text_index, result
358359
return
359360

361+
with self._no_usage_monitor_exit_flushing():
362+
yield from self._multiprocess(n_process, batch_iter)
363+
364+
@contextmanager
365+
def _no_usage_monitor_exit_flushing(self):
366+
# NOTE: the `UsageMonitor.__del__` method can cause
367+
# multiprocessing to stall while it waits for it to be
368+
# called. So here we remove the method.
369+
# However, due to the object being pickled for multiprocessing
370+
# purposes, the class'es `__del__` method will be used anyway.
371+
# So we need to trick it into using a different class.
372+
original_cls = self.usage_monitor.__class__
373+
self.usage_monitor.__class__ = _NoDelUM
374+
try:
375+
yield
376+
finally:
377+
self.usage_monitor.__class__ = original_cls
378+
379+
def _multiprocess(
380+
self, n_process: int,
381+
batch_iter: Iterator[list[tuple[str, str, bool]]]
382+
) -> Iterator[tuple[str, Union[dict, Entities, OnlyCUIEntities]]]:
360383
external_processes = n_process - 1
361384
with ProcessPoolExecutor(max_workers=external_processes) as executor:
362385
yield from self._mp_one_batch_per_process(

medcat-v2/medcat/utils/usage_monitoring.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ def __del__(self):
121121
raise
122122

123123

124+
class _NoDelUM(UsageMonitor):
125+
126+
def __del__(self):
127+
pass
128+
129+
124130
# NOTE: at termination time, open is not available
125131
# while this isn't great, there's nothing we can do
126132
def _says_open_not_available(err: NameError) -> bool:

0 commit comments

Comments
 (0)