File tree Expand file tree Collapse file tree 2 files changed +21
-14
lines changed
Expand file tree Collapse file tree 2 files changed +21
-14
lines changed Original file line number Diff line number Diff line change 99import itertools
1010import logging
1111import multiprocessing
12- import os
1312import pickle
1413import sqlite3
1514import tempfile
@@ -175,7 +174,6 @@ def partition(
175174 clusters = self .cluster (pair_scores , threshold )
176175 clusters = self ._add_singletons (data , clusters )
177176 clusters = list (clusters )
178- _cleanup_scores (pair_scores )
179177 return clusters
180178
181179 def _add_singletons (self , data : Data , clusters : Clusters ) -> Clusters :
@@ -514,7 +512,6 @@ def join(
514512 links = pair_scores [pair_scores ["score" ] > threshold ]
515513
516514 links = list (links )
517- _cleanup_scores (pair_scores )
518515 return links
519516
520517 def one_to_one (self , scores : Scores , threshold : float = 0.0 ) -> Links :
@@ -1468,14 +1465,3 @@ def flatten_training(
14681465 y .extend ([encoded_y ] * len (pairs ))
14691466
14701467 return examples , numpy .array (y )
1471-
1472-
1473- def _cleanup_scores (arr : Scores ) -> None :
1474- try :
1475- mmap_file = arr .filename # type: ignore
1476- except AttributeError :
1477- pass
1478- else :
1479- del arr
1480- if mmap_file :
1481- os .remove (mmap_file )
Original file line number Diff line number Diff line change 1010import os
1111import queue
1212import tempfile
13+ import weakref
1314from typing import TYPE_CHECKING , overload
1415
1516import numpy
@@ -176,9 +177,29 @@ def scoreDuplicates(
176177 else :
177178 scored_pairs = numpy .array ([], dtype = dtype )
178179
180+ # Monkeypatch in these extra methods and attributes.
181+ # See https://docs.python.org/3/library/weakref.html#comparing-finalizers-with-del-methods
182+ scored_pairs .remove = weakref .finalize (scored_pairs , _cleanup_scores , scored_pairs ) # type: ignore[attr-defined]
183+ scored_pairs .removed = property (_is_removed ) # type: ignore[attr-defined]
184+
179185 return scored_pairs
180186
181187
188+ def _cleanup_scores (arr : Scores ) -> None :
189+ try :
190+ mmap_file = arr .filename # type: ignore
191+ except AttributeError :
192+ pass
193+ else :
194+ del arr
195+ if mmap_file :
196+ os .remove (mmap_file )
197+
198+
199+ def _is_removed (self ):
200+ return not self .remove .alive
201+
202+
182203def fillQueue (
183204 queue : _Queue , iterable : Iterable [Any ], stop_signals : int , chunk_size : int = 20000
184205) -> None :
You can’t perform that action at this time.
0 commit comments