File tree Expand file tree Collapse file tree 2 files changed +20
-14
lines changed
Expand file tree Collapse file tree 2 files changed +20
-14
lines changed Original file line number Diff line number Diff line change 99import itertools
1010import logging
1111import multiprocessing
12- import os
1312import pickle
1413import sqlite3
1514import tempfile
@@ -175,7 +174,6 @@ def partition(
175174 clusters = self .cluster (pair_scores , threshold )
176175 clusters = self ._add_singletons (data , clusters )
177176 clusters = list (clusters )
178- _cleanup_scores (pair_scores )
179177 return clusters
180178
181179 def _add_singletons (self , data : Data , clusters : Clusters ) -> Clusters :
@@ -514,7 +512,6 @@ def join(
514512 links = pair_scores [pair_scores ["score" ] > threshold ]
515513
516514 links = list (links )
517- _cleanup_scores (pair_scores )
518515 return links
519516
520517 def one_to_one (self , scores : Scores , threshold : float = 0.0 ) -> Links :
@@ -1468,14 +1465,3 @@ def flatten_training(
14681465 y .extend ([encoded_y ] * len (pairs ))
14691466
14701467 return examples , numpy .array (y )
1471-
1472-
1473- def _cleanup_scores (arr : Scores ) -> None :
1474- try :
1475- mmap_file = arr .filename # type: ignore
1476- except AttributeError :
1477- pass
1478- else :
1479- del arr
1480- if mmap_file :
1481- os .remove (mmap_file )
Original file line number Diff line number Diff line change 1111import queue
1212import tempfile
1313from typing import TYPE_CHECKING , overload
14+ import weakref
1415
1516import numpy
1617
@@ -176,9 +177,28 @@ def scoreDuplicates(
176177 else :
177178 scored_pairs = numpy .array ([], dtype = dtype )
178179
180+ # See https://docs.python.org/3/library/weakref.html#comparing-finalizers-with-del-methods
181+ scored_pairs .remove = weakref .finalize (scored_pairs , _cleanup_scores , scored_pairs )
182+ scored_pairs .removed = property (_is_removed )
183+
179184 return scored_pairs
180185
181186
187+ def _cleanup_scores (arr : Scores ) -> None :
188+ try :
189+ mmap_file = arr .filename # type: ignore
190+ except AttributeError :
191+ pass
192+ else :
193+ del arr
194+ if mmap_file :
195+ os .remove (mmap_file )
196+
197+
198+ def _is_removed (self ):
199+ return not self .remove .alive
200+
201+
182202def fillQueue (
183203 queue : _Queue , iterable : Iterable [Any ], stop_signals : int , chunk_size : int = 20000
184204) -> None :
You can’t perform that action at this time.
0 commit comments