Skip to content

Commit 689cfc5

Browse files
committed
feat: Auto cleanup scores with weakref.finalize
It wasn't stated anywhere in the docs that you have to cleanup the possibly-a-tempfile, so people might not have been doing it. And, I thought this was better than the alternatives from https://stackoverflow.com/questions/865115/how-do-i-correctly-clean-up-a-python-object
1 parent bf028e9 commit 689cfc5

File tree

2 files changed

+20
-14
lines changed

2 files changed

+20
-14
lines changed

dedupe/api.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import itertools
1010
import logging
1111
import multiprocessing
12-
import os
1312
import pickle
1413
import sqlite3
1514
import tempfile
@@ -175,7 +174,6 @@ def partition(
175174
clusters = self.cluster(pair_scores, threshold)
176175
clusters = self._add_singletons(data, clusters)
177176
clusters = list(clusters)
178-
_cleanup_scores(pair_scores)
179177
return clusters
180178

181179
def _add_singletons(self, data: Data, clusters: Clusters) -> Clusters:
@@ -514,7 +512,6 @@ def join(
514512
links = pair_scores[pair_scores["score"] > threshold]
515513

516514
links = list(links)
517-
_cleanup_scores(pair_scores)
518515
return links
519516

520517
def one_to_one(self, scores: Scores, threshold: float = 0.0) -> Links:
@@ -1468,14 +1465,3 @@ def flatten_training(
14681465
y.extend([encoded_y] * len(pairs))
14691466

14701467
return examples, numpy.array(y)
1471-
1472-
1473-
def _cleanup_scores(arr: Scores) -> None:
1474-
try:
1475-
mmap_file = arr.filename # type: ignore
1476-
except AttributeError:
1477-
pass
1478-
else:
1479-
del arr
1480-
if mmap_file:
1481-
os.remove(mmap_file)

dedupe/core.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import queue
1212
import tempfile
1313
from typing import TYPE_CHECKING, overload
14+
import weakref
1415

1516
import numpy
1617

@@ -176,9 +177,28 @@ def scoreDuplicates(
176177
else:
177178
scored_pairs = numpy.array([], dtype=dtype)
178179

180+
# See https://docs.python.org/3/library/weakref.html#comparing-finalizers-with-del-methods
181+
scored_pairs.remove = weakref.finalize(scored_pairs, _cleanup_scores, scored_pairs)
182+
scored_pairs.removed = property(_is_removed)
183+
179184
return scored_pairs
180185

181186

187+
def _cleanup_scores(arr: Scores) -> None:
188+
try:
189+
mmap_file = arr.filename # type: ignore
190+
except AttributeError:
191+
pass
192+
else:
193+
del arr
194+
if mmap_file:
195+
os.remove(mmap_file)
196+
197+
198+
def _is_removed(self):
199+
return not self.remove.alive
200+
201+
182202
def fillQueue(
183203
queue: _Queue, iterable: Iterable[Any], stop_signals: int, chunk_size: int = 20000
184204
) -> None:

0 commit comments

Comments
 (0)