Skip to content

Commit a61dab1

Browse files
committed
Fix and improve score() docstrings
1 parent 689cfc5 commit a61dab1

File tree

1 file changed

+22
-5
lines changed

1 file changed

+22
-5
lines changed

dedupe/api.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,27 @@ class IntegralMatching(Matching):
9797

9898
def score(self, pairs: RecordPairs) -> Scores:
9999
"""
100-
Scores pairs of records. Returns pairs of tuples of records id and
101-
associated probabilities that the pair of records are match
100+
Scores pairs of records. Returns a numpy structured array of scores.
102101
103102
Args:
104-
pairs: Iterator of pairs of records
105-
103+
pairs: Iterator of pairs of records, such as from the output of :func:`pairs`
104+
105+
Returns:
106+
A numpy
107+
`structured array <https://docs.scipy.org/doc/numpy/user/basics.rec.html>`_
108+
with a with a dtype of `[('pairs', id_type, 2), ('score', 'f4')]`
109+
where dtype is either a str or int,
110+
and score is a 32-bit float in the range (0, 1].
111+
The 'pairs' column contains pairs of ids of
112+
the records compared and the 'score' column contains
113+
the similarity score for that pair of records.
114+
115+
This array will be a numpy.array when self.num_cores is 1,
116+
and a numpy.memmap when self.num_cores is greater than 1.
117+
This memmap will automatically clean itself up, you don't
118+
have to worry about it.
119+
120+
For each pair, the smaller id will be first.
106121
"""
107122
try:
108123
matches = core.scoreDuplicates(
@@ -802,6 +817,8 @@ def score(self, blocks: Blocks) -> Generator[Scores, None, None]:
802817
Args:
803818
blocks: Iterator of blocks of records
804819
820+
Yields:
821+
Structured numpy arrays. See :meth:`dedupe.Dedupe.score` for more info.
805822
"""
806823

807824
matches = core.scoreGazette(
@@ -943,7 +960,7 @@ def __init__(
943960
Args:
944961
settings_file: A file object containing settings
945962
info produced from the
946-
:func:`~dedupe.api.ActiveMatching.write_settings` method.
963+
:meth:`dedupe.Dedupe.write_settings` method.
947964
948965
num_cores: The number of cpus to use for parallel
949966
processing, defaults to the number of cpus

0 commit comments

Comments
 (0)