Skip to content

Commit 460d291

Browse files
committed
improve performance and memory usage of editops
1 parent f6e19dd commit 460d291

File tree

7 files changed

+39
-8
lines changed

7 files changed

+39
-8
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.8.3
1+
1.9.0

docs/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
author = 'Max Bachmann'
2323

2424
# The full version, including alpha/beta/rc tags
25-
release = '1.8.3'
25+
release = '1.9.0'
2626

2727

2828
# -- General configuration ---------------------------------------------------

extern/rapidfuzz-cpp

src/rapidfuzz/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
"""
44
__author__ = "Max Bachmann"
55
__license__ = "MIT"
6-
__version__ = "1.8.3"
6+
__version__ = "1.9.0"
77

88
from rapidfuzz import process, fuzz, utils, levenshtein, string_metric

tests/test_hypothesis.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,32 @@ def extract_scorer(s1, s2, scorer, processor=None, **kwargs):
109109
def extract_iter_scorer(s1, s2, scorer, processor=None, **kwargs):
110110
return list(process.extract_iter(s1, [s2], processor=processor, scorer=scorer, **kwargs))[0][1]
111111

112+
def apply_editops(s1, s2, ops):
113+
new_str = ''
114+
s1_pos = 0
115+
for op in ops:
116+
j = op[1] - s1_pos
117+
while j:
118+
new_str += s1[s1_pos]
119+
s1_pos += 1
120+
j -= 1
121+
122+
if op[0] == 'delete':
123+
s1_pos += 1
124+
elif op[0] == 'insert':
125+
new_str += s2[op[2]]
126+
elif op[0] == 'replace':
127+
new_str += s2[op[2]]
128+
s1_pos += 1
129+
130+
j = len(s1) - s1_pos
131+
while j:
132+
new_str += s1[s1_pos]
133+
s1_pos += 1
134+
j -= 1
135+
136+
return new_str
137+
112138

113139
HYPOTHESIS_ALPHABET = ascii_letters + digits + punctuation
114140

@@ -137,12 +163,13 @@ def extract_iter_scorer(s1, s2, scorer, processor=None, **kwargs):
137163
]
138164

139165
@given(s1=st.text(), s2=st.text())
140-
@settings(max_examples=50, deadline=1000)
166+
@settings(max_examples=100, deadline=None)
141167
def test_levenshtein_editops(s1, s2):
142168
"""
143169
test levenshtein_editops. Currently this only tests, so there are no exceptions.
144170
"""
145-
string_metric.levenshtein_editops(s1, s2)
171+
ops = string_metric.levenshtein_editops(s1, s2)
172+
assert apply_editops(s1, s2, ops) == s2
146173

147174
@given(s1=st.text(max_size=64), s2=st.text())
148175
@settings(max_examples=50, deadline=1000)
@@ -322,4 +349,4 @@ def test_cdist(queries, choices):
322349

323350
reference_matrix = cdist_distance(queries, queries, scorer=string_metric.levenshtein)
324351
matrix = process.cdist(queries, queries, scorer=string_metric.levenshtein)
325-
assert (matrix == reference_matrix).all()
352+
assert (matrix == reference_matrix).all()

tests/test_process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def testWithScorer(self):
143143
self.assertEqual(best[0], choices_mapping[1])
144144
best = process.extract(query, choices_mapping, scorer=fuzz.QRatio)[0]
145145
self.assertEqual(best[0], choices_mapping[1])
146-
146+
147147
def testWithCutoff(self):
148148
choices = [
149149
"new york mets vs chicago cubs",

tests/test_string_metric.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ def test_levenshtein_editops():
5959
"""
6060
assert string_metric.levenshtein_editops("0", "") == [("delete", 0, 0)]
6161
assert string_metric.levenshtein_editops("", "0") == [("insert", 0, 0)]
62+
63+
assert string_metric.levenshtein_editops("00", "0") == [("delete", 1, 1)]
64+
assert string_metric.levenshtein_editops("0", "00") == [("insert", 1, 1)]
65+
6266
assert string_metric.levenshtein_editops("qabxcd", "abycdf") == [
6367
("delete", 0, 0), ("replace", 3, 2), ("insert", 6, 5)
6468
]

0 commit comments

Comments
 (0)