Skip to content

Commit 0bd05e9

Browse files
authored
Merge pull request #44 from life4/fix-jaro
fix jaro
2 parents c9bec68 + f0e5e12 commit 0bd05e9

File tree

2 files changed

+10
-4
lines changed

2 files changed

+10
-4
lines changed

tests/edit_based.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ def test_common(self):
5050
self.assertAlmostEqual(self.alg.similarity('DWAYNE', 'DUANE'), 0.822222222)
5151
self.assertAlmostEqual(self.alg.similarity('DIXON', 'DICKSONX'), 0.76666666)
5252

53+
# https://github.com/life4/textdistance/issues/41
54+
self.assertAlmostEqual(
55+
self.alg.similarity('Sint-Pietersplein 6, 9000 Gent', 'Test 10, 1010 Brussel'),
56+
0.5182539682539683,
57+
)
58+
5359

5460
class JaroWinklerTest(unittest.TestCase):
5561
alg = textdistance.JaroWinkler(external=False)

textdistance/algorithms/edit_based.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ def __call__(self, s1, s2, prefix_weight=0.1):
262262
# looking only within search range, count & flag matched pairs
263263
common_chars = 0
264264
for i, s1_ch in enumerate(s1):
265-
low = i - search_range if i > search_range else 0
266-
hi = i + search_range if i + search_range < s2_len else s2_len - 1
265+
low = max(0, i - search_range)
266+
hi = min(i + search_range, s2_len - 1)
267267
for j in range(low, hi + 1):
268268
if not s2_flags[j] and s2[j] == s1_ch:
269269
s1_flags[i] = s2_flags[j] = True
@@ -284,7 +284,7 @@ def __call__(self, s1, s2, prefix_weight=0.1):
284284
break
285285
if s1[i] != s2[j]:
286286
trans_count += 1
287-
trans_count /= 2
287+
trans_count //= 2
288288

289289
# adjust for similarities in nonmatched characters
290290
common_chars = float(common_chars)
@@ -312,7 +312,7 @@ def __call__(self, s1, s2, prefix_weight=0.1):
312312
# agreed characters must be > half of remaining characters
313313
if not self.long_tolerance or min_len <= 4:
314314
return weight
315-
if common_chars < i or 2 * common_chars < min_len + i:
315+
if common_chars <= i + 1 or 2 * common_chars < min_len + i:
316316
return weight
317317
tmp = float(common_chars - i - 1) / (s1_len + s2_len - i * 2 + 2)
318318
weight += (1.0 - weight) * tmp

0 commit comments

Comments
 (0)