Skip to content

Commit b4702c4

Browse files
committed
Merge branch 'master' into ci-3.10
2 parents 99a59d3 + 64c121e commit b4702c4

File tree

9 files changed

+83
-26
lines changed

9 files changed

+83
-26
lines changed

.drone.star

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@ def main(ctx):
77
steps=[
88
dict(
99
name="install task",
10-
image="alpine:latest",
10+
image="debian:latest",
1111
commands=[
12-
"apk add --no-cache wget",
12+
"apt update",
13+
"apt install -y wget",
1314
"wget https://taskfile.dev/install.sh",
1415
"sh install.sh -- latest",
1516
"rm install.sh",
@@ -43,7 +44,8 @@ def step(env, python):
4344
COVERAGE_FILE=".coverage.{}.{}".format(env, python),
4445
),
4546
commands=[
46-
"apt install curl git gcc libc-dev",
47+
"apt update",
48+
"apt install -y curl git gcc libc-dev build-essential",
4749
"./bin/task PYTHON_BIN=python3 VENVS=/opt/py{python}/ -f {env}:run".format(
4850
python=python,
4951
env=env,

Taskfile.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ vars:
1010
ISORT_ENV: "{{.VENVS}}isort"
1111
TWINE_ENV: "{{.VENVS}}twine"
1212

13-
TESTS_PATH: tests/
14-
1513
tasks:
1614
venv:create:
1715
status:
@@ -21,7 +19,7 @@ tasks:
2119
- "{{.ENV}}/bin/python3 -m pip install -U pip setuptools wheel"
2220
pip:install:
2321
sources:
24-
- pyproject.toml
22+
- setup.py
2523
- "{{.ENV}}/bin/activate"
2624
deps:
2725
- task: venv:create
@@ -74,7 +72,7 @@ tasks:
7472
ENV: "{{.PYTEST_PURE_ENV}}"
7573
EXTRA: test
7674
cmds:
77-
- "{{.PYTEST_PURE_ENV}}/bin/pytest -m 'not external' {{.ARGS}} {{.TESTS_PATH}}"
75+
- "{{.PYTEST_PURE_ENV}}/bin/pytest -m 'not external' {{.CLI_ARGS}}"
7876

7977
pytest-external:run:
8078
deps:
@@ -83,7 +81,7 @@ tasks:
8381
ENV: "{{.PYTEST_EXT_ENV}}"
8482
EXTRA: test,benchmark
8583
cmds:
86-
- "{{.PYTEST_EXT_ENV}}/bin/pytest {{.ARGS}} {{.TESTS_PATH}}"
84+
- "{{.PYTEST_EXT_ENV}}/bin/pytest {{.CLI_ARGS}}"
8785

8886
isort:run:
8987
sources:

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ license_file = LICENSE
44

55
[flake8]
66
max-line-length=120
7-
ignore=P101,P103,E241
7+
ignore=P101,P103,E241,B023
88
exclude=
99
.tox,
1010
.pytest_cache

setup.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
'numpy', # for SmithWaterman and other
1313
'python-Levenshtein', # for Jaro and Levenshtein
1414
'pyxDamerauLevenshtein', # for DamerauLevenshtein
15+
'rapidfuzz>=2.6.0', # for Jaro, Levenshtein and other
1516
],
1617

1718
# needed for benchmarking, optimization and testing
@@ -22,6 +23,7 @@
2223
'numpy',
2324
'python-Levenshtein',
2425
'pyxDamerauLevenshtein',
26+
'rapidfuzz>=2.6.0',
2527
# slow
2628
'distance',
2729
'pylev',
@@ -38,22 +40,27 @@
3840

3941
# for algos, from fastest to slowest, only faster than textdistance:
4042
'DamerauLevenshtein': [
43+
'rapidfuzz>=2.6.0', # any iterators of hashable elements
4144
'jellyfish', # only for text
4245
'pyxDamerauLevenshtein', # for any iterators
4346
],
4447
'Hamming': [
4548
'python-Levenshtein', # only same length and strings
49+
'rapidfuzz>=2.6.0', # only same length, any iterators of hashable elements
4650
'jellyfish', # only strings, any length
4751
'distance', # only same length, any iterators
4852
'abydos', # any iterators
4953
],
5054
'Jaro': [
55+
'rapidfuzz>=2.6.0', # any iterators of hashable elements
5156
'python-Levenshtein', # only text
5257
],
5358
'JaroWinkler': [
59+
'rapidfuzz>=2.6.0', # any iterators of hashable elements
5460
'jellyfish', # only text
5561
],
5662
'Levenshtein': [
63+
'rapidfuzz>=2.6.0', # any iterators of hashable elements
5764
'python-Levenshtein', # only text
5865
# yeah, other libs slower than textdistance
5966
],
@@ -79,7 +86,7 @@
7986

8087
setup(
8188
name='textdistance',
82-
version='4.2.2',
89+
version='4.4.0',
8390

8491
author='orsinium',
8592
author_email='gram@orsinium.dev',

tests/test_external.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,15 @@
1212

1313
libraries = prototype.clone()
1414

15+
# numpy throws a bunch of warning about abydos using `np.int` isntead of `int`.
16+
ABYDOS_WARNINGS = (
17+
'ignore:`np.int` is a deprecated alias',
18+
'ignore:`np.float` is a deprecated alias',
19+
'ignore:Using or importing the ABCs',
20+
)
21+
1522

23+
@pytest.mark.filterwarnings(*ABYDOS_WARNINGS)
1624
@pytest.mark.external
1725
@pytest.mark.parametrize('alg', libraries.get_algorithms())
1826
@hypothesis.settings(deadline=None)
@@ -37,34 +45,40 @@ def test_compare(left, right, alg):
3745
assert isclose(int_result, ext_result), str(lib)
3846

3947

48+
@pytest.mark.filterwarnings(*ABYDOS_WARNINGS)
4049
@pytest.mark.external
4150
@pytest.mark.parametrize('alg', libraries.get_algorithms())
4251
@hypothesis.given(
4352
left=hypothesis.strategies.text(min_size=1),
4453
right=hypothesis.strategies.text(min_size=1),
4554
)
46-
def test_qval(left, right, alg):
55+
@pytest.mark.parametrize('qval', (None, 1, 2, 3))
56+
def test_qval(left, right, alg, qval):
4757
for lib in libraries.get_libs(alg):
4858
conditions = lib.conditions or {}
4959
internal_func = getattr(textdistance, alg)(external=False, **conditions)
5060
external_func = lib.get_function()
5161
# algorithm doesn't support q-grams
5262
if not hasattr(internal_func, 'qval'):
5363
continue
54-
for qval in (None, 1, 2, 3):
55-
internal_func.qval = qval
56-
# if qval unsopporting already set for lib
57-
s1, s2 = internal_func._get_sequences(left, right)
58-
if not lib.check_conditions(internal_func, s1, s2):
59-
continue
60-
61-
# test
62-
int_result = internal_func(left, right)
63-
s1, s2 = lib.prepare(s1, s2)
64-
ext_result = external_func(s1, s2)
65-
assert isclose(int_result, ext_result), str(lib)
6664

65+
internal_func.qval = qval
66+
# if qval unsopporting already set for lib
67+
s1, s2 = internal_func._get_sequences(left, right)
68+
if not lib.check_conditions(internal_func, s1, s2):
69+
continue
70+
quick_answer = internal_func.quick_answer(s1, s2)
71+
if quick_answer is not None:
72+
continue
6773

74+
# test
75+
int_result = internal_func(left, right)
76+
s1, s2 = lib.prepare(s1, s2)
77+
ext_result = external_func(s1, s2)
78+
assert isclose(int_result, ext_result), f'{lib}({repr(s1)}, {repr(s2)})'
79+
80+
81+
@pytest.mark.filterwarnings(*ABYDOS_WARNINGS)
6882
@pytest.mark.external
6983
@pytest.mark.parametrize('alg', libraries.get_algorithms())
7084
@hypothesis.given(
@@ -79,10 +93,13 @@ def test_list_of_numbers(left, right, alg):
7993
if external_func is None:
8094
raise RuntimeError('cannot import {}'.format(str(lib)))
8195

96+
quick_answer = internal_func.quick_answer(left, right)
97+
if quick_answer is not None:
98+
continue
8299
if not lib.check_conditions(internal_func, left, right):
83100
continue
84101

85102
int_result = internal_func(left, right)
86103
s1, s2 = lib.prepare(left, right)
87104
ext_result = external_func(s1, s2)
88-
assert isclose(int_result, ext_result), str(lib)
105+
assert isclose(int_result, ext_result), f'{lib}({repr(s1)}, {repr(s2)})'

textdistance/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
# main package info
88
__title__ = 'TextDistance'
9-
__version__ = '4.2.2'
9+
__version__ = '4.4.0'
1010
__author__ = 'Gram (@orsinium)'
1111
__license__ = 'MIT'
1212

textdistance/algorithms/edit_based.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class Hamming(_Base):
3232
3333
https://en.wikipedia.org/wiki/Hamming_distance
3434
"""
35+
3536
def __init__(self, qval=1, test_func=None, truncate=False, external=True):
3637
self.qval = qval
3738
self.test_func = test_func or self._ident
@@ -62,6 +63,7 @@ class Levenshtein(_Base):
6263
https://en.wikipedia.org/wiki/Levenshtein_distance
6364
TODO: https://gist.github.com/kylebgorman/1081951/9b38b7743a3cb5167ab2c6608ac8eea7fc629dca
6465
"""
66+
6567
def __init__(self, qval=1, test_func=None, external=True):
6668
self.qval = qval
6769
self.test_func = test_func or self._ident
@@ -130,6 +132,7 @@ class DamerauLevenshtein(_Base):
130132
131133
https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
132134
"""
135+
133136
def __init__(self, qval=1, test_func=None, external=True):
134137
self.qval = qval
135138
self.test_func = test_func or self._ident
@@ -229,6 +232,7 @@ class JaroWinkler(_BaseSimilarity):
229232
https://github.com/Yomguithereal/talisman/blob/master/src/metrics/jaro.js
230233
https://github.com/Yomguithereal/talisman/blob/master/src/metrics/jaro-winkler.js
231234
"""
235+
232236
def __init__(self, long_tolerance=False, winklerize=True, qval=1, external=True):
233237
self.qval = qval
234238
self.long_tolerance = long_tolerance
@@ -302,7 +306,7 @@ def __call__(self, s1, s2, prefix_weight=0.1):
302306
# adjust for up to first 4 chars in common
303307
j = min(min_len, 4)
304308
i = 0
305-
while i < j and s1[i] == s2[i] and s1[i]:
309+
while i < j and s1[i] == s2[i]:
306310
i += 1
307311
if i:
308312
weight += i * prefix_weight * (1.0 - weight)
@@ -422,6 +426,7 @@ class SmithWaterman(_BaseSimilarity):
422426
https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
423427
https://github.com/Yomguithereal/talisman/blob/master/src/metrics/smith-waterman.js
424428
"""
429+
425430
def __init__(self, gap_cost=1.0, sim_func=None, qval=1, external=True):
426431
self.qval = qval
427432
self.gap_cost = gap_cost
@@ -464,6 +469,7 @@ class Gotoh(NeedlemanWunsch):
464469
penalties:
465470
https://www.cs.umd.edu/class/spring2003/cmsc838t/papers/gotoh1982.pdf
466471
"""
472+
467473
def __init__(self, gap_open=1, gap_ext=0.4, sim_func=None, qval=1, external=True):
468474
self.qval = qval
469475
self.gap_open = gap_open
@@ -687,6 +693,7 @@ class MLIPNS(_BaseSimilarity):
687693
http://www.sial.iias.spb.su/files/386-386-1-PB.pdf
688694
https://github.com/Yomguithereal/talisman/blob/master/src/metrics/mlipns.js
689695
"""
696+
690697
def __init__(self, threshold=0.25, maxmismatches=2, qval=1, external=True):
691698
self.qval = qval
692699
self.threshold = threshold

textdistance/libraries.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
{
22
"DamerauLevenshtein": [
3+
[
4+
"rapidfuzz.distance.DamerauLevenshtein",
5+
"distance"
6+
],
37
[
48
"jellyfish",
59
"damerau_levenshtein_distance"
@@ -18,6 +22,10 @@
1822
"Levenshtein",
1923
"hamming"
2024
],
25+
[
26+
"rapidfuzz.distance.hamming",
27+
"distance"
28+
],
2129
[
2230
"jellyfish",
2331
"hamming_distance"
@@ -32,6 +40,10 @@
3240
]
3341
],
3442
"Jaro": [
43+
[
44+
"rapidfuzz.distance.Jaro",
45+
"similarity"
46+
],
3547
[
3648
"Levenshtein",
3749
"jaro"
@@ -46,12 +58,20 @@
4658
]
4759
],
4860
"JaroWinkler": [
61+
[
62+
"rapidfuzz.distance.JaroWinkler",
63+
"similarity"
64+
],
4965
[
5066
"jellyfish",
5167
"jaro_winkler_similarity"
5268
]
5369
],
5470
"Levenshtein": [
71+
[
72+
"rapidfuzz.distance.Levenshtein",
73+
"distance"
74+
],
5575
[
5676
"Levenshtein",
5777
"distance"

textdistance/libraries.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,18 +154,23 @@ class SameLengthTextLibrary(SameLengthLibrary, TextLibrary):
154154
prototype.register('DamerauLevenshtein', LibraryBase('abydos.distance', 'DamerauLevenshtein'))
155155
prototype.register('DamerauLevenshtein', LibraryBase('pyxdameraulevenshtein', 'damerau_levenshtein_distance'))
156156
prototype.register('DamerauLevenshtein', TextLibrary('jellyfish', 'damerau_levenshtein_distance'))
157+
prototype.register('DamerauLevenshtein', LibraryBase('rapidfuzz.distance.DamerauLevenshtein', 'distance'))
157158

158159
prototype.register('Hamming', LibraryBase('abydos.distance', 'Hamming'))
159160
prototype.register('Hamming', SameLengthLibrary('distance', 'hamming'))
160161
prototype.register('Hamming', SameLengthTextLibrary('Levenshtein', 'hamming'))
161162
prototype.register('Hamming', TextLibrary('jellyfish', 'hamming_distance'))
163+
prototype.register('Hamming', SameLengthLibrary('rapidfuzz.distance.Hamming', 'distance'))
162164

163165
prototype.register('Jaro', TextLibrary('jellyfish', 'jaro_similarity'))
166+
prototype.register('Jaro', LibraryBase('rapidfuzz.distance.Jaro', 'similarity'))
164167
# prototype.register('Jaro', TextLibrary('Levenshtein', 'jaro'))
165168
# prototype.register('Jaro', TextLibrary('py_stringmatching.similarity_measure.jaro', 'jaro'))
166169

167170
# prototype.register('JaroWinkler', LibraryBase('py_stringmatching.similarity_measure.jaro_winkler', 'jaro_winkler'))
168171
prototype.register('JaroWinkler', TextLibrary('jellyfish', 'jaro_winkler_similarity', conditions=dict(winklerize=True)))
172+
prototype.register('JaroWinkler', LibraryBase('rapidfuzz.distance.JaroWinkler', 'similarity',
173+
conditions=dict(winklerize=True)))
169174
# https://github.com/life4/textdistance/issues/39
170175
# prototype.register('JaroWinkler', TextLibrary('Levenshtein', 'jaro_winkler', conditions=dict(winklerize=True)))
171176

@@ -174,4 +179,5 @@ class SameLengthTextLibrary(SameLengthLibrary, TextLibrary):
174179
prototype.register('Levenshtein', LibraryBase('pylev', 'levenshtein'))
175180
prototype.register('Levenshtein', TextLibrary('jellyfish', 'levenshtein_distance'))
176181
prototype.register('Levenshtein', TextLibrary('Levenshtein', 'distance'))
182+
prototype.register('Levenshtein', LibraryBase('rapidfuzz.distance.Levenshtein', 'distance'))
177183
# prototype.register('Levenshtein', TextLibrary('py_stringmatching.similarity_measure.levenshtein', 'levenshtein'))

0 commit comments

Comments
 (0)