Skip to content

Commit c6eebb7

Browse files
committed
fix incorrect ref counting
1 parent 74e41ad commit c6eebb7

File tree

8 files changed

+1149
-1130
lines changed

8 files changed

+1149
-1130
lines changed

.github/workflows/pythonbuild.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
- name: Run Unit Tests
4444
run: |
4545
pip install .
46-
pip install pytest hypothesis
46+
pip install pytest hypothesis pandas
4747
pytest
4848
4949

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.1.1
1+
1.1.2

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def build_extensions(self):
3838
elif ct == 'msvc':
3939
opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version())
4040
for ext in self.extensions:
41-
ext.extra_compile_args = opts
42-
ext.extra_link_args = link_opts
41+
ext.extra_compile_args += opts
42+
ext.extra_link_args += link_opts
4343
build_ext.build_extensions(self)
4444

4545
setup(

src/cpp_process.cpp

Lines changed: 1124 additions & 1121 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cpp_process.pyx

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ from cpython.list cimport PyList_New
1111
from cpython.list cimport PyList_SET_ITEM
1212
from cpython.object cimport PyObject
1313
from cpython.ref cimport Py_INCREF
14+
from cpython.ref cimport Py_DECREF
1415

1516
import heapq
1617

@@ -358,6 +359,10 @@ cdef inline extract_dict(scorer_context context, choices, processor, size_t limi
358359
score = context.scorer(context.context, choice, score_cutoff)
359360

360361
if score >= score_cutoff:
362+
# especially the key object might be created on the fly by e.g. pandas.Dataframe
363+
# so we need to ensure Python does not deallocate it
364+
Py_INCREF(choice)
365+
Py_INCREF(choice_key)
361366
results.push_back(DictMatchElem(score, i, <PyObject*>choice, <PyObject*>choice_key))
362367
index += 1
363368

@@ -379,10 +384,15 @@ cdef inline extract_dict(scorer_context context, choices, processor, size_t limi
379384
# https://stackoverflow.com/questions/43553763/cythonize-list-of-all-splits-of-a-string/43557675#43557675
380385
PyList_SET_ITEM(result_list, i,
381386
<object>Py_BuildValue("OdO",
382-
<PyObject*>choices[<object>results[i].key],
387+
<PyObject*>results[i].choice,
383388
results[i].score,
384389
<PyObject*>results[i].key))
385390

391+
# decref all reference counts
392+
for i in range(results.size()):
393+
Py_DECREF(<object>results[i].choice)
394+
Py_DECREF(<object>results[i].key)
395+
386396
return result_list
387397

388398

@@ -393,7 +403,7 @@ cdef inline extract_list(scorer_context context, choices, processor, size_t limi
393403
# todo possibly a smaller vector would be good to reduce memory usage
394404
cdef vector[ListMatchElem] results
395405
results.reserve(<size_t>len(choices))
396-
cdef object result_list
406+
cdef list result_list
397407

398408
if processor is not None:
399409
for choice in choices:
@@ -751,4 +761,3 @@ def extract_iter(query, choices, scorer=fuzz.WRatio, processor=utils.default_pro
751761
if py_score >= score_cutoff:
752762
yield(choice, py_score, index)
753763
index += 1
754-

src/rapidfuzz-cpp

Submodule rapidfuzz-cpp updated from ea6f17d to 91f20cd

src/rapidfuzz/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
"""
44
__author__ = "Max Bachmann"
55
__license__ = "MIT"
6-
__version__ = "1.1.1"
6+
__version__ = "1.1.2"
77

88
from rapidfuzz import process, fuzz, utils, levenshtein, string_metric

tests/test_process.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66

77
from rapidfuzz import process, fuzz, utils
8+
import pandas as pd
89

910
class ProcessTest(unittest.TestCase):
1011
def setUp(self):
@@ -187,6 +188,12 @@ def testNullStrings(self):
187188
best = process.extractOne(query, choices)
188189
self.assertEqual(best[0], choices[1])
189190

191+
def testIssue81(self):
192+
# this mostly tests whether this segfaults due to incorrect ref counting
193+
choices = pd.Series(['test color brightness', 'test lemon', 'test lavender'], index=[67478, 67479, 67480])
194+
matches = process.extract("test", choices)
195+
assert matches == [('test color brightness', 90.0, 67478), ('test lemon', 90.0, 67479), ('test lavender', 90.0, 67480)]
196+
190197

191198
def custom_scorer(s1, s2, processor=None, score_cutoff=0):
192199
return fuzz.ratio(s1, s2, processor=processor, score_cutoff=score_cutoff)

0 commit comments

Comments
 (0)