Skip to content

Commit b1f4b55

Browse files
committed
fix more issues with new rapidfuzz-cpp
1 parent 322adfc commit b1f4b55

File tree

8 files changed

+16
-7
lines changed

8 files changed

+16
-7
lines changed

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Performance
1515
Changed
1616
~~~~~~~
1717
* upgrade to ``Cython==3.0.7``
18+
* cdist for many metrics now returns a matrix of ``uint32`` instead of ``int32`` by default
1819

1920
[3.5.2] - 2023-11-02
2021
^^^^^^^^^^^^^^^^^^^^

src/rapidfuzz/_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,23 @@
2020
class ScorerFlag(IntFlag):
2121
RESULT_F64 = 1 << 5
2222
RESULT_I64 = 1 << 6
23+
RESULT_SIZE_T = 1 << 7
2324
SYMMETRIC = 1 << 11
2425

2526

2627
def _get_scorer_flags_distance(**_kwargs: Any) -> dict[str, Any]:
2728
return {
2829
"optimal_score": 0,
2930
"worst_score": 2**63 - 1,
30-
"flags": ScorerFlag.RESULT_I64 | ScorerFlag.SYMMETRIC,
31+
"flags": ScorerFlag.RESULT_SIZE_T | ScorerFlag.SYMMETRIC,
3132
}
3233

3334

3435
def _get_scorer_flags_similarity(**_kwargs: Any) -> dict[str, Any]:
3536
return {
3637
"optimal_score": 2**63 - 1,
3738
"worst_score": 0,
38-
"flags": ScorerFlag.RESULT_I64 | ScorerFlag.SYMMETRIC,
39+
"flags": ScorerFlag.RESULT_SIZE_T | ScorerFlag.SYMMETRIC,
3940
}
4041

4142

src/rapidfuzz/distance/metrics_py.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@
196196

197197

198198
def _get_scorer_flags_levenshtein_distance(weights: tuple[int, int, int] | None = (1, 1, 1)) -> dict[str, Any]:
199-
flags = ScorerFlag.RESULT_I64
199+
flags = ScorerFlag.RESULT_SIZE_T
200200
if weights is None or weights[0] == weights[1]:
201201
flags |= ScorerFlag.SYMMETRIC
202202

@@ -208,7 +208,7 @@ def _get_scorer_flags_levenshtein_distance(weights: tuple[int, int, int] | None
208208

209209

210210
def _get_scorer_flags_levenshtein_similarity(weights: tuple[int, int, int] | None = (1, 1, 1)) -> dict[str, Any]:
211-
flags = ScorerFlag.RESULT_I64
211+
flags = ScorerFlag.RESULT_SIZE_T
212212
if weights is None or weights[0] == weights[1]:
213213
flags |= ScorerFlag.SYMMETRIC
214214

src/rapidfuzz/process_cpp.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ struct ExtractComp {
7474
if (m_scorer_flags->flags & RF_SCORER_FLAG_RESULT_F64) {
7575
return is_first(a, b, m_scorer_flags->optimal_score.f64, m_scorer_flags->worst_score.f64);
7676
}
77+
if (m_scorer_flags->flags & RF_SCORER_FLAG_RESULT_SIZE_T) {
78+
return is_first(a, b, m_scorer_flags->optimal_score.sizet, m_scorer_flags->worst_score.sizet);
79+
}
7780
else {
7881
return is_first(a, b, m_scorer_flags->optimal_score.i64, m_scorer_flags->worst_score.i64);
7982
}

src/rapidfuzz/process_cpp_impl.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,6 +1727,8 @@ cdef inline MatrixType dtype_to_type_num_py(dtype, scorer, dict scorer_kwargs) e
17271727
flags = params["get_scorer_flags"](**scorer_kwargs)
17281728
if <int>flags["flags"] & RF_SCORER_FLAG_RESULT_I64:
17291729
return MatrixType.INT32
1730+
if <int>flags["flags"] & RF_SCORER_FLAG_RESULT_SIZE_T:
1731+
return MatrixType.UINT32
17301732
return MatrixType.FLOAT32
17311733

17321734
return MatrixType.FLOAT32

src/rapidfuzz/process_py.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,8 @@ def _dtype_to_type_num(
527527
flags = params["get_scorer_flags"](**scorer_kwargs)
528528
if flags["flags"] & ScorerFlag.RESULT_I64:
529529
return np.int32
530+
if flags["flags"] & ScorerFlag.RESULT_SIZE_T:
531+
return np.uint32
530532
return np.float32
531533

532534
return np.float32

src/rapidfuzz/rapidfuzz.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ typedef bool (*RF_ScorerFuncInit)(RF_ScorerFunc* self, const RF_Kwargs* kwargs,
156156
#define RF_SCORER_FLAG_RESULT_I64 ((uint32_t)1 << 6)
157157

158158
/* scorer returns result as size_t */
159-
#define RF_SCORER_FLAG_RESULT_SIZE_T ((uint32_t)1 << 6)
159+
#define RF_SCORER_FLAG_RESULT_SIZE_T ((uint32_t)1 << 7)
160160

161161
/* scorer is symmetric: scorer(a, b) == scorer(b, a) */
162162
#define RF_SCORER_FLAG_SYMMETRIC ((uint32_t)1 << 11)

tests/test_process.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ def generate_choices():
510510

511511
def test_cdist_pure_python_dtype():
512512
np = pytest.importorskip("numpy")
513-
assert process.cdist(["test"], ["test"], scorer=Levenshtein_py.distance).dtype == np.int32
514-
assert process.cdist(["test"], ["test"], scorer=Levenshtein_py.similarity).dtype == np.int32
513+
assert process.cdist(["test"], ["test"], scorer=Levenshtein_py.distance).dtype == np.uint32
514+
assert process.cdist(["test"], ["test"], scorer=Levenshtein_py.similarity).dtype == np.uint32
515515
assert process.cdist(["test"], ["test"], scorer=Levenshtein_py.normalized_distance).dtype == np.float32
516516
assert process.cdist(["test"], ["test"], scorer=Levenshtein_py.normalized_similarity).dtype == np.float32

0 commit comments

Comments
 (0)