Skip to content

Commit 426fbb2

Browse files
authored
implement process.extractOne in C++ (#53)
* start to simplify complexion * start implementation * add extractOne to C++ * fix a couple of bugs in the implementation * start adressing performance issues
1 parent eee513f commit 426fbb2

File tree

15 files changed

+825
-237
lines changed

15 files changed

+825
-237
lines changed

.github/workflows/pythonbuild.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ on:
99

1010
jobs:
1111
test_python:
12-
name: run linting, tests and benchmarks for the python module
13-
runs-on: ubuntu-latest
12+
name: linting and tests on Python ${{ matrix.python-version }}
13+
runs-on: ubuntu-18.04
1414
strategy:
1515
matrix:
16-
python-version: [2.7, 3.5, 3.6, 3.7, 3.8]
16+
python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9]
1717

1818
steps:
1919
- uses: actions/checkout@v2
@@ -41,7 +41,7 @@ jobs:
4141
- name: Run Unit Tests
4242
run: |
4343
pip install .
44-
pip install pytest
44+
pip install pytest hypothesis
4545
pytest
4646
4747

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,7 @@ site/
1515

1616
# benchmark results
1717
bench_results/
18+
19+
# Hypothesis results
20+
.hypothesis/
21+

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.12.5
1+
0.13.0

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class BuildExt(build_ext):
1717
"""A custom build extension for adding compiler-specific options."""
1818
c_opts = {
1919
'msvc': ['/EHsc', '/O2', '/std:c++14'],
20-
'unix': ['-O3', '-std=c++14'],
20+
'unix': ['-O3', '-std=c++14', '-Wextra', '-Wall'],
2121
}
2222
l_opts = {
2323
'msvc': [],

src/py2_utils.hpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,14 @@ bool valid_str(PyObject* str, const char* name)
2121
Py_InitModule3(#name, methods, doc); \
2222
}
2323

24+
using python_string =
25+
mpark::variant<std::basic_string<uint8_t>, std::basic_string<Py_UNICODE>,
26+
rapidfuzz::basic_string_view<uint8_t>, rapidfuzz::basic_string_view<Py_UNICODE>>;
27+
2428
using python_string_view =
2529
mpark::variant<rapidfuzz::basic_string_view<uint8_t>, rapidfuzz::basic_string_view<Py_UNICODE>>;
2630

27-
python_string_view decode_python_string(PyObject* py_str)
31+
python_string decode_python_string(PyObject* py_str)
2832
{
2933
if (PyObject_TypeCheck(py_str, &PyString_Type)) {
3034
Py_ssize_t len = PyString_GET_SIZE(py_str);
@@ -38,12 +42,27 @@ python_string_view decode_python_string(PyObject* py_str)
3842
}
3943
}
4044

41-
PyObject* encode_python_string(std::basic_string<uint8_t> str)
45+
python_string_view decode_python_string_view(PyObject* py_str)
46+
{
47+
if (PyObject_TypeCheck(py_str, &PyString_Type)) {
48+
Py_ssize_t len = PyString_GET_SIZE(py_str);
49+
uint8_t* str = reinterpret_cast<uint8_t*>(PyString_AS_STRING(py_str));
50+
return rapidfuzz::basic_string_view<uint8_t>(str, len);
51+
}
52+
else {
53+
Py_ssize_t len = PyUnicode_GET_SIZE(py_str);
54+
Py_UNICODE* str = PyUnicode_AS_UNICODE(py_str);
55+
return rapidfuzz::basic_string_view<Py_UNICODE>(str, len);
56+
}
57+
}
58+
59+
60+
PyObject* encode_python_string(rapidfuzz::basic_string_view<uint8_t> str)
4261
{
4362
return PyString_FromStringAndSize(reinterpret_cast<const char*>(str.data()), str.size());
4463
}
4564

46-
PyObject* encode_python_string(std::basic_string<Py_UNICODE> str)
65+
PyObject* encode_python_string(rapidfuzz::basic_string_view<Py_UNICODE> str)
4766
{
4867
return PyUnicode_FromUnicode(str.data(), str.size());
4968
}

src/py3_utils.hpp

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,16 @@
66
#include "details/types.hpp"
77
#include <variant/variant.hpp>
88

9-
// PEP 623 deprecates legacy strings and therefor
10-
// deprecates e.g. PyUnicode_READY in Python 3.10
11-
#if PY_VERSION_HEX < 0x030A0000
12-
#define PY_BELOW_3_10
13-
#endif
14-
159
bool valid_str(PyObject* str, const char* name)
1610
{
1711
if (!PyUnicode_Check(str)) {
1812
PyErr_Format(PyExc_TypeError, "%s must be a String or None", name);
1913
return false;
2014
}
2115

22-
#ifdef PY_BELOW_3_10
16+
// PEP 623 deprecates legacy strings and therefor
17+
// deprecates e.g. PyUnicode_READY in Python 3.10
18+
#if PY_VERSION_HEX < PYTHON_VERSION(3,10,0)
2319
if (PyUnicode_READY(str)) {
2420
return false;
2521
}
@@ -36,11 +32,31 @@ bool valid_str(PyObject* str, const char* name)
3632
return PyModule_Create(&moduledef); \
3733
}
3834

35+
using python_string =
36+
mpark::variant<std::basic_string<uint8_t>, std::basic_string<uint16_t>, std::basic_string<uint32_t>,
37+
rapidfuzz::basic_string_view<uint8_t>, rapidfuzz::basic_string_view<uint16_t>,
38+
rapidfuzz::basic_string_view<uint32_t>>;
39+
3940
using python_string_view =
4041
mpark::variant<rapidfuzz::basic_string_view<uint8_t>, rapidfuzz::basic_string_view<uint16_t>,
4142
rapidfuzz::basic_string_view<uint32_t>>;
4243

43-
python_string_view decode_python_string(PyObject* py_str)
44+
python_string decode_python_string(PyObject* py_str)
45+
{
46+
Py_ssize_t len = PyUnicode_GET_LENGTH(py_str);
47+
void* str = PyUnicode_DATA(py_str);
48+
49+
switch (PyUnicode_KIND(py_str)) {
50+
case PyUnicode_1BYTE_KIND:
51+
return rapidfuzz::basic_string_view<uint8_t>(static_cast<uint8_t*>(str), len);
52+
case PyUnicode_2BYTE_KIND:
53+
return rapidfuzz::basic_string_view<uint16_t>(static_cast<uint16_t*>(str), len);
54+
default:
55+
return rapidfuzz::basic_string_view<uint32_t>(static_cast<uint32_t*>(str), len);
56+
}
57+
}
58+
59+
python_string_view decode_python_string_view(PyObject* py_str)
4460
{
4561
Py_ssize_t len = PyUnicode_GET_LENGTH(py_str);
4662
void* str = PyUnicode_DATA(py_str);
@@ -55,17 +71,17 @@ python_string_view decode_python_string(PyObject* py_str)
5571
}
5672
}
5773

58-
PyObject* encode_python_string(std::basic_string<uint8_t> str)
74+
PyObject* encode_python_string(rapidfuzz::basic_string_view<uint8_t> str)
5975
{
6076
return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, str.data(), str.size());
6177
}
6278

63-
PyObject* encode_python_string(std::basic_string<uint16_t> str)
79+
PyObject* encode_python_string(rapidfuzz::basic_string_view<uint16_t> str)
6480
{
6581
return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, str.data(), str.size());
6682
}
6783

68-
PyObject* encode_python_string(std::basic_string<uint32_t> str)
84+
PyObject* encode_python_string(rapidfuzz::basic_string_view<uint32_t> str)
6985
{
7086
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, str.data(), str.size());
7187
}

0 commit comments

Comments
 (0)