Skip to content

Commit 1749f9b

Browse files
Merge pull request #307 from tkhshtsh0917/feat/306-pyo3-27
update pyo3 to v0.27 for py314, py314t support
2 parents 54e85e8 + 94802b1 commit 1749f9b

File tree

17 files changed

+56
-55
lines changed

17 files changed

+56
-55
lines changed

.github/workflows/python-upload-test.yml

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,19 @@ jobs:
105105
fail-fast: false
106106
matrix:
107107
os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest, macOS-latest]
108-
target: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"]
108+
target: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t"]
109109
include:
110110
- os: "ubuntu-latest"
111111
target: "sdist"
112112
python-version: "3.13"
113+
- os: "ubuntu-latest"
114+
target: "sdist"
115+
python-version: "3.14"
113116
exclude:
114117
- os: "windows-latest"
115118
target: "3.13t"
119+
- os: "windows-latest"
120+
target: "3.14t"
116121

117122
runs-on: ${{ matrix.os }}
118123
steps:
@@ -136,17 +141,17 @@ jobs:
136141
# this must be after sudachipy install
137142
run: python -m pip install sudachidict_core
138143
- name: Install dependencies (test pretokenizer)
139-
# tokenizers for py3.13t is not provided yet
140-
if: ${{ matrix.target != '3.13t' }}
144+
# tokenizers for py3.13t, py3.14, py3.14t are not provided yet
145+
if: ${{ matrix.target != '3.13t' && matrix.target != '3.14' && matrix.target != '3.14t' }}
141146
run: python -m pip install tokenizers
142147

143148
- name: Run test
144-
if: ${{ matrix.target != '3.13t' }}
149+
if: ${{ matrix.target != '3.13t' && matrix.target != '3.14' && matrix.target != '3.14t' }}
145150
working-directory: ./python
146151
run: python -m unittest
147152
- name: Run test (skip pretokenizer test)
148-
# tokenizers for py3.13t is not provided yet
149-
if: ${{ matrix.target == '3.13t' }}
153+
# tokenizers for py3.13t, py3.14, py3.14t are not provided yet
154+
if: ${{ matrix.target == '3.13t' || matrix.target == '3.14' || matrix.target == '3.14t' }}
150155
working-directory: ./python
151156
run: ls tests/test_*.py | grep -v pretokenizer | xargs -I{} python -m unittest {}
152157
- name: Check that binary works (C mode)

Cargo.lock

Lines changed: 12 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

python/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ name = "sudachipy"
1515
crate-type = ["cdylib"]
1616

1717
[dependencies]
18-
pyo3 = { version = "0.23", features = ["extension-module"] }
18+
pyo3 = { version = "0.27", features = ["extension-module"] }
1919
scopeguard = "1" # Apache 2.0/MIT
2020
thread_local = "1.1" # Apache 2.0/MIT
2121

python/py_src/sudachipy/sudachipy.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ class Morpheme:
252252
Returns sub-morphemes in the provided split mode.
253253
254254
:param mode: mode of new split.
255-
:param out: write results to this MorhpemeList instead of creating new one.
255+
:param out: write results to this MorphemeList instead of creating new one.
256256
See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for
257257
more information on output parameters.
258258
Returned MorphemeList will be invalidated if this MorphemeList is used as an output parameter.

python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
requires = ["setuptools", "wheel", "setuptools-rust"]
33

44
[tool.cibuildwheel]
5-
build = "cp39-* cp310-* cp311-* cp312-* cp313-* cp313t-*"
5+
build = "cp39-* cp310-* cp311-* cp312-* cp313-* cp313t-* cp314-* cp314t-*"
66
skip = "*t-win* *-win32 *-musllinux_*"
77
enable = ["cpython-freethreading"]
88

python/src/build.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ fn create_file(p: &Path) -> std::io::Result<File> {
5858
///
5959
/// :param matrix: Path to the matrix file.
6060
/// :param lex: List of paths to lexicon files.
61-
/// :param output: Path to output built dictionray.
61+
/// :param output: Path to output built dictionary.
6262
/// :param description: A description text to embed in the dictionary.
6363
/// :return: A build report, list of (part, size, time).
6464
///
@@ -107,7 +107,7 @@ fn build_system_dic<'py>(
107107
///
108108
/// :param system: Path to the system dictionary.
109109
/// :param lex: List of paths to lexicon files.
110-
/// :param output: Path to output built dictionray.
110+
/// :param output: Path to output built dictionary.
111111
/// :param description: A description text to embed in the dictionary.
112112
/// :return: A build report, list of (part, size, time).
113113
///
@@ -168,7 +168,7 @@ fn resolve_as_pypathstr<'py>(
168168
data: &Bound<'py, PyAny>,
169169
) -> PyResult<Option<Bound<'py, PyString>>> {
170170
let binding = py.import("pathlib")?.getattr("Path")?;
171-
let path = binding.downcast::<PyType>()?;
171+
let path = binding.cast::<PyType>()?;
172172
if data.is_instance(path)? {
173173
Ok(Some(data.call_method0("resolve")?.str()?))
174174
} else if data.is_instance_of::<PyString>() {
@@ -186,9 +186,7 @@ fn as_data_source<'py>(
186186
Some(pystr) => Ok(DataSource::File(Path::new(pystr.to_str()?))),
187187
None => {
188188
if original_obj.is_instance_of::<PyBytes>() {
189-
Ok(DataSource::Data(
190-
original_obj.downcast::<PyBytes>()?.as_bytes(),
191-
))
189+
Ok(DataSource::Data(original_obj.cast::<PyBytes>()?.as_bytes()))
192190
} else {
193191
errors::wrap(Err(format!(
194192
"data source should be only Path, bytes or str, was {}: {}",

python/src/dictionary.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ pub(crate) struct PyDicData {
4747
pub(crate) pos: Vec<Py<PyTuple>>,
4848
/// Compute default string representation for a morpheme using vtable dispatch.
4949
/// None by default (if outputting surface as it is)
50-
/// This is default per-dictionary value, can be overriden when creating tokenizers and pre-tokenizers
50+
/// This is default per-dictionary value, can be overridden when creating tokenizers and pre-tokenizers
5151
pub(crate) projection: PyProjector,
5252
}
5353

@@ -430,7 +430,7 @@ impl PyDictionary {
430430
///
431431
/// :type pos_id: int
432432
#[pyo3(text_signature = "(self, /, pos_id: int) -> tuple[str, str, str, str, str, str] | None")]
433-
fn pos_of<'py>(&'py self, py: Python<'py>, pos_id: usize) -> Option<&Bound<'py, PyTuple>> {
433+
fn pos_of<'py>(&'py self, py: Python<'py>, pos_id: usize) -> Option<&'py Bound<'py, PyTuple>> {
434434
let dic = self.dictionary.as_ref().unwrap();
435435
dic.pos.get(pos_id).map(|x| x.bind(py))
436436
}
@@ -516,21 +516,21 @@ fn read_config(config_opt: &Bound<PyAny>) -> PyResult<ConfigBuilder> {
516516

517517
pub(crate) fn read_default_config(py: Python) -> PyResult<ConfigBuilder> {
518518
let path = py.import("sudachipy")?.getattr("_DEFAULT_SETTINGFILE")?;
519-
let path = path.downcast::<PyString>()?.to_str()?;
519+
let path = path.cast::<PyString>()?.to_str()?;
520520
let path = PathBuf::from(path);
521521
errors::wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path)
522522
}
523523

524524
pub(crate) fn get_default_resource_dir(py: Python) -> PyResult<PathBuf> {
525525
let path = py.import("sudachipy")?.getattr("_DEFAULT_RESOURCEDIR")?;
526-
let path = path.downcast::<PyString>()?.to_str()?;
526+
let path = path.cast::<PyString>()?.to_str()?;
527527
Ok(PathBuf::from(path))
528528
}
529529

530530
fn find_dict_path(py: Python, dict_type: &str) -> PyResult<PathBuf> {
531531
let pyfunc = py.import("sudachipy")?.getattr("_find_dict_path")?;
532532
let path = pyfunc.call1((dict_type,))?;
533-
let path = path.downcast::<PyString>()?.to_str()?;
533+
let path = path.cast::<PyString>()?.to_str()?;
534534
Ok(PathBuf::from(path))
535535
}
536536

python/src/morpheme.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ impl PyMorpheme {
335335

336336
/// Returns the dictionary form.
337337
#[pyo3(text_signature = "(self, /) -> str")]
338-
fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
338+
fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyString>> {
339339
Ok(self
340340
.morph(py)
341341
.get_word_info()
@@ -345,7 +345,7 @@ impl PyMorpheme {
345345

346346
/// Returns the normalized form.
347347
#[pyo3(text_signature = "(self, /) -> str")]
348-
fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
348+
fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyString>> {
349349
Ok(self
350350
.morph(py)
351351
.get_word_info()
@@ -355,7 +355,7 @@ impl PyMorpheme {
355355

356356
/// Returns the reading form.
357357
#[pyo3(text_signature = "(self, /) -> str")]
358-
fn reading_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
358+
fn reading_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyString>> {
359359
Ok(self
360360
.morph(py)
361361
.get_word_info()
@@ -366,7 +366,7 @@ impl PyMorpheme {
366366
/// Returns sub-morphemes in the provided split mode.
367367
///
368368
/// :param mode: mode of new split.
369-
/// :param out: write results to this MorhpemeList instead of creating new one.
369+
/// :param out: write results to this MorphemeList instead of creating new one.
370370
/// See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for
371371
/// more information on output parameters.
372372
/// Returned MorphemeList will be invalidated if this MorphemeList is used as an output parameter.
@@ -444,7 +444,7 @@ impl PyMorpheme {
444444

445445
/// Returns the list of synonym group ids.
446446
#[pyo3(text_signature = "(self, /) -> List[int]")]
447-
fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyList>> {
447+
fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyList>> {
448448
let mref = self.morph(py);
449449
let ids = mref.get_word_info().synonym_group_ids();
450450
PyList::new(py, ids)

python/src/pos_matcher.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ impl PyPosMatcher {
5353
fn create_from_fn(dic: &Arc<PyDicData>, func: &Bound<PyAny>) -> PyResult<Self> {
5454
let mut data = Vec::new();
5555
for (pos_id, pos) in dic.pos.iter().enumerate() {
56-
if func.call1((pos,))?.downcast::<PyBool>()?.is_true() {
56+
if func.call1((pos,))?.cast::<PyBool>()?.is_true() {
5757
data.push(pos_id as u16);
5858
}
5959
}
@@ -67,7 +67,7 @@ impl PyPosMatcher {
6767
let mut result = Vec::new();
6868
for item in data {
6969
let item = item?;
70-
let item = item.downcast::<PyTuple>()?;
70+
let item = item.cast::<PyTuple>()?;
7171
Self::match_pos_elements(&mut result, dic.as_ref(), item)?;
7272
}
7373
Ok(Self {
@@ -232,7 +232,7 @@ impl PyPosIter {
232232
slf
233233
}
234234

235-
fn __next__<'py>(&'py mut self, py: Python<'py>) -> Option<&Bound<'py, PyTuple>> {
235+
fn __next__<'py>(&'py mut self, py: Python<'py>) -> Option<&'py Bound<'py, PyTuple>> {
236236
let idx = self.index;
237237
self.index += 1;
238238
if idx >= self.data.len() {

python/src/pretokenizer.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use std::sync::Arc;
1919

2020
use pyo3::intern;
2121
use pyo3::prelude::*;
22-
use pyo3::sync::GILOnceCell;
22+
use pyo3::sync::PyOnceLock;
2323
use pyo3::types::{PyList, PySlice, PyType};
2424
use thread_local::ThreadLocal;
2525

@@ -138,7 +138,7 @@ impl PyPretokenizer {
138138
let pystr = string.str()?;
139139
let input_data = pystr.to_str()?;
140140
// tokenization itself should work without GIL, we have thread-local tokenizers here
141-
py.allow_threads(|| self.tokenizer_cell().borrow_mut().tokenize(input_data))?;
141+
py.detach(|| self.tokenizer_cell().borrow_mut().tokenize(input_data))?;
142142
// then prepare results with GIL
143143
self.tokenizer_cell().borrow_mut().collect_results(py)?;
144144
let cell = self.tokenizer_cell().borrow();
@@ -191,10 +191,10 @@ fn make_result_for_projection<'py>(
191191
) -> PyResult<Bound<'py, PyList>> {
192192
let result = PyList::empty(py);
193193
let nstring = {
194-
static NORMALIZED_STRING: GILOnceCell<Py<PyType>> = GILOnceCell::new();
194+
static NORMALIZED_STRING: PyOnceLock<Py<PyType>> = PyOnceLock::new();
195195
NORMALIZED_STRING.get_or_try_init(py, || -> PyResult<Py<PyType>> {
196196
let ns = py.import("tokenizers")?.getattr("NormalizedString")?;
197-
let tpe = ns.downcast::<PyType>()?;
197+
let tpe = ns.cast::<PyType>()?;
198198
Ok(tpe.clone().unbind())
199199
})?
200200
};

0 commit comments

Comments
 (0)