Merge pull request #307 from tkhshtsh0917/feat/306-pyo3-27

yanagisawa-yu-wap · web-flow · commit 1749f9b7b609 · 2026-01-06T12:28:59.000+09:00
update pyo3 to v0.27 for py314, py314t support
diff --git a/.github/workflows/python-upload-test.yml b/.github/workflows/python-upload-test.yml
@@ -105,14 +105,19 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest, macOS-latest]
-        target: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"]
+        target: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t"]
         include:
           - os: "ubuntu-latest"
             target: "sdist"
             python-version: "3.13"
+          - os: "ubuntu-latest"
+            target: "sdist"
+            python-version: "3.14"
         exclude:
           - os: "windows-latest"
             target: "3.13t"
+          - os: "windows-latest"
+            target: "3.14t"
 
     runs-on: ${{ matrix.os }}
     steps:
@@ -136,17 +141,17 @@ jobs:
         # this must be after sudachipy install
         run: python -m pip install sudachidict_core
       - name: Install dependencies (test pretokenizer)
-        # tokenizers for py3.13t is not provided yet
-        if: ${{ matrix.target != '3.13t' }}
+        # tokenizers for py3.13t, py3.14, py3.14t are not provided yet
+        if: ${{ matrix.target != '3.13t' && matrix.target != '3.14' && matrix.target != '3.14t' }}
         run: python -m pip install tokenizers
 
       - name: Run test
-        if: ${{ matrix.target != '3.13t' }}
+        if: ${{ matrix.target != '3.13t' && matrix.target != '3.14' && matrix.target != '3.14t' }}
         working-directory: ./python
         run: python -m unittest
       - name: Run test (skip pretokenizer test)
-        # tokenizers for py3.13t is not provided yet
-        if: ${{ matrix.target == '3.13t' }}
+        # tokenizers for py3.13t, py3.14, py3.14t are not provided yet
+        if: ${{ matrix.target == '3.13t' || matrix.target == '3.14' || matrix.target == '3.14t' }}
         working-directory: ./python
         run: ls tests/test_*.py | grep -v pretokenizer | xargs -I{} python -m unittest {}
       - name: Check that binary works (C mode)
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/python/Cargo.toml b/python/Cargo.toml
@@ -15,7 +15,7 @@ name = "sudachipy"
 crate-type = ["cdylib"]
 
 [dependencies]
-pyo3 = { version = "0.23", features = ["extension-module"] }
+pyo3 = { version = "0.27", features = ["extension-module"] }
 scopeguard = "1" # Apache 2.0/MIT
 thread_local = "1.1" # Apache 2.0/MIT
 
diff --git a/python/py_src/sudachipy/sudachipy.pyi b/python/py_src/sudachipy/sudachipy.pyi
@@ -252,7 +252,7 @@ class Morpheme:
         Returns sub-morphemes in the provided split mode.
 
         :param mode: mode of new split.
-        :param out: write results to this MorhpemeList instead of creating new one.
+        :param out: write results to this MorphemeList instead of creating new one.
             See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for
             more information on output parameters.
             Returned MorphemeList will be invalidated if this MorphemeList is used as an output parameter.
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -2,7 +2,7 @@
 requires = ["setuptools", "wheel", "setuptools-rust"]
 
 [tool.cibuildwheel]
-build = "cp39-* cp310-* cp311-* cp312-* cp313-* cp313t-*"
+build = "cp39-* cp310-* cp311-* cp312-* cp313-* cp313t-* cp314-* cp314t-*"
 skip = "*t-win* *-win32 *-musllinux_*"
 enable = ["cpython-freethreading"]
 
diff --git a/python/src/build.rs b/python/src/build.rs
@@ -58,7 +58,7 @@ fn create_file(p: &Path) -> std::io::Result<File> {
 ///
 /// :param matrix: Path to the matrix file.
 /// :param lex: List of paths to lexicon files.
-/// :param output: Path to output built dictionray.
+/// :param output: Path to output built dictionary.
 /// :param description: A description text to embed in the dictionary.
 /// :return: A build report, list of (part, size, time).
 ///
@@ -107,7 +107,7 @@ fn build_system_dic<'py>(
 ///
 /// :param system: Path to the system dictionary.
 /// :param lex: List of paths to lexicon files.
-/// :param output: Path to output built dictionray.
+/// :param output: Path to output built dictionary.
 /// :param description: A description text to embed in the dictionary.
 /// :return: A build report, list of (part, size, time).
 ///
@@ -168,7 +168,7 @@ fn resolve_as_pypathstr<'py>(
     data: &Bound<'py, PyAny>,
 ) -> PyResult<Option<Bound<'py, PyString>>> {
     let binding = py.import("pathlib")?.getattr("Path")?;
-    let path = binding.downcast::<PyType>()?;
+    let path = binding.cast::<PyType>()?;
     if data.is_instance(path)? {
         Ok(Some(data.call_method0("resolve")?.str()?))
     } else if data.is_instance_of::<PyString>() {
@@ -186,9 +186,7 @@ fn as_data_source<'py>(
         Some(pystr) => Ok(DataSource::File(Path::new(pystr.to_str()?))),
         None => {
             if original_obj.is_instance_of::<PyBytes>() {
-                Ok(DataSource::Data(
-                    original_obj.downcast::<PyBytes>()?.as_bytes(),
-                ))
+                Ok(DataSource::Data(original_obj.cast::<PyBytes>()?.as_bytes()))
             } else {
                 errors::wrap(Err(format!(
                     "data source should be only Path, bytes or str, was {}: {}",
diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs
@@ -47,7 +47,7 @@ pub(crate) struct PyDicData {
     pub(crate) pos: Vec<Py<PyTuple>>,
     /// Compute default string representation for a morpheme using vtable dispatch.
     /// None by default (if outputting surface as it is)
-    /// This is default per-dictionary value, can be overriden when creating tokenizers and pre-tokenizers
+    /// This is default per-dictionary value, can be overridden when creating tokenizers and pre-tokenizers
     pub(crate) projection: PyProjector,
 }
 
@@ -430,7 +430,7 @@ impl PyDictionary {
     ///
     /// :type pos_id: int
     #[pyo3(text_signature = "(self, /, pos_id: int) -> tuple[str, str, str, str, str, str] | None")]
-    fn pos_of<'py>(&'py self, py: Python<'py>, pos_id: usize) -> Option<&Bound<'py, PyTuple>> {
+    fn pos_of<'py>(&'py self, py: Python<'py>, pos_id: usize) -> Option<&'py Bound<'py, PyTuple>> {
         let dic = self.dictionary.as_ref().unwrap();
         dic.pos.get(pos_id).map(|x| x.bind(py))
     }
@@ -516,21 +516,21 @@ fn read_config(config_opt: &Bound<PyAny>) -> PyResult<ConfigBuilder> {
 
 pub(crate) fn read_default_config(py: Python) -> PyResult<ConfigBuilder> {
     let path = py.import("sudachipy")?.getattr("_DEFAULT_SETTINGFILE")?;
-    let path = path.downcast::<PyString>()?.to_str()?;
+    let path = path.cast::<PyString>()?.to_str()?;
     let path = PathBuf::from(path);
     errors::wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path)
 }
 
 pub(crate) fn get_default_resource_dir(py: Python) -> PyResult<PathBuf> {
     let path = py.import("sudachipy")?.getattr("_DEFAULT_RESOURCEDIR")?;
-    let path = path.downcast::<PyString>()?.to_str()?;
+    let path = path.cast::<PyString>()?.to_str()?;
     Ok(PathBuf::from(path))
 }
 
 fn find_dict_path(py: Python, dict_type: &str) -> PyResult<PathBuf> {
     let pyfunc = py.import("sudachipy")?.getattr("_find_dict_path")?;
     let path = pyfunc.call1((dict_type,))?;
-    let path = path.downcast::<PyString>()?.to_str()?;
+    let path = path.cast::<PyString>()?.to_str()?;
     Ok(PathBuf::from(path))
 }
 
diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs
@@ -335,7 +335,7 @@ impl PyMorpheme {
 
     /// Returns the dictionary form.
     #[pyo3(text_signature = "(self, /) -> str")]
-    fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
+    fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyString>> {
         Ok(self
             .morph(py)
             .get_word_info()
@@ -345,7 +345,7 @@ impl PyMorpheme {
 
     /// Returns the normalized form.
     #[pyo3(text_signature = "(self, /) -> str")]
-    fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
+    fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyString>> {
         Ok(self
             .morph(py)
             .get_word_info()
@@ -355,7 +355,7 @@ impl PyMorpheme {
 
     /// Returns the reading form.
     #[pyo3(text_signature = "(self, /) -> str")]
-    fn reading_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
+    fn reading_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyString>> {
         Ok(self
             .morph(py)
             .get_word_info()
@@ -366,7 +366,7 @@ impl PyMorpheme {
     /// Returns sub-morphemes in the provided split mode.
     ///
     /// :param mode: mode of new split.
-    /// :param out: write results to this MorhpemeList instead of creating new one.
+    /// :param out: write results to this MorphemeList instead of creating new one.
     ///     See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for
     ///     more information on output parameters.
     ///     Returned MorphemeList will be invalidated if this MorphemeList is used as an output parameter.
@@ -444,7 +444,7 @@ impl PyMorpheme {
 
     /// Returns the list of synonym group ids.
     #[pyo3(text_signature = "(self, /) -> List[int]")]
-    fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyList>> {
+    fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyList>> {
         let mref = self.morph(py);
         let ids = mref.get_word_info().synonym_group_ids();
         PyList::new(py, ids)
diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs
@@ -53,7 +53,7 @@ impl PyPosMatcher {
     fn create_from_fn(dic: &Arc<PyDicData>, func: &Bound<PyAny>) -> PyResult<Self> {
         let mut data = Vec::new();
         for (pos_id, pos) in dic.pos.iter().enumerate() {
-            if func.call1((pos,))?.downcast::<PyBool>()?.is_true() {
+            if func.call1((pos,))?.cast::<PyBool>()?.is_true() {
                 data.push(pos_id as u16);
             }
         }
@@ -67,7 +67,7 @@ impl PyPosMatcher {
         let mut result = Vec::new();
         for item in data {
             let item = item?;
-            let item = item.downcast::<PyTuple>()?;
+            let item = item.cast::<PyTuple>()?;
             Self::match_pos_elements(&mut result, dic.as_ref(), item)?;
         }
         Ok(Self {
@@ -232,7 +232,7 @@ impl PyPosIter {
         slf
     }
 
-    fn __next__<'py>(&'py mut self, py: Python<'py>) -> Option<&Bound<'py, PyTuple>> {
+    fn __next__<'py>(&'py mut self, py: Python<'py>) -> Option<&'py Bound<'py, PyTuple>> {
         let idx = self.index;
         self.index += 1;
         if idx >= self.data.len() {
diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs
@@ -19,7 +19,7 @@ use std::sync::Arc;
 
 use pyo3::intern;
 use pyo3::prelude::*;
-use pyo3::sync::GILOnceCell;
+use pyo3::sync::PyOnceLock;
 use pyo3::types::{PyList, PySlice, PyType};
 use thread_local::ThreadLocal;
 
@@ -138,7 +138,7 @@ impl PyPretokenizer {
         let pystr = string.str()?;
         let input_data = pystr.to_str()?;
         // tokenization itself should work without GIL, we have thread-local tokenizers here
-        py.allow_threads(|| self.tokenizer_cell().borrow_mut().tokenize(input_data))?;
+        py.detach(|| self.tokenizer_cell().borrow_mut().tokenize(input_data))?;
         // then prepare results with GIL
         self.tokenizer_cell().borrow_mut().collect_results(py)?;
         let cell = self.tokenizer_cell().borrow();
@@ -191,10 +191,10 @@ fn make_result_for_projection<'py>(
 ) -> PyResult<Bound<'py, PyList>> {
     let result = PyList::empty(py);
     let nstring = {
-        static NORMALIZED_STRING: GILOnceCell<Py<PyType>> = GILOnceCell::new();
+        static NORMALIZED_STRING: PyOnceLock<Py<PyType>> = PyOnceLock::new();
         NORMALIZED_STRING.get_or_try_init(py, || -> PyResult<Py<PyType>> {
             let ns = py.import("tokenizers")?.getattr("NormalizedString")?;
-            let tpe = ns.downcast::<PyType>()?;
+            let tpe = ns.cast::<PyType>()?;
             Ok(tpe.clone().unbind())
         })?
     };
diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs
@@ -149,9 +149,9 @@ impl PyTokenizer {
         py: Python<'py>,
         text: &'py str,
         mode: Option<&Bound<'py, PyAny>>,
-        logger: Option<PyObject>,
+        logger: Option<Py<PyAny>>,
         out: Option<Bound<'py, PyMorphemeListWrapper>>,
-    ) -> PyResult<Bound<PyMorphemeListWrapper>> {
+    ) -> PyResult<Bound<'py, PyMorphemeListWrapper>> {
         // restore default mode on scope exit
         let mode = match mode {
             None => None,
@@ -164,7 +164,7 @@ impl PyTokenizer {
 
         // analysis can be done without GIL
         errors::wrap_ctx(
-            py.allow_threads(|| {
+            py.detach(|| {
                 tokenizer.reset().push_str(text);
                 tokenizer.do_tokenize()
             }),
diff --git a/sudachi-fuzz/src/main.rs b/sudachi-fuzz/src/main.rs
@@ -22,7 +22,7 @@ fn consume_mlist<'a, 'b: 'a>(
     }
 
     // mlist.get_internal_cost() as isize;
-    // use black_box function to forbit optimizing accesses to API functions
+    // use black_box function to forbid optimizing accesses to API functions
     // this is important for fuzzing, we want to trigger any possible panics that can happen
     for i in 0..mlist.len() {
         let m = mlist.get(i);
diff --git a/sudachi/src/dic/lexicon/word_id_table.rs b/sudachi/src/dic/lexicon/word_id_table.rs
@@ -24,7 +24,7 @@ pub struct WordIdTable<'a> {
 }
 
 impl<'a> WordIdTable<'a> {
-    pub fn new(bytes: &'a [u8], size: u32, offset: usize) -> WordIdTable {
+    pub fn new(bytes: &'a [u8], size: u32, offset: usize) -> WordIdTable<'a> {
         WordIdTable {
             bytes,
             size,
diff --git a/sudachi/src/dic/lexicon/word_infos.rs b/sudachi/src/dic/lexicon/word_infos.rs
@@ -36,7 +36,7 @@ impl<'a> WordInfos<'a> {
         offset: usize,
         _word_size: u32,
         has_synonym_group_ids: bool,
-    ) -> WordInfos {
+    ) -> WordInfos<'a> {
         WordInfos {
             bytes,
             offset,
diff --git a/sudachi/src/dic/lexicon/word_params.rs b/sudachi/src/dic/lexicon/word_params.rs
@@ -25,7 +25,7 @@ impl<'a> WordParams<'a> {
     const PARAM_SIZE: usize = 3;
     const ELEMENT_SIZE: usize = 2 * Self::PARAM_SIZE;
 
-    pub fn new(bytes: &'a [u8], size: u32, offset: usize) -> WordParams {
+    pub fn new(bytes: &'a [u8], size: u32, offset: usize) -> WordParams<'a> {
         let n_entries = size as usize * Self::PARAM_SIZE;
         Self {
             data: CowArray::from_bytes(bytes, offset, n_entries),
diff --git a/sudachi/src/dic/mod.rs b/sudachi/src/dic/mod.rs
@@ -92,7 +92,7 @@ impl<'a> LoadedDictionary<'a> {
     pub(crate) fn merge_dictionary(
         mut self,
         other: DictionaryLoader<'a>,
-    ) -> SudachiResult<LoadedDictionary> {
+    ) -> SudachiResult<LoadedDictionary<'a>> {
         let npos = self.grammar.pos_list.len();
         let lexicon = other.lexicon;
         let grammar = other.grammar;
diff --git a/sudachi/src/input_text/buffer/edit.rs b/sudachi/src/input_text/buffer/edit.rs

Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@ impl PyPosMatcher {`
`53`	`53`	`fn create_from_fn(dic: &Arc<PyDicData>, func: &Bound<PyAny>) -> PyResult<Self> {`
`54`	`54`	`let mut data = Vec::new();`
`55`	`55`	`for (pos_id, pos) in dic.pos.iter().enumerate() {`
`56`		`- if func.call1((pos,))?.downcast::<PyBool>()?.is_true() {`
	`56`	`+ if func.call1((pos,))?.cast::<PyBool>()?.is_true() {`
`57`	`57`	`data.push(pos_id as u16);`
`58`	`58`	`}`
`59`	`59`	`}`
`@@ -67,7 +67,7 @@ impl PyPosMatcher {`
`67`	`67`	`let mut result = Vec::new();`
`68`	`68`	`for item in data {`
`69`	`69`	`let item = item?;`
`70`		`- let item = item.downcast::<PyTuple>()?;`
	`70`	`+ let item = item.cast::<PyTuple>()?;`
`71`	`71`	`Self::match_pos_elements(&mut result, dic.as_ref(), item)?;`
`72`	`72`	`}`
`73`	`73`	`Ok(Self {`
`@@ -232,7 +232,7 @@ impl PyPosIter {`
`232`	`232`	`slf`
`233`	`233`	`}`
`234`	`234`
`235`		`- fn __next__<'py>(&'py mut self, py: Python<'py>) -> Option<&Bound<'py, PyTuple>> {`
	`235`	`+ fn __next__<'py>(&'py mut self, py: Python<'py>) -> Option<&'py Bound<'py, PyTuple>> {`
`236`	`236`	`let idx = self.index;`
`237`	`237`	`self.index += 1;`
`238`	`238`	`if idx >= self.data.len() {`