diff --git a/newsfragments/5199.added.md b/newsfragments/5199.added.md new file mode 100644 index 00000000000..2da08ebfa39 --- /dev/null +++ b/newsfragments/5199.added.md @@ -0,0 +1,2 @@ +- Add `PyString::from_fmt` +- Add `py_format!` macro diff --git a/pyo3-benches/Cargo.toml b/pyo3-benches/Cargo.toml index 1611244082c..c2c9d67f8fd 100644 --- a/pyo3-benches/Cargo.toml +++ b/pyo3-benches/Cargo.toml @@ -87,4 +87,8 @@ harness = false name = "bench_bigint" harness = false +[[bench]] +name = "bench_pystring_from_fmt" +harness = false + [workspace] diff --git a/pyo3-benches/benches/bench_pystring_from_fmt.rs b/pyo3-benches/benches/bench_pystring_from_fmt.rs new file mode 100644 index 00000000000..877c5c34310 --- /dev/null +++ b/pyo3-benches/benches/bench_pystring_from_fmt.rs @@ -0,0 +1,28 @@ +use codspeed_criterion_compat::{criterion_group, criterion_main, Bencher, Criterion}; +use pyo3::{py_format, Python}; +use std::hint::black_box; + +fn format_simple(b: &mut Bencher<'_>) { + Python::attach(|py| { + b.iter(|| { + py_format!(py, "Hello {}!", "world").unwrap() + }); + }); +} + +fn format_complex(b: &mut Bencher<'_>) { + Python::attach(|py| { + b.iter(|| { + let value = (black_box(42), black_box("foo"), [0; 0]); + py_format!(py, "This is some complex value: {value:?}").unwrap() + }); + }); +} + +fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("format_simple", format_simple); + c.bench_function("format_complex", format_complex); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/pyo3-ffi/src/cpython/unicodeobject.rs b/pyo3-ffi/src/cpython/unicodeobject.rs index cf0ef54484e..ae1a9074883 100644 --- a/pyo3-ffi/src/cpython/unicodeobject.rs +++ b/pyo3-ffi/src/cpython/unicodeobject.rs @@ -686,6 +686,26 @@ extern "C" { // skipped PyUnicode_GetMax } +#[cfg(Py_3_14)] +opaque_struct!(pub PyUnicodeWriter); + +extern "C" { + #[cfg(Py_3_14)] + pub fn PyUnicodeWriter_Create(length: Py_ssize_t) -> *mut PyUnicodeWriter; + #[cfg(Py_3_14)] + pub fn PyUnicodeWriter_Finish(writer: *mut PyUnicodeWriter) -> *mut PyObject; + #[cfg(Py_3_14)] + pub fn PyUnicodeWriter_Discard(writer: *mut PyUnicodeWriter); + #[cfg(Py_3_14)] + pub fn PyUnicodeWriter_WriteChar(writer: *mut PyUnicodeWriter, ch: Py_UCS4) -> c_int; + #[cfg(Py_3_14)] + pub fn PyUnicodeWriter_WriteUTF8( + writer: *mut PyUnicodeWriter, + str: *const c_char, + size: Py_ssize_t, + ) -> c_int; +} + // skipped _PyUnicodeWriter // skipped _PyUnicodeWriter_Init // skipped _PyUnicodeWriter_Prepare diff --git a/src/fmt.rs b/src/fmt.rs new file mode 100644 index 00000000000..2b5554182f1 --- /dev/null +++ b/src/fmt.rs @@ -0,0 +1,226 @@ +#[cfg(any(doc, all(Py_3_14, not(Py_LIMITED_API))))] +use crate::{types::PyString, Python}; +#[cfg(all(Py_3_14, not(Py_LIMITED_API)))] +use { + crate::ffi::{ + PyUnicodeWriter_Create, PyUnicodeWriter_Discard, PyUnicodeWriter_Finish, + PyUnicodeWriter_WriteChar, PyUnicodeWriter_WriteUTF8, + }, + crate::ffi_ptr_ext::FfiPtrExt, + crate::impl_::callback::WrappingCastTo, + crate::py_result_ext::PyResultExt, + crate::IntoPyObject, + crate::{ffi, Bound, PyErr, PyResult}, + std::fmt, + std::mem::ManuallyDrop, + std::ptr::NonNull, +}; + +/// This macro is analogous to Rust's [`format!`] macro, but returns a [`PyString`] instead of a [`String`]. +/// +/// # Arguments +/// +/// The arguments are exactly like [`format!`], but with `py` (a [`Python`] token) as the first argument: +/// +/// # Interning Advantage +/// +/// If the format string is a static string and all arguments are constant at compile time, +/// this macro will intern the string in Python, offering better performance and memory usage +/// compared to [`PyString::from_fmt`]. +/// +/// ```rust +/// # use pyo3::{py_format, Python, types::PyString, Bound}; +/// Python::attach(|py| { +/// let py_string: Bound<'_, PyString> = py_format!(py, "{} {}", "hello", "world").unwrap(); +/// assert_eq!(py_string.to_string(), "hello world"); +/// }); +/// ``` +#[macro_export] +macro_rules! py_format { + ($py: expr, $($arg:tt)*) => {{ + if let Some(static_string) = format_args!($($arg)*).as_str() { + static INTERNED: $crate::sync::PyOnceLock<$crate::Py<$crate::types::PyString>> = $crate::sync::PyOnceLock::new(); + Ok( + INTERNED + .get_or_init($py, || $crate::types::PyString::intern($py, static_string).unbind()) + .bind($py) + .to_owned() + ) + } else { + $crate::types::PyString::from_fmt($py, format_args!($($arg)*)) + } + }} +} + +#[cfg(all(Py_3_14, not(Py_LIMITED_API)))] +/// The `PyUnicodeWriter` is a utility for efficiently constructing Python strings +pub(crate) struct PyUnicodeWriter<'py> { + python: Python<'py>, + writer: NonNull, + last_error: Option, +} + +#[cfg(all(Py_3_14, not(Py_LIMITED_API)))] +impl<'py> PyUnicodeWriter<'py> { + /// Creates a new `PyUnicodeWriter`. + pub fn new(py: Python<'py>) -> PyResult { + Self::with_capacity(py, 0) + } + + /// Creates a new `PyUnicodeWriter` with the specified initial capacity. + #[inline] + pub fn with_capacity(py: Python<'py>, capacity: usize) -> PyResult { + match NonNull::new(unsafe { PyUnicodeWriter_Create(capacity.wrapping_cast()) }) { + Some(ptr) => Ok(PyUnicodeWriter { + python: py, + writer: ptr, + last_error: None, + }), + None => Err(PyErr::fetch(py)), + } + } + + /// Consumes the `PyUnicodeWriter` and returns a `Bound` containing the constructed string. + #[inline] + pub fn into_py_string(mut self) -> PyResult> { + let py = self.python; + if let Some(error) = self.take_error() { + Err(error) + } else { + unsafe { + PyUnicodeWriter_Finish(ManuallyDrop::new(self).as_ptr()) + .assume_owned_or_err(py) + .cast_into_unchecked() + } + } + } + + /// When fmt::Write returned an error, this function can be used to retrieve the last error that occurred. + #[inline] + pub fn take_error(&mut self) -> Option { + self.last_error.take() + } + + #[inline] + fn as_ptr(&self) -> *mut ffi::PyUnicodeWriter { + self.writer.as_ptr() + } + + #[inline] + fn set_error(&mut self) { + self.last_error = Some(PyErr::fetch(self.python)); + } +} + +#[cfg(all(Py_3_14, not(Py_LIMITED_API)))] +impl fmt::Write for PyUnicodeWriter<'_> { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + let result = unsafe { + PyUnicodeWriter_WriteUTF8(self.as_ptr(), s.as_ptr().cast(), s.len() as isize) + }; + if result < 0 { + self.set_error(); + Err(fmt::Error) + } else { + Ok(()) + } + } + + #[inline] + fn write_char(&mut self, c: char) -> fmt::Result { + let result = unsafe { PyUnicodeWriter_WriteChar(self.as_ptr(), c.into()) }; + if result < 0 { + self.set_error(); + Err(fmt::Error) + } else { + Ok(()) + } + } +} + +#[cfg(all(Py_3_14, not(Py_LIMITED_API)))] +impl Drop for PyUnicodeWriter<'_> { + #[inline] + fn drop(&mut self) { + unsafe { + PyUnicodeWriter_Discard(self.as_ptr()); + } + } +} + +#[cfg(all(Py_3_14, not(Py_LIMITED_API)))] +impl<'py> IntoPyObject<'py> for PyUnicodeWriter<'py> { + type Target = PyString; + type Output = Bound<'py, Self::Target>; + type Error = PyErr; + + #[inline] + fn into_pyobject(self, _py: Python<'py>) -> PyResult> { + self.into_py_string() + } +} + +#[cfg(all(Py_3_14, not(Py_LIMITED_API)))] +impl<'py> TryInto> for PyUnicodeWriter<'py> { + type Error = PyErr; + + #[inline] + fn try_into(self) -> PyResult> { + self.into_py_string() + } +} + +#[cfg(test)] +mod tests { + #[cfg(all(Py_3_14, not(Py_LIMITED_API)))] + use super::*; + use crate::types::PyStringMethods; + use crate::{IntoPyObject, Python}; + + #[test] + #[allow(clippy::write_literal)] + #[cfg(all(Py_3_14, not(Py_LIMITED_API)))] + fn unicode_writer_test() { + use std::fmt::Write; + Python::attach(|py| { + let mut writer = PyUnicodeWriter::new(py).unwrap(); + write!(writer, "Hello {}!", "world").unwrap(); + writer.write_char('😎').unwrap(); + let result = writer.into_py_string().unwrap(); + assert_eq!(result.to_string(), "Hello world!😎"); + }); + } + + #[test] + #[allow(clippy::write_literal)] + #[cfg(all(Py_3_14, not(Py_LIMITED_API)))] + fn unicode_writer_with_capacity() { + use std::fmt::Write; + Python::attach(|py| { + let mut writer = PyUnicodeWriter::with_capacity(py, 10).unwrap(); + write!(writer, "Hello {}!", "world").unwrap(); + writer.write_char('😎').unwrap(); + let result = writer.into_py_string().unwrap(); + assert_eq!(result.to_string(), "Hello world!😎"); + }); + } + + #[test] + fn test_pystring_from_fmt() { + Python::attach(|py| { + py_format!(py, "Hello {}!", "world").unwrap(); + }); + } + + #[test] + fn test_complex_format() { + Python::attach(|py| { + let complex_value = (42, "foo", [0; 0]).into_pyobject(py).unwrap(); + let py_string = py_format!(py, "This is some complex value: {complex_value}").unwrap(); + let actual = py_string.to_cow().unwrap(); + let expected = "This is some complex value: (42, 'foo', [])"; + assert_eq!(actual, expected); + }); + } +} diff --git a/src/lib.rs b/src/lib.rs index 41307496b75..2d8e0b3059f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -418,6 +418,7 @@ pub mod coroutine; mod err; pub mod exceptions; pub mod ffi; +pub(crate) mod fmt; mod instance; mod interpreter_lifecycle; pub mod marker; diff --git a/src/types/string.rs b/src/types/string.rs index 0f69d18e0be..a2010d3f434 100644 --- a/src/types/string.rs +++ b/src/types/string.rs @@ -8,7 +8,7 @@ use crate::types::PyBytes; use crate::{ffi, Bound, Py, PyAny, PyResult, Python}; use std::borrow::Cow; use std::ffi::CStr; -use std::str; +use std::{fmt, str}; /// Represents raw data backing a Python `str`. /// @@ -230,6 +230,36 @@ impl PyString { .cast_into_unchecked() } } + + /// Creates a Python string using a format string. + /// + /// This function is similar to [`format!`], but it returns a Python string object instead of a Rust string. + #[inline] + pub fn from_fmt<'py>( + py: Python<'py>, + args: fmt::Arguments<'_>, + ) -> PyResult> { + if let Some(static_string) = args.as_str() { + return Ok(PyString::new(py, static_string)); + }; + + #[cfg(all(Py_3_14, not(Py_LIMITED_API)))] + { + use crate::fmt::PyUnicodeWriter; + use std::fmt::Write as _; + + let mut writer = PyUnicodeWriter::new(py)?; + writer + .write_fmt(args) + .map_err(|_| writer.take_error().expect("expected error"))?; + writer.into_py_string() + } + + #[cfg(any(not(Py_3_14), Py_LIMITED_API))] + { + Ok(PyString::new(py, &format!("{args}"))) + } + } } /// Implementation of functionality for [`PyString`].