Skip to content

Commit df0bf99

Browse files
Add PyString::from_fmt using new PyUnicodeWriter
Intern static strings in `py_format!` Add `'py` lifetime to `PyUnicodeWriter` Add benchmark Bench using new macro Bench on python 3.14 Don't measure pystring dealloc Remove PyUnicodeWriter compat shim Add `TryInto` impl Make `PyUnicodeWriter` private Add changelog fix abi3 Add more tests Update docs fix docs imports
1 parent c969dd0 commit df0bf99

File tree

7 files changed

+312
-1
lines changed

7 files changed

+312
-1
lines changed

newsfragments/5199.added.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
- Add `PyString::from_fmt`
2+
- Add `py_format!` macro

pyo3-benches/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,8 @@ harness = false
8787
name = "bench_bigint"
8888
harness = false
8989

90+
[[bench]]
91+
name = "bench_pystring_from_fmt"
92+
harness = false
93+
9094
[workspace]
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
use codspeed_criterion_compat::{criterion_group, criterion_main, Bencher, Criterion};
2+
use pyo3::{py_format, Python};
3+
use std::hint::black_box;
4+
5+
fn format_simple(b: &mut Bencher<'_>) {
6+
Python::attach(|py| {
7+
b.iter(|| {
8+
py_format!(py, "Hello {}!", "world").unwrap()
9+
});
10+
});
11+
}
12+
13+
fn format_complex(b: &mut Bencher<'_>) {
14+
Python::attach(|py| {
15+
b.iter(|| {
16+
let value = (black_box(42), black_box("foo"), [0; 0]);
17+
py_format!(py, "This is some complex value: {value:?}").unwrap()
18+
});
19+
});
20+
}
21+
22+
fn criterion_benchmark(c: &mut Criterion) {
23+
c.bench_function("format_simple", format_simple);
24+
c.bench_function("format_complex", format_complex);
25+
}
26+
27+
criterion_group!(benches, criterion_benchmark);
28+
criterion_main!(benches);

pyo3-ffi/src/cpython/unicodeobject.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,26 @@ extern "C" {
686686
// skipped PyUnicode_GetMax
687687
}
688688

689+
#[cfg(Py_3_14)]
690+
opaque_struct!(pub PyUnicodeWriter);
691+
692+
extern "C" {
693+
#[cfg(Py_3_14)]
694+
pub fn PyUnicodeWriter_Create(length: Py_ssize_t) -> *mut PyUnicodeWriter;
695+
#[cfg(Py_3_14)]
696+
pub fn PyUnicodeWriter_Finish(writer: *mut PyUnicodeWriter) -> *mut PyObject;
697+
#[cfg(Py_3_14)]
698+
pub fn PyUnicodeWriter_Discard(writer: *mut PyUnicodeWriter);
699+
#[cfg(Py_3_14)]
700+
pub fn PyUnicodeWriter_WriteChar(writer: *mut PyUnicodeWriter, ch: Py_UCS4) -> c_int;
701+
#[cfg(Py_3_14)]
702+
pub fn PyUnicodeWriter_WriteUTF8(
703+
writer: *mut PyUnicodeWriter,
704+
str: *const c_char,
705+
size: Py_ssize_t,
706+
) -> c_int;
707+
}
708+
689709
// skipped _PyUnicodeWriter
690710
// skipped _PyUnicodeWriter_Init
691711
// skipped _PyUnicodeWriter_Prepare

src/fmt.rs

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
#[cfg(all(doc, not(all(Py_3_14, not(Py_LIMITED_API)))))]
2+
use crate::{types::PyString, Python};
3+
#[cfg(all(Py_3_14, not(Py_LIMITED_API)))]
4+
use {
5+
crate::ffi::{
6+
PyUnicodeWriter_Create, PyUnicodeWriter_Discard, PyUnicodeWriter_Finish,
7+
PyUnicodeWriter_WriteChar, PyUnicodeWriter_WriteUTF8,
8+
},
9+
crate::ffi_ptr_ext::FfiPtrExt,
10+
crate::impl_::callback::WrappingCastTo,
11+
crate::py_result_ext::PyResultExt,
12+
crate::IntoPyObject,
13+
crate::{ffi, Bound, PyErr, PyResult},
14+
std::fmt,
15+
std::mem::ManuallyDrop,
16+
std::ptr::NonNull,
17+
};
18+
19+
/// This macro is analogous to Rust's [`format!`] macro, but returns a [`PyString`] instead of a [`String`].
20+
///
21+
/// # Arguments
22+
///
23+
/// The arguments are exactly like [`format!`], but with `py` (a [`Python`] token) as the first argument:
24+
///
25+
/// # Interning Advantage
26+
///
27+
/// If the format string is a static string and all arguments are constant at compile time,
28+
/// this macro will intern the string in Python, offering better performance and memory usage
29+
/// compared to [`PyString::from_fmt`].
30+
///
31+
/// ```rust
32+
/// # use pyo3::{py_format, Python, types::PyString, Bound};
33+
/// Python::attach(|py| {
34+
/// let py_string: Bound<'_, PyString> = py_format!(py, "{} {}", "hello", "world").unwrap();
35+
/// assert_eq!(py_string.to_string(), "hello world");
36+
/// });
37+
/// ```
38+
#[macro_export]
39+
macro_rules! py_format {
40+
($py: expr, $($arg:tt)*) => {{
41+
if let Some(static_string) = format_args!($($arg)*).as_str() {
42+
static INTERNED: $crate::sync::PyOnceLock<$crate::Py<$crate::types::PyString>> = $crate::sync::PyOnceLock::new();
43+
Ok(
44+
INTERNED
45+
.get_or_init($py, || $crate::types::PyString::intern($py, static_string).unbind())
46+
.bind($py)
47+
.to_owned()
48+
)
49+
} else {
50+
$crate::types::PyString::from_fmt($py, format_args!($($arg)*))
51+
}
52+
}}
53+
}
54+
55+
#[cfg(all(Py_3_14, not(Py_LIMITED_API)))]
56+
/// The `PyUnicodeWriter` is a utility for efficiently constructing Python strings
57+
pub(crate) struct PyUnicodeWriter<'py> {
58+
python: Python<'py>,
59+
writer: NonNull<ffi::PyUnicodeWriter>,
60+
last_error: Option<PyErr>,
61+
}
62+
63+
#[cfg(all(Py_3_14, not(Py_LIMITED_API)))]
64+
impl<'py> PyUnicodeWriter<'py> {
65+
/// Creates a new `PyUnicodeWriter`.
66+
pub fn new(py: Python<'py>) -> PyResult<Self> {
67+
Self::with_capacity(py, 0)
68+
}
69+
70+
/// Creates a new `PyUnicodeWriter` with the specified initial capacity.
71+
#[inline]
72+
pub fn with_capacity(py: Python<'py>, capacity: usize) -> PyResult<Self> {
73+
match NonNull::new(unsafe { PyUnicodeWriter_Create(capacity.wrapping_cast()) }) {
74+
Some(ptr) => Ok(PyUnicodeWriter {
75+
python: py,
76+
writer: ptr,
77+
last_error: None,
78+
}),
79+
None => Err(PyErr::fetch(py)),
80+
}
81+
}
82+
83+
/// Consumes the `PyUnicodeWriter` and returns a `Bound<PyString>` containing the constructed string.
84+
#[inline]
85+
pub fn into_py_string(mut self) -> PyResult<Bound<'py, PyString>> {
86+
let py = self.python;
87+
if let Some(error) = self.take_error() {
88+
Err(error)
89+
} else {
90+
unsafe {
91+
PyUnicodeWriter_Finish(ManuallyDrop::new(self).as_ptr())
92+
.assume_owned_or_err(py)
93+
.cast_into_unchecked()
94+
}
95+
}
96+
}
97+
98+
/// When fmt::Write returned an error, this function can be used to retrieve the last error that occurred.
99+
#[inline]
100+
pub fn take_error(&mut self) -> Option<PyErr> {
101+
self.last_error.take()
102+
}
103+
104+
#[inline]
105+
fn as_ptr(&self) -> *mut ffi::PyUnicodeWriter {
106+
self.writer.as_ptr()
107+
}
108+
109+
#[inline]
110+
fn set_error(&mut self) {
111+
self.last_error = Some(PyErr::fetch(self.python));
112+
}
113+
}
114+
115+
#[cfg(all(Py_3_14, not(Py_LIMITED_API)))]
116+
impl fmt::Write for PyUnicodeWriter<'_> {
117+
#[inline]
118+
fn write_str(&mut self, s: &str) -> fmt::Result {
119+
let result = unsafe {
120+
PyUnicodeWriter_WriteUTF8(self.as_ptr(), s.as_ptr().cast(), s.len() as isize)
121+
};
122+
if result < 0 {
123+
self.set_error();
124+
Err(fmt::Error)
125+
} else {
126+
Ok(())
127+
}
128+
}
129+
130+
#[inline]
131+
fn write_char(&mut self, c: char) -> fmt::Result {
132+
let result = unsafe { PyUnicodeWriter_WriteChar(self.as_ptr(), c.into()) };
133+
if result < 0 {
134+
self.set_error();
135+
Err(fmt::Error)
136+
} else {
137+
Ok(())
138+
}
139+
}
140+
}
141+
142+
#[cfg(all(Py_3_14, not(Py_LIMITED_API)))]
143+
impl Drop for PyUnicodeWriter<'_> {
144+
#[inline]
145+
fn drop(&mut self) {
146+
unsafe {
147+
PyUnicodeWriter_Discard(self.as_ptr());
148+
}
149+
}
150+
}
151+
152+
#[cfg(all(Py_3_14, not(Py_LIMITED_API)))]
153+
impl<'py> IntoPyObject<'py> for PyUnicodeWriter<'py> {
154+
type Target = PyString;
155+
type Output = Bound<'py, Self::Target>;
156+
type Error = PyErr;
157+
158+
#[inline]
159+
fn into_pyobject(self, _py: Python<'py>) -> PyResult<Bound<'py, PyString>> {
160+
self.into_py_string()
161+
}
162+
}
163+
164+
#[cfg(all(Py_3_14, not(Py_LIMITED_API)))]
165+
impl<'py> TryInto<Bound<'py, PyString>> for PyUnicodeWriter<'py> {
166+
type Error = PyErr;
167+
168+
#[inline]
169+
fn try_into(self) -> PyResult<Bound<'py, PyString>> {
170+
self.into_py_string()
171+
}
172+
}
173+
174+
#[cfg(test)]
175+
mod tests {
176+
#[cfg(all(Py_3_14, not(Py_LIMITED_API)))]
177+
use super::*;
178+
use crate::types::PyStringMethods;
179+
use crate::{IntoPyObject, Python};
180+
181+
#[test]
182+
#[allow(clippy::write_literal)]
183+
#[cfg(all(Py_3_14, not(Py_LIMITED_API)))]
184+
fn unicode_writer_test() {
185+
use std::fmt::Write;
186+
Python::attach(|py| {
187+
let mut writer = PyUnicodeWriter::new(py).unwrap();
188+
write!(writer, "Hello {}!", "world").unwrap();
189+
writer.write_char('😎').unwrap();
190+
let result = writer.into_py_string().unwrap();
191+
assert_eq!(result.to_string(), "Hello world!😎");
192+
});
193+
}
194+
195+
#[test]
196+
#[allow(clippy::write_literal)]
197+
#[cfg(all(Py_3_14, not(Py_LIMITED_API)))]
198+
fn unicode_writer_with_capacity() {
199+
use std::fmt::Write;
200+
Python::attach(|py| {
201+
let mut writer = PyUnicodeWriter::with_capacity(py, 10).unwrap();
202+
write!(writer, "Hello {}!", "world").unwrap();
203+
writer.write_char('😎').unwrap();
204+
let result = writer.into_py_string().unwrap();
205+
assert_eq!(result.to_string(), "Hello world!😎");
206+
});
207+
}
208+
209+
#[test]
210+
fn test_pystring_from_fmt() {
211+
Python::attach(|py| {
212+
py_format!(py, "Hello {}!", "world").unwrap();
213+
});
214+
}
215+
216+
#[test]
217+
fn test_complex_format() {
218+
Python::attach(|py| {
219+
let complex_value = (42, "foo", [0; 0]).into_pyobject(py).unwrap();
220+
let py_string = py_format!(py, "This is some complex value: {complex_value}").unwrap();
221+
let actual = py_string.to_cow().unwrap();
222+
let expected = "This is some complex value: (42, 'foo', [])";
223+
assert_eq!(actual, expected);
224+
});
225+
}
226+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,7 @@ pub mod coroutine;
418418
mod err;
419419
pub mod exceptions;
420420
pub mod ffi;
421+
pub(crate) mod fmt;
421422
mod instance;
422423
mod interpreter_lifecycle;
423424
pub mod marker;

src/types/string.rs

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::types::PyBytes;
88
use crate::{ffi, Bound, Py, PyAny, PyResult, Python};
99
use std::borrow::Cow;
1010
use std::ffi::CStr;
11-
use std::str;
11+
use std::{fmt, str};
1212

1313
/// Represents raw data backing a Python `str`.
1414
///
@@ -230,6 +230,36 @@ impl PyString {
230230
.cast_into_unchecked()
231231
}
232232
}
233+
234+
/// Creates a Python string using a format string.
235+
///
236+
/// This function is similar to [`format!`], but it returns a Python string object instead of a Rust string.
237+
#[inline]
238+
pub fn from_fmt<'py>(
239+
py: Python<'py>,
240+
args: fmt::Arguments<'_>,
241+
) -> PyResult<Bound<'py, PyString>> {
242+
if let Some(static_string) = args.as_str() {
243+
return Ok(PyString::new(py, static_string));
244+
};
245+
246+
#[cfg(all(Py_3_14, not(Py_LIMITED_API)))]
247+
{
248+
use crate::fmt::PyUnicodeWriter;
249+
use std::fmt::Write as _;
250+
251+
let mut writer = PyUnicodeWriter::new(py)?;
252+
writer
253+
.write_fmt(args)
254+
.map_err(|_| writer.take_error().expect("expected error"))?;
255+
writer.into_py_string()
256+
}
257+
258+
#[cfg(any(not(Py_3_14), Py_LIMITED_API))]
259+
{
260+
Ok(PyString::new(py, &format!("{args}")))
261+
}
262+
}
233263
}
234264

235265
/// Implementation of functionality for [`PyString`].

0 commit comments

Comments
 (0)