Skip to content

Commit ce5f28e

Browse files
committed
yaml12: lazy handler registry lookups
1 parent e83e5c4 commit ce5f28e

File tree

1 file changed

+90
-68
lines changed

1 file changed

+90
-68
lines changed

src/lib.rs

Lines changed: 90 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use pyo3::IntoPyObjectExt;
1111
use saphyr::{Mapping, Scalar, Tag, Yaml, YamlEmitter};
1212
use saphyr_parser::{Parser, ScalarStyle};
1313
use std::borrow::Cow;
14-
use std::cell::UnsafeCell;
14+
use std::cell::{RefCell, UnsafeCell};
1515
use std::collections::HashMap;
1616
use std::fs;
1717
use std::io::{self, Write};
@@ -59,10 +59,9 @@ fn pathlike_to_pathbuf(obj: &Bound<'_, PyAny>) -> Result<Option<PathBuf>> {
5959
}
6060
}
6161

62-
type HandlerMap = HashMap<String, Py<PyAny>>;
63-
6462
struct HandlerRegistry {
65-
map: HandlerMap,
63+
dict: Py<PyDict>,
64+
cache: RefCell<HashMap<String, Option<Py<PyAny>>>>,
6665
}
6766

6867
impl HandlerRegistry {
@@ -83,32 +82,98 @@ impl HandlerRegistry {
8382
return Ok(None);
8483
}
8584

86-
let mut handlers_map: HandlerMap = HashMap::with_capacity(dict.len());
87-
for (key_obj, value_obj) in dict.iter() {
88-
let key_str = key_obj.cast::<PyString>().map_err(|_| {
89-
PyTypeError::new_err("handler keys must be strings or subclasses of str")
90-
})?;
91-
let key_text = key_str.to_str()?;
92-
let key = normalize_handler_tag_string(key_text)?;
93-
if !value_obj.is_callable() {
94-
return Err(PyTypeError::new_err(format!(
95-
"handler `{}` must be callable",
96-
key_text
97-
)));
98-
}
99-
handlers_map.insert(key, value_obj.unbind());
100-
}
101-
102-
Ok(Some(Self { map: handlers_map }))
85+
Ok(Some(Self {
86+
dict: dict.clone().unbind(),
87+
cache: RefCell::new(HashMap::new()),
88+
}))
10389
}
10490

105-
fn get_for_tag(&self, tag: &str) -> Option<&Py<PyAny>> {
106-
self.map.get(tag)
91+
fn get_for_tag(&self, py: Python<'_>, tag: &str) -> Result<Option<Py<PyAny>>> {
92+
if let Some(cached) = self.cache.borrow().get(tag) {
93+
return Ok(cached.as_ref().map(|handler| handler.clone_ref(py)));
94+
}
95+
96+
let dict = self.dict.bind(py);
97+
let resolved = self.lookup_handler(dict, tag)?;
98+
let result = resolved.as_ref().map(|handler| handler.clone_ref(py));
99+
self.cache.borrow_mut().insert(tag.to_owned(), resolved);
100+
Ok(result)
107101
}
108102

109103
fn apply(&self, py: Python<'_>, handler: &Py<PyAny>, arg: Py<PyAny>) -> Result<Py<PyAny>> {
110104
handler.call1(py, (arg,))
111105
}
106+
107+
fn lookup_handler(&self, dict: &Bound<'_, PyDict>, tag: &str) -> Result<Option<Py<PyAny>>> {
108+
if let Some(handler) = dict.get_item(tag)? {
109+
Self::validate_callable(tag, &handler)?;
110+
return Ok(Some(handler.unbind()));
111+
}
112+
113+
if let Some(inner) = tag
114+
.strip_prefix("!<")
115+
.and_then(|rest| rest.strip_suffix('>'))
116+
.filter(|inner| !inner.is_empty())
117+
{
118+
let normalized = normalize_simple_tag_name_for_api(inner);
119+
if normalized.as_ref() != tag {
120+
if let Some(handler) = dict.get_item(normalized.as_ref())? {
121+
Self::validate_callable(normalized.as_ref(), &handler)?;
122+
return Ok(Some(handler.unbind()));
123+
}
124+
}
125+
}
126+
127+
if let Some(local) = tag.strip_prefix('!') {
128+
if is_simple_local_tag_name(local) {
129+
if let Some(handler) = dict.get_item(local)? {
130+
Self::validate_callable(local, &handler)?;
131+
return Ok(Some(handler.unbind()));
132+
}
133+
134+
let wrapped_local = format!("!<{local}>");
135+
if let Some(handler) = dict.get_item(wrapped_local.as_str())? {
136+
Self::validate_callable(wrapped_local.as_str(), &handler)?;
137+
return Ok(Some(handler.unbind()));
138+
}
139+
140+
let wrapped_tag = format!("!<{tag}>");
141+
if let Some(handler) = dict.get_item(wrapped_tag.as_str())? {
142+
Self::validate_callable(wrapped_tag.as_str(), &handler)?;
143+
return Ok(Some(handler.unbind()));
144+
}
145+
}
146+
} else {
147+
let wrapped_tag = format!("!<{tag}>");
148+
if let Some(handler) = dict.get_item(wrapped_tag.as_str())? {
149+
Self::validate_callable(wrapped_tag.as_str(), &handler)?;
150+
return Ok(Some(handler.unbind()));
151+
}
152+
153+
const CORE_PREFIX: &str = "tag:yaml.org,2002:";
154+
if let Some(core_suffix) = tag.strip_prefix(CORE_PREFIX) {
155+
if !core_suffix.is_empty() {
156+
let shorthand = format!("!!{core_suffix}");
157+
if let Some(handler) = dict.get_item(shorthand.as_str())? {
158+
Self::validate_callable(shorthand.as_str(), &handler)?;
159+
return Ok(Some(handler.unbind()));
160+
}
161+
}
162+
}
163+
}
164+
165+
Ok(None)
166+
}
167+
168+
fn validate_callable(key: &str, handler: &Bound<'_, PyAny>) -> Result<()> {
169+
if handler.is_callable() {
170+
Ok(())
171+
} else {
172+
Err(PyTypeError::new_err(format!(
173+
"handler `{key}` must be callable"
174+
)))
175+
}
176+
}
112177
}
113178

114179
fn builtin_types(py: Python<'_>) -> Result<&BuiltinTypes> {
@@ -139,49 +204,6 @@ fn handler_registry_from_arg(
139204
}
140205
}
141206

142-
fn normalize_handler_tag_string(name: &str) -> Result<String> {
143-
let trimmed = name.trim();
144-
if trimmed.is_empty() {
145-
return Err(PyTypeError::new_err(
146-
"handler keys must be non-empty strings",
147-
));
148-
}
149-
if trimmed != name {
150-
return Err(PyTypeError::new_err(
151-
"handler keys must not contain leading/trailing whitespace",
152-
));
153-
}
154-
if trimmed.chars().any(|c| c.is_whitespace()) {
155-
return Err(PyTypeError::new_err(
156-
"handler keys must not contain whitespace",
157-
));
158-
}
159-
160-
// Accept shorthand forms and normalize to the tag strings produced by `render_tag`.
161-
if let Some(rest) = trimmed.strip_prefix("!!") {
162-
if rest.is_empty() {
163-
return Err(PyTypeError::new_err(
164-
"`handlers` keys must be valid YAML tag strings",
165-
));
166-
}
167-
return Ok(format!("tag:yaml.org,2002:{rest}"));
168-
}
169-
170-
let normalized = if let Some(uri) = trimmed.strip_prefix("!<").and_then(|s| s.strip_suffix('>'))
171-
{
172-
if uri.is_empty() {
173-
return Err(PyTypeError::new_err(
174-
"`handlers` keys must be valid YAML tag strings",
175-
));
176-
}
177-
uri
178-
} else {
179-
trimmed
180-
};
181-
182-
Ok(normalize_simple_tag_name_for_api(normalized).into_owned())
183-
}
184-
185207
fn is_simple_local_tag_name(name: &str) -> bool {
186208
if name.is_empty() {
187209
return false;
@@ -1164,11 +1186,11 @@ fn convert_tagged(
11641186
let public_tag = normalize_simple_tag_name_for_api(rendered);
11651187

11661188
if let Some(registry) = handlers {
1167-
if let Some(handler) = registry.get_for_tag(public_tag.as_ref()) {
1189+
if let Some(handler) = registry.get_for_tag(py, public_tag.as_ref())? {
11681190
// Convert inner node in value mode to avoid pre-wrapping keys; the tag logic below
11691191
// handles hashability and tag preservation.
11701192
let value = yaml_to_py(py, node, false, handlers)?;
1171-
let handled = registry.apply(py, handler, value)?;
1193+
let handled = registry.apply(py, &handler, value)?;
11721194
if is_key && handler_result_needs_wrap(py, handled.bind(py))? {
11731195
return make_yaml_node(py, handled, None);
11741196
}

0 commit comments

Comments
 (0)