Skip to content

Commit 8cad3bb

Browse files
committed
Tag handling: simplify core checks, normalize tags, add binary/timestamp tests
1 parent 4595942 commit 8cad3bb

File tree

2 files changed

+108
-54
lines changed

2 files changed

+108
-54
lines changed

src/lib.rs

Lines changed: 64 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -435,10 +435,7 @@ fn resolve_representation(node: &mut Yaml, _simplify: bool) {
435435

436436
let parsed = match tag {
437437
Some(tag) => {
438-
let is_core_schema = tag.is_yaml_core_schema();
439-
let normalized_suffix = normalized_suffix(tag.suffix.as_str());
440-
441-
if is_core_schema {
438+
if tag.is_yaml_core_schema() {
442439
match tag.suffix.as_str() {
443440
"str" => Yaml::value_from_cow_and_metadata(value, style, Some(&tag)),
444441
"null" => {
@@ -466,8 +463,6 @@ fn resolve_representation(node: &mut Yaml, _simplify: bool) {
466463
}
467464
}
468465
}
469-
} else if is_effective_core_null(&tag, is_core_schema, normalized_suffix) {
470-
Yaml::Value(Scalar::Null)
471466
} else {
472467
Yaml::Tagged(tag, Box::new(Yaml::Value(Scalar::String(value))))
473468
}
@@ -586,17 +581,11 @@ fn convert_tagged(
586581
}
587582
}
588583

589-
let is_core_schema = tag.is_yaml_core_schema();
590-
let normalized_suffix = normalized_suffix(tag.suffix.as_str());
591-
592-
if is_effective_core_null(tag, is_core_schema, normalized_suffix) {
593-
return Ok(py.None());
594-
}
595-
596584
let value = yaml_to_py(py, node, is_key, handlers)?;
597585

598-
if is_core_schema {
599-
return match tag.suffix.as_str() {
586+
let normalized_suffix = normalized_suffix(tag.suffix.as_str());
587+
if is_core_tag(tag) {
588+
return match normalized_suffix {
600589
"str" | "null" | "bool" | "int" | "float" | "seq" | "map" => Ok(value),
601590
"timestamp" | "set" | "omap" | "pairs" | "binary" => {
602591
let rendered = render_tag(tag);
@@ -633,25 +622,24 @@ fn is_core_null_tag(tag: &Tag) -> bool {
633622
}
634623

635624
fn is_core_scalar_tag(tag: &Tag) -> bool {
636-
tag.is_yaml_core_schema()
637-
&& matches!(
638-
tag.suffix.as_str(),
639-
"str" | "null" | "bool" | "int" | "float" | "seq" | "map"
640-
)
625+
if !is_core_tag(tag) {
626+
return false;
627+
}
628+
matches!(
629+
normalized_suffix(tag.suffix.as_str()),
630+
"str" | "null" | "bool" | "int" | "float" | "seq" | "map"
631+
)
641632
}
642633

643634
fn normalized_suffix(suffix: &str) -> &str {
644635
let suffix = suffix.trim_start_matches('!');
645636
suffix.strip_prefix("tag:yaml.org,2002:").unwrap_or(suffix)
646637
}
647638

648-
fn is_effective_core_null(tag: &Tag, is_core_schema: bool, normalized_suffix: &str) -> bool {
649-
(is_core_schema && tag.suffix.as_str() == "null")
650-
|| (tag.handle.as_str() == "!!" && tag.suffix.as_str() == "null")
651-
|| (tag.handle.is_empty()
652-
&& normalized_suffix == "null"
653-
&& (tag.suffix.as_str().starts_with('!')
654-
|| tag.suffix.as_str().starts_with("tag:yaml.org,2002:")))
639+
fn is_core_tag(tag: &Tag) -> bool {
640+
tag.is_yaml_core_schema()
641+
|| (tag.handle.as_str() == "!" && tag.suffix.as_str().starts_with('!'))
642+
|| (tag.handle.is_empty() && tag.suffix.as_str().starts_with("tag:yaml.org,2002:"))
655643
}
656644

657645
fn render_tag(tag: &Tag) -> String {
@@ -790,38 +778,66 @@ fn py_to_yaml(py: Python<'_>, obj: &Bound<'_, PyAny>, is_key: bool) -> Result<Ya
790778
}
791779

792780
fn parse_tag_string(tag: &str) -> Result<Tag> {
793-
const YAML_CORE_HANDLE: &str = "tag:yaml.org,2002:";
794-
795781
let trimmed = tag.trim();
796782
if trimmed.is_empty() {
797783
return Err(PyValueError::new_err("tag must not be empty"));
798784
}
799785

800-
let Some((mut handle, mut suffix)) = split_tag_name(trimmed) else {
801-
return Err(PyValueError::new_err(format!(
802-
"invalid YAML tag `{trimmed}`"
803-
)));
804-
};
786+
let invalid_tag_error = || PyValueError::new_err(format!("invalid YAML tag `{trimmed}`"));
805787

806-
if handle == "!!" {
807-
handle = YAML_CORE_HANDLE;
808-
} else if handle.is_empty() && suffix.starts_with(YAML_CORE_HANDLE) {
809-
suffix = &suffix[YAML_CORE_HANDLE.len()..];
810-
handle = YAML_CORE_HANDLE;
788+
if !trimmed.contains('!') && !trimmed.contains(':') {
789+
return Err(invalid_tag_error());
811790
}
812791

813-
// saphyr cannot emit a bare tag represented as handle="" / suffix="!".
814-
// Normalize to handle="!" / suffix="" so round-tripping `!` works.
815-
let (handle, suffix) = if handle.is_empty() && suffix == "!" {
816-
("!", "")
792+
let tag = if trimmed == "!" {
793+
Tag {
794+
handle: String::new(),
795+
suffix: "!".to_string(),
796+
}
797+
} else if let Some(rest) = trimmed.strip_prefix("!!") {
798+
if rest.is_empty() {
799+
return Err(invalid_tag_error());
800+
}
801+
let mut suffix = String::with_capacity(rest.len() + 1);
802+
suffix.push('!');
803+
suffix.push_str(rest);
804+
Tag {
805+
handle: "!".to_string(),
806+
suffix,
807+
}
808+
} else if let Some(rest) = trimmed.strip_prefix('!') {
809+
if rest.is_empty() {
810+
return Err(invalid_tag_error());
811+
}
812+
Tag {
813+
handle: "!".to_string(),
814+
suffix: rest.to_string(),
815+
}
816+
} else if let Some((handle, suffix)) = trimmed.rsplit_once('!') {
817+
if suffix.is_empty() {
818+
return Err(invalid_tag_error());
819+
}
820+
Tag {
821+
handle: handle.to_string(),
822+
suffix: suffix.to_string(),
823+
}
817824
} else {
818-
(handle, suffix)
825+
Tag {
826+
handle: String::new(),
827+
suffix: trimmed.to_string(),
828+
}
819829
};
820830

821-
Ok(Tag {
822-
handle: handle.to_string(),
823-
suffix: suffix.to_string(),
824-
})
831+
// saphyr cannot emit a bare tag represented as handle="" / suffix="!".
832+
// Normalize to handle="!" / suffix="" so round-tripping `!` works.
833+
if tag.handle.is_empty() && tag.suffix.as_str() == "!" {
834+
Ok(Tag {
835+
handle: "!".to_string(),
836+
suffix: String::new(),
837+
})
838+
} else {
839+
Ok(tag)
840+
}
825841
}
826842

827843
fn is_tagged(py: Python<'_>, obj: &Bound<'_, PyAny>) -> Result<bool> {

tests_py/test_format_parse.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import base64
34
import math
45
import textwrap
56
from typing import Callable
@@ -8,6 +9,7 @@
89

910
import yaml12
1011
from yaml12 import Tagged
12+
import base64
1113

1214

1315
def test_format_yaml_round_trip_nested_structures():
@@ -43,6 +45,20 @@ def test_format_yaml_preserves_tagged_values():
4345
assert reparsed.value["seq"].tag == "!seq"
4446

4547

48+
def test_format_yaml_preserves_binary_tags():
49+
payload = base64.b64encode(b"hello world").decode("ascii")
50+
tagged = Tagged(payload, "!!binary")
51+
52+
out = yaml12.format_yaml(tagged)
53+
54+
assert out.startswith("!!binary ")
55+
56+
reparsed = yaml12.parse_yaml(out)
57+
assert isinstance(reparsed, Tagged)
58+
assert reparsed.tag == "tag:yaml.org,2002:binary"
59+
assert reparsed.value == payload
60+
61+
4662
def test_format_yaml_ignores_core_schema_handles():
4763
obj = Tagged(
4864
{
@@ -292,6 +308,24 @@ def test_parse_yaml_preserves_custom_tags():
292308
assert nested["values"].value == [1, 2]
293309

294310

311+
def test_parse_yaml_preserves_timestamp_tags():
312+
yaml = textwrap.dedent(
313+
"""\
314+
- !!timestamp 2025-01-01
315+
- !!timestamp 2025-01-01 21:59:43.10 -5
316+
"""
317+
)
318+
parsed = yaml12.parse_yaml(yaml)
319+
320+
assert isinstance(parsed, list)
321+
assert len(parsed) == 2
322+
expected_values = ["2025-01-01", "2025-01-01 21:59:43.10 -5"]
323+
for item, expected in zip(parsed, expected_values):
324+
assert isinstance(item, Tagged)
325+
assert item.tag == "tag:yaml.org,2002:timestamp"
326+
assert item.value == expected
327+
328+
295329
def test_parse_yaml_applies_handlers_to_tagged_nodes():
296330
handlers: dict[str, Callable[[object], object]] = {
297331
"!expr": lambda value: eval(str(value)),
@@ -374,18 +408,22 @@ def test_parse_yaml_validates_handlers_argument():
374408
def test_parse_yaml_resolves_canonical_null_tags():
375409
canonical_cases = [
376410
"!!null ~",
377-
"!<!!null> ~",
378411
"!<tag:yaml.org,2002:null> ~",
379-
"!<!null> ~",
380412
]
381413
for yaml in canonical_cases:
382414
parsed = yaml12.parse_yaml(yaml)
383415
assert parsed is None
384416

385-
custom = yaml12.parse_yaml("!null ~")
386-
assert isinstance(custom, Tagged)
387-
assert custom.tag == "!null"
388-
assert custom.value == "~"
417+
informative_cases = {
418+
"!<!!null> ~": "!!null",
419+
"!<!null> ~": "!null",
420+
"!null ~": "!null",
421+
}
422+
for yaml, expected_tag in informative_cases.items():
423+
parsed = yaml12.parse_yaml(yaml)
424+
assert isinstance(parsed, Tagged)
425+
assert parsed.tag == expected_tag
426+
assert parsed.value == "~"
389427

390428

391429
def test_parse_yaml_errors_on_invalid_canonical_tags():

0 commit comments

Comments
 (0)