Skip to content

Commit 9a0fc16

Browse files
committed
parse yaml markdown and interpret tags
1 parent 3faab11 commit 9a0fc16

File tree

7 files changed

+489
-10
lines changed

7 files changed

+489
-10
lines changed

crates/quarto-markdown-pandoc/src/pandoc/meta.rs

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
*/
55

66
use crate::pandoc::block::Blocks;
7-
use crate::pandoc::inline::Inlines;
7+
use crate::pandoc::inline::{Inline, Inlines, Span, Str};
8+
use crate::pandoc::location::empty_source_info;
89
use crate::readers;
910
use crate::{pandoc::RawBlock, utils::output::VerboseOutput};
1011
use hashlink::LinkedHashMap;
12+
use std::collections::HashMap;
1113
use std::{io, mem};
1214
use yaml_rust2::parser::{Event, MarkedEventReceiver, Parser};
1315

@@ -82,7 +84,29 @@ impl YamlEventHandler {
8284
}
8385
}
8486

85-
fn parse_scalar(&self, s: &str) -> MetaValue {
87+
fn parse_scalar(&self, s: &str, tag: Option<yaml_rust2::parser::Tag>) -> MetaValue {
88+
// Check if this scalar has a YAML tag (like !path, !glob, !str)
89+
if let Some(t) = tag {
90+
// Tagged strings bypass markdown parsing - wrap in Span immediately
91+
let mut attributes = HashMap::new();
92+
attributes.insert("tag".to_string(), t.suffix.clone());
93+
94+
let span = Span {
95+
attr: (
96+
String::new(),
97+
vec!["yaml-tagged-string".to_string()],
98+
attributes,
99+
),
100+
content: vec![Inline::Str(Str {
101+
text: s.to_string(),
102+
source_info: empty_source_info(),
103+
})],
104+
source_info: empty_source_info(),
105+
};
106+
return MetaValue::MetaInlines(vec![Inline::Span(span)]);
107+
}
108+
109+
// Untagged scalars: parse as booleans or strings (will be parsed as markdown later)
86110
if s == "true" {
87111
MetaValue::MetaBool(true)
88112
} else if s == "false" {
@@ -116,12 +140,12 @@ impl MarkedEventReceiver for YamlEventHandler {
116140
self.push_value(MetaValue::MetaList(list));
117141
}
118142
}
119-
Event::Scalar(s, ..) => match self.stack.last_mut() {
143+
Event::Scalar(s, _style, _anchor, tag) => match self.stack.last_mut() {
120144
Some(ContextFrame::Map(_, key_slot @ None)) => {
121145
*key_slot = Some(s.to_string());
122146
}
123147
Some(ContextFrame::Map(_, Some(_))) | Some(ContextFrame::List(_)) => {
124-
let value = self.parse_scalar(&s);
148+
let value = self.parse_scalar(&s, tag);
125149
self.push_value(value);
126150
}
127151
_ => {}
@@ -187,10 +211,22 @@ pub fn parse_metadata_strings(meta: MetaValue, outer_metadata: &mut Meta) -> Met
187211
}
188212
MetaValue::MetaBlocks(pandoc.blocks)
189213
}
190-
_ => panic!(
191-
"(unimplemented syntax error, this is a bug!) Failed to parse metadata string as markdown: {}",
192-
s
193-
),
214+
Err(_) => {
215+
// Markdown parse failed - wrap in Span with class "yaml-markdown-syntax-error"
216+
let span = Span {
217+
attr: (
218+
String::new(),
219+
vec!["yaml-markdown-syntax-error".to_string()],
220+
HashMap::new(),
221+
),
222+
content: vec![Inline::Str(Str {
223+
text: s.clone(),
224+
source_info: empty_source_info(),
225+
})],
226+
source_info: empty_source_info(),
227+
};
228+
MetaValue::MetaInlines(vec![Inline::Span(span)])
229+
}
194230
}
195231
}
196232
MetaValue::MetaList(list) => {

crates/quarto-markdown-pandoc/src/pandoc/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,5 @@ pub use crate::pandoc::table::{
3737
};
3838

3939
pub use crate::pandoc::ast_context::ASTContext;
40-
pub use crate::pandoc::meta::{Meta, MetaValue, rawblock_to_meta};
40+
pub use crate::pandoc::meta::{Meta, MetaValue, parse_metadata_strings, rawblock_to_meta};
4141
pub use crate::pandoc::treesitter::treesitter_to_pandoc;

crates/quarto-markdown-pandoc/tests/test_meta.rs

Lines changed: 184 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
* Copyright (c) 2025 Posit, PBC
44
*/
55

6+
use hashlink::LinkedHashMap;
67
use quarto_markdown_pandoc::pandoc::location::{Location, Range, SourceInfo};
7-
use quarto_markdown_pandoc::pandoc::{MetaValue, RawBlock, rawblock_to_meta};
8+
use quarto_markdown_pandoc::pandoc::{
9+
Inline, MetaValue, RawBlock, parse_metadata_strings, rawblock_to_meta,
10+
};
811
use std::fs;
912

1013
#[test]
@@ -54,3 +57,183 @@ fn test_metadata_parsing() {
5457
Some(MetaValue::MetaList(_))
5558
));
5659
}
60+
61+
#[test]
62+
fn test_yaml_tagged_strings() {
63+
// Test that YAML tags (!path, !glob, !str) prevent markdown parsing
64+
let content = fs::read_to_string("tests/yaml-tagged-strings.qmd").unwrap();
65+
66+
let block = RawBlock {
67+
format: "quarto_minus_metadata".to_string(),
68+
text: content,
69+
source_info: SourceInfo::with_range(Range {
70+
start: Location {
71+
offset: 0,
72+
row: 0,
73+
column: 0,
74+
},
75+
end: Location {
76+
offset: 0,
77+
row: 0,
78+
column: 0,
79+
},
80+
}),
81+
};
82+
83+
let mut meta = rawblock_to_meta(block);
84+
let mut outer_meta = LinkedHashMap::new();
85+
86+
// Parse metadata strings
87+
for (k, v) in meta.drain() {
88+
let parsed = parse_metadata_strings(v, &mut outer_meta);
89+
outer_meta.insert(k, parsed);
90+
}
91+
92+
// Check plain_path - should be MetaInlines with Span wrapper
93+
let plain_path = outer_meta.get("plain_path").expect("plain_path not found");
94+
if let MetaValue::MetaInlines(inlines) = plain_path {
95+
assert_eq!(inlines.len(), 1, "Expected exactly one inline");
96+
if let Inline::Span(span) = &inlines[0] {
97+
assert!(span.attr.1.contains(&"yaml-tagged-string".to_string()));
98+
assert_eq!(span.attr.2.get("tag"), Some(&"path".to_string()));
99+
// Extract the string content
100+
if let Inline::Str(s) = &span.content[0] {
101+
assert_eq!(s.text, "images/neovim-*.png");
102+
} else {
103+
panic!("Expected Str inline inside Span");
104+
}
105+
} else {
106+
panic!("Expected Span inline, got: {:?}", inlines[0]);
107+
}
108+
} else {
109+
panic!("Expected MetaInlines for plain_path");
110+
}
111+
112+
// Check glob_pattern
113+
let glob_pattern = outer_meta
114+
.get("glob_pattern")
115+
.expect("glob_pattern not found");
116+
if let MetaValue::MetaInlines(inlines) = glob_pattern {
117+
if let Inline::Span(span) = &inlines[0] {
118+
assert_eq!(span.attr.2.get("tag"), Some(&"glob".to_string()));
119+
if let Inline::Str(s) = &span.content[0] {
120+
assert_eq!(s.text, "posts/*/index.qmd");
121+
}
122+
}
123+
}
124+
125+
// Check literal_string
126+
let literal_string = outer_meta
127+
.get("literal_string")
128+
.expect("literal_string not found");
129+
if let MetaValue::MetaInlines(inlines) = literal_string {
130+
if let Inline::Span(span) = &inlines[0] {
131+
assert_eq!(span.attr.2.get("tag"), Some(&"str".to_string()));
132+
if let Inline::Str(s) = &span.content[0] {
133+
assert_eq!(s.text, "_foo_.py");
134+
}
135+
}
136+
}
137+
138+
// Check regular_markdown - should have parsed markdown (Emph element)
139+
let regular_markdown = outer_meta
140+
.get("regular_markdown")
141+
.expect("regular_markdown not found");
142+
if let MetaValue::MetaInlines(inlines) = regular_markdown {
143+
// Should contain Emph for *emphasis*
144+
let has_emph = inlines
145+
.iter()
146+
.any(|inline| matches!(inline, Inline::Emph(_)));
147+
assert!(
148+
has_emph,
149+
"regular_markdown should have Emph element from *emphasis*"
150+
);
151+
} else {
152+
panic!("Expected MetaInlines for regular_markdown");
153+
}
154+
}
155+
156+
#[test]
157+
fn test_yaml_markdown_parse_failure() {
158+
// Test that untagged strings that fail markdown parsing are gracefully handled
159+
let content = fs::read_to_string("tests/yaml-markdown-parse-failure.qmd").unwrap();
160+
161+
let block = RawBlock {
162+
format: "quarto_minus_metadata".to_string(),
163+
text: content,
164+
source_info: SourceInfo::with_range(Range {
165+
start: Location {
166+
offset: 0,
167+
row: 0,
168+
column: 0,
169+
},
170+
end: Location {
171+
offset: 0,
172+
row: 0,
173+
column: 0,
174+
},
175+
}),
176+
};
177+
178+
let mut meta = rawblock_to_meta(block);
179+
let mut outer_meta = LinkedHashMap::new();
180+
181+
// Parse metadata strings - this should not panic
182+
for (k, v) in meta.drain() {
183+
let parsed = parse_metadata_strings(v, &mut outer_meta);
184+
outer_meta.insert(k, parsed);
185+
}
186+
187+
// Check untagged_path - should be wrapped in error span
188+
let untagged_path = outer_meta
189+
.get("untagged_path")
190+
.expect("untagged_path not found");
191+
if let MetaValue::MetaInlines(inlines) = untagged_path {
192+
if let Inline::Span(span) = &inlines[0] {
193+
assert!(
194+
span.attr
195+
.1
196+
.contains(&"yaml-markdown-syntax-error".to_string())
197+
);
198+
if let Inline::Str(s) = &span.content[0] {
199+
assert_eq!(s.text, "posts/*/index.qmd");
200+
}
201+
} else {
202+
panic!("Expected Span inline for failed parse");
203+
}
204+
} else {
205+
panic!("Expected MetaInlines for untagged_path");
206+
}
207+
208+
// Check another_glob - should also be wrapped in error span
209+
let another_glob = outer_meta
210+
.get("another_glob")
211+
.expect("another_glob not found");
212+
if let MetaValue::MetaInlines(inlines) = another_glob {
213+
if let Inline::Span(span) = &inlines[0] {
214+
assert!(
215+
span.attr
216+
.1
217+
.contains(&"yaml-markdown-syntax-error".to_string())
218+
);
219+
if let Inline::Str(s) = &span.content[0] {
220+
assert_eq!(s.text, "images/*.png");
221+
}
222+
}
223+
}
224+
225+
// Check underscore_file - this one should successfully parse as markdown with Emph
226+
let underscore_file = outer_meta
227+
.get("underscore_file")
228+
.expect("underscore_file not found");
229+
if let MetaValue::MetaInlines(inlines) = underscore_file {
230+
// _foo_ should become Emph element
231+
let has_emph = inlines
232+
.iter()
233+
.any(|inline| matches!(inline, Inline::Emph(_)));
234+
assert!(
235+
has_emph,
236+
"underscore_file should have Emph element from _foo_"
237+
);
238+
}
239+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
title: Test Markdown Parse Failure Fallback
3+
untagged_path: posts/*/index.qmd
4+
another_glob: images/*.png
5+
underscore_file: _foo_.py
6+
---
7+
8+
Test document for graceful handling of markdown parse failures in untagged strings.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
title: Test YAML Tagged Strings
3+
plain_path: !path images/neovim-*.png
4+
glob_pattern: !glob posts/*/index.qmd
5+
literal_string: !str _foo_.py
6+
regular_markdown: This has *emphasis*
7+
---
8+
9+
Test document for YAML tag support.

docs/syntax/index.qmd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ The features documented here are currently under development. The syntax and beh
1313
- [Definition Lists](definition-lists.qmd) - Create definition lists using an embedded markdown DSL
1414
- [Editorial Marks](editorial-marks.qmd) - Annotate text with highlights, insertions, deletions, and comments
1515
- [Footnotes](footnotes.qmd) - Add footnotes with inline or fenced block syntax
16+
- [YAML Metadata](yaml-metadata.qmd) - Control markdown parsing in metadata with YAML tags

0 commit comments

Comments
 (0)