Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 44 additions & 8 deletions crates/quarto-markdown-pandoc/src/pandoc/meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
*/

use crate::pandoc::block::Blocks;
use crate::pandoc::inline::Inlines;
use crate::pandoc::inline::{Inline, Inlines, Span, Str};
use crate::pandoc::location::empty_source_info;
use crate::readers;
use crate::{pandoc::RawBlock, utils::output::VerboseOutput};
use hashlink::LinkedHashMap;
use std::collections::HashMap;
use std::{io, mem};
use yaml_rust2::parser::{Event, MarkedEventReceiver, Parser};

Expand Down Expand Up @@ -82,7 +84,29 @@ impl YamlEventHandler {
}
}

fn parse_scalar(&self, s: &str) -> MetaValue {
fn parse_scalar(&self, s: &str, tag: Option<yaml_rust2::parser::Tag>) -> MetaValue {
// Check if this scalar has a YAML tag (like !path, !glob, !str)
if let Some(t) = tag {
// Tagged strings bypass markdown parsing - wrap in Span immediately
let mut attributes = HashMap::new();
attributes.insert("tag".to_string(), t.suffix.clone());

let span = Span {
attr: (
String::new(),
vec!["yaml-tagged-string".to_string()],
attributes,
),
content: vec![Inline::Str(Str {
text: s.to_string(),
source_info: empty_source_info(),
})],
source_info: empty_source_info(),
};
return MetaValue::MetaInlines(vec![Inline::Span(span)]);
}

// Untagged scalars: parse as booleans or strings (will be parsed as markdown later)
if s == "true" {
MetaValue::MetaBool(true)
} else if s == "false" {
Expand Down Expand Up @@ -116,12 +140,12 @@ impl MarkedEventReceiver for YamlEventHandler {
self.push_value(MetaValue::MetaList(list));
}
}
Event::Scalar(s, ..) => match self.stack.last_mut() {
Event::Scalar(s, _style, _anchor, tag) => match self.stack.last_mut() {
Some(ContextFrame::Map(_, key_slot @ None)) => {
*key_slot = Some(s.to_string());
}
Some(ContextFrame::Map(_, Some(_))) | Some(ContextFrame::List(_)) => {
let value = self.parse_scalar(&s);
let value = self.parse_scalar(&s, tag);
self.push_value(value);
}
_ => {}
Expand Down Expand Up @@ -187,10 +211,22 @@ pub fn parse_metadata_strings(meta: MetaValue, outer_metadata: &mut Meta) -> Met
}
MetaValue::MetaBlocks(pandoc.blocks)
}
_ => panic!(
"(unimplemented syntax error, this is a bug!) Failed to parse metadata string as markdown: {}",
s
),
Err(_) => {
// Markdown parse failed - wrap in Span with class "yaml-markdown-syntax-error"
let span = Span {
attr: (
String::new(),
vec!["yaml-markdown-syntax-error".to_string()],
HashMap::new(),
),
content: vec![Inline::Str(Str {
text: s.clone(),
source_info: empty_source_info(),
})],
source_info: empty_source_info(),
};
MetaValue::MetaInlines(vec![Inline::Span(span)])
}
}
}
MetaValue::MetaList(list) => {
Expand Down
2 changes: 1 addition & 1 deletion crates/quarto-markdown-pandoc/src/pandoc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,5 @@ pub use crate::pandoc::table::{
};

pub use crate::pandoc::ast_context::ASTContext;
pub use crate::pandoc::meta::{Meta, MetaValue, rawblock_to_meta};
pub use crate::pandoc::meta::{Meta, MetaValue, parse_metadata_strings, rawblock_to_meta};
pub use crate::pandoc::treesitter::treesitter_to_pandoc;
185 changes: 184 additions & 1 deletion crates/quarto-markdown-pandoc/tests/test_meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
* Copyright (c) 2025 Posit, PBC
*/

use hashlink::LinkedHashMap;
use quarto_markdown_pandoc::pandoc::location::{Location, Range, SourceInfo};
use quarto_markdown_pandoc::pandoc::{MetaValue, RawBlock, rawblock_to_meta};
use quarto_markdown_pandoc::pandoc::{
Inline, MetaValue, RawBlock, parse_metadata_strings, rawblock_to_meta,
};
use std::fs;

#[test]
Expand Down Expand Up @@ -54,3 +57,183 @@ fn test_metadata_parsing() {
Some(MetaValue::MetaList(_))
));
}

#[test]
fn test_yaml_tagged_strings() {
// Test that YAML tags (!path, !glob, !str) prevent markdown parsing
let content = fs::read_to_string("tests/yaml-tagged-strings.qmd").unwrap();

let block = RawBlock {
format: "quarto_minus_metadata".to_string(),
text: content,
source_info: SourceInfo::with_range(Range {
start: Location {
offset: 0,
row: 0,
column: 0,
},
end: Location {
offset: 0,
row: 0,
column: 0,
},
}),
};

let mut meta = rawblock_to_meta(block);
let mut outer_meta = LinkedHashMap::new();

// Parse metadata strings
for (k, v) in meta.drain() {
let parsed = parse_metadata_strings(v, &mut outer_meta);
outer_meta.insert(k, parsed);
}

// Check plain_path - should be MetaInlines with Span wrapper
let plain_path = outer_meta.get("plain_path").expect("plain_path not found");
if let MetaValue::MetaInlines(inlines) = plain_path {
assert_eq!(inlines.len(), 1, "Expected exactly one inline");
if let Inline::Span(span) = &inlines[0] {
assert!(span.attr.1.contains(&"yaml-tagged-string".to_string()));
assert_eq!(span.attr.2.get("tag"), Some(&"path".to_string()));
// Extract the string content
if let Inline::Str(s) = &span.content[0] {
assert_eq!(s.text, "images/neovim-*.png");
} else {
panic!("Expected Str inline inside Span");
}
} else {
panic!("Expected Span inline, got: {:?}", inlines[0]);
}
} else {
panic!("Expected MetaInlines for plain_path");
}

// Check glob_pattern
let glob_pattern = outer_meta
.get("glob_pattern")
.expect("glob_pattern not found");
if let MetaValue::MetaInlines(inlines) = glob_pattern {
if let Inline::Span(span) = &inlines[0] {
assert_eq!(span.attr.2.get("tag"), Some(&"glob".to_string()));
if let Inline::Str(s) = &span.content[0] {
assert_eq!(s.text, "posts/*/index.qmd");
}
}
}

// Check literal_string
let literal_string = outer_meta
.get("literal_string")
.expect("literal_string not found");
if let MetaValue::MetaInlines(inlines) = literal_string {
if let Inline::Span(span) = &inlines[0] {
assert_eq!(span.attr.2.get("tag"), Some(&"str".to_string()));
if let Inline::Str(s) = &span.content[0] {
assert_eq!(s.text, "_foo_.py");
}
}
}

// Check regular_markdown - should have parsed markdown (Emph element)
let regular_markdown = outer_meta
.get("regular_markdown")
.expect("regular_markdown not found");
if let MetaValue::MetaInlines(inlines) = regular_markdown {
// Should contain Emph for *emphasis*
let has_emph = inlines
.iter()
.any(|inline| matches!(inline, Inline::Emph(_)));
assert!(
has_emph,
"regular_markdown should have Emph element from *emphasis*"
);
} else {
panic!("Expected MetaInlines for regular_markdown");
}
}

#[test]
fn test_yaml_markdown_parse_failure() {
// Test that untagged strings that fail markdown parsing are gracefully handled
let content = fs::read_to_string("tests/yaml-markdown-parse-failure.qmd").unwrap();

let block = RawBlock {
format: "quarto_minus_metadata".to_string(),
text: content,
source_info: SourceInfo::with_range(Range {
start: Location {
offset: 0,
row: 0,
column: 0,
},
end: Location {
offset: 0,
row: 0,
column: 0,
},
}),
};

let mut meta = rawblock_to_meta(block);
let mut outer_meta = LinkedHashMap::new();

// Parse metadata strings - this should not panic
for (k, v) in meta.drain() {
let parsed = parse_metadata_strings(v, &mut outer_meta);
outer_meta.insert(k, parsed);
}

// Check untagged_path - should be wrapped in error span
let untagged_path = outer_meta
.get("untagged_path")
.expect("untagged_path not found");
if let MetaValue::MetaInlines(inlines) = untagged_path {
if let Inline::Span(span) = &inlines[0] {
assert!(
span.attr
.1
.contains(&"yaml-markdown-syntax-error".to_string())
);
if let Inline::Str(s) = &span.content[0] {
assert_eq!(s.text, "posts/*/index.qmd");
}
} else {
panic!("Expected Span inline for failed parse");
}
} else {
panic!("Expected MetaInlines for untagged_path");
}

// Check another_glob - should also be wrapped in error span
let another_glob = outer_meta
.get("another_glob")
.expect("another_glob not found");
if let MetaValue::MetaInlines(inlines) = another_glob {
if let Inline::Span(span) = &inlines[0] {
assert!(
span.attr
.1
.contains(&"yaml-markdown-syntax-error".to_string())
);
if let Inline::Str(s) = &span.content[0] {
assert_eq!(s.text, "images/*.png");
}
}
}

// Check underscore_file - this one should successfully parse as markdown with Emph
let underscore_file = outer_meta
.get("underscore_file")
.expect("underscore_file not found");
if let MetaValue::MetaInlines(inlines) = underscore_file {
// _foo_ should become Emph element
let has_emph = inlines
.iter()
.any(|inline| matches!(inline, Inline::Emph(_)));
assert!(
has_emph,
"underscore_file should have Emph element from _foo_"
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
title: Test Markdown Parse Failure Fallback
untagged_path: posts/*/index.qmd
another_glob: images/*.png
underscore_file: _foo_.py
---

Test document for graceful handling of markdown parse failures in untagged strings.
9 changes: 9 additions & 0 deletions crates/quarto-markdown-pandoc/tests/yaml-tagged-strings.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
title: Test YAML Tagged Strings
plain_path: !path images/neovim-*.png
glob_pattern: !glob posts/*/index.qmd
literal_string: !str _foo_.py
regular_markdown: This has *emphasis*
---

Test document for YAML tag support.
1 change: 1 addition & 0 deletions docs/syntax/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ The features documented here are currently under development. The syntax and beh
- [Definition Lists](definition-lists.qmd) - Create definition lists using an embedded markdown DSL
- [Editorial Marks](editorial-marks.qmd) - Annotate text with highlights, insertions, deletions, and comments
- [Footnotes](footnotes.qmd) - Add footnotes with inline or fenced block syntax
- [YAML Metadata](yaml-metadata.qmd) - Control markdown parsing in metadata with YAML tags
Loading