diff --git a/Cargo.lock b/Cargo.lock index e6207fb..51a2a65 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -394,6 +394,7 @@ dependencies = [ "colored", "glob", "include_dir", + "quarto-error-reporting", "quarto-markdown-pandoc", "regex", "serde", diff --git a/crates/qmd-syntax-helper/Cargo.toml b/crates/qmd-syntax-helper/Cargo.toml index 9b4f98c..da5b9ae 100644 --- a/crates/qmd-syntax-helper/Cargo.toml +++ b/crates/qmd-syntax-helper/Cargo.toml @@ -23,6 +23,7 @@ anyhow = "1.0" regex = "1.10" colored = "2.1" quarto-markdown-pandoc.workspace = true +quarto-error-reporting.workspace = true include_dir = "0.7" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" diff --git a/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs b/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs index 0b1f33f..be2b034 100644 --- a/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs +++ b/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs @@ -1,28 +1,11 @@ use anyhow::{Context, Result}; use colored::Colorize; -use serde::{Deserialize, Serialize}; use std::fs; use std::path::Path; use crate::rule::{CheckResult, ConvertResult, Rule}; use crate::utils::file_io::{read_file, write_file}; -#[derive(Debug, Serialize, Deserialize)] -struct ErrorLocation { - row: usize, - column: usize, - byte_offset: usize, - size: usize, -} - -#[derive(Debug, Serialize, Deserialize)] -struct ParseError { - filename: String, - title: String, - message: String, - location: ErrorLocation, -} - pub struct DivWhitespaceConverter {} impl DivWhitespaceConverter { @@ -30,8 +13,8 @@ impl DivWhitespaceConverter { Ok(Self {}) } - /// Parse a file and get error locations as JSON - fn get_parse_errors(&self, file_path: &Path) -> Result> { + /// Parse a file and get diagnostic messages + fn get_parse_errors(&self, file_path: &Path) -> Result> { let content = fs::read_to_string(file_path) .with_context(|| format!("Failed to read file: {}", file_path.display()))?; @@ -44,43 +27,19 @@ impl DivWhitespaceConverter { false, // not loose mode &filename, &mut sink, - Some( - quarto_markdown_pandoc::readers::qmd_error_messages::produce_json_error_messages - as fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - ), ); match result { Ok(_) => Ok(Vec::new()), // No errors - Err(error_messages) => { - // Parse the JSON error output - // The error messages come as a single JSON array string - if error_messages.is_empty() { - return Ok(Vec::new()); - } - - let json_str = error_messages.join(""); - - // Try to parse as JSON array - match serde_json::from_str::>(&json_str) { - Ok(errors) => Ok(errors), - Err(_) => { - // If parsing fails, the messages are likely plain text warnings/debug messages - // rather than actual syntax errors. These don't indicate div whitespace issues, - // so we can safely ignore them for this specific rule. - Ok(Vec::new()) - } - } + Err(diagnostics) => { + // Return diagnostic messages directly + Ok(diagnostics) } } } /// Find div fence errors that need whitespace fixes - fn find_div_whitespace_errors(&self, content: &str, errors: &[ParseError]) -> Vec { + fn find_div_whitespace_errors(&self, content: &str, errors: &[quarto_error_reporting::DiagnosticMessage]) -> Vec { let mut fix_positions = Vec::new(); let lines: Vec<&str> = content.lines().collect(); @@ -93,12 +52,18 @@ impl DivWhitespaceConverter { continue; } + // Extract row from location (if available) + // SourceInfo uses 0-indexed rows, div_whitespace uses them too + let error_row = error.location.as_ref() + .map(|loc| loc.range.start.row) + .unwrap_or(0); + // The error might be on the line itself or the line before (for div fences) // Check both the current line and the previous line - let lines_to_check = if error.location.row > 0 { - vec![error.location.row - 1, error.location.row] + let lines_to_check = if error_row > 0 { + vec![error_row - 1, error_row] } else { - vec![error.location.row] + vec![error_row] }; for &line_idx in &lines_to_check { diff --git a/crates/qmd-syntax-helper/src/diagnostics/parse_check.rs b/crates/qmd-syntax-helper/src/diagnostics/parse_check.rs index 9dc25a8..406497e 100644 --- a/crates/qmd-syntax-helper/src/diagnostics/parse_check.rs +++ b/crates/qmd-syntax-helper/src/diagnostics/parse_check.rs @@ -24,14 +24,6 @@ impl ParseChecker { false, &filename, &mut sink, - Some( - quarto_markdown_pandoc::readers::qmd_error_messages::produce_json_error_messages - as fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - ), ); Ok(result.is_ok()) diff --git a/crates/quarto-error-reporting/src/builder.rs b/crates/quarto-error-reporting/src/builder.rs index 59d7240..b76888d 100644 --- a/crates/quarto-error-reporting/src/builder.rs +++ b/crates/quarto-error-reporting/src/builder.rs @@ -256,6 +256,35 @@ impl DiagnosticMessageBuilder { self.details.push(DetailItem { kind: DetailKind::Error, content: detail.into(), + location: None, + }); + self + } + + /// Add an error detail with a source location. + /// + /// This allows adding contextual information that points to specific locations + /// in the source code, creating rich multi-location error messages. + /// + /// # Example + /// + /// ```ignore + /// use quarto_error_reporting::DiagnosticMessageBuilder; + /// + /// let error = DiagnosticMessageBuilder::error("Mismatched brackets") + /// .add_detail_at("Opening bracket here", opening_location) + /// .add_detail_at("But no closing bracket found", end_location) + /// .build(); + /// ``` + pub fn add_detail_at( + mut self, + detail: impl Into, + location: quarto_source_map::SourceInfo, + ) -> Self { + self.details.push(DetailItem { + kind: DetailKind::Error, + content: detail.into(), + location: Some(location), }); self } @@ -278,6 +307,21 @@ impl DiagnosticMessageBuilder { self.details.push(DetailItem { kind: DetailKind::Info, content: info.into(), + location: None, + }); + self + } + + /// Add an info detail with a source location. + pub fn add_info_at( + mut self, + info: impl Into, + location: quarto_source_map::SourceInfo, + ) -> Self { + self.details.push(DetailItem { + kind: DetailKind::Info, + content: info.into(), + location: Some(location), }); self } @@ -297,6 +341,21 @@ impl DiagnosticMessageBuilder { self.details.push(DetailItem { kind: DetailKind::Note, content: note.into(), + location: None, + }); + self + } + + /// Add a note detail with a source location. + pub fn add_note_at( + mut self, + note: impl Into, + location: quarto_source_map::SourceInfo, + ) -> Self { + self.details.push(DetailItem { + kind: DetailKind::Note, + content: note.into(), + location: Some(location), }); self } diff --git a/crates/quarto-error-reporting/src/diagnostic.rs b/crates/quarto-error-reporting/src/diagnostic.rs index 08cf24f..5608f71 100644 --- a/crates/quarto-error-reporting/src/diagnostic.rs +++ b/crates/quarto-error-reporting/src/diagnostic.rs @@ -83,14 +83,18 @@ impl From<&str> for MessageContent { /// /// Following tidyverse guidelines, details provide specific information about /// the error (what went wrong, where, with what values). -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct DetailItem { /// The kind of detail (error, info, note) pub kind: DetailKind, /// The content of the detail pub content: MessageContent, - // Future: Optional source span for details that point to specific code locations - // pub span: Option, + /// Optional source location for this detail + /// + /// When present, this identifies where in the source code this detail applies. + /// This allows error messages to highlight multiple related locations. + #[serde(skip_serializing_if = "Option::is_none")] + pub location: Option, } /// A diagnostic message following tidyverse-style structure. @@ -290,66 +294,117 @@ impl DiagnosticMessage { let mut result = String::new(); - // Title line with kind - let kind_str = match self.kind { - DiagnosticKind::Error => "Error", - DiagnosticKind::Warning => "Warning", - DiagnosticKind::Info => "Info", - DiagnosticKind::Note => "Note", + // Check if we have any location info that could be displayed with ariadne + // This includes the main diagnostic location OR any detail with a location + let has_any_location = self.location.is_some() + || self.details.iter().any(|d| d.location.is_some()); + + // If we have location info and source context, render ariadne source display + let has_ariadne = if has_any_location && ctx.is_some() { + // Use main location if available, otherwise use first detail location + let location = self.location.as_ref() + .or_else(|| self.details.iter().find_map(|d| d.location.as_ref())); + + if let Some(loc) = location { + if let Some(ariadne_output) = self.render_ariadne_source_context(loc, ctx.unwrap()) { + result.push_str(&ariadne_output); + true + } else { + false + } + } else { + false + } + } else { + false }; - if let Some(code) = &self.code { - write!(result, "{} [{}]: {}", kind_str, code, self.title).unwrap(); - } else { - write!(result, "{}: {}", kind_str, self.title).unwrap(); - } + // If we don't have ariadne output, show full tidyverse-style content + // If we do have ariadne, only show details without locations and hints + // (ariadne already shows: title, code, problem, and details with locations) + if !has_ariadne { + // No ariadne - show everything in tidyverse style + + // Title with kind prefix and error code (e.g., "Error [Q-1-1]: Invalid input") + let kind_str = match self.kind { + DiagnosticKind::Error => "Error", + DiagnosticKind::Warning => "Warning", + DiagnosticKind::Info => "Info", + DiagnosticKind::Note => "Note", + }; + if let Some(code) = &self.code { + write!(result, "{} [{}]: {}\n", kind_str, code, self.title).unwrap(); + } else { + write!(result, "{}: {}\n", kind_str, self.title).unwrap(); + } - // Add location if present - if let Some(loc) = &self.location { - if let Some(ctx) = ctx { - // Try to map to original source - if let Some(mapped) = loc.map_offset(loc.range.start.offset, ctx) { - if let Some(file) = ctx.get_file(mapped.file_id) { - write!( - result, - " at {}:{}:{}", - file.path, - mapped.location.row + 1, // Display as 1-based - mapped.location.column + 1 - ) - .unwrap(); + // Show location info if available (but no ariadne rendering) + if let Some(loc) = &self.location { + // Try to map with context if available + if let Some(ctx) = ctx { + if let Some(mapped) = loc.map_offset(loc.range.start.offset, ctx) { + if let Some(file) = ctx.get_file(mapped.file_id) { + write!( + result, + " at {}:{}:{}\n", + file.path, + mapped.location.row + 1, + mapped.location.column + 1 + ) + .unwrap(); + } } + } else { + // No context: show immediate location (1-indexed for display) + write!( + result, + " at {}:{}\n", + loc.range.start.row + 1, + loc.range.start.column + 1 + ) + .unwrap(); } - } else { - // No context, show immediate location - write!( - result, - " at {}:{}", - loc.range.start.row + 1, - loc.range.start.column + 1 - ) - .unwrap(); } - } - // Problem statement - if let Some(problem) = &self.problem { - write!(result, "\n{}", problem.as_str()).unwrap(); - } + // Problem statement (optional additional context) + if let Some(problem) = &self.problem { + write!(result, "{}\n", problem.as_str()).unwrap(); + } - // Details with appropriate bullets - for detail in &self.details { - let bullet = match detail.kind { - DetailKind::Error => "✖", - DetailKind::Info => "ℹ", - DetailKind::Note => "•", - }; - write!(result, "\n{} {}", bullet, detail.content.as_str()).unwrap(); - } + // All details with appropriate bullets + for detail in &self.details { + let bullet = match detail.kind { + DetailKind::Error => "✖", + DetailKind::Info => "ℹ", + DetailKind::Note => "•", + }; + write!(result, "{} {}\n", bullet, detail.content.as_str()).unwrap(); + } + + // All hints + for hint in &self.hints { + write!(result, "? {}\n", hint.as_str()).unwrap(); + } + } else { + // Have ariadne - only show details without locations and hints + // (ariadne shows title, code, problem, and located details) + + // Details without locations (ariadne can't show these) + for detail in &self.details { + if detail.location.is_none() { + let bullet = match detail.kind { + DetailKind::Error => "✖", + DetailKind::Info => "ℹ", + DetailKind::Note => "•", + }; + write!(result, "{} {}\n", bullet, detail.content.as_str()).unwrap(); + } + } - // Hints - for hint in &self.hints { - write!(result, "\n? {}", hint.as_str()).unwrap(); + // All hints (ariadne doesn't show hints) + for hint in &self.hints { + write!(result, "? {}\n", hint.as_str()).unwrap(); + } } result @@ -413,10 +468,14 @@ impl DiagnosticMessage { DetailKind::Info => "info", DetailKind::Note => "note", }; - json!({ + let mut detail_obj = json!({ "kind": detail_kind, "content": d.content.to_json() - }) + }); + if let Some(location) = &d.location { + detail_obj["location"] = json!(location); + } + detail_obj }) .collect(); obj["details"] = json!(details); @@ -433,6 +492,127 @@ impl DiagnosticMessage { obj } + + /// Extract the original file_id from a SourceInfo by traversing the mapping chain + fn extract_file_id(source_info: &quarto_source_map::SourceInfo) -> Option { + match &source_info.mapping { + quarto_source_map::SourceMapping::Original { file_id } => Some(*file_id), + quarto_source_map::SourceMapping::Substring { parent, .. } => Self::extract_file_id(parent), + quarto_source_map::SourceMapping::Transformed { parent, .. } => Self::extract_file_id(parent), + quarto_source_map::SourceMapping::Concat { pieces } => { + // For concatenated sources, use the first piece's file_id + pieces.first().and_then(|p| Self::extract_file_id(&p.source_info)) + } + } + } + + /// Render source context using ariadne (private helper for to_text). + /// + /// This produces the visual source code snippet with highlighting. + /// The tidyverse-style problem/details/hints are added separately by to_text(). + fn render_ariadne_source_context( + &self, + main_location: &quarto_source_map::SourceInfo, + ctx: &quarto_source_map::SourceContext, + ) -> Option { + use ariadne::{Color, Label, Report, ReportKind, Source}; + + // Extract file_id from the source mapping by traversing the chain + let file_id = Self::extract_file_id(main_location)?; + + let file = ctx.get_file(file_id)?; + + // Get file content: use stored content for ephemeral files, or read from disk + let content = match &file.content { + Some(c) => c.clone(), // Ephemeral file: use stored content + None => { + // Disk-backed file: read from disk + std::fs::read_to_string(&file.path) + .unwrap_or_else(|e| panic!("Failed to read file '{}': {}", file.path, e)) + } + }; + + // Map the location offsets back to original file positions + let start_mapped = main_location.map_offset(main_location.range.start.offset, ctx)?; + let end_mapped = main_location.map_offset(main_location.range.end.offset, ctx)?; + + // Determine report kind and color + let (report_kind, main_color) = match self.kind { + DiagnosticKind::Error => (ReportKind::Error, Color::Red), + DiagnosticKind::Warning => (ReportKind::Warning, Color::Yellow), + DiagnosticKind::Info => (ReportKind::Advice, Color::Cyan), + DiagnosticKind::Note => (ReportKind::Advice, Color::Blue), + }; + + // Build the report using the mapped offset for proper line:column display + let mut report = Report::build( + report_kind, + file.path.clone(), + start_mapped.location.offset, + ); + + // Add title with error code + if let Some(code) = &self.code { + report = report.with_message(format!("[{}] {}", code, self.title)); + } else { + report = report.with_message(&self.title); + } + + // Add main location label using mapped offsets + let main_span = start_mapped.location.offset..end_mapped.location.offset; + let main_message = if let Some(problem) = &self.problem { + problem.as_str() + } else { + &self.title + }; + + report = report.with_label( + Label::new((file.path.clone(), main_span)) + .with_message(main_message) + .with_color(main_color), + ); + + // Add detail locations as additional labels (only those with locations) + for detail in &self.details { + if let Some(detail_loc) = &detail.location { + // Extract file_id from detail location + let detail_file_id = match Self::extract_file_id(detail_loc) { + Some(fid) => fid, + None => continue, // Skip if we can't extract file_id + }; + + if detail_file_id == file_id { + // Map detail offsets to original file positions + if let (Some(detail_start), Some(detail_end)) = ( + detail_loc.map_offset(detail_loc.range.start.offset, ctx), + detail_loc.map_offset(detail_loc.range.end.offset, ctx), + ) { + let detail_span = detail_start.location.offset..detail_end.location.offset; + let detail_color = match detail.kind { + DetailKind::Error => Color::Red, + DetailKind::Info => Color::Cyan, + DetailKind::Note => Color::Blue, + }; + + report = report.with_label( + Label::new((file.path.clone(), detail_span)) + .with_message(detail.content.as_str()) + .with_color(detail_color), + ); + } + } + } + } + + // Render to string + let report = report.finish(); + let mut output = Vec::new(); + report + .write((file.path.clone(), Source::from(content.as_str())), &mut output) + .ok()?; + + String::from_utf8(output).ok() + } } #[cfg(test)] @@ -503,13 +683,13 @@ mod tests { #[test] fn test_to_text_simple_error() { let msg = DiagnosticMessage::error("Something went wrong"); - assert_eq!(msg.to_text(None), "Error: Something went wrong"); + assert_eq!(msg.to_text(None), "Error: Something went wrong\n"); } #[test] fn test_to_text_with_code() { let msg = DiagnosticMessage::error("Something went wrong").with_code("Q-1-1"); - assert_eq!(msg.to_text(None), "Error [Q-1-1]: Something went wrong"); + assert_eq!(msg.to_text(None), "Error [Q-1-1]: Something went wrong\n"); } #[test] diff --git a/crates/quarto-markdown-pandoc/fuzz/fuzz_targets/hello_fuzz.rs b/crates/quarto-markdown-pandoc/fuzz/fuzz_targets/hello_fuzz.rs index 76250e8..545bfca 100644 --- a/crates/quarto-markdown-pandoc/fuzz/fuzz_targets/hello_fuzz.rs +++ b/crates/quarto-markdown-pandoc/fuzz/fuzz_targets/hello_fuzz.rs @@ -7,16 +7,9 @@ #[macro_use] extern crate libfuzzer_sys; use quarto_markdown_pandoc::readers; -use quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver; fuzz_target!(|data: &[u8]| { if let Ok(s) = std::str::from_utf8(data) { - let _ = crate::readers::qmd::read( - s.as_bytes(), - false, - "", - &mut std::io::sink(), - None:: Vec>, - ); + let _ = crate::readers::qmd::read(s.as_bytes(), false, "", &mut std::io::sink()); } }); diff --git a/crates/quarto-markdown-pandoc/src/main.rs b/crates/quarto-markdown-pandoc/src/main.rs index 9bf4f60..f60e9c7 100644 --- a/crates/quarto-markdown-pandoc/src/main.rs +++ b/crates/quarto-markdown-pandoc/src/main.rs @@ -113,35 +113,42 @@ fn main() { let (pandoc, context) = match args.from.as_str() { "markdown" | "qmd" => { - let error_formatter = if args.json_errors { - Some( - readers::qmd_error_messages::produce_json_error_messages - as fn( - &[u8], - &utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - ) - } else { - None - }; - let result = readers::qmd::read( input.as_bytes(), args.loose, input_filename, &mut output_stream, - error_formatter, ); match result { - Ok(p) => p, - Err(error_messages) => { + Ok((pandoc, context, warnings)) => { + // Output warnings to stderr + if args.json_errors { + // JSON format + for warning in warnings { + eprintln!("{}", warning.to_json()); + } + } else { + // Text format (default) - pass source_context for Ariadne rendering + for warning in warnings { + eprintln!("{}", warning.to_text(Some(&context.source_context))); + } + } + (pandoc, context) + } + Err(diagnostics) => { + // Output errors if args.json_errors { // For JSON errors, print to stdout as a JSON array - println!("{}", error_messages.join("")); + for diagnostic in diagnostics { + println!("{}", diagnostic.to_json()); + } } else { - for msg in error_messages { - eprintln!("{}", msg); + // Build a minimal source context for Ariadne rendering + let mut source_context = quarto_source_map::SourceContext::new(); + source_context.add_file(input_filename.to_string(), Some(input.clone())); + + for diagnostic in diagnostics { + eprintln!("{}", diagnostic.to_text(Some(&source_context))); } } std::process::exit(1); diff --git a/crates/quarto-markdown-pandoc/src/pandoc/meta.rs b/crates/quarto-markdown-pandoc/src/pandoc/meta.rs index 68f8d59..1c2a94c 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/meta.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/meta.rs @@ -104,6 +104,30 @@ impl MetaValueWithSourceInfo { } } + /// Check if this MetaValue represents a string with a specific value + /// + /// This handles both: + /// - MetaString { value, .. } where value == expected + /// - MetaInlines { content, .. } where content is a single Str with text == expected + /// + /// This is needed because after k-90/k-95, YAML strings are parsed as markdown + /// and become MetaInlines containing a single Str node. + pub fn is_string_value(&self, expected: &str) -> bool { + match self { + MetaValueWithSourceInfo::MetaString { value, .. } => value == expected, + MetaValueWithSourceInfo::MetaInlines { content, .. } => { + // Check if it's a single Str inline with the expected text + if content.len() == 1 { + if let crate::pandoc::Inline::Str(str_node) = &content[0] { + return str_node.text == expected; + } + } + false + } + _ => false, + } + } + /// Convert to old Meta format (loses source info) pub fn to_meta_value(&self) -> MetaValue { match self { @@ -203,6 +227,113 @@ pub fn meta_value_from_legacy(value: MetaValue) -> MetaValueWithSourceInfo { } } +/// Parse a YAML string value as markdown +/// +/// - If tag_source_info is Some: This is a !md tagged value, ERROR on parse failure +/// - If tag_source_info is None: This is an untagged value, WARN on parse failure +/// +/// On success: Returns MetaInlines or MetaBlocks +/// On failure with !md: Returns error (will need to panic or collect diagnostic) +/// On failure untagged: Returns MetaInlines with yaml-markdown-syntax-error Span + warning +fn parse_yaml_string_as_markdown( + value: &str, + source_info: &quarto_source_map::SourceInfo, + _context: &crate::pandoc::ast_context::ASTContext, + tag_source_info: Option, + diagnostics: &mut crate::utils::diagnostic_collector::DiagnosticCollector, +) -> MetaValueWithSourceInfo { + use quarto_error_reporting::DiagnosticMessageBuilder; + + let mut output_stream = VerboseOutput::Sink(io::sink()); + let result = readers::qmd::read(value.as_bytes(), false, "", &mut output_stream); + + match result { + Ok((mut pandoc, _, warnings)) => { + // Propagate warnings from recursive parse + for warning in warnings { + diagnostics.add(warning); + } + // Parse succeeded - return as MetaInlines or MetaBlocks + if pandoc.blocks.len() == 1 { + if let crate::pandoc::Block::Paragraph(p) = &mut pandoc.blocks[0] { + return MetaValueWithSourceInfo::MetaInlines { + content: mem::take(&mut p.content), + source_info: source_info.clone(), + }; + } + } + MetaValueWithSourceInfo::MetaBlocks { + content: pandoc.blocks, + source_info: source_info.clone(), + } + } + Err(_parse_errors) => { + if let Some(_tag_loc) = tag_source_info { + // !md tag: ERROR on parse failure + let diagnostic = + DiagnosticMessageBuilder::error("Failed to parse !md tagged value") + .with_code("Q-1-100") + .with_location(source_info.clone()) + .problem("The `!md` tag requires valid markdown syntax") + .add_detail(format!("Could not parse: {}", value)) + .add_hint("Remove the `!md` tag or fix the markdown syntax") + .build(); + + // Collect diagnostic instead of printing + diagnostics.add(diagnostic); + + // For now, also return the error span so we can continue + // In the future, we might want to actually fail the parse + let span = Span { + attr: ( + String::new(), + vec!["yaml-markdown-syntax-error".to_string()], + HashMap::new(), + ), + content: vec![Inline::Str(Str { + text: value.to_string(), + source_info: quarto_source_map::SourceInfo::default(), + })], + source_info: quarto_source_map::SourceInfo::default(), + }; + MetaValueWithSourceInfo::MetaInlines { + content: vec![Inline::Span(span)], + source_info: source_info.clone(), + } + } else { + // Untagged: WARN on parse failure + let diagnostic = DiagnosticMessageBuilder::warning("Failed to parse metadata value as markdown") + .with_code("Q-1-101") + .with_location(source_info.clone()) + .problem(format!("Could not parse '{}' as markdown", value)) + .add_hint("Add the `!str` tag to treat this as a plain string, or fix the markdown syntax") + .build(); + + // Collect diagnostic instead of printing + diagnostics.add(diagnostic); + + // Return span with yaml-markdown-syntax-error class + let span = Span { + attr: ( + String::new(), + vec!["yaml-markdown-syntax-error".to_string()], + HashMap::new(), + ), + content: vec![Inline::Str(Str { + text: value.to_string(), + source_info: quarto_source_map::SourceInfo::default(), + })], + source_info: quarto_source_map::SourceInfo::default(), + }; + MetaValueWithSourceInfo::MetaInlines { + content: vec![Inline::Span(span)], + source_info: source_info.clone(), + } + } + } + } +} + /// Transform YamlWithSourceInfo to MetaValueWithSourceInfo /// /// This is the core transformation that: @@ -215,6 +346,7 @@ pub fn meta_value_from_legacy(value: MetaValue) -> MetaValueWithSourceInfo { pub fn yaml_to_meta_with_source_info( yaml: quarto_yaml::YamlWithSourceInfo, _context: &crate::pandoc::ast_context::ASTContext, + diagnostics: &mut crate::utils::diagnostic_collector::DiagnosticCollector, ) -> MetaValueWithSourceInfo { use yaml_rust2::Yaml; @@ -224,7 +356,7 @@ pub fn yaml_to_meta_with_source_info( let (items, source_info) = yaml.into_array().unwrap(); let meta_items = items .into_iter() - .map(|item| yaml_to_meta_with_source_info(item, _context)) + .map(|item| yaml_to_meta_with_source_info(item, _context, diagnostics)) .collect(); return MetaValueWithSourceInfo::MetaList { @@ -242,7 +374,7 @@ pub fn yaml_to_meta_with_source_info( entry.key.yaml.as_str().map(|key_str| MetaMapEntry { key: key_str.to_string(), key_source: entry.key_span, - value: yaml_to_meta_with_source_info(entry.value, _context), + value: yaml_to_meta_with_source_info(entry.value, _context, diagnostics), }) }) .collect(); @@ -263,35 +395,57 @@ pub fn yaml_to_meta_with_source_info( match yaml_value { Yaml::String(s) => { - // Check for YAML tags (e.g., !path, !glob, !str) - if let Some((tag_suffix, _tag_source_info)) = tag { - // Tagged string - bypass markdown parsing - // Wrap in Span with class "yaml-tagged-string" and tag attribute - let mut attributes = HashMap::new(); - attributes.insert("tag".to_string(), tag_suffix.clone()); - - let span = Span { - attr: ( - String::new(), - vec!["yaml-tagged-string".to_string()], - attributes, - ), - content: vec![Inline::Str(Str { - text: s.clone(), - source_info: source_info.clone(), - })], - source_info: quarto_source_map::SourceInfo::default(), - }; - MetaValueWithSourceInfo::MetaInlines { - content: vec![Inline::Span(span)], - source_info, // Overall node source + // Check for YAML tags (e.g., !path, !glob, !str, !md) + if let Some((tag_suffix, tag_source_info)) = tag { + match tag_suffix.as_str() { + "str" | "path" => { + // !str and !path: Emit plain Str without markdown parsing + // No wrapper span, just a plain Str node + MetaValueWithSourceInfo::MetaInlines { + content: vec![Inline::Str(Str { + text: s.clone(), + source_info: source_info.clone(), + })], + source_info, + } + } + "md" => { + // !md: Parse as markdown immediately, ERROR if fails + parse_yaml_string_as_markdown( + &s, + &source_info, + _context, + Some(tag_source_info), + diagnostics, + ) + } + _ => { + // Other tags (!glob, !expr, etc.): Keep current behavior + // Wrap in Span with class "yaml-tagged-string" and tag attribute + let mut attributes = HashMap::new(); + attributes.insert("tag".to_string(), tag_suffix.clone()); + + let span = Span { + attr: ( + String::new(), + vec!["yaml-tagged-string".to_string()], + attributes, + ), + content: vec![Inline::Str(Str { + text: s.clone(), + source_info: source_info.clone(), + })], + source_info: quarto_source_map::SourceInfo::default(), + }; + MetaValueWithSourceInfo::MetaInlines { + content: vec![Inline::Span(span)], + source_info, // Overall node source + } + } } } else { - // Untagged string - return as MetaString for later markdown parsing - MetaValueWithSourceInfo::MetaString { - value: s, - source_info, - } + // Untagged string: Parse as markdown immediately, WARN if fails + parse_yaml_string_as_markdown(&s, &source_info, _context, None, diagnostics) } } @@ -468,6 +622,7 @@ impl MarkedEventReceiver for YamlEventHandler { pub fn rawblock_to_meta_with_source_info( block: &RawBlock, context: &crate::pandoc::ast_context::ASTContext, + diagnostics: &mut crate::utils::diagnostic_collector::DiagnosticCollector, ) -> MetaValueWithSourceInfo { if block.format != "quarto_minus_metadata" { panic!( @@ -501,7 +656,7 @@ pub fn rawblock_to_meta_with_source_info( // Transform YamlWithSourceInfo to MetaValueWithSourceInfo // Pass by value since yaml is no longer needed - yaml_to_meta_with_source_info(yaml, context) + yaml_to_meta_with_source_info(yaml, context, diagnostics) } /// Legacy version: Convert RawBlock to Meta (old implementation) @@ -532,25 +687,19 @@ pub fn rawblock_to_meta(block: RawBlock) -> Meta { pub fn parse_metadata_strings_with_source_info( meta: MetaValueWithSourceInfo, outer_metadata: &mut Vec, + diagnostics: &mut crate::utils::diagnostic_collector::DiagnosticCollector, ) -> MetaValueWithSourceInfo { match meta { MetaValueWithSourceInfo::MetaString { value, source_info } => { let mut output_stream = VerboseOutput::Sink(io::sink()); - let result = readers::qmd::read( - value.as_bytes(), - false, - "", - &mut output_stream, - None::< - fn( - &[u8], - &crate::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ); + let result = + readers::qmd::read(value.as_bytes(), false, "", &mut output_stream); match result { - Ok((mut pandoc, _context)) => { + Ok((mut pandoc, _context, warnings)) => { + // Propagate warnings from recursive parse + for warning in warnings { + diagnostics.add(warning); + } // Merge parsed metadata, preserving full MetaMapEntry with key_source if let MetaValueWithSourceInfo::MetaMap { entries, .. } = pandoc.meta { for entry in entries { @@ -595,7 +744,9 @@ pub fn parse_metadata_strings_with_source_info( MetaValueWithSourceInfo::MetaList { items, source_info } => { let parsed_items = items .into_iter() - .map(|item| parse_metadata_strings_with_source_info(item, outer_metadata)) + .map(|item| { + parse_metadata_strings_with_source_info(item, outer_metadata, diagnostics) + }) .collect(); MetaValueWithSourceInfo::MetaList { items: parsed_items, @@ -611,7 +762,11 @@ pub fn parse_metadata_strings_with_source_info( .map(|entry| MetaMapEntry { key: entry.key, key_source: entry.key_source, - value: parse_metadata_strings_with_source_info(entry.value, outer_metadata), + value: parse_metadata_strings_with_source_info( + entry.value, + outer_metadata, + diagnostics, + ), }) .collect(); MetaValueWithSourceInfo::MetaMap { @@ -627,21 +782,10 @@ pub fn parse_metadata_strings(meta: MetaValue, outer_metadata: &mut Meta) -> Met match meta { MetaValue::MetaString(s) => { let mut output_stream = VerboseOutput::Sink(io::sink()); - let result = readers::qmd::read( - s.as_bytes(), - false, - "", - &mut output_stream, - None::< - fn( - &[u8], - &crate::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ); + let result = readers::qmd::read(s.as_bytes(), false, "", &mut output_stream); match result { - Ok((mut pandoc, _context)) => { + Ok((mut pandoc, _context, _warnings)) => { + // TODO: Handle warnings from recursive parse // pandoc.meta is now MetaValueWithSourceInfo, convert it to Meta if let MetaValueWithSourceInfo::MetaMap { entries, .. } = pandoc.meta { for entry in entries { diff --git a/crates/quarto-markdown-pandoc/src/readers/qmd.rs b/crates/quarto-markdown-pandoc/src/readers/qmd.rs index 42ab1fb..c86ab18 100644 --- a/crates/quarto-markdown-pandoc/src/readers/qmd.rs +++ b/crates/quarto-markdown-pandoc/src/readers/qmd.rs @@ -13,7 +13,7 @@ use crate::pandoc::block::MetaBlock; use crate::pandoc::meta::parse_metadata_strings_with_source_info; use crate::pandoc::rawblock_to_meta_with_source_info; use crate::pandoc::{self, Block, MetaValueWithSourceInfo}; -use crate::readers::qmd_error_messages::{produce_error_message, produce_error_message_json}; +use crate::readers::qmd_error_messages::{produce_diagnostic_messages, produce_error_message_json}; use crate::traversals; use crate::utils::diagnostic_collector::DiagnosticCollector; use std::io::Write; @@ -50,22 +50,20 @@ pub fn read_bad_qmd_for_error_message(input_bytes: &[u8]) -> Vec { return produce_error_message_json(&log_observer); } -pub fn read( +pub fn read( input_bytes: &[u8], _loose: bool, filename: &str, mut output_stream: &mut T, - error_formatter: Option, -) -> Result<(pandoc::Pandoc, ASTContext), Vec> -where - F: Fn( - &[u8], - &crate::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, -{ +) -> Result< + ( + pandoc::Pandoc, + ASTContext, + Vec, + ), + Vec, +> { let mut parser = MarkdownParser::default(); - let mut error_messages: Vec = Vec::new(); let mut log_observer = crate::utils::tree_sitter_log_observer::TreeSitterLogObserver::default(); parser @@ -82,18 +80,22 @@ where let mut input_bytes_with_newline = Vec::with_capacity(input_bytes.len() + 1); input_bytes_with_newline.extend_from_slice(input_bytes); input_bytes_with_newline.push(b'\n'); - return read( - &input_bytes_with_newline, - _loose, - filename, - output_stream, - error_formatter, - ); + return read(&input_bytes_with_newline, _loose, filename, output_stream); } let tree = parser .parse(&input_bytes, None) .expect("Failed to parse input"); + + // Create ASTContext early so we can use it for error diagnostics + let mut context = ASTContext::with_filename(filename.to_string()); + // Add the input content to the SourceContext for proper error rendering + let input_str = String::from_utf8_lossy(input_bytes).to_string(); + context.source_context = quarto_source_map::SourceContext::new(); + context + .source_context + .add_file(filename.to_string(), Some(input_str)); + log_observer.parses.iter().for_each(|parse| { writeln!(output_stream, "tree-sitter parse:").unwrap(); parse @@ -103,41 +105,40 @@ where writeln!(output_stream, "---").unwrap(); }); if log_observer.had_errors() { - if let Some(formatter) = error_formatter { - // Use the provided error formatter - return Err(formatter(input_bytes, &log_observer, filename)); - } else { - // Use the default ariadne formatter - return Err(produce_error_message(input_bytes, &log_observer, filename)); - } + // Produce structured DiagnosticMessage objects with proper source locations + let diagnostics = produce_diagnostic_messages( + input_bytes, + &log_observer, + filename, + &context.source_context, + ); + return Err(diagnostics); } let depth = crate::utils::concrete_tree_depth::concrete_tree_depth(&tree); // this is here mostly to prevent our fuzzer from blowing the stack // with a deeply nested document if depth > 100 { - error_messages.push(format!( + let diagnostic = quarto_error_reporting::generic_error!(format!( "The input document is too deeply nested (max depth: {} > 100).", depth )); - return Err(error_messages); + return Err(vec![diagnostic]); } let errors = parse_is_good(&tree); print_whole_tree(&mut tree.walk(), &mut output_stream); if !errors.is_empty() { let mut cursor = tree.walk(); + let mut diagnostics = Vec::new(); for error in errors { cursor.goto_id(error); - error_messages.push(errors::error_message(&mut cursor, &input_bytes)); + let error_msg = errors::error_message(&mut cursor, &input_bytes); + diagnostics.push(quarto_error_reporting::generic_error!(error_msg)); } - } - if !error_messages.is_empty() { - return Err(error_messages); + return Err(diagnostics); } - let context = ASTContext::with_filename(filename.to_string()); - // Create diagnostic collector and convert to Pandoc AST let mut error_collector = DiagnosticCollector::new(); let mut result = match pandoc::treesitter_to_pandoc( @@ -149,34 +150,14 @@ where ) { Ok(pandoc) => pandoc, Err(diagnostics) => { - // Convert diagnostics to strings based on format - if error_formatter.is_some() { - return Err(diagnostics - .iter() - .map(|d| d.to_json().to_string()) - .collect()); - } else { - return Err(diagnostics.iter().map(|d| d.to_text(None)).collect()); - } + // Return diagnostics directly + return Err(diagnostics); } }; - - // Output warnings to stderr in appropriate format - if error_formatter.is_some() { - // JSON format - let warnings = error_collector.to_json(); - for warning in warnings { - eprintln!("{}", warning); - } - } else { - // Text format (default) - let warnings = error_collector.to_text(); - for warning in warnings { - eprintln!("{}", warning); - } - } // Store complete MetaMapEntry objects to preserve key_source information let mut meta_from_parses: Vec = Vec::new(); + // Create a separate diagnostic collector for metadata parsing warnings + let mut meta_diagnostics = DiagnosticCollector::new(); result = { let mut filter = Filter::new().with_raw_block(|rb| { @@ -184,18 +165,15 @@ where return Unchanged(rb); } // Use new rawblock_to_meta_with_source_info - preserves source info! - let meta_with_source = rawblock_to_meta_with_source_info(&rb, &context); + let meta_with_source = + rawblock_to_meta_with_source_info(&rb, &context, &mut meta_diagnostics); // Check if this is lexical metadata let is_lexical = if let MetaValueWithSourceInfo::MetaMap { ref entries, .. } = meta_with_source { - entries.iter().any(|e| { - e.key == "_scope" - && matches!( - &e.value, - MetaValueWithSourceInfo::MetaString { value, .. } if value == "lexical" - ) - }) + entries + .iter() + .any(|e| e.key == "_scope" && e.value.is_string_value("lexical")) } else { false }; @@ -206,6 +184,7 @@ where let parsed_meta = parse_metadata_strings_with_source_info( meta_with_source, &mut inner_meta_from_parses, + &mut meta_diagnostics, ); // Merge inner metadata if needed @@ -236,8 +215,11 @@ where } else { // Document-level metadata - parse strings and merge into meta_from_parses let mut inner_meta = Vec::new(); - let parsed_meta = - parse_metadata_strings_with_source_info(meta_with_source, &mut inner_meta); + let parsed_meta = parse_metadata_strings_with_source_info( + meta_with_source, + &mut inner_meta, + &mut meta_diagnostics, + ); // Extract MetaMapEntry objects (preserving key_source) and store them if let MetaValueWithSourceInfo::MetaMap { entries, .. } = parsed_meta { @@ -263,5 +245,14 @@ where entries.push(entry); } } - Ok((result, context)) + + // Merge metadata diagnostics into main error_collector + for diagnostic in meta_diagnostics.into_diagnostics() { + error_collector.add(diagnostic); + } + + // Collect all warnings + let warnings = error_collector.into_diagnostics(); + + Ok((result, context, warnings)) } diff --git a/crates/quarto-markdown-pandoc/src/readers/qmd_error_messages.rs b/crates/quarto-markdown-pandoc/src/readers/qmd_error_messages.rs index f615abf..0f3cb6b 100644 --- a/crates/quarto-markdown-pandoc/src/readers/qmd_error_messages.rs +++ b/crates/quarto-markdown-pandoc/src/readers/qmd_error_messages.rs @@ -45,6 +45,43 @@ pub fn produce_error_message( return result; } +/// Produce structured DiagnosticMessage objects from parse errors +/// Uses the SourceContext to properly calculate source locations +pub fn produce_diagnostic_messages( + input_bytes: &[u8], + tree_sitter_log: &crate::utils::tree_sitter_log_observer::TreeSitterLogObserver, + filename: &str, + source_context: &quarto_source_map::SourceContext, +) -> Vec { + assert!(tree_sitter_log.had_errors()); + assert!(tree_sitter_log.parses.len() > 0); + + let mut result: Vec = vec![]; + let mut seen_errors: std::collections::HashSet<(usize, usize)> = + std::collections::HashSet::new(); + + for parse in &tree_sitter_log.parses { + for (_, process_log) in &parse.processes { + for state in process_log.error_states.iter() { + if seen_errors.contains(&(state.row, state.column)) { + continue; + } + seen_errors.insert((state.row, state.column)); + let diagnostic = error_diagnostic_from_parse_state( + input_bytes, + state, + &parse.consumed_tokens, + filename, + source_context, + ); + result.push(diagnostic); + } + } + } + + return result; +} + fn error_message_from_parse_state( input_bytes: &[u8], parse_state: &crate::utils::tree_sitter_log_observer::ProcessMessage, @@ -327,6 +364,136 @@ fn find_matching_token<'a>( .find(|token| token.lr_state == capture.lr_state && token.sym == capture.sym) } +/// Convert a parse state error into a structured DiagnosticMessage +fn error_diagnostic_from_parse_state( + input_bytes: &[u8], + parse_state: &crate::utils::tree_sitter_log_observer::ProcessMessage, + consumed_tokens: &[ConsumedToken], + _filename: &str, + _source_context: &quarto_source_map::SourceContext, +) -> quarto_error_reporting::DiagnosticMessage { + use quarto_error_reporting::DiagnosticMessageBuilder; + + // Look up the error entry from the table + let error_entry = crate::readers::qmd_error_message_table::lookup_error_entry(parse_state); + + // Convert input to string for offset calculation + let input_str = String::from_utf8_lossy(input_bytes); + + // Calculate byte offset and create proper locations using quarto-source-map utilities + let byte_offset = calculate_byte_offset(&input_str, parse_state.row, parse_state.column); + let span_end = byte_offset + parse_state.size.max(1); + + // Use quarto_source_map::utils::offset_to_location to properly calculate locations + let start_location = quarto_source_map::utils::offset_to_location(&input_str, byte_offset) + .unwrap_or(quarto_source_map::Location { + offset: byte_offset, + row: parse_state.row, + column: parse_state.column, + }); + let end_location = quarto_source_map::utils::offset_to_location(&input_str, span_end) + .unwrap_or(quarto_source_map::Location { + offset: span_end, + row: parse_state.row, + column: parse_state.column + parse_state.size.max(1), + }); + + // Create SourceInfo for the error location + let range = quarto_source_map::Range { + start: start_location, + end: end_location, + }; + let source_info = quarto_source_map::SourceInfo::original( + quarto_source_map::FileId(0), // File ID 0 (set up in ASTContext) + range, + ); + + if let Some(entry) = error_entry { + // Build diagnostic from error table entry + let mut builder = DiagnosticMessageBuilder::error(entry.error_info.title) + .with_location(source_info.clone()) + .problem(entry.error_info.message); + + // Add notes with their corresponding source locations + for note in entry.error_info.notes { + match note.note_type { + "simple" => { + // Find the capture that this note refers to + if let Some(capture) = + entry.error_info.captures.iter().find(|c| match note.label { + None => false, + Some(l) => c.label == l, + }) + { + // Find the consumed token that matches this capture + if let Some(token) = find_matching_token(consumed_tokens, capture) { + // Calculate the byte offset for this token + let token_byte_offset = + calculate_byte_offset(&input_str, token.row, token.column); + let token_span_end = token_byte_offset + token.size.max(1); + + // Use SourceInfo::substring to create a SourceInfo for this token + // This properly uses the quarto-source-map infrastructure + let token_source_info = quarto_source_map::SourceInfo::substring( + source_info.clone(), + token_byte_offset, + token_span_end, + ); + + // Add as info detail with location (will show as blue label in Ariadne) + builder = builder.add_info_at(note.message, token_source_info); + } + } + } + "label-range" => { + // Find the begin and end captures + let begin_capture = note.label_begin.and_then(|label| { + entry.error_info.captures.iter().find(|c| c.label == label) + }); + let end_capture = note.label_end.and_then(|label| { + entry.error_info.captures.iter().find(|c| c.label == label) + }); + + if let (Some(begin_cap), Some(end_cap)) = (begin_capture, end_capture) { + // Find the consumed tokens that match these captures + let begin_token = find_matching_token(consumed_tokens, begin_cap); + let end_token = find_matching_token(consumed_tokens, end_cap); + + if let (Some(begin_tok), Some(end_tok)) = (begin_token, end_token) { + // Calculate the span from the beginning of begin_token to the end of end_token + let begin_byte_offset = + calculate_byte_offset(&input_str, begin_tok.row, begin_tok.column); + let end_byte_offset = + calculate_byte_offset(&input_str, end_tok.row, end_tok.column); + let range_span_end = end_byte_offset + end_tok.size.max(1); + + // Use SourceInfo::substring to create a SourceInfo for this range + // This properly uses the quarto-source-map infrastructure + let range_source_info = quarto_source_map::SourceInfo::substring( + source_info.clone(), + begin_byte_offset, + range_span_end, + ); + + // Add as info detail with location + builder = builder.add_info_at(note.message, range_source_info); + } + } + } + _ => {} + } + } + + builder.build() + } else { + // Fallback for errors not in the table + DiagnosticMessageBuilder::error("Parse error") + .with_location(source_info) + .problem("unexpected character or token here") + .build() + } +} + fn calculate_byte_offset(input: &str, row: usize, column: usize) -> usize { let mut current_row = 0; let mut current_col = 0; diff --git a/crates/quarto-markdown-pandoc/src/wasm_entry_points/mod.rs b/crates/quarto-markdown-pandoc/src/wasm_entry_points/mod.rs index eca9d4b..e6dd1e1 100644 --- a/crates/quarto-markdown-pandoc/src/wasm_entry_points/mod.rs +++ b/crates/quarto-markdown-pandoc/src/wasm_entry_points/mod.rs @@ -5,7 +5,6 @@ use crate::readers; use crate::utils::output::VerboseOutput; -use crate::utils::tree_sitter_log_observer::TreeSitterLogObserver; use std::io; fn pandoc_to_json( @@ -38,13 +37,16 @@ pub fn qmd_to_pandoc( Vec, > { let mut output = VerboseOutput::Sink(io::sink()); - readers::qmd::read( - input, - false, - "", - &mut output, - None:: Vec>, - ) + match readers::qmd::read(input, false, "", &mut output) { + Ok((pandoc, context, _warnings)) => { + // TODO: Decide how to handle warnings in WASM context + Ok((pandoc, context)) + } + Err(diagnostics) => { + // Convert diagnostics to strings for backward compatibility + Err(diagnostics.iter().map(|d| d.to_text(None)).collect()) + } + } } pub fn parse_qmd(input: &[u8]) -> String { diff --git a/crates/quarto-markdown-pandoc/tests/claude-examples/meta-error.qmd b/crates/quarto-markdown-pandoc/tests/claude-examples/meta-error.qmd new file mode 100644 index 0000000..46418ec --- /dev/null +++ b/crates/quarto-markdown-pandoc/tests/claude-examples/meta-error.qmd @@ -0,0 +1,7 @@ +--- +title: hello +resources: + - !md images/*.png +# - !str "images/*.png" +# - !md images/*.png +--- \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/claude-examples/meta-warning.qmd b/crates/quarto-markdown-pandoc/tests/claude-examples/meta-warning.qmd new file mode 100644 index 0000000..54c8208 --- /dev/null +++ b/crates/quarto-markdown-pandoc/tests/claude-examples/meta-warning.qmd @@ -0,0 +1,7 @@ +--- +title: hello +resources: + - images/*.png +# - !str "images/*.png" +# - !md images/*.png +--- \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/test.rs b/crates/quarto-markdown-pandoc/tests/test.rs index 52f63fe..68ce4c0 100644 --- a/crates/quarto-markdown-pandoc/tests/test.rs +++ b/crates/quarto-markdown-pandoc/tests/test.rs @@ -95,20 +95,8 @@ fn matches_pandoc_markdown_reader(input: &str) -> bool { let mut buf1 = Vec::new(); let mut buf2 = Vec::new(); - let (doc, context) = readers::qmd::read( - input.as_bytes(), - false, - "", - &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .unwrap(); + let (doc, context, _warnings) = + readers::qmd::read(input.as_bytes(), false, "", &mut std::io::sink()).unwrap(); writers::native::write(&doc, &mut buf1).unwrap(); let native_output = String::from_utf8(buf1).expect("Invalid UTF-8 in output"); writers::json::write(&doc, &context, &mut buf2).unwrap(); @@ -286,13 +274,13 @@ where input.push('\n'); // ensure the input ends with a newline } let mut output_stream = VerboseOutput::Sink(io::sink()); - let (pandoc, context) = readers::qmd::read( + let (pandoc, context, _warnings) = readers::qmd::read( input.as_bytes(), false, &path.to_string_lossy(), &mut output_stream, - None:: Vec>, - ).unwrap(); + ) + .unwrap(); writer(&pandoc, &context, &mut buffer).unwrap(); let output = String::from_utf8(buffer).expect("Invalid UTF-8 in output"); @@ -613,11 +601,10 @@ fn test_markdown_writer_smoke() { false, path.to_str().unwrap(), &mut std::io::sink(), - None:: Vec> ); match doc_result { - Ok((doc, _context)) => { + Ok((doc, _context, _warnings)) => { // Write it back out using the markdown writer let mut buf = Vec::new(); writers::qmd::write(&doc, &mut buf).expect("Failed to write markdown"); @@ -657,13 +644,13 @@ fn test_qmd_roundtrip_consistency() { let original_qmd = std::fs::read_to_string(&path).expect("Failed to read file"); // Step 1: QMD -> JSON - let (doc1, context1) = readers::qmd::read( + let (doc1, context1, _warnings) = readers::qmd::read( original_qmd.as_bytes(), false, path.to_str().unwrap(), &mut std::io::sink(), - None:: Vec> - ).expect("Failed to parse original QMD"); + ) + .expect("Failed to parse original QMD"); let mut json_buf = Vec::new(); writers::json::write(&doc1, &context1, &mut json_buf) @@ -679,13 +666,13 @@ fn test_qmd_roundtrip_consistency() { let regenerated_qmd = String::from_utf8(qmd_buf).expect("Invalid UTF-8 in QMD"); // Step 3: QMD -> JSON again - let (doc3, context3) = readers::qmd::read( + let (doc3, context3, _warnings) = readers::qmd::read( regenerated_qmd.as_bytes(), false, "", &mut std::io::sink(), - None:: Vec> - ).expect("Failed to parse regenerated QMD"); + ) + .expect("Failed to parse regenerated QMD"); // Compare JSON representations (without location fields) let mut json1_buf = Vec::new(); @@ -737,18 +724,11 @@ fn test_empty_blockquote_roundtrip() { let original_qmd = std::fs::read_to_string(test_file).expect("Failed to read file"); // Step 1: QMD -> JSON - let (doc1, context1) = readers::qmd::read( + let (doc1, context1, _warnings) = readers::qmd::read( original_qmd.as_bytes(), false, test_file, &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, ) .expect("Failed to parse original QMD"); @@ -765,18 +745,11 @@ fn test_empty_blockquote_roundtrip() { let regenerated_qmd = String::from_utf8(qmd_buf).expect("Invalid UTF-8 in QMD"); // Step 3: QMD -> JSON again - let (doc3, context3) = readers::qmd::read( + let (doc3, context3, _warnings) = readers::qmd::read( regenerated_qmd.as_bytes(), false, "", &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, ) .expect("Failed to parse regenerated QMD"); diff --git a/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs b/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs index 51acb18..73aed64 100644 --- a/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs +++ b/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs @@ -78,7 +78,7 @@ fn test_inline_source_locations() { let hello_str = &inlines[0]; assert_eq!(hello_str["t"], "Str"); assert_eq!(hello_str["c"], "hello"); - let (start_off, start_row, start_col, end_off, end_row, end_col, _type) = + let (start_off, _start_row, start_col, end_off, _end_row, end_col, _type) = resolve_source_ref(&hello_str["s"], pool); assert_eq!(start_col, 0); assert_eq!(start_off, 0); diff --git a/crates/quarto-markdown-pandoc/tests/test_json_errors.rs b/crates/quarto-markdown-pandoc/tests/test_json_errors.rs index 80fd0d8..71b8f77 100644 --- a/crates/quarto-markdown-pandoc/tests/test_json_errors.rs +++ b/crates/quarto-markdown-pandoc/tests/test_json_errors.rs @@ -1,46 +1,23 @@ use quarto_markdown_pandoc::readers; -use quarto_markdown_pandoc::utils; #[test] fn test_json_error_format() { // Create input with a malformed code block to trigger an error let input = "```{python\n"; - // Test with JSON errors enabled using the formatter closure - let json_formatter = readers::qmd_error_messages::produce_json_error_messages; - let result = readers::qmd::read( - input.as_bytes(), - false, - "test.md", - &mut std::io::sink(), - Some(json_formatter), - ); + // Test with new API + let result = readers::qmd::read(input.as_bytes(), false, "test.md", &mut std::io::sink()); assert!(result.is_err()); - let error_messages = result.unwrap_err(); - assert_eq!(error_messages.len(), 1); + let diagnostics = result.unwrap_err(); + assert!(diagnostics.len() > 0, "Should have at least one diagnostic"); - // Verify the error is valid JSON - let json_str = &error_messages[0]; - let parsed: serde_json::Value = serde_json::from_str(json_str).expect("Should be valid JSON"); + // Verify the first diagnostic can be serialized to JSON + let json_value = diagnostics[0].to_json(); - // Verify it's an array - assert!(parsed.is_array()); - let errors = parsed.as_array().unwrap(); - assert!(errors.len() > 0); - - // Verify the structure of the first error - let first_error = &errors[0]; - assert!(first_error.get("filename").is_some()); - assert!(first_error.get("title").is_some()); - assert!(first_error.get("message").is_some()); - assert!(first_error.get("location").is_some()); - - let location = first_error.get("location").unwrap(); - assert!(location.get("row").is_some()); - assert!(location.get("column").is_some()); - assert!(location.get("byte_offset").is_some()); - assert!(location.get("size").is_some()); + // Verify the structure - DiagnosticMessage has a different structure than the old format + assert!(json_value.get("kind").is_some()); + assert!(json_value.get("title").is_some()); } #[test] @@ -48,134 +25,30 @@ fn test_regular_error_format() { // Create input with a malformed code block to trigger an error let input = "```{python\n"; - // Test with JSON errors disabled (None for formatter) - let result = readers::qmd::read( - input.as_bytes(), - false, - "test.md", - &mut std::io::sink(), - None::< - fn(&[u8], &utils::tree_sitter_log_observer::TreeSitterLogObserver, &str) -> Vec, - >, - ); + // Test with new API + let result = readers::qmd::read(input.as_bytes(), false, "test.md", &mut std::io::sink()); assert!(result.is_err()); - let error_messages = result.unwrap_err(); + let diagnostics = result.unwrap_err(); - // Regular errors should be plain strings, not JSON - for msg in &error_messages { - // Verify it's NOT valid JSON (should be a formatted error message) - if msg.starts_with("[") || msg.starts_with("{") { - let parse_result: Result = serde_json::from_str(msg); - assert!( - parse_result.is_err(), - "Regular error messages should not be JSON" - ); - } + // Diagnostics can be formatted as text + for diag in &diagnostics { + let text = diag.to_text(None); + // Verify it's a non-empty formatted error message + assert!(!text.is_empty()); } } #[test] -fn test_label_range_note_type() { - // Create input that triggers a label-range error (error 003 from corpus) - let input = "[foo]{#id key=value .class}"; - - // Test with JSON errors enabled using the formatter closure - let json_formatter = readers::qmd_error_messages::produce_json_error_messages; - let result = readers::qmd::read( - input.as_bytes(), - false, - "test.md", - &mut std::io::sink(), - Some(json_formatter), - ); - - assert!(result.is_err()); - let error_messages = result.unwrap_err(); - assert_eq!(error_messages.len(), 1); - - // Verify the error is valid JSON - let json_str = &error_messages[0]; - let parsed: serde_json::Value = serde_json::from_str(json_str).expect("Should be valid JSON"); - - // Verify it's an array - assert!(parsed.is_array()); - let errors = parsed.as_array().unwrap(); - assert!(errors.len() > 0); - - // Find the error with label-range note type - let mut found_label_range = false; - for error in errors { - if let Some(notes) = error.get("notes") { - if let Some(notes_array) = notes.as_array() { - for note in notes_array { - if let Some(note_type) = note.get("noteType") { - if note_type.as_str() == Some("label-range") { - found_label_range = true; - - // Verify the label-range note has a "range" field instead of "location" - assert!( - note.get("range").is_some(), - "label-range note should have a 'range' field" - ); - assert!( - note.get("location").is_none(), - "label-range note should not have a 'location' field" - ); - - let range = note.get("range").unwrap(); - assert!( - range.get("start").is_some(), - "range should have a 'start' field" - ); - assert!( - range.get("end").is_some(), - "range should have an 'end' field" - ); - - let start = range.get("start").unwrap(); - let end = range.get("end").unwrap(); - - // Verify start and end have required fields - assert!(start.get("row").is_some()); - assert!(start.get("column").is_some()); - assert!(start.get("byte_offset").is_some()); - - assert!(end.get("row").is_some()); - assert!(end.get("column").is_some()); - assert!(end.get("byte_offset").is_some()); - - break; - } - } - } - } - } - } - - assert!( - found_label_range, - "Should find at least one label-range note in the error" - ); -} - -#[test] -fn test_missing_newline_warning_json_format() { - // This test verifies that the missing newline warning is formatted as JSON - // when --json-errors is used. Currently this test will fail because the - // warning is always output as plain text. - - // Create input without trailing newline +fn test_newline_warning() { + // Test file without trailing newline let input = "# Hello World"; - // We can't easily test the binary's stderr output from here, but we can - // document the expected behavior: when --json-errors is used, the warning - // should be output as: - // {"title":"Warning","message":"Adding missing newline to end of input"} - // - // Currently it outputs: - // (Warning) Adding missing newline to end of input. + let result = readers::qmd::read(input.as_bytes(), false, "test.md", &mut std::io::sink()); + + // Should succeed (the newline is added automatically) + assert!(result.is_ok()); - // This test just documents the issue. The actual fix will be in main.rs - // where the warning is emitted. + // The newline warning is currently emitted in main.rs, not in the library + // This test just verifies that the parse succeeds } diff --git a/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs b/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs index be55280..8ace709 100644 --- a/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs +++ b/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs @@ -3,7 +3,6 @@ * Copyright (c) 2025 Posit, PBC */ -use hashlink::LinkedHashMap; use quarto_markdown_pandoc::pandoc::ast_context::ASTContext; use quarto_markdown_pandoc::pandoc::{Block, Inline, Pandoc, Paragraph, Str}; use quarto_markdown_pandoc::readers; diff --git a/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs b/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs index dcf680b..f63b3a0 100644 --- a/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs +++ b/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs @@ -61,20 +61,9 @@ fn test_metadata_source_tracking_002_qmd() { // Step 1: Read QMD to PandocAST let mut output_stream = quarto_markdown_pandoc::utils::output::VerboseOutput::Sink(std::io::sink()); - let (pandoc, context) = readers::qmd::read( - content.as_bytes(), - false, - test_file, - &mut output_stream, - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .expect("Failed to parse QMD"); + let (pandoc, context, _warnings) = + readers::qmd::read(content.as_bytes(), false, test_file, &mut output_stream) + .expect("Failed to parse QMD"); // Verify document-level metadata: title: metadata1 if let MetaValueWithSourceInfo::MetaMap { ref entries, .. } = pandoc.meta { @@ -166,20 +155,9 @@ title: Simple title description: This is a description ---"#; - let (pandoc, _context) = readers::qmd::read( - input.as_bytes(), - false, - "test.qmd", - &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .expect("Failed to parse"); + let (pandoc, _context, _warnings) = + readers::qmd::read(input.as_bytes(), false, "test.qmd", &mut std::io::sink()) + .expect("Failed to parse"); // Extract metadata let MetaValueWithSourceInfo::MetaMap { entries, .. } = pandoc.meta else { diff --git a/crates/quarto-markdown-pandoc/tests/test_nested_yaml_serialization.rs b/crates/quarto-markdown-pandoc/tests/test_nested_yaml_serialization.rs index 8834e94..58f31d8 100644 --- a/crates/quarto-markdown-pandoc/tests/test_nested_yaml_serialization.rs +++ b/crates/quarto-markdown-pandoc/tests/test_nested_yaml_serialization.rs @@ -38,18 +38,11 @@ fn test_yaml_serialization_size_scaling() { // Parse QMD to PandocAST let mut output_stream = quarto_markdown_pandoc::utils::output::VerboseOutput::Sink(std::io::sink()); - let (pandoc, context) = readers::qmd::read( + let (pandoc, context, _warnings) = readers::qmd::read( qmd_content.as_bytes(), false, "test.qmd", &mut output_stream, - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, ) .expect("Failed to parse QMD"); @@ -101,20 +94,9 @@ fn test_yaml_serialization_with_siblings() { // Parse and serialize let mut output_stream = quarto_markdown_pandoc::utils::output::VerboseOutput::Sink(std::io::sink()); - let (pandoc, context) = readers::qmd::read( - yaml.as_bytes(), - false, - "test.qmd", - &mut output_stream, - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .expect("Failed to parse QMD"); + let (pandoc, context, _warnings) = + readers::qmd::read(yaml.as_bytes(), false, "test.qmd", &mut output_stream) + .expect("Failed to parse QMD"); let mut json_output = Vec::new(); writers::json::write(&pandoc, &context, &mut json_output).expect("Failed to write JSON"); @@ -148,20 +130,9 @@ Some content. let mut output_stream = quarto_markdown_pandoc::utils::output::VerboseOutput::Sink(std::io::sink()); - let (pandoc, context) = readers::qmd::read( - yaml.as_bytes(), - false, - "test.qmd", - &mut output_stream, - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .expect("Failed to parse QMD"); + let (pandoc, context, _warnings) = + readers::qmd::read(yaml.as_bytes(), false, "test.qmd", &mut output_stream) + .expect("Failed to parse QMD"); let mut json_output = Vec::new(); writers::json::write(&pandoc, &context, &mut json_output).expect("Failed to write JSON"); @@ -236,18 +207,11 @@ fn test_binary_tree_serialization() { // Parse QMD to PandocAST let mut output_stream = quarto_markdown_pandoc::utils::output::VerboseOutput::Sink(std::io::sink()); - let (pandoc, context) = readers::qmd::read( + let (pandoc, context, _warnings) = readers::qmd::read( qmd_content.as_bytes(), false, "test.qmd", &mut output_stream, - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, ) .expect("Failed to parse QMD"); diff --git a/crates/quarto-markdown-pandoc/tests/test_ordered_list_formatting.rs b/crates/quarto-markdown-pandoc/tests/test_ordered_list_formatting.rs index eeaf5e4..b4564df 100644 --- a/crates/quarto-markdown-pandoc/tests/test_ordered_list_formatting.rs +++ b/crates/quarto-markdown-pandoc/tests/test_ordered_list_formatting.rs @@ -21,20 +21,8 @@ fn test_ordered_list_10plus_formatting() { 11. Eleventh item"#; // Parse the markdown - let (doc, _context) = readers::qmd::read( - input.as_bytes(), - false, - "", - &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .unwrap(); + let (doc, _context, _warnings) = + readers::qmd::read(input.as_bytes(), false, "", &mut std::io::sink()).unwrap(); // Write it back out let mut buf = Vec::new(); @@ -87,20 +75,8 @@ fn test_ordered_list_continuation_indentation() { with continuation"#; // Parse the markdown - let (doc, _context) = readers::qmd::read( - input.as_bytes(), - false, - "", - &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .unwrap(); + let (doc, _context, _warnings) = + readers::qmd::read(input.as_bytes(), false, "", &mut std::io::sink()).unwrap(); // Write it back out let mut buf = Vec::new(); diff --git a/crates/quarto-markdown-pandoc/tests/test_warnings.rs b/crates/quarto-markdown-pandoc/tests/test_warnings.rs index 3f22d0d..4455175 100644 --- a/crates/quarto-markdown-pandoc/tests/test_warnings.rs +++ b/crates/quarto-markdown-pandoc/tests/test_warnings.rs @@ -1,5 +1,4 @@ use quarto_markdown_pandoc::readers; -use quarto_markdown_pandoc::utils; #[test] fn test_caption_without_table_warning() { @@ -13,15 +12,7 @@ Some content "#; // Parse the document - let result = readers::qmd::read( - input.as_bytes(), - false, - "test.md", - &mut std::io::sink(), - None::< - fn(&[u8], &utils::tree_sitter_log_observer::TreeSitterLogObserver, &str) -> Vec, - >, - ); + let result = readers::qmd::read(input.as_bytes(), false, "test.md", &mut std::io::sink()); // Parsing should succeed (warnings are not errors) assert!( @@ -48,15 +39,7 @@ fn test_caption_with_table_no_warning() { "#; // Parse the document - let result = readers::qmd::read( - input.as_bytes(), - false, - "test.md", - &mut std::io::sink(), - None::< - fn(&[u8], &utils::tree_sitter_log_observer::TreeSitterLogObserver, &str) -> Vec, - >, - ); + let result = readers::qmd::read(input.as_bytes(), false, "test.md", &mut std::io::sink()); // Parsing should succeed and no warnings should be emitted assert!( @@ -64,7 +47,7 @@ fn test_caption_with_table_no_warning() { "Document with valid table caption should parse successfully" ); - let (pandoc, _context) = result.unwrap(); + let (pandoc, _context, _warnings) = result.unwrap(); // Verify we have a table in the output assert!( diff --git a/crates/quarto-markdown-pandoc/tests/test_yaml_tag_regression.rs b/crates/quarto-markdown-pandoc/tests/test_yaml_tag_regression.rs index 06d501a..787142c 100644 --- a/crates/quarto-markdown-pandoc/tests/test_yaml_tag_regression.rs +++ b/crates/quarto-markdown-pandoc/tests/test_yaml_tag_regression.rs @@ -12,6 +12,7 @@ use quarto_markdown_pandoc::pandoc::meta::{ rawblock_to_meta_with_source_info, }; use quarto_markdown_pandoc::pandoc::{Inline, RawBlock}; +use quarto_markdown_pandoc::utils::diagnostic_collector::DiagnosticCollector; #[test] fn test_yaml_tags_preserved_in_new_api() { @@ -42,10 +43,12 @@ regular: This has *emphasis* }; let context = ASTContext::default(); - let meta = rawblock_to_meta_with_source_info(&block, &context); + let mut diagnostics = DiagnosticCollector::new(); + let meta = rawblock_to_meta_with_source_info(&block, &context, &mut diagnostics); let mut outer_meta = Vec::new(); - let parsed_meta = parse_metadata_strings_with_source_info(meta, &mut outer_meta); + let parsed_meta = + parse_metadata_strings_with_source_info(meta, &mut outer_meta, &mut diagnostics); // Extract entries let entries = if let MetaValueWithSourceInfo::MetaMap { entries, .. } = parsed_meta { @@ -65,27 +68,14 @@ regular: This has *emphasis* } = &tagged_path_entry.value { assert_eq!(inlines.len(), 1, "Expected exactly one inline"); - if let Inline::Span(span) = &inlines[0] { - // Should have yaml-tagged-string class - assert!( - span.attr.1.contains(&"yaml-tagged-string".to_string()), - "Expected yaml-tagged-string class, found: {:?}", - span.attr.1 - ); - // Should have tag attribute - assert_eq!( - span.attr.2.get("tag"), - Some(&"path".to_string()), - "Expected tag=path attribute" - ); - // Extract the string content - if let Inline::Str(s) = &span.content[0] { - assert_eq!(s.text, "images/*.png"); - } else { - panic!("Expected Str inline inside Span"); - } + // !path tag should produce plain Str (no Span wrapper) + if let Inline::Str(s) = &inlines[0] { + assert_eq!(s.text, "images/*.png"); } else { - panic!("Expected Span inline, got: {:?}", inlines[0]); + panic!( + "Expected plain Str inline for !path tag, got: {:?}", + inlines[0] + ); } } else { panic!( diff --git a/crates/quarto-source-map/src/context.rs b/crates/quarto-source-map/src/context.rs index 5d0ded6..3c4100d 100644 --- a/crates/quarto-source-map/src/context.rs +++ b/crates/quarto-source-map/src/context.rs @@ -15,6 +15,11 @@ pub struct SourceContext { pub struct SourceFile { /// File path or identifier pub path: String, + /// File content (for ephemeral/in-memory files) + /// When Some, content is stored in memory (e.g., for or test files) + /// When None, content should be read from disk using the path + #[serde(skip_serializing_if = "Option::is_none")] + pub content: Option, /// File information for efficient location lookups (optional for serialization) #[serde(skip_serializing_if = "Option::is_none")] pub file_info: Option, @@ -36,11 +41,32 @@ impl SourceContext { } /// Add a file to the context and return its ID + /// + /// - If content is Some: Creates an ephemeral (in-memory) file. Content is stored and used for ariadne rendering. + /// - If content is None: Creates a disk-backed file. Content will be read from disk when needed (path must exist). + /// + /// For ephemeral files, FileInformation is created immediately from the provided content. + /// For disk-backed files, FileInformation is created by reading from disk if the path exists. pub fn add_file(&mut self, path: String, content: Option) -> FileId { let id = FileId(self.files.len()); - let file_info = content.as_ref().map(|c| FileInformation::new(c)); + + // For ephemeral files (content provided), store it and create FileInformation + // For disk-backed files (no content), try to read from disk for FileInformation only + let (stored_content, content_for_info) = match content { + Some(c) => { + // Ephemeral file: store content and use it for FileInformation + (Some(c.clone()), Some(c)) + } + None => { + // Disk-backed file: don't store content, but try to read for FileInformation + (None, std::fs::read_to_string(&path).ok()) + } + }; + + let file_info = content_for_info.as_ref().map(|c| FileInformation::new(c)); self.files.push(SourceFile { path, + content: stored_content, file_info, metadata: FileMetadata { file_type: None }, }); @@ -52,7 +78,11 @@ impl SourceContext { self.files.get(id.0) } - /// Create a copy without file information (for serialization) + /// Create a copy without FileInformation (for serialization) + /// + /// Note: This preserves the content field for ephemeral files, as they need + /// content to be serialized for proper deserialization. Only FileInformation + /// is removed since it can be reconstructed from content. pub fn without_content(&self) -> Self { SourceContext { files: self @@ -60,6 +90,7 @@ impl SourceContext { .iter() .map(|f| SourceFile { path: f.path.clone(), + content: f.content.clone(), // Preserve content for ephemeral files file_info: None, metadata: f.metadata.clone(), }) diff --git a/crates/quarto-yaml/claude-notes/implementation-plan.md b/crates/quarto-yaml/claude-notes/implementation-plan.md index 2350bdc..984ba83 100644 --- a/crates/quarto-yaml/claude-notes/implementation-plan.md +++ b/crates/quarto-yaml/claude-notes/implementation-plan.md @@ -2,7 +2,7 @@ ## Overview -This crate implements `YamlWithSourceInfo`, a data structure that wraps `yaml-rust2::Yaml` with source location tracking. +This crate implements `YamlWithSourceInfo`, a data structure that wraps `yaml-rust2::Yaml` with source location tracking. This uses the **owned data approach** as decided in the design discussion (see `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/session-logs/2025-10-13-yaml-lifetime-vs-owned-discussion.md`). ## Architecture Decision: Owned Data @@ -157,4 +157,11 @@ impl MarkedEventReceiver for YamlBuilder { 1. **Config merging** - Merge multiple YamlWithSourceInfo objects 2. **Validation** - Schema validation with source positions 3. **Unified SourceInfo** - Replace with project-wide SourceInfo type -4. **Multi-document** - Support YAML streams +4. **YAML tags** - Support for !expr and custom tags +5. **Multi-document** - Support YAML streams + +## References + +- Design document: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/yaml-with-source-info-design.md` +- Session log: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/session-logs/2025-10-13-yaml-lifetime-vs-owned-discussion.md` +- rust-analyzer patterns: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/rust-analyzer-owned-data-patterns.md`