diff --git a/crates/quarto-markdown-pandoc/src/pandoc/meta.rs b/crates/quarto-markdown-pandoc/src/pandoc/meta.rs index 1c2a94c..099f16f 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/meta.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/meta.rs @@ -635,8 +635,9 @@ pub fn rawblock_to_meta_with_source_info( let content = extract_between_delimiters(&block.text).unwrap(); // Calculate offsets within RawBlock.text - // The text is "---\n\n---", so content starts at index 4 - let yaml_start = block.text.find("---\n").unwrap() + 4; + // Find the actual position of the trimmed content in the original text + // extract_between_delimiters trims the content, so we need to find where it actually starts + let yaml_start = block.text.find(content).unwrap(); // block.source_info is already quarto_source_map::SourceInfo let parent = block.source_info.clone(); @@ -646,7 +647,7 @@ pub fn rawblock_to_meta_with_source_info( quarto_source_map::SourceInfo::substring(parent, yaml_start, yaml_start + content.len()); // Parse YAML with source tracking - let yaml = match quarto_yaml::parse_with_parent(content, yaml_parent) { + let yaml = match quarto_yaml::parse_with_parent(content, yaml_parent.clone()) { Ok(y) => y, Err(e) => panic!( "(unimplemented syntax error - this is a bug!) Failed to parse metadata block as YAML: {}", @@ -656,7 +657,19 @@ pub fn rawblock_to_meta_with_source_info( // Transform YamlWithSourceInfo to MetaValueWithSourceInfo // Pass by value since yaml is no longer needed - yaml_to_meta_with_source_info(yaml, context, diagnostics) + let mut result = yaml_to_meta_with_source_info(yaml, context, diagnostics); + + // For the top-level metadata, replace the source_info with yaml_parent + // to ensure it spans the entire YAML content, not just where the mapping starts + if let MetaValueWithSourceInfo::MetaMap { + ref mut source_info, + .. + } = result + { + *source_info = yaml_parent; + } + + result } /// Legacy version: Convert RawBlock to Meta (old implementation) diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/document.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/document.rs index e03808c..345ed3a 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/document.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/document.rs @@ -25,12 +25,15 @@ pub fn process_document( PandocNativeIntermediate::IntermediateSection(section) => { blocks.extend(section); } - PandocNativeIntermediate::IntermediateMetadataString(text, _range) => { + PandocNativeIntermediate::IntermediateMetadataString(text, range) => { // for now we assume it's metadata and emit it as a rawblock blocks.push(Block::RawBlock(RawBlock { format: "quarto_minus_metadata".to_string(), text, - source_info: node_source_info_with_context(node, context), + source_info: quarto_source_map::SourceInfo::from_range( + context.current_file_id(), + range, + ), })); } _ => panic!("Expected Block or Section, got {:?}", child), diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/section.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/section.rs index ed69e2a..1e31f90 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/section.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/section.rs @@ -27,12 +27,15 @@ pub fn process_section( PandocNativeIntermediate::IntermediateSection(section) => { blocks.extend(section); } - PandocNativeIntermediate::IntermediateMetadataString(text, _range) => { + PandocNativeIntermediate::IntermediateMetadataString(text, range) => { // for now we assume it's metadata and emit it as a rawblock blocks.push(Block::RawBlock(RawBlock { format: "quarto_minus_metadata".to_string(), text, - source_info: node_source_info_with_context(section_node, context), + source_info: quarto_source_map::SourceInfo::from_range( + context.current_file_id(), + range, + ), })); } _ => panic!("Expected Block or Section, got {:?}", child), diff --git a/crates/quarto-markdown-pandoc/src/readers/qmd.rs b/crates/quarto-markdown-pandoc/src/readers/qmd.rs index c86ab18..79c6052 100644 --- a/crates/quarto-markdown-pandoc/src/readers/qmd.rs +++ b/crates/quarto-markdown-pandoc/src/readers/qmd.rs @@ -156,6 +156,8 @@ pub fn read( }; // Store complete MetaMapEntry objects to preserve key_source information let mut meta_from_parses: Vec = Vec::new(); + // Track the source_info of the metadata block (for simple case with single block) + let mut meta_source_info: Option = None; // Create a separate diagnostic collector for metadata parsing warnings let mut meta_diagnostics = DiagnosticCollector::new(); @@ -222,7 +224,15 @@ pub fn read( ); // Extract MetaMapEntry objects (preserving key_source) and store them - if let MetaValueWithSourceInfo::MetaMap { entries, .. } = parsed_meta { + if let MetaValueWithSourceInfo::MetaMap { + entries, + source_info, + } = parsed_meta + { + // Store the source_info (for simple case with single metadata block) + if meta_source_info.is_none() { + meta_source_info = Some(source_info); + } for entry in entries { meta_from_parses.push(entry); } @@ -240,10 +250,18 @@ pub fn read( // Merge meta_from_parses into result.meta // result.meta is MetaValueWithSourceInfo::MetaMap, so we need to append entries // Now meta_from_parses contains complete MetaMapEntry objects with key_source preserved - if let MetaValueWithSourceInfo::MetaMap { entries, .. } = &mut result.meta { + if let MetaValueWithSourceInfo::MetaMap { + entries, + source_info, + } = &mut result.meta + { for entry in meta_from_parses.into_iter() { entries.push(entry); } + // Update the overall metadata source_info if we captured one + if let Some(captured_source_info) = meta_source_info { + *source_info = captured_source_info; + } } // Merge metadata diagnostics into main error_collector diff --git a/crates/quarto-markdown-pandoc/tests/snapshots/json/002.qmd.snapshot b/crates/quarto-markdown-pandoc/tests/snapshots/json/002.qmd.snapshot index 3fd26c2..452eef1 100644 --- a/crates/quarto-markdown-pandoc/tests/snapshots/json/002.qmd.snapshot +++ b/crates/quarto-markdown-pandoc/tests/snapshots/json/002.qmd.snapshot @@ -1 +1 @@ -{"astContext":{"files":[{"line_breaks":[3,20,24,25,35,36,40,53,57,58,62],"name":"tests/snapshots/json/002.qmd","total_length":63}],"metaTopLevelKeySources":{"nested":14,"title":12},"sourceInfoPool":[{"d":0,"r":[0,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,9],"t":2},{"d":0,"r":[0,63],"t":0},{"d":3,"r":[4,20],"t":1},{"d":4,"r":[7,16],"t":1},{"d":0,"r":[0,4],"t":0},{"d":0,"r":[37,58],"t":0},{"d":7,"r":[4,16],"t":1},{"d":8,"r":[8,12],"t":1},{"d":0,"r":[26,63],"t":0},{"d":3,"r":[4,20],"t":1},{"d":11,"r":[0,5],"t":1},{"d":7,"r":[4,16],"t":1},{"d":13,"r":[0,6],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[]],"s":10,"t":"Div"}],"meta":{"nested":{"c":[{"c":"meta","s":6,"t":"Str"}],"s":9,"t":"MetaInlines"},"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]} \ No newline at end of file +{"astContext":{"files":[{"line_breaks":[3,20,24,25,35,36,40,53,57,58,62],"name":"tests/snapshots/json/002.qmd","total_length":63}],"metaTopLevelKeySources":{"nested":14,"title":12},"sourceInfoPool":[{"d":0,"r":[0,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,9],"t":2},{"d":0,"r":[0,25],"t":0},{"d":3,"r":[4,20],"t":1},{"d":4,"r":[7,16],"t":1},{"d":0,"r":[0,4],"t":0},{"d":0,"r":[37,58],"t":0},{"d":7,"r":[4,16],"t":1},{"d":8,"r":[8,12],"t":1},{"d":0,"r":[26,63],"t":0},{"d":3,"r":[4,20],"t":1},{"d":11,"r":[0,5],"t":1},{"d":7,"r":[4,16],"t":1},{"d":13,"r":[0,6],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[]],"s":10,"t":"Div"}],"meta":{"nested":{"c":[{"c":"meta","s":6,"t":"Str"}],"s":9,"t":"MetaInlines"},"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]} \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/snapshots/json/003.qmd.snapshot b/crates/quarto-markdown-pandoc/tests/snapshots/json/003.qmd.snapshot index 60aea7b..059ef22 100644 --- a/crates/quarto-markdown-pandoc/tests/snapshots/json/003.qmd.snapshot +++ b/crates/quarto-markdown-pandoc/tests/snapshots/json/003.qmd.snapshot @@ -1 +1 @@ -{"astContext":{"files":[{"line_breaks":[3,20,24,25,35,36,40,56,69,73,74,78],"name":"tests/snapshots/json/003.qmd","total_length":79}],"metaTopLevelKeySources":{"title":22},"sourceInfoPool":[{"d":0,"r":[0,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,9],"t":2},{"d":0,"r":[0,79],"t":0},{"d":3,"r":[4,20],"t":1},{"d":4,"r":[7,16],"t":1},{"d":0,"r":[37,74],"t":0},{"d":6,"r":[4,32],"t":1},{"d":7,"r":[0,6],"t":1},{"d":0,"r":[0,7],"t":0},{"d":6,"r":[4,32],"t":1},{"d":10,"r":[8,15],"t":1},{"d":6,"r":[4,32],"t":1},{"d":12,"r":[16,22],"t":1},{"d":0,"r":[0,4],"t":0},{"d":6,"r":[4,32],"t":1},{"d":15,"r":[24,28],"t":1},{"d":6,"r":[4,32],"t":1},{"d":17,"r":[6,28],"t":1},{"d":0,"r":[37,74],"t":0},{"d":0,"r":[26,79],"t":0},{"d":3,"r":[4,20],"t":1},{"d":21,"r":[0,5],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[{"c":{"c":[{"key":"_scope","key_source":8,"value":{"c":[{"c":"lexical","s":9,"t":"Str"}],"s":11,"t":"MetaInlines"}},{"key":"nested","key_source":13,"value":{"c":[{"c":"meta","s":14,"t":"Str"}],"s":16,"t":"MetaInlines"}}],"s":18,"t":"MetaMap"},"s":19,"t":"BlockMetadata"}]],"s":20,"t":"Div"}],"meta":{"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]} \ No newline at end of file +{"astContext":{"files":[{"line_breaks":[3,20,24,25,35,36,40,56,69,73,74,78],"name":"tests/snapshots/json/003.qmd","total_length":79}],"metaTopLevelKeySources":{"title":21},"sourceInfoPool":[{"d":0,"r":[0,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,9],"t":2},{"d":0,"r":[0,25],"t":0},{"d":3,"r":[4,20],"t":1},{"d":4,"r":[7,16],"t":1},{"d":0,"r":[37,74],"t":0},{"d":6,"r":[4,32],"t":1},{"d":7,"r":[0,6],"t":1},{"d":0,"r":[0,7],"t":0},{"d":6,"r":[4,32],"t":1},{"d":10,"r":[8,15],"t":1},{"d":6,"r":[4,32],"t":1},{"d":12,"r":[16,22],"t":1},{"d":0,"r":[0,4],"t":0},{"d":6,"r":[4,32],"t":1},{"d":15,"r":[24,28],"t":1},{"d":6,"r":[4,32],"t":1},{"d":0,"r":[37,74],"t":0},{"d":0,"r":[26,79],"t":0},{"d":3,"r":[4,20],"t":1},{"d":20,"r":[0,5],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[{"c":{"c":[{"key":"_scope","key_source":8,"value":{"c":[{"c":"lexical","s":9,"t":"Str"}],"s":11,"t":"MetaInlines"}},{"key":"nested","key_source":13,"value":{"c":[{"c":"meta","s":14,"t":"Str"}],"s":16,"t":"MetaInlines"}}],"s":17,"t":"MetaMap"},"s":18,"t":"BlockMetadata"}]],"s":19,"t":"Div"}],"meta":{"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]} \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs b/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs index 89574cb..ce831fc 100644 --- a/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs +++ b/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs @@ -227,3 +227,69 @@ description: This is a description ); eprintln!("āœ“ LinkedHashMap fix working - key source information preserved!"); } + +#[test] +fn test_metadata_block_overall_source_info() { + // Test that the overall metadata block's source info points to the full metadata + // content (not just the opening "---\n" delimiter) + // + // This test verifies that when we have: + // --- + // title: Test + // author: Me + // --- + // + // The MetaMap's source_info should point to the entire YAML content + // "title: Test\nauthor: Me\n", not just "---\n" + + let input = r#"--- +title: Test Document +author: Test Author +--- + +Some content here. +"#; + + let (pandoc, _context, _warnings) = + readers::qmd::read(input.as_bytes(), false, "test.qmd", &mut std::io::sink()) + .expect("Failed to parse"); + + // Extract metadata + let MetaValueWithSourceInfo::MetaMap { + entries, + source_info, + } = pandoc.meta + else { + panic!("Expected MetaMap"); + }; + + // Verify the overall metadata source info + // The YAML content starts at offset 4 (after "---\n") + // and should span the entire YAML content area + let meta_offset = resolve_source_offset(&source_info); + + eprintln!("\nMetadata block resolved offset: {}", meta_offset); + eprintln!("Metadata entries count: {}", entries.len()); + + // The metadata content starts at offset 4 (after "---\n") + assert_eq!( + meta_offset, 4, + "Metadata block should start at offset 4 (after opening '---\\n'), got {}", + meta_offset + ); + + // Also verify we have the expected entries + assert_eq!(entries.len(), 2, "Should have 2 metadata entries"); + + let has_title = entries.iter().any(|e| e.key == "title"); + let has_author = entries.iter().any(|e| e.key == "author"); + + assert!(has_title, "Should have 'title' entry"); + assert!(has_author, "Should have 'author' entry"); + + eprintln!("\nāœ… Metadata block overall source info test passed!"); + eprintln!( + "āœ“ Metadata block source points to correct offset ({})", + meta_offset + ); +}