diff --git a/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs b/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs index c8119a4..03c47aa 100644 --- a/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs +++ b/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs @@ -72,7 +72,21 @@ impl DivWhitespaceConverter { let error_row = error .location .as_ref() - .map(|loc| loc.range.start.row) + .and_then(|loc| { + // Get the start offset from SourceInfo + let offset = loc.start_offset(); + // Binary search to find which line this offset is on + match line_starts.binary_search(&offset) { + Ok(idx) => Some(idx), + Err(idx) => { + if idx > 0 { + Some(idx - 1) + } else { + Some(0) + } + } + } + }) .unwrap_or(0); // The error might be on the line itself or the line before (for div fences) diff --git a/crates/quarto-error-reporting/src/diagnostic.rs b/crates/quarto-error-reporting/src/diagnostic.rs index ab2b1fd..6b8d808 100644 --- a/crates/quarto-error-reporting/src/diagnostic.rs +++ b/crates/quarto-error-reporting/src/diagnostic.rs @@ -345,7 +345,7 @@ impl DiagnosticMessage { if let Some(loc) = &self.location { // Try to map with context if available if let Some(ctx) = ctx { - if let Some(mapped) = loc.map_offset(loc.range.start.offset, ctx) { + if let Some(mapped) = loc.map_offset(loc.start_offset(), ctx) { if let Some(file) = ctx.get_file(mapped.file_id) { write!( result, @@ -359,13 +359,9 @@ impl DiagnosticMessage { } } else { // No context: show immediate location (1-indexed for display) - write!( - result, - " at {}:{}\n", - loc.range.start.row + 1, - loc.range.start.column + 1 - ) - .unwrap(); + // Note: Without context, we can't get row/column from offsets + // We could map_offset with ctx to get Location, but ctx is None here + write!(result, " at offset {}\n", loc.start_offset()).unwrap(); } } @@ -500,15 +496,12 @@ impl DiagnosticMessage { fn extract_file_id( source_info: &quarto_source_map::SourceInfo, ) -> Option { - match &source_info.mapping { - quarto_source_map::SourceMapping::Original { file_id } => Some(*file_id), - quarto_source_map::SourceMapping::Substring { parent, .. } => { + match source_info { + quarto_source_map::SourceInfo::Original { file_id, .. } => Some(*file_id), + quarto_source_map::SourceInfo::Substring { parent, .. } => { Self::extract_file_id(parent) } - quarto_source_map::SourceMapping::Transformed { parent, .. } => { - Self::extract_file_id(parent) - } - quarto_source_map::SourceMapping::Concat { pieces } => { + quarto_source_map::SourceInfo::Concat { pieces } => { // For concatenated sources, use the first piece's file_id pieces .first() @@ -544,8 +537,9 @@ impl DiagnosticMessage { }; // Map the location offsets back to original file positions - let start_mapped = main_location.map_offset(main_location.range.start.offset, ctx)?; - let end_mapped = main_location.map_offset(main_location.range.end.offset, ctx)?; + // map_offset expects relative offsets (0 = start of this SourceInfo's range) + let start_mapped = main_location.map_offset(0, ctx)?; + let end_mapped = main_location.map_offset(main_location.length(), ctx)?; // Determine report kind and color let (report_kind, main_color) = match self.kind { @@ -591,9 +585,10 @@ impl DiagnosticMessage { if detail_file_id == file_id { // Map detail offsets to original file positions + // map_offset expects relative offsets (0 = start of SourceInfo's range) if let (Some(detail_start), Some(detail_end)) = ( - detail_loc.map_offset(detail_loc.range.start.offset, ctx), - detail_loc.map_offset(detail_loc.range.end.offset, ctx), + detail_loc.map_offset(0, ctx), + detail_loc.map_offset(detail_loc.length(), ctx), ) { let detail_span = detail_start.location.offset..detail_end.location.offset; let detail_color = match detail.kind { @@ -787,22 +782,9 @@ mod tests { fn test_location_in_to_text_without_context() { use crate::builder::DiagnosticMessageBuilder; - // Create a location at row 10, column 5 - let location = quarto_source_map::SourceInfo::original( - quarto_source_map::FileId(0), - quarto_source_map::Range { - start: quarto_source_map::Location { - offset: 100, - row: 10, - column: 5, - }, - end: quarto_source_map::Location { - offset: 110, - row: 10, - column: 15, - }, - }, - ); + // Create a location at offsets 100-110 + let location = + quarto_source_map::SourceInfo::original(quarto_source_map::FileId(0), 100, 110); let msg = DiagnosticMessageBuilder::error("Invalid syntax") .with_location(location) @@ -810,9 +792,9 @@ mod tests { let text = msg.to_text(None); - // Without context, should show immediate location (1-indexed) + // Without context, should show offset (we can't get row/column without context) assert!(text.contains("Invalid syntax")); - assert!(text.contains("at 11:6")); // row 10 + 1, column 5 + 1 + assert!(text.contains("at offset 100")); } #[test] @@ -826,21 +808,10 @@ mod tests { Some("line 1\nline 2\nline 3\nline 4".to_string()), ); - // Create a location in that file + // Create a location in that file (offset 7 is start of "line 2") let location = quarto_source_map::SourceInfo::original( - file_id, - quarto_source_map::Range { - start: quarto_source_map::Location { - offset: 7, // Start of "line 2" - row: 1, - column: 0, - }, - end: quarto_source_map::Location { - offset: 13, - row: 1, - column: 6, - }, - }, + file_id, 7, // Start of "line 2" + 13, // End of "line 2" ); let msg = DiagnosticMessageBuilder::error("Invalid syntax") @@ -859,21 +830,8 @@ mod tests { fn test_location_in_to_json() { use crate::builder::DiagnosticMessageBuilder; - let location = quarto_source_map::SourceInfo::original( - quarto_source_map::FileId(0), - quarto_source_map::Range { - start: quarto_source_map::Location { - offset: 100, - row: 10, - column: 5, - }, - end: quarto_source_map::Location { - offset: 110, - row: 10, - column: 15, - }, - }, - ); + let location = + quarto_source_map::SourceInfo::original(quarto_source_map::FileId(0), 100, 110); let msg = DiagnosticMessageBuilder::error("Invalid syntax") .with_location(location) @@ -881,19 +839,16 @@ mod tests { let json = msg.to_json(); - // Should have location field with range info + // Should have location field with Original variant assert!(json.get("location").is_some()); let loc = &json["location"]; - assert!(loc.get("range").is_some()); - - // Verify the range is serialized correctly - let range = &loc["range"]; - assert_eq!(range["start"]["row"], 10); - assert_eq!(range["start"]["column"], 5); - assert_eq!(range["start"]["offset"], 100); - assert_eq!(range["end"]["row"], 10); - assert_eq!(range["end"]["column"], 15); - assert_eq!(range["end"]["offset"], 110); + + // Verify the SourceInfo is serialized correctly (as Original enum variant) + assert!(loc.get("Original").is_some()); + let original = &loc["Original"]; + assert_eq!(original["file_id"], 0); + assert_eq!(original["start_offset"], 100); + assert_eq!(original["end_offset"], 110); } #[test] diff --git a/crates/quarto-markdown-pandoc/src/pandoc/inline.rs b/crates/quarto-markdown-pandoc/src/pandoc/inline.rs index 3fbfd90..e8d939d 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/inline.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/inline.rs @@ -484,7 +484,7 @@ mod tests { use super::*; fn dummy_source_info() -> quarto_source_map::SourceInfo { - quarto_source_map::SourceInfo::original( + quarto_source_map::SourceInfo::from_range( quarto_source_map::FileId(0), quarto_source_map::Range { start: quarto_source_map::Location { diff --git a/crates/quarto-markdown-pandoc/src/pandoc/location.rs b/crates/quarto-markdown-pandoc/src/pandoc/location.rs index 18c4cf5..25047b1 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/location.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/location.rs @@ -63,6 +63,16 @@ impl SourceInfo { } } + /// Get the start offset + pub fn start_offset(&self) -> usize { + self.range.start.offset + } + + /// Get the end offset + pub fn end_offset(&self) -> usize { + self.range.end.offset + } + /// Convert to quarto-source-map::SourceInfo (temporary conversion helper) /// /// This helper bridges between pandoc::location types and quarto-source-map types. @@ -71,7 +81,7 @@ impl SourceInfo { /// Creates an Original mapping with a dummy FileId(0). /// For proper filename support, use to_source_map_info_with_mapping with a real FileId. pub fn to_source_map_info(&self) -> quarto_source_map::SourceInfo { - quarto_source_map::SourceInfo::original( + quarto_source_map::SourceInfo::from_range( quarto_source_map::FileId(0), quarto_source_map::Range { start: quarto_source_map::Location { @@ -96,7 +106,7 @@ impl SourceInfo { &self, file_id: quarto_source_map::FileId, ) -> quarto_source_map::SourceInfo { - quarto_source_map::SourceInfo::original( + quarto_source_map::SourceInfo::from_range( file_id, quarto_source_map::Range { start: quarto_source_map::Location { @@ -132,14 +142,14 @@ pub fn node_location(node: &tree_sitter::Node) -> quarto_source_map::Range { } pub fn node_source_info(node: &tree_sitter::Node) -> quarto_source_map::SourceInfo { - quarto_source_map::SourceInfo::original(quarto_source_map::FileId(0), node_location(node)) + quarto_source_map::SourceInfo::from_range(quarto_source_map::FileId(0), node_location(node)) } pub fn node_source_info_with_context( node: &tree_sitter::Node, context: &ASTContext, ) -> quarto_source_map::SourceInfo { - quarto_source_map::SourceInfo::original(context.current_file_id(), node_location(node)) + quarto_source_map::SourceInfo::from_range(context.current_file_id(), node_location(node)) } pub fn empty_range() -> Range { @@ -158,7 +168,7 @@ pub fn empty_range() -> Range { } pub fn empty_source_info() -> quarto_source_map::SourceInfo { - quarto_source_map::SourceInfo::original( + quarto_source_map::SourceInfo::from_range( quarto_source_map::FileId(0), quarto_source_map::Range { start: quarto_source_map::Location { @@ -177,15 +187,10 @@ pub fn empty_source_info() -> quarto_source_map::SourceInfo { /// Extract filename index from quarto_source_map::SourceInfo by walking to Original mapping pub fn extract_filename_index(info: &quarto_source_map::SourceInfo) -> Option { - match &info.mapping { - quarto_source_map::SourceMapping::Original { file_id } => Some(file_id.0), - quarto_source_map::SourceMapping::Substring { parent, .. } => { - extract_filename_index(parent) - } - quarto_source_map::SourceMapping::Transformed { parent, .. } => { - extract_filename_index(parent) - } - quarto_source_map::SourceMapping::Concat { pieces } => { + match info { + quarto_source_map::SourceInfo::Original { file_id, .. } => Some(file_id.0), + quarto_source_map::SourceInfo::Substring { parent, .. } => extract_filename_index(parent), + quarto_source_map::SourceInfo::Concat { pieces } => { // Return first non-None filename_index from pieces pieces .iter() diff --git a/crates/quarto-markdown-pandoc/src/pandoc/source_map_compat.rs b/crates/quarto-markdown-pandoc/src/pandoc/source_map_compat.rs index 4104c7f..323e018 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/source_map_compat.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/source_map_compat.rs @@ -29,7 +29,7 @@ pub fn node_to_source_info(node: &Node, file_id: FileId) -> SourceInfo { let start_pos = node.start_position(); let end_pos = node.end_position(); - SourceInfo::original( + SourceInfo::from_range( file_id, Range { start: Location { @@ -91,16 +91,16 @@ pub fn old_to_new_source_info( }; // Convert the Range (both use the same Location structure) - SourceInfo::original( + SourceInfo::from_range( file_id, Range { start: Location { - offset: old_info.range.start.offset, + offset: old_info.start_offset(), row: old_info.range.start.row, column: old_info.range.start.column, }, end: Location { - offset: old_info.range.end.offset, + offset: old_info.end_offset(), row: old_info.range.end.row, column: old_info.range.end.column, }, @@ -108,6 +108,44 @@ pub fn old_to_new_source_info( ) } +/// Convert quarto-source-map::SourceInfo to a quarto_source_map::Range, with a fallback if mapping fails. +/// +/// This is for use with PandocNativeIntermediate which uses quarto_source_map::Range. +/// Provides a fallback Range with zero row/column values if the mapping fails. +/// +/// # Arguments +/// * `source_info` - The SourceInfo to convert +/// * `ctx` - The ASTContext containing the source context +/// +/// # Returns +/// A quarto_source_map::Range with row/column information if available, or a Range with offsets only +pub fn source_info_to_qsm_range_or_fallback( + source_info: &SourceInfo, + ctx: &ASTContext, +) -> quarto_source_map::Range { + let start_mapped = source_info.map_offset(0, &ctx.source_context); + let end_mapped = source_info.map_offset(source_info.length(), &ctx.source_context); + + match (start_mapped, end_mapped) { + (Some(start), Some(end)) => quarto_source_map::Range { + start: start.location, + end: end.location, + }, + _ => quarto_source_map::Range { + start: quarto_source_map::Location { + offset: source_info.start_offset(), + row: 0, + column: 0, + }, + end: quarto_source_map::Location { + offset: source_info.end_offset(), + row: 0, + column: 0, + }, + }, + } +} + // Note: Tests for these functions will be validated through integration tests // when they're used in actual parsing modules. The tree-sitter-qmd parser // setup is too complex to mock in unit tests here. diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter.rs index b2f926e..26be6f6 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter.rs @@ -103,7 +103,7 @@ fn process_list( // but the next item might not itself be a paragraph. let mut has_loose_item = false; - let mut last_para_range: Option = None; + let mut last_para_end_row: Option = None; let mut last_item_end_row: Option = None; let mut list_items: Vec = Vec::new(); let mut is_ordered_list: Option = None; @@ -153,9 +153,9 @@ fn process_list( } } - // is the last item loose? Check the last paragraph range - if let Some(ref last_range) = last_para_range { - if last_range.end.row != child_range.start.row { + // is the last item loose? Check the last paragraph end row + if let Some(last_para_end) = last_para_end_row { + if last_para_end != child_range.start.row { // if the last paragraph ends on a different line than the current item starts, // then the last item was loose, mark it has_loose_item = true; @@ -186,12 +186,15 @@ fn process_list( has_loose_item = true; // technically, we don't need to worry about - // last paragraph range after setting has_loose_item, + // last paragraph end row after setting has_loose_item, // but we do it in case we want to use it later - last_para_range = None; - last_item_end_row = blocks - .last() - .map(|b| get_block_source_info(b).range.end.row); + last_para_end_row = None; + last_item_end_row = blocks.last().and_then(|b| { + let source_info = get_block_source_info(b); + source_info + .map_offset(source_info.length(), &context.source_context) + .map(|mapped| mapped.location.row) + }); list_items.push(blocks); continue; } @@ -199,22 +202,28 @@ fn process_list( // is this item possibly loose? if blocks.len() == 1 { if let Some(Block::Paragraph(para)) = blocks.first() { - // yes, so store the range and wait to finish the check on + // yes, so store the end row and wait to finish the check on // next item - last_para_range = Some(para.source_info.range.clone()); + last_para_end_row = para + .source_info + .map_offset(para.source_info.length(), &context.source_context) + .map(|mapped| mapped.location.row); } else { // if the first block is not a paragraph, it's not loose - last_para_range = None; + last_para_end_row = None; } } else { // if the item has multiple blocks (but not multiple paragraphs, // which would have been caught above), we need to reset the - // last_para_range since this item can't participate in loose detection - last_para_range = None; + // last_para_end_row since this item can't participate in loose detection + last_para_end_row = None; } - last_item_end_row = blocks - .last() - .map(|b| get_block_source_info(b).range.end.row); + last_item_end_row = blocks.last().and_then(|b| { + let source_info = get_block_source_info(b); + source_info + .map_offset(source_info.length(), &context.source_context) + .map(|mapped| mapped.location.row) + }); list_items.push(blocks); } @@ -361,7 +370,7 @@ fn process_native_inline( PandocNativeIntermediate::IntermediateBaseText(text, range) => { if let Some(_) = whitespace_re.find(&text) { Inline::Space(Space { - source_info: quarto_source_map::SourceInfo::original( + source_info: quarto_source_map::SourceInfo::from_range( context.current_file_id(), range, ), @@ -369,7 +378,7 @@ fn process_native_inline( } else { Inline::Str(Str { text: apply_smart_quotes(text), - source_info: quarto_source_map::SourceInfo::original( + source_info: quarto_source_map::SourceInfo::from_range( context.current_file_id(), range, ), @@ -431,7 +440,7 @@ fn process_native_inlines( PandocNativeIntermediate::IntermediateBaseText(text, range) => { if let Some(_) = whitespace_re.find(&text) { inlines.push(Inline::Space(Space { - source_info: quarto_source_map::SourceInfo::original( + source_info: quarto_source_map::SourceInfo::from_range( context.current_file_id(), range, ), @@ -439,7 +448,7 @@ fn process_native_inlines( } else { inlines.push(Inline::Str(Str { text: apply_smart_quotes(text), - source_info: quarto_source_map::SourceInfo::original( + source_info: quarto_source_map::SourceInfo::from_range( context.current_file_id(), range, ), diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/backslash_escape.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/backslash_escape.rs index 8e0588a..ca62c23 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/backslash_escape.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/backslash_escape.rs @@ -20,8 +20,10 @@ pub fn process_backslash_escape( panic!("Invalid backslash escape: {}", text); } let content = &text[1..]; // remove the leading backslash - PandocNativeIntermediate::IntermediateBaseText( - content.to_string(), - node_source_info_with_context(node, context).range, - ) + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); + PandocNativeIntermediate::IntermediateBaseText(content.to_string(), range) } diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/block_quote.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/block_quote.rs index c427e90..b07d13c 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/block_quote.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/block_quote.rs @@ -46,7 +46,7 @@ pub fn process_block_quote( content.push(Block::RawBlock(RawBlock { format: "quarto_minus_metadata".to_string(), text, - source_info: quarto_source_map::SourceInfo::original( + source_info: quarto_source_map::SourceInfo::from_range( quarto_source_map::FileId(0), range, ), diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/code_fence_content.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/code_fence_content.rs index 2fc106c..d5bb5f5 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/code_fence_content.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/code_fence_content.rs @@ -44,8 +44,10 @@ pub fn process_code_fence_content( let slice_after_continuation = &input_bytes[current_location..end]; content.push_str(std::str::from_utf8(slice_after_continuation).unwrap()); } - PandocNativeIntermediate::IntermediateBaseText( - content, - node_source_info_with_context(node, context).range, - ) + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); + PandocNativeIntermediate::IntermediateBaseText(content, range) } diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/code_span.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/code_span.rs index 13839e0..b0c0744 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/code_span.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/code_span.rs @@ -26,14 +26,18 @@ pub fn process_code_span( let mut inlines: Vec<_> = children .into_iter() .map(|(node_name, child)| { - let range = node_source_info_with_context(node, context); + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); match child { PandocNativeIntermediate::IntermediateAttr(a) => { attr = a; // IntermediateUnknown here "consumes" the node ( node_name, - PandocNativeIntermediate::IntermediateUnknown(range.range.clone()), + PandocNativeIntermediate::IntermediateUnknown(range.clone()), ) } PandocNativeIntermediate::IntermediateRawFormat(raw, _) => { @@ -41,7 +45,7 @@ pub fn process_code_span( // IntermediateUnknown here "consumes" the node ( node_name, - PandocNativeIntermediate::IntermediateUnknown(range.range.clone()), + PandocNativeIntermediate::IntermediateUnknown(range.clone()), ) } PandocNativeIntermediate::IntermediateBaseText(text, range) => { diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/editorial_marks.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/editorial_marks.rs index bbc75f3..dde568c 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/editorial_marks.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/editorial_marks.rs @@ -44,12 +44,12 @@ macro_rules! process_editorial_mark { PandocNativeIntermediate::IntermediateBaseText(text, range) => { if let Some(_) = whitespace_re.find(&text) { content.push(Inline::Space(Space { - source_info: quarto_source_map::SourceInfo::original(context.current_file_id(), range), + source_info: quarto_source_map::SourceInfo::from_range(context.current_file_id(), range), })) } else { content.push(Inline::Str(Str { text: apply_smart_quotes(text), - source_info: quarto_source_map::SourceInfo::original(context.current_file_id(), range), + source_info: quarto_source_map::SourceInfo::from_range(context.current_file_id(), range), })) } } diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/fenced_div_block.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/fenced_div_block.rs index a7c746c..9e86aac 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/fenced_div_block.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/fenced_div_block.rs @@ -67,7 +67,7 @@ pub fn process_fenced_div_block( content.push(Block::RawBlock(RawBlock { format: "quarto_minus_metadata".to_string(), text, - source_info: quarto_source_map::SourceInfo::original( + source_info: quarto_source_map::SourceInfo::from_range( quarto_source_map::FileId(0), range, ), diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/indented_code_block.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/indented_code_block.rs index 02de385..39416ff 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/indented_code_block.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/indented_code_block.rs @@ -36,11 +36,9 @@ pub fn process_indented_code_block( let continuation_start = range .start .offset - .saturating_sub(outer_range.range.start.offset); - let continuation_end = range - .end - .offset - .saturating_sub(outer_range.range.start.offset); + .saturating_sub(outer_range.start_offset()); + let continuation_end = + range.end.offset.saturating_sub(outer_range.start_offset()); // Append content before this continuation if continuation_start > start_offset && continuation_start <= outer_string.len() diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/link_title.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/link_title.rs index 003470a..681cbfd 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/link_title.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/link_title.rs @@ -15,8 +15,10 @@ pub fn process_link_title( ) -> PandocNativeIntermediate { let title = node.utf8_text(input_bytes).unwrap().to_string(); let title = title[1..title.len() - 1].to_string(); - PandocNativeIntermediate::IntermediateBaseText( - title, - node_source_info_with_context(node, context).range, - ) + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); + PandocNativeIntermediate::IntermediateBaseText(title, range) } diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/list_marker.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/list_marker.rs index ba6aca6..f26d239 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/list_marker.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/list_marker.rs @@ -26,10 +26,12 @@ pub fn process_list_marker( if marker_text == "(@)" { // For example lists, we use 1 as the starting number // The actual numbering will be handled in postprocessing - return PandocNativeIntermediate::IntermediateOrderedListMarker( - 1, - node_source_info_with_context(node, context).range, + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, ); + return PandocNativeIntermediate::IntermediateOrderedListMarker(1, range); } let marker_text = marker_text @@ -39,8 +41,10 @@ pub fn process_list_marker( let marker_number: usize = marker_text .parse() .unwrap_or_else(|_| panic!("Invalid list marker number: {}", marker_text)); - PandocNativeIntermediate::IntermediateOrderedListMarker( - marker_number, - node_source_info_with_context(node, context).range, - ) + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); + PandocNativeIntermediate::IntermediateOrderedListMarker(marker_number, range) } diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/numeric_character_reference.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/numeric_character_reference.rs index a7e2de2..bf8b706 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/numeric_character_reference.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/numeric_character_reference.rs @@ -32,8 +32,10 @@ pub fn process_numeric_character_reference( None => text, // If we can't parse it, return the original text }; - PandocNativeIntermediate::IntermediateBaseText( - result_text, - node_source_info_with_context(node, context).range, - ) + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); + PandocNativeIntermediate::IntermediateBaseText(result_text, range) } diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/raw_attribute.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/raw_attribute.rs index 8a45ee2..68c51c3 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/raw_attribute.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/raw_attribute.rs @@ -13,7 +13,11 @@ pub fn process_raw_attribute( children: Vec<(String, PandocNativeIntermediate)>, context: &ASTContext, ) -> PandocNativeIntermediate { - let range = node_source_info_with_context(node, context).range; + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); for (_, child) in children { match child { PandocNativeIntermediate::IntermediateBaseText(raw, _) => { diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/raw_specifier.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/raw_specifier.rs index c5b36d8..fd9596d 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/raw_specifier.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/raw_specifier.rs @@ -15,15 +15,17 @@ pub fn process_raw_specifier( ) -> PandocNativeIntermediate { // like code_content but skipping first character let raw = node.utf8_text(input_bytes).unwrap().to_string(); + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); if raw.chars().nth(0) == Some('<') { PandocNativeIntermediate::IntermediateBaseText( "pandoc-reader:".to_string() + &raw[1..], - node_source_info_with_context(node, context).range, + range.clone(), ) } else { - PandocNativeIntermediate::IntermediateBaseText( - raw[1..].to_string(), - node_source_info_with_context(node, context).range, - ) + PandocNativeIntermediate::IntermediateBaseText(raw[1..].to_string(), range) } } diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/shortcode.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/shortcode.rs index 6d5dc87..81e1dfc 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/shortcode.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/shortcode.rs @@ -22,10 +22,12 @@ pub fn process_shortcode_string_arg( context: &ASTContext, ) -> PandocNativeIntermediate { let id = node.utf8_text(input_bytes).unwrap().to_string(); - PandocNativeIntermediate::IntermediateShortcodeArg( - ShortcodeArg::String(id), - node_source_info_with_context(node, context).range, - ) + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); + PandocNativeIntermediate::IntermediateShortcodeArg(ShortcodeArg::String(id), range) } // Helper function to process shortcode_string nodes @@ -40,10 +42,12 @@ pub fn process_shortcode_string( extract_quoted_text_fn() ) }; - PandocNativeIntermediate::IntermediateShortcodeArg( - ShortcodeArg::String(id), - node_source_info_with_context(node, context).range, - ) + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); + PandocNativeIntermediate::IntermediateShortcodeArg(ShortcodeArg::String(id), range) } pub fn process_shortcode_keyword_param( @@ -104,7 +108,11 @@ pub fn process_shortcode_keyword_param( } } } - let range = node_source_info_with_context(node, context).range; + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); PandocNativeIntermediate::IntermediateShortcodeArg(ShortcodeArg::KeyValue(result), range) } @@ -182,7 +190,11 @@ pub fn process_shortcode_boolean( "false" => ShortcodeArg::Boolean(false), _ => panic!("Unexpected shortcode_boolean value: {}", value), }; - let range = node_source_info_with_context(node, context).range; + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); PandocNativeIntermediate::IntermediateShortcodeArg(value, range) } @@ -192,7 +204,11 @@ pub fn process_shortcode_number( context: &ASTContext, ) -> PandocNativeIntermediate { let value = node.utf8_text(input_bytes).unwrap(); - let range = node_source_info_with_context(node, context).range; + let source_info = node_source_info_with_context(node, context); + let range = crate::pandoc::source_map_compat::source_info_to_qsm_range_or_fallback( + &source_info, + context, + ); let Ok(num) = value.parse::() else { panic!("Invalid shortcode_number: {}", value) }; diff --git a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/text_helpers.rs b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/text_helpers.rs index 94d5a4d..7bee649 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/text_helpers.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/text_helpers.rs @@ -119,14 +119,14 @@ pub fn create_line_break_inline( let range = node_location(node); let inline = if is_hard { Inline::LineBreak(LineBreak { - source_info: quarto_source_map::SourceInfo::original( + source_info: quarto_source_map::SourceInfo::from_range( quarto_source_map::FileId(0), range, ), }) } else { Inline::SoftBreak(SoftBreak { - source_info: quarto_source_map::SourceInfo::original( + source_info: quarto_source_map::SourceInfo::from_range( quarto_source_map::FileId(0), range, ), diff --git a/crates/quarto-markdown-pandoc/src/readers/json.rs b/crates/quarto-markdown-pandoc/src/readers/json.rs index 5300f8e..5c5032f 100644 --- a/crates/quarto-markdown-pandoc/src/readers/json.rs +++ b/crates/quarto-markdown-pandoc/src/readers/json.rs @@ -18,7 +18,7 @@ use crate::pandoc::{ RawBlock, RawInline, SmallCaps, SoftBreak, Space, Span, Str, Strikeout, Strong, Subscript, Superscript, Underline, }; -use quarto_source_map::{FileId, RangeMapping, SourceMapping}; +use quarto_source_map::FileId; use serde_json::Value; use std::rc::Rc; @@ -80,8 +80,11 @@ impl SourceInfoDeserializer { /// Build the pool from the sourceInfoPool JSON array (compact format) /// - /// Compact format: {"r": [start_off, start_row, start_col, end_off, end_row, end_col], "t": type_code, "d": data} + /// New format: {"r": [start_offset, end_offset], "t": type_code, "d": data} + /// Old format: {"r": [start_off, start_row, start_col, end_off, end_row, end_col], "t": type_code, "d": data} /// ID is implicit from array index + /// + /// Note: Row/column information from old format is ignored since SourceInfo now stores only offsets. fn new(pool_json: &Value) -> Result { let pool_array = pool_json .as_array() @@ -91,45 +94,39 @@ impl SourceInfoDeserializer { // Build pool in order - parents must come before children for item in pool_array { - // Parse range from "r" array: [start_offset, start_row, start_col, end_offset, end_row, end_col] + // Parse offsets from "r" array let range_array = item .get("r") .and_then(|v| v.as_array()) .ok_or(JsonReadError::MalformedSourceInfoPool)?; - if range_array.len() != 6 { - return Err(JsonReadError::MalformedSourceInfoPool); - } - - let range = quarto_source_map::Range { - start: quarto_source_map::Location { - offset: range_array[0] - .as_u64() - .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize, - row: range_array[1] - .as_u64() - .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize, - column: range_array[2] + let (start_offset, end_offset) = match range_array.len() { + 2 => { + // New format: [start_offset, end_offset] + let start = range_array[0] .as_u64() .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize, - }, - end: quarto_source_map::Location { - offset: range_array[3] + as usize; + let end = range_array[1] .as_u64() .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize, - row: range_array[4] + as usize; + (start, end) + } + 6 => { + // Old format: [start_offset, start_row, start_col, end_offset, end_row, end_col] + // Extract only offsets, ignore row/column + let start = range_array[0] .as_u64() .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize, - column: range_array[5] + as usize; + let end = range_array[3] .as_u64() .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize, - }, + as usize; + (start, end) + } + _ => return Err(JsonReadError::MalformedSourceInfoPool), }; // Parse type code from "t" @@ -143,42 +140,47 @@ impl SourceInfoDeserializer { .get("d") .ok_or(JsonReadError::MalformedSourceInfoPool)?; - let mapping = match type_code { + let source_info = match type_code { 0 => { // Original: data is file_id (number) let file_id = data .as_u64() .ok_or(JsonReadError::MalformedSourceInfoPool)? as usize; - SourceMapping::Original { + quarto_source_map::SourceInfo::Original { file_id: FileId(file_id), + start_offset, + end_offset, } } 1 => { - // Substring: data is [parent_id, offset] - let data_array = data - .as_array() - .ok_or(JsonReadError::MalformedSourceInfoPool)?; - if data_array.len() != 2 { + // Substring: data is parent_id (new format) or [parent_id, offset] (old format) + // In new format, offsets are already in start_offset/end_offset above + let parent_id = if let Some(id) = data.as_u64() { + // New format: just parent_id + id as usize + } else if let Some(data_array) = data.as_array() { + // Old format: [parent_id, offset] - ignore offset, use start_offset/end_offset + if data_array.len() != 2 { + return Err(JsonReadError::MalformedSourceInfoPool); + } + data_array[0] + .as_u64() + .ok_or(JsonReadError::MalformedSourceInfoPool)? + as usize + } else { return Err(JsonReadError::MalformedSourceInfoPool); - } - let parent_id = data_array[0] - .as_u64() - .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize; - let offset = data_array[1] - .as_u64() - .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize; + }; let parent = pool .get(parent_id) .ok_or(JsonReadError::MalformedSourceInfoPool)? .clone(); - SourceMapping::Substring { + quarto_source_map::SourceInfo::Substring { parent: Rc::new(parent), - offset, + start_offset, + end_offset, } } 2 => { @@ -222,62 +224,33 @@ impl SourceInfoDeserializer { }) .collect(); - SourceMapping::Concat { pieces: pieces? } + quarto_source_map::SourceInfo::Concat { pieces: pieces? } } 3 => { - // Transformed: data is [parent_id, [[from_start, from_end, to_start, to_end], ...]] + // Transformed variant no longer exists in SourceInfo + // Convert to approximate Substring pointing to parent + // This loses the transformation mapping but preserves the parent relationship let data_array = data .as_array() .ok_or(JsonReadError::MalformedSourceInfoPool)?; - if data_array.len() != 2 { + if data_array.is_empty() { return Err(JsonReadError::MalformedSourceInfoPool); } let parent_id = data_array[0] .as_u64() .ok_or(JsonReadError::MalformedSourceInfoPool)? as usize; - let mapping_array = data_array[1] - .as_array() - .ok_or(JsonReadError::MalformedSourceInfoPool)?; - - let range_mappings: Result> = mapping_array - .iter() - .map(|rm_array| { - let rm = rm_array - .as_array() - .ok_or(JsonReadError::MalformedSourceInfoPool)?; - if rm.len() != 4 { - return Err(JsonReadError::MalformedSourceInfoPool); - } - Ok(RangeMapping { - from_start: rm[0] - .as_u64() - .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize, - from_end: rm[1] - .as_u64() - .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize, - to_start: rm[2] - .as_u64() - .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize, - to_end: rm[3] - .as_u64() - .ok_or(JsonReadError::MalformedSourceInfoPool)? - as usize, - }) - }) - .collect(); let parent = pool .get(parent_id) .ok_or(JsonReadError::MalformedSourceInfoPool)? .clone(); - SourceMapping::Transformed { + // Approximate with Substring + quarto_source_map::SourceInfo::Substring { parent: Rc::new(parent), - mapping: range_mappings?, + start_offset, + end_offset, } } _ => { @@ -285,7 +258,7 @@ impl SourceInfoDeserializer { } }; - pool.push(quarto_source_map::SourceInfo { range, mapping }); + pool.push(source_info); } Ok(SourceInfoDeserializer { pool }) @@ -320,7 +293,7 @@ fn make_source_info(filename_index: Option, range: Range) -> quarto_sourc column: range.end.column, }, }; - quarto_source_map::SourceInfo::original(file_id, qsm_range) + quarto_source_map::SourceInfo::from_range(file_id, qsm_range) } fn empty_range() -> Range { @@ -448,6 +421,19 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { let c = obj @@ -459,38 +445,13 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { - let (filename_index, range) = obj - .get("l") - .and_then(read_location) - .unwrap_or_else(|| (None, empty_range())); - Ok(Inline::Space(Space { - source_info: make_source_info(filename_index, range), - })) - } - "LineBreak" => { - let (filename_index, range) = obj - .get("l") - .and_then(read_location) - .unwrap_or_else(|| (None, empty_range())); - Ok(Inline::LineBreak(crate::pandoc::inline::LineBreak { - source_info: make_source_info(filename_index, range), - })) - } - "SoftBreak" => { - let (filename_index, range) = obj - .get("l") - .and_then(read_location) - .unwrap_or_else(|| (None, empty_range())); - Ok(Inline::SoftBreak(SoftBreak { - source_info: make_source_info(filename_index, range), - })) + Ok(Inline::Str(Str { text, source_info })) } + "Space" => Ok(Inline::Space(Space { source_info })), + "LineBreak" => Ok(Inline::LineBreak(crate::pandoc::inline::LineBreak { + source_info, + })), + "SoftBreak" => Ok(Inline::SoftBreak(SoftBreak { source_info })), "Emph" => { let c = obj .get("c") @@ -498,7 +459,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -508,7 +469,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -531,7 +492,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -572,7 +533,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -582,7 +543,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -592,7 +553,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -602,7 +563,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -612,7 +573,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -622,7 +583,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -660,7 +621,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -701,7 +662,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -731,7 +692,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -774,7 +735,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -795,7 +756,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -805,7 +766,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -887,7 +848,7 @@ fn read_inline(value: &Value, deserializer: &SourceInfoDeserializer) -> Result Err(JsonReadError::UnsupportedVariant(format!("Inline: {}", t))), @@ -906,27 +867,63 @@ fn read_ast_context(value: &Value) -> Result { .as_object() .ok_or_else(|| JsonReadError::InvalidType("Expected object for ASTContext".to_string()))?; - let filenames_val = obj - .get("filenames") - .ok_or_else(|| JsonReadError::MissingField("filenames".to_string()))?; + // Read files array - each entry has "name" and optionally "line_breaks"/"total_length" + let files_val = obj + .get("files") + .ok_or_else(|| JsonReadError::MissingField("files".to_string()))?; - let filenames_arr = filenames_val + let files_arr = files_val .as_array() - .ok_or_else(|| JsonReadError::InvalidType("filenames must be array".to_string()))?; + .ok_or_else(|| JsonReadError::InvalidType("files must be array".to_string()))?; + + let mut filenames = Vec::new(); + let mut source_context = quarto_source_map::SourceContext::new(); + + for file_obj in files_arr { + let file_map = file_obj + .as_object() + .ok_or_else(|| JsonReadError::InvalidType("file entry must be object".to_string()))?; + + // Extract filename + let filename = file_map + .get("name") + .and_then(|v| v.as_str()) + .ok_or_else(|| JsonReadError::MissingField("name in file entry".to_string()))? + .to_string(); + + filenames.push(filename.clone()); + + // Try to extract FileInformation fields + let has_line_breaks = file_map.get("line_breaks").is_some(); + let has_total_length = file_map.get("total_length").is_some(); + + if has_line_breaks && has_total_length { + // Deserialize FileInformation from the fields + let line_breaks: Vec = serde_json::from_value( + file_map.get("line_breaks").unwrap().clone(), + ) + .map_err(|_| { + JsonReadError::InvalidType("line_breaks must be array of numbers".to_string()) + })?; - let filenames = filenames_arr - .iter() - .map(|v| { - v.as_str() - .ok_or_else(|| JsonReadError::InvalidType("filename must be string".to_string())) - .map(|s| s.to_string()) - }) - .collect::>>()?; + let total_length: usize = serde_json::from_value( + file_map.get("total_length").unwrap().clone(), + ) + .map_err(|_| JsonReadError::InvalidType("total_length must be number".to_string()))?; + + let file_info = + quarto_source_map::FileInformation::from_parts(line_breaks, total_length); + source_context.add_file_with_info(filename, file_info); + } else { + // No FileInformation - try to read from disk + source_context.add_file(filename, None); + } + } Ok(ASTContext { filenames, example_list_counter: std::cell::Cell::new(1), - source_context: quarto_source_map::SourceContext::new(), + source_context, }) } @@ -1314,11 +1311,18 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -1328,7 +1332,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -1338,7 +1342,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -1354,7 +1358,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result>>()?; Ok(Block::LineBlock(LineBlock { content, - source_info: make_source_info(filename_index, range), + source_info, })) } "CodeBlock" => { @@ -1379,7 +1383,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -1409,7 +1413,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -1419,7 +1423,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -1439,7 +1443,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -1449,7 +1453,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -1477,7 +1481,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result>>()?; Ok(Block::DefinitionList(DefinitionList { content, - source_info: make_source_info(filename_index, range), + source_info, })) } "Header" => { @@ -1501,12 +1505,10 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result Ok(Block::HorizontalRule(HorizontalRule { - source_info: make_source_info(filename_index, range), - })), + "HorizontalRule" => Ok(Block::HorizontalRule(HorizontalRule { source_info })), "Figure" => { let c = obj .get("c") @@ -1526,7 +1528,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -1566,7 +1568,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -1586,7 +1588,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { @@ -1595,10 +1597,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result { let c = obj @@ -1623,7 +1622,7 @@ fn read_block(value: &Value, deserializer: &SourceInfoDeserializer) -> Result Result usize { let mut current_row = 0; let mut current_col = 0; - let mut byte_offset = 0; for (i, ch) in input.char_indices() { if current_row == row && current_col == column { @@ -505,16 +552,25 @@ fn calculate_byte_offset(input: &str, row: usize, column: usize) -> usize { } if ch == '\n' { + current_col += 1; + // Check if the target is at the newline position (end of line) + if current_row == row && current_col == column { + return i; + } current_row += 1; current_col = 0; } else { current_col += 1; } - byte_offset = i; } - // Return the position even if we're past the end - byte_offset + 1 + // If we're looking for EOF position, return the length + if current_row == row && current_col == column { + return input.len(); + } + + // If we couldn't find the position, clamp to EOF + input.len() } // Helper function to produce JSON-formatted error messages for use as a closure diff --git a/crates/quarto-markdown-pandoc/src/writers/json.rs b/crates/quarto-markdown-pandoc/src/writers/json.rs index 429d57f..d0ebba6 100644 --- a/crates/quarto-markdown-pandoc/src/writers/json.rs +++ b/crates/quarto-markdown-pandoc/src/writers/json.rs @@ -6,7 +6,7 @@ use crate::pandoc::{ ASTContext, Attr, Block, Caption, CitationMode, Inline, Inlines, ListAttributes, Pandoc, }; -use quarto_source_map::{FileId, Range, RangeMapping, SourceInfo, SourceMapping}; +use quarto_source_map::{FileId, SourceInfo}; use serde::Serialize; use serde_json::{Value, json}; use std::collections::HashMap; @@ -17,11 +17,15 @@ use std::collections::HashMap; /// Each unique SourceInfo is assigned an ID and stored in a pool. References to parent /// SourceInfo objects are replaced with parent_id integers. /// -/// Serializes in compact format: {"r": [6 range values], "t": type_code, "d": type_data} +/// Serializes in compact format: {"r": [2 offset values], "t": type_code, "d": type_data} /// The ID is implicit from the array index in the pool. +/// +/// Note: Row/column information is not stored in the serialized format. +/// To get row/column, the reader must map offsets through the SourceContext. struct SerializableSourceInfo { id: usize, - range: Range, + start_offset: usize, + end_offset: usize, mapping: SerializableSourceMapping, } @@ -33,16 +37,9 @@ impl Serialize for SerializableSourceInfo { use serde::ser::SerializeMap; let mut map = serializer.serialize_map(Some(3))?; - // Serialize range as array [start_offset, start_row, start_col, end_offset, end_row, end_col] - let range_array = [ - self.range.start.offset, - self.range.start.row, - self.range.start.column, - self.range.end.offset, - self.range.end.row, - self.range.end.column, - ]; - map.serialize_entry("r", &range_array)?; + // Serialize offsets as array [start_offset, end_offset] + let offset_array = [self.start_offset, self.end_offset]; + map.serialize_entry("r", &offset_array)?; // Serialize type code and data based on mapping variant match &self.mapping { @@ -50,9 +47,9 @@ impl Serialize for SerializableSourceInfo { map.serialize_entry("t", &0)?; map.serialize_entry("d", &file_id.0)?; } - SerializableSourceMapping::Substring { parent_id, offset } => { + SerializableSourceMapping::Substring { parent_id } => { map.serialize_entry("t", &1)?; - map.serialize_entry("d", &[parent_id, offset])?; + map.serialize_entry("d", parent_id)?; } SerializableSourceMapping::Concat { pieces } => { map.serialize_entry("t", &2)?; @@ -62,14 +59,6 @@ impl Serialize for SerializableSourceInfo { .collect(); map.serialize_entry("d", &piece_arrays)?; } - SerializableSourceMapping::Transformed { parent_id, mapping } => { - map.serialize_entry("t", &3)?; - let mapping_arrays: Vec<[usize; 4]> = mapping - .iter() - .map(|m| [m.from_start, m.from_end, m.to_start, m.to_end]) - .collect(); - map.serialize_entry("d", &[json!(*parent_id), json!(mapping_arrays)])?; - } } map.end() @@ -83,15 +72,10 @@ enum SerializableSourceMapping { }, Substring { parent_id: usize, - offset: usize, }, Concat { pieces: Vec, }, - Transformed { - parent_id: usize, - mapping: Vec, - }, } /// Serializable version of SourcePiece that uses source_info_id instead of SourceInfo. @@ -137,26 +121,30 @@ impl SourceInfoSerializer { return id; } - // Recursively intern parents and build the serializable mapping - let mapping = match &source_info.mapping { - SourceMapping::Original { file_id } => { - SerializableSourceMapping::Original { file_id: *file_id } - } - SourceMapping::Substring { parent, offset } => { - let parent_id = self.intern(parent); - SerializableSourceMapping::Substring { - parent_id, - offset: *offset, - } - } - SourceMapping::Transformed { parent, mapping } => { + // Extract offsets and recursively intern parents to build the serializable mapping + let (start_offset, end_offset, mapping) = match source_info { + SourceInfo::Original { + file_id, + start_offset, + end_offset, + } => ( + *start_offset, + *end_offset, + SerializableSourceMapping::Original { file_id: *file_id }, + ), + SourceInfo::Substring { + parent, + start_offset, + end_offset, + } => { let parent_id = self.intern(parent); - SerializableSourceMapping::Transformed { - parent_id, - mapping: mapping.clone(), - } + ( + *start_offset, + *end_offset, + SerializableSourceMapping::Substring { parent_id }, + ) } - SourceMapping::Concat { pieces } => { + SourceInfo::Concat { pieces } => { let serializable_pieces = pieces .iter() .map(|piece| SerializableSourcePiece { @@ -165,9 +153,13 @@ impl SourceInfoSerializer { length: piece.length, }) .collect(); - SerializableSourceMapping::Concat { - pieces: serializable_pieces, - } + ( + 0, + pieces.iter().map(|p| p.length).sum(), + SerializableSourceMapping::Concat { + pieces: serializable_pieces, + }, + ) } }; @@ -177,7 +169,8 @@ impl SourceInfoSerializer { // Add to pool self.pool.push(SerializableSourceInfo { id, - range: source_info.range.clone(), + start_offset, + end_offset, mapping, }); @@ -194,24 +187,30 @@ impl SourceInfoSerializer { } } -fn write_location(source_info: &quarto_source_map::SourceInfo) -> Value { - // Extract filename index by walking to the Original mapping - let filename_index = crate::pandoc::location::extract_filename_index(source_info); - - json!({ - "start": { - "offset": source_info.range.start.offset, - "row": source_info.range.start.row, - "column": source_info.range.start.column, - }, - "end": { - "offset": source_info.range.end.offset, - "row": source_info.range.end.row, - "column": source_info.range.end.column, - }, - "filenameIndex": filename_index, - }) -} +// NOTE: This function is currently unused and would need a SourceContext parameter +// to map offsets to row/column positions. Commenting out for now. +// fn write_location(source_info: &quarto_source_map::SourceInfo, ctx: &SourceContext) -> Value { +// // Extract filename index by walking to the Original mapping +// let filename_index = crate::pandoc::location::extract_filename_index(source_info); +// +// // Map start and end offsets to locations with row/column +// let start_mapped = source_info.map_offset(0, ctx).unwrap(); +// let end_mapped = source_info.map_offset(source_info.length(), ctx).unwrap(); +// +// json!({ +// "start": { +// "offset": source_info.start_offset(), +// "row": start_mapped.location.row, +// "column": start_mapped.location.column, +// }, +// "end": { +// "offset": source_info.end_offset(), +// "row": end_mapped.location.row, +// "column": end_mapped.location.column, +// }, +// "filenameIndex": filename_index, +// }) +// } fn write_attr(attr: &Attr) -> Value { json!([ @@ -730,7 +729,34 @@ fn write_pandoc(pandoc: &Pandoc, context: &ASTContext) -> Value { // Build astContext with pool and metaTopLevelKeySources let mut ast_context_obj = serde_json::Map::new(); - ast_context_obj.insert("filenames".to_string(), json!(context.filenames)); + + // Serialize files array combining filenames and FileInformation + // Each file entry has: "name", "line_breaks", "total_length" + let files_array: Vec = (0..context.filenames.len()) + .map(|idx| { + let filename = &context.filenames[idx]; + let file_info = context + .source_context + .get_file(quarto_source_map::FileId(idx)) + .and_then(|file| file.file_info.as_ref()); + + if let Some(info) = file_info { + // File with FileInformation - serialize everything + json!({ + "name": filename, + "line_breaks": info.line_breaks(), + "total_length": info.total_length() + }) + } else { + // File without FileInformation - just the name + json!({ + "name": filename + }) + } + }) + .collect(); + + ast_context_obj.insert("files".to_string(), json!(files_array)); // Only include sourceInfoPool if non-empty if !serializer.pool.is_empty() { diff --git a/crates/quarto-markdown-pandoc/tests/snapshots/json/001.qmd.snapshot b/crates/quarto-markdown-pandoc/tests/snapshots/json/001.qmd.snapshot index 2fb967c..f0e9efb 100644 --- a/crates/quarto-markdown-pandoc/tests/snapshots/json/001.qmd.snapshot +++ b/crates/quarto-markdown-pandoc/tests/snapshots/json/001.qmd.snapshot @@ -1 +1 @@ -{"astContext":{"filenames":["tests/snapshots/json/001.qmd"],"sourceInfoPool":[{"d":0,"r":[0,0,0,4,0,4],"t":0},{"d":0,"r":[4,0,4,5,0,5],"t":0},{"d":0,"r":[5,0,5,7,0,7],"t":0},{"d":0,"r":[7,0,7,8,0,8],"t":0},{"d":0,"r":[8,0,8,9,0,9],"t":0},{"d":0,"r":[9,0,9,10,0,10],"t":0},{"d":0,"r":[12,0,12,16,0,16],"t":0},{"d":0,"r":[10,0,10,18,0,18],"t":0},{"d":0,"r":[18,0,18,19,0,19],"t":0},{"d":0,"r":[19,0,19,23,0,23],"t":0},{"d":0,"r":[23,0,23,24,0,24],"t":0},{"d":[[9,0,4],[10,4,1]],"r":[0,0,0,5,0,0],"t":2},{"d":0,"r":[0,0,0,25,1,0],"t":0}]},"blocks":[{"c":[{"c":"This","s":0,"t":"Str"},{"s":1,"t":"Space"},{"c":"is","s":2,"t":"Str"},{"s":3,"t":"Space"},{"c":"a","s":4,"t":"Str"},{"s":5,"t":"Space"},{"c":[{"c":"bold","s":6,"t":"Str"}],"s":7,"t":"Strong"},{"s":8,"t":"Space"},{"c":"test.","s":11,"t":"Str"}],"s":12,"t":"Para"}],"meta":{},"pandoc-api-version":[1,23,1]} \ No newline at end of file +{"astContext":{"files":[{"line_breaks":[24],"name":"tests/snapshots/json/001.qmd","total_length":25}],"sourceInfoPool":[{"d":0,"r":[0,4],"t":0},{"d":0,"r":[4,5],"t":0},{"d":0,"r":[5,7],"t":0},{"d":0,"r":[7,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":0,"r":[9,10],"t":0},{"d":0,"r":[12,16],"t":0},{"d":0,"r":[10,18],"t":0},{"d":0,"r":[18,19],"t":0},{"d":0,"r":[19,23],"t":0},{"d":0,"r":[23,24],"t":0},{"d":[[9,0,4],[10,4,1]],"r":[0,5],"t":2},{"d":0,"r":[0,25],"t":0}]},"blocks":[{"c":[{"c":"This","s":0,"t":"Str"},{"s":1,"t":"Space"},{"c":"is","s":2,"t":"Str"},{"s":3,"t":"Space"},{"c":"a","s":4,"t":"Str"},{"s":5,"t":"Space"},{"c":[{"c":"bold","s":6,"t":"Str"}],"s":7,"t":"Strong"},{"s":8,"t":"Space"},{"c":"test.","s":11,"t":"Str"}],"s":12,"t":"Para"}],"meta":{},"pandoc-api-version":[1,23,1]} \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/snapshots/json/002.qmd.snapshot b/crates/quarto-markdown-pandoc/tests/snapshots/json/002.qmd.snapshot index 80f92d9..3fd26c2 100644 --- a/crates/quarto-markdown-pandoc/tests/snapshots/json/002.qmd.snapshot +++ b/crates/quarto-markdown-pandoc/tests/snapshots/json/002.qmd.snapshot @@ -1 +1 @@ -{"astContext":{"filenames":["tests/snapshots/json/002.qmd"],"metaTopLevelKeySources":{"nested":14,"title":12},"sourceInfoPool":[{"d":0,"r":[0,0,0,8,0,8],"t":0},{"d":0,"r":[8,0,8,9,0,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,0,0,9,0,0],"t":2},{"d":0,"r":[0,0,0,63,11,0],"t":0},{"d":[3,4],"r":[0,0,0,16,0,0],"t":1},{"d":[4,7],"r":[0,0,0,9,0,0],"t":1},{"d":0,"r":[0,0,0,4,0,4],"t":0},{"d":0,"r":[37,6,0,58,9,0],"t":0},{"d":[7,4],"r":[0,0,0,12,0,0],"t":1},{"d":[8,8],"r":[0,0,0,4,0,0],"t":1},{"d":0,"r":[26,4,0,63,11,0],"t":0},{"d":[3,4],"r":[0,0,0,16,0,0],"t":1},{"d":[11,0],"r":[0,0,0,5,0,0],"t":1},{"d":[7,4],"r":[0,0,0,12,0,0],"t":1},{"d":[13,0],"r":[0,0,0,6,0,0],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[]],"s":10,"t":"Div"}],"meta":{"nested":{"c":[{"c":"meta","s":6,"t":"Str"}],"s":9,"t":"MetaInlines"},"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]} \ No newline at end of file +{"astContext":{"files":[{"line_breaks":[3,20,24,25,35,36,40,53,57,58,62],"name":"tests/snapshots/json/002.qmd","total_length":63}],"metaTopLevelKeySources":{"nested":14,"title":12},"sourceInfoPool":[{"d":0,"r":[0,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,9],"t":2},{"d":0,"r":[0,63],"t":0},{"d":3,"r":[4,20],"t":1},{"d":4,"r":[7,16],"t":1},{"d":0,"r":[0,4],"t":0},{"d":0,"r":[37,58],"t":0},{"d":7,"r":[4,16],"t":1},{"d":8,"r":[8,12],"t":1},{"d":0,"r":[26,63],"t":0},{"d":3,"r":[4,20],"t":1},{"d":11,"r":[0,5],"t":1},{"d":7,"r":[4,16],"t":1},{"d":13,"r":[0,6],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[]],"s":10,"t":"Div"}],"meta":{"nested":{"c":[{"c":"meta","s":6,"t":"Str"}],"s":9,"t":"MetaInlines"},"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]} \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/snapshots/json/003.qmd.snapshot b/crates/quarto-markdown-pandoc/tests/snapshots/json/003.qmd.snapshot index 26dc690..60aea7b 100644 --- a/crates/quarto-markdown-pandoc/tests/snapshots/json/003.qmd.snapshot +++ b/crates/quarto-markdown-pandoc/tests/snapshots/json/003.qmd.snapshot @@ -1 +1 @@ -{"astContext":{"filenames":["tests/snapshots/json/003.qmd"],"metaTopLevelKeySources":{"title":22},"sourceInfoPool":[{"d":0,"r":[0,0,0,8,0,8],"t":0},{"d":0,"r":[8,0,8,9,0,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,0,0,9,0,0],"t":2},{"d":0,"r":[0,0,0,79,12,0],"t":0},{"d":[3,4],"r":[0,0,0,16,0,0],"t":1},{"d":[4,7],"r":[0,0,0,9,0,0],"t":1},{"d":0,"r":[37,6,0,74,10,0],"t":0},{"d":[6,4],"r":[0,0,0,28,0,0],"t":1},{"d":[7,0],"r":[0,0,0,6,0,0],"t":1},{"d":0,"r":[0,0,0,7,0,7],"t":0},{"d":[6,4],"r":[0,0,0,28,0,0],"t":1},{"d":[10,8],"r":[0,0,0,7,0,0],"t":1},{"d":[6,4],"r":[0,0,0,28,0,0],"t":1},{"d":[12,16],"r":[0,0,0,6,0,0],"t":1},{"d":0,"r":[0,0,0,4,0,4],"t":0},{"d":[6,4],"r":[0,0,0,28,0,0],"t":1},{"d":[15,24],"r":[0,0,0,4,0,0],"t":1},{"d":[6,4],"r":[0,0,0,28,0,0],"t":1},{"d":[17,6],"r":[0,0,0,22,0,0],"t":1},{"d":0,"r":[37,6,0,74,10,0],"t":0},{"d":0,"r":[26,4,0,79,12,0],"t":0},{"d":[3,4],"r":[0,0,0,16,0,0],"t":1},{"d":[21,0],"r":[0,0,0,5,0,0],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[{"c":{"c":[{"key":"_scope","key_source":8,"value":{"c":[{"c":"lexical","s":9,"t":"Str"}],"s":11,"t":"MetaInlines"}},{"key":"nested","key_source":13,"value":{"c":[{"c":"meta","s":14,"t":"Str"}],"s":16,"t":"MetaInlines"}}],"s":18,"t":"MetaMap"},"s":19,"t":"BlockMetadata"}]],"s":20,"t":"Div"}],"meta":{"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]} \ No newline at end of file +{"astContext":{"files":[{"line_breaks":[3,20,24,25,35,36,40,56,69,73,74,78],"name":"tests/snapshots/json/003.qmd","total_length":79}],"metaTopLevelKeySources":{"title":22},"sourceInfoPool":[{"d":0,"r":[0,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,9],"t":2},{"d":0,"r":[0,79],"t":0},{"d":3,"r":[4,20],"t":1},{"d":4,"r":[7,16],"t":1},{"d":0,"r":[37,74],"t":0},{"d":6,"r":[4,32],"t":1},{"d":7,"r":[0,6],"t":1},{"d":0,"r":[0,7],"t":0},{"d":6,"r":[4,32],"t":1},{"d":10,"r":[8,15],"t":1},{"d":6,"r":[4,32],"t":1},{"d":12,"r":[16,22],"t":1},{"d":0,"r":[0,4],"t":0},{"d":6,"r":[4,32],"t":1},{"d":15,"r":[24,28],"t":1},{"d":6,"r":[4,32],"t":1},{"d":17,"r":[6,28],"t":1},{"d":0,"r":[37,74],"t":0},{"d":0,"r":[26,79],"t":0},{"d":3,"r":[4,20],"t":1},{"d":21,"r":[0,5],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[{"c":{"c":[{"key":"_scope","key_source":8,"value":{"c":[{"c":"lexical","s":9,"t":"Str"}],"s":11,"t":"MetaInlines"}},{"key":"nested","key_source":13,"value":{"c":[{"c":"meta","s":14,"t":"Str"}],"s":16,"t":"MetaInlines"}}],"s":18,"t":"MetaMap"},"s":19,"t":"BlockMetadata"}]],"s":20,"t":"Div"}],"meta":{"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]} \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/snapshots/json/math-with-attr.qmd.snapshot b/crates/quarto-markdown-pandoc/tests/snapshots/json/math-with-attr.qmd.snapshot index b38b9bf..a65988a 100644 --- a/crates/quarto-markdown-pandoc/tests/snapshots/json/math-with-attr.qmd.snapshot +++ b/crates/quarto-markdown-pandoc/tests/snapshots/json/math-with-attr.qmd.snapshot @@ -1 +1 @@ -{"astContext":{"filenames":["tests/snapshots/json/math-with-attr.qmd"],"sourceInfoPool":[{"d":0,"r":[0,0,0,6,0,6],"t":0},{"d":0,"r":[6,0,6,7,0,7],"t":0},{"d":0,"r":[7,0,7,11,0,11],"t":0},{"d":0,"r":[11,0,11,12,0,12],"t":0},{"d":0,"r":[12,0,12,16,0,16],"t":0},{"d":0,"r":[16,0,16,17,0,17],"t":0},{"d":0,"r":[17,0,17,26,0,26],"t":0},{"d":0,"r":[26,0,26,27,0,27],"t":0},{"d":[[6,0,9],[7,9,1]],"r":[0,0,0,10,0,0],"t":2},{"d":0,"r":[27,0,27,28,0,28],"t":0},{"d":0,"r":[28,0,28,38,0,38],"t":0},{"d":0,"r":[0,0,0,0,0,0],"t":0},{"d":0,"r":[0,0,0,54,1,0],"t":0},{"d":0,"r":[55,2,0,62,2,7],"t":0},{"d":0,"r":[62,2,7,63,2,8],"t":0},{"d":0,"r":[63,2,8,67,2,12],"t":0},{"d":0,"r":[67,2,12,68,2,13],"t":0},{"d":0,"r":[68,2,13,72,2,17],"t":0},{"d":0,"r":[72,2,17,73,2,18],"t":0},{"d":0,"r":[73,2,18,82,2,27],"t":0},{"d":0,"r":[82,2,27,83,2,28],"t":0},{"d":[[19,0,9],[20,9,1]],"r":[0,0,0,10,0,0],"t":2},{"d":0,"r":[55,2,0,84,3,0],"t":0},{"d":0,"r":[85,4,0,139,6,2],"t":0},{"d":0,"r":[0,0,0,0,0,0],"t":0},{"d":0,"r":[85,4,0,155,7,0],"t":0},{"d":0,"r":[156,8,0,163,8,7],"t":0},{"d":0,"r":[163,8,7,164,8,8],"t":0},{"d":0,"r":[164,8,8,170,8,14],"t":0},{"d":0,"r":[170,8,14,171,8,15],"t":0},{"d":0,"r":[171,8,15,178,8,22],"t":0},{"d":0,"r":[178,8,22,179,8,23],"t":0},{"d":[[30,0,7],[31,7,1]],"r":[0,0,0,8,0,0],"t":2},{"d":0,"r":[179,8,23,180,8,24],"t":0},{"d":0,"r":[180,8,24,197,8,41],"t":0},{"d":0,"r":[0,0,0,0,0,0],"t":0},{"d":0,"r":[156,8,0,216,9,0],"t":0}]},"blocks":[{"c":[{"c":"Inline","s":0,"t":"Str"},{"s":1,"t":"Space"},{"c":"math","s":2,"t":"Str"},{"s":3,"t":"Space"},{"c":"with","s":4,"t":"Str"},{"s":5,"t":"Space"},{"c":"attribute:","s":8,"t":"Str"},{"s":9,"t":"Space"},{"c":[["eq-einstein",["quarto-math-with-attribute"],[]],[{"c":[{"t":"InlineMath"},"E = mc^2"],"s":10,"t":"Math"}]],"s":11,"t":"Span"}],"s":12,"t":"Para"},{"c":[{"c":"Display","s":13,"t":"Str"},{"s":14,"t":"Space"},{"c":"math","s":15,"t":"Str"},{"s":16,"t":"Space"},{"c":"with","s":17,"t":"Str"},{"s":18,"t":"Space"},{"c":"attribute:","s":21,"t":"Str"}],"s":22,"t":"Para"},{"c":[{"c":[["eq-gaussian",["quarto-math-with-attribute"],[]],[{"c":[{"t":"DisplayMath"},"\n\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}\n"],"s":23,"t":"Math"}]],"s":24,"t":"Span"}],"s":25,"t":"Para"},{"c":[{"c":"Another","s":26,"t":"Str"},{"s":27,"t":"Space"},{"c":"inline","s":28,"t":"Str"},{"s":29,"t":"Space"},{"c":"example:","s":32,"t":"Str"},{"s":33,"t":"Space"},{"c":[["eq-pythagorean",["quarto-math-with-attribute"],[]],[{"c":[{"t":"InlineMath"},"a^2 + b^2 = c^2"],"s":34,"t":"Math"}]],"s":35,"t":"Span"}],"s":36,"t":"Para"}],"meta":{},"pandoc-api-version":[1,23,1]} \ No newline at end of file +{"astContext":{"files":[{"line_breaks":[53,54,83,84,87,136,154,155,215],"name":"tests/snapshots/json/math-with-attr.qmd","total_length":216}],"sourceInfoPool":[{"d":0,"r":[0,6],"t":0},{"d":0,"r":[6,7],"t":0},{"d":0,"r":[7,11],"t":0},{"d":0,"r":[11,12],"t":0},{"d":0,"r":[12,16],"t":0},{"d":0,"r":[16,17],"t":0},{"d":0,"r":[17,26],"t":0},{"d":0,"r":[26,27],"t":0},{"d":[[6,0,9],[7,9,1]],"r":[0,10],"t":2},{"d":0,"r":[27,28],"t":0},{"d":0,"r":[28,38],"t":0},{"d":0,"r":[0,0],"t":0},{"d":0,"r":[0,54],"t":0},{"d":0,"r":[55,62],"t":0},{"d":0,"r":[62,63],"t":0},{"d":0,"r":[63,67],"t":0},{"d":0,"r":[67,68],"t":0},{"d":0,"r":[68,72],"t":0},{"d":0,"r":[72,73],"t":0},{"d":0,"r":[73,82],"t":0},{"d":0,"r":[82,83],"t":0},{"d":[[19,0,9],[20,9,1]],"r":[0,10],"t":2},{"d":0,"r":[55,84],"t":0},{"d":0,"r":[85,139],"t":0},{"d":0,"r":[0,0],"t":0},{"d":0,"r":[85,155],"t":0},{"d":0,"r":[156,163],"t":0},{"d":0,"r":[163,164],"t":0},{"d":0,"r":[164,170],"t":0},{"d":0,"r":[170,171],"t":0},{"d":0,"r":[171,178],"t":0},{"d":0,"r":[178,179],"t":0},{"d":[[30,0,7],[31,7,1]],"r":[0,8],"t":2},{"d":0,"r":[179,180],"t":0},{"d":0,"r":[180,197],"t":0},{"d":0,"r":[0,0],"t":0},{"d":0,"r":[156,216],"t":0}]},"blocks":[{"c":[{"c":"Inline","s":0,"t":"Str"},{"s":1,"t":"Space"},{"c":"math","s":2,"t":"Str"},{"s":3,"t":"Space"},{"c":"with","s":4,"t":"Str"},{"s":5,"t":"Space"},{"c":"attribute:","s":8,"t":"Str"},{"s":9,"t":"Space"},{"c":[["eq-einstein",["quarto-math-with-attribute"],[]],[{"c":[{"t":"InlineMath"},"E = mc^2"],"s":10,"t":"Math"}]],"s":11,"t":"Span"}],"s":12,"t":"Para"},{"c":[{"c":"Display","s":13,"t":"Str"},{"s":14,"t":"Space"},{"c":"math","s":15,"t":"Str"},{"s":16,"t":"Space"},{"c":"with","s":17,"t":"Str"},{"s":18,"t":"Space"},{"c":"attribute:","s":21,"t":"Str"}],"s":22,"t":"Para"},{"c":[{"c":[["eq-gaussian",["quarto-math-with-attribute"],[]],[{"c":[{"t":"DisplayMath"},"\n\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}\n"],"s":23,"t":"Math"}]],"s":24,"t":"Span"}],"s":25,"t":"Para"},{"c":[{"c":"Another","s":26,"t":"Str"},{"s":27,"t":"Space"},{"c":"inline","s":28,"t":"Str"},{"s":29,"t":"Space"},{"c":"example:","s":32,"t":"Str"},{"s":33,"t":"Space"},{"c":[["eq-pythagorean",["quarto-math-with-attribute"],[]],[{"c":[{"t":"InlineMath"},"a^2 + b^2 = c^2"],"s":34,"t":"Math"}]],"s":35,"t":"Span"}],"s":36,"t":"Para"}],"meta":{},"pandoc-api-version":[1,23,1]} \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/snapshots/json/table-alignment.qmd.snapshot b/crates/quarto-markdown-pandoc/tests/snapshots/json/table-alignment.qmd.snapshot index e69e8dc..e5d955f 100644 --- a/crates/quarto-markdown-pandoc/tests/snapshots/json/table-alignment.qmd.snapshot +++ b/crates/quarto-markdown-pandoc/tests/snapshots/json/table-alignment.qmd.snapshot @@ -1 +1 @@ -{"astContext":{"filenames":["tests/snapshots/json/table-alignment.qmd"],"sourceInfoPool":[{"d":0,"r":[2,0,2,7,0,7],"t":0},{"d":0,"r":[2,0,2,8,0,8],"t":0},{"d":0,"r":[10,0,10,14,0,14],"t":0},{"d":0,"r":[10,0,10,15,0,15],"t":0},{"d":0,"r":[17,0,17,23,0,23],"t":0},{"d":0,"r":[17,0,17,24,0,24],"t":0},{"d":0,"r":[26,0,26,33,0,33],"t":0},{"d":0,"r":[26,0,26,34,0,34],"t":0},{"d":0,"r":[74,2,2,75,2,3],"t":0},{"d":0,"r":[75,2,3,76,2,4],"t":0},{"d":[[8,0,1],[9,1,1]],"r":[0,0,0,2,0,0],"t":2},{"d":0,"r":[74,2,2,80,2,8],"t":0},{"d":0,"r":[82,2,10,83,2,11],"t":0},{"d":0,"r":[83,2,11,84,2,12],"t":0},{"d":[[12,0,1],[13,1,1]],"r":[0,0,0,2,0,0],"t":2},{"d":0,"r":[82,2,10,87,2,15],"t":0},{"d":0,"r":[89,2,17,90,2,18],"t":0},{"d":0,"r":[90,2,18,91,2,19],"t":0},{"d":[[16,0,1],[17,1,1]],"r":[0,0,0,2,0,0],"t":2},{"d":0,"r":[89,2,17,96,2,24],"t":0},{"d":0,"r":[98,2,26,99,2,27],"t":0},{"d":0,"r":[99,2,27,100,2,28],"t":0},{"d":[[20,0,1],[21,1,1]],"r":[0,0,0,2,0,0],"t":2},{"d":0,"r":[98,2,26,106,2,34],"t":0},{"d":0,"r":[110,3,2,111,3,3],"t":0},{"d":0,"r":[111,3,3,112,3,4],"t":0},{"d":[[24,0,1],[25,1,1]],"r":[0,0,0,2,0,0],"t":2},{"d":0,"r":[110,3,2,116,3,8],"t":0},{"d":0,"r":[118,3,10,119,3,11],"t":0},{"d":0,"r":[119,3,11,120,3,12],"t":0},{"d":[[28,0,1],[29,1,1]],"r":[0,0,0,2,0,0],"t":2},{"d":0,"r":[118,3,10,123,3,15],"t":0},{"d":0,"r":[125,3,17,126,3,18],"t":0},{"d":0,"r":[126,3,18,127,3,19],"t":0},{"d":[[32,0,1],[33,1,1]],"r":[0,0,0,2,0,0],"t":2},{"d":0,"r":[125,3,17,132,3,24],"t":0},{"d":0,"r":[134,3,26,135,3,27],"t":0},{"d":0,"r":[135,3,27,136,3,28],"t":0},{"d":[[36,0,1],[37,1,1]],"r":[0,0,0,2,0,0],"t":2},{"d":0,"r":[134,3,26,142,3,34],"t":0},{"d":0,"r":[0,0,0,144,4,0],"t":0}]},"blocks":[{"c":[["",[],[]],[null,[]],[[{"t":"AlignRight"},{"t":"ColWidthDefault"}],[{"t":"AlignLeft"},{"t":"ColWidthDefault"}],[{"t":"AlignCenter"},{"t":"ColWidthDefault"}],[{"t":"AlignDefault"},{"t":"ColWidthDefault"}]],[["",[],[]],[[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Right","s":0,"t":"Str"}],"s":1,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Left","s":2,"t":"Str"}],"s":3,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Center","s":4,"t":"Str"}],"s":5,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Default","s":6,"t":"Str"}],"s":7,"t":"Plain"}]]]]]],[[["",[],[]],0,[],[[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"R1","s":10,"t":"Str"}],"s":11,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"L1","s":14,"t":"Str"}],"s":15,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"C1","s":18,"t":"Str"}],"s":19,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"D1","s":22,"t":"Str"}],"s":23,"t":"Plain"}]]]],[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"R2","s":26,"t":"Str"}],"s":27,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"L2","s":30,"t":"Str"}],"s":31,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"C2","s":34,"t":"Str"}],"s":35,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"D2","s":38,"t":"Str"}],"s":39,"t":"Plain"}]]]]]]],[["",[],[]],[]]],"s":40,"t":"Table"}],"meta":{},"pandoc-api-version":[1,23,1]} \ No newline at end of file +{"astContext":{"files":[{"line_breaks":[35,71,107,143],"name":"tests/snapshots/json/table-alignment.qmd","total_length":144}],"sourceInfoPool":[{"d":0,"r":[2,7],"t":0},{"d":0,"r":[2,8],"t":0},{"d":0,"r":[10,14],"t":0},{"d":0,"r":[10,15],"t":0},{"d":0,"r":[17,23],"t":0},{"d":0,"r":[17,24],"t":0},{"d":0,"r":[26,33],"t":0},{"d":0,"r":[26,34],"t":0},{"d":0,"r":[74,75],"t":0},{"d":0,"r":[75,76],"t":0},{"d":[[8,0,1],[9,1,1]],"r":[0,2],"t":2},{"d":0,"r":[74,80],"t":0},{"d":0,"r":[82,83],"t":0},{"d":0,"r":[83,84],"t":0},{"d":[[12,0,1],[13,1,1]],"r":[0,2],"t":2},{"d":0,"r":[82,87],"t":0},{"d":0,"r":[89,90],"t":0},{"d":0,"r":[90,91],"t":0},{"d":[[16,0,1],[17,1,1]],"r":[0,2],"t":2},{"d":0,"r":[89,96],"t":0},{"d":0,"r":[98,99],"t":0},{"d":0,"r":[99,100],"t":0},{"d":[[20,0,1],[21,1,1]],"r":[0,2],"t":2},{"d":0,"r":[98,106],"t":0},{"d":0,"r":[110,111],"t":0},{"d":0,"r":[111,112],"t":0},{"d":[[24,0,1],[25,1,1]],"r":[0,2],"t":2},{"d":0,"r":[110,116],"t":0},{"d":0,"r":[118,119],"t":0},{"d":0,"r":[119,120],"t":0},{"d":[[28,0,1],[29,1,1]],"r":[0,2],"t":2},{"d":0,"r":[118,123],"t":0},{"d":0,"r":[125,126],"t":0},{"d":0,"r":[126,127],"t":0},{"d":[[32,0,1],[33,1,1]],"r":[0,2],"t":2},{"d":0,"r":[125,132],"t":0},{"d":0,"r":[134,135],"t":0},{"d":0,"r":[135,136],"t":0},{"d":[[36,0,1],[37,1,1]],"r":[0,2],"t":2},{"d":0,"r":[134,142],"t":0},{"d":0,"r":[0,144],"t":0}]},"blocks":[{"c":[["",[],[]],[null,[]],[[{"t":"AlignRight"},{"t":"ColWidthDefault"}],[{"t":"AlignLeft"},{"t":"ColWidthDefault"}],[{"t":"AlignCenter"},{"t":"ColWidthDefault"}],[{"t":"AlignDefault"},{"t":"ColWidthDefault"}]],[["",[],[]],[[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Right","s":0,"t":"Str"}],"s":1,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Left","s":2,"t":"Str"}],"s":3,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Center","s":4,"t":"Str"}],"s":5,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Default","s":6,"t":"Str"}],"s":7,"t":"Plain"}]]]]]],[[["",[],[]],0,[],[[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"R1","s":10,"t":"Str"}],"s":11,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"L1","s":14,"t":"Str"}],"s":15,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"C1","s":18,"t":"Str"}],"s":19,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"D1","s":22,"t":"Str"}],"s":23,"t":"Plain"}]]]],[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"R2","s":26,"t":"Str"}],"s":27,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"L2","s":30,"t":"Str"}],"s":31,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"C2","s":34,"t":"Str"}],"s":35,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"D2","s":38,"t":"Str"}],"s":39,"t":"Plain"}]]]]]]],[["",[],[]],[]]],"s":40,"t":"Table"}],"meta":{},"pandoc-api-version":[1,23,1]} \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/snapshots/json/table-caption-attr.qmd.snapshot b/crates/quarto-markdown-pandoc/tests/snapshots/json/table-caption-attr.qmd.snapshot index 88c1c5b..4abd2aa 100644 --- a/crates/quarto-markdown-pandoc/tests/snapshots/json/table-caption-attr.qmd.snapshot +++ b/crates/quarto-markdown-pandoc/tests/snapshots/json/table-caption-attr.qmd.snapshot @@ -1 +1 @@ -{"astContext":{"filenames":["tests/snapshots/json/table-caption-attr.qmd"],"sourceInfoPool":[{"d":0,"r":[75,4,2,80,4,7],"t":0},{"d":0,"r":[80,4,7,81,4,8],"t":0},{"d":0,"r":[81,4,8,88,4,15],"t":0},{"d":0,"r":[88,4,15,89,4,16],"t":0},{"d":0,"r":[72,3,0,115,5,0],"t":0},{"d":0,"r":[2,0,2,8,0,8],"t":0},{"d":0,"r":[8,0,8,9,0,9],"t":0},{"d":0,"r":[9,0,9,10,0,10],"t":0},{"d":0,"r":[2,0,2,11,0,11],"t":0},{"d":0,"r":[13,0,13,19,0,19],"t":0},{"d":0,"r":[19,0,19,20,0,20],"t":0},{"d":0,"r":[20,0,20,21,0,21],"t":0},{"d":0,"r":[13,0,13,22,0,22],"t":0},{"d":0,"r":[50,2,2,54,2,6],"t":0},{"d":0,"r":[54,2,6,55,2,7],"t":0},{"d":0,"r":[55,2,7,56,2,8],"t":0},{"d":0,"r":[50,2,2,59,2,11],"t":0},{"d":0,"r":[61,2,13,65,2,17],"t":0},{"d":0,"r":[65,2,17,66,2,18],"t":0},{"d":0,"r":[66,2,18,67,2,19],"t":0},{"d":0,"r":[61,2,13,70,2,22],"t":0},{"d":0,"r":[0,0,0,72,3,0],"t":0}]},"blocks":[{"c":[["",[],[["tbl-colwidths","[30,70]"]]],[null,[{"c":[{"c":"Table","s":0,"t":"Str"},{"s":1,"t":"Space"},{"c":"caption","s":2,"t":"Str"},{"s":3,"t":"Space"}],"s":4,"t":"Plain"}]],[[{"t":"AlignDefault"},{"t":"ColWidthDefault"}],[{"t":"AlignDefault"},{"t":"ColWidthDefault"}]],[["",[],[]],[[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Column","s":5,"t":"Str"},{"s":6,"t":"Space"},{"c":"1","s":7,"t":"Str"}],"s":8,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Column","s":9,"t":"Str"},{"s":10,"t":"Space"},{"c":"2","s":11,"t":"Str"}],"s":12,"t":"Plain"}]]]]]],[[["",[],[]],0,[],[[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Data","s":13,"t":"Str"},{"s":14,"t":"Space"},{"c":"1","s":15,"t":"Str"}],"s":16,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Data","s":17,"t":"Str"},{"s":18,"t":"Space"},{"c":"2","s":19,"t":"Str"}],"s":20,"t":"Plain"}]]]]]]],[["",[],[]],[]]],"s":21,"t":"Table"}],"meta":{},"pandoc-api-version":[1,23,1]} \ No newline at end of file +{"astContext":{"files":[{"line_breaks":[23,47,71,72,114],"name":"tests/snapshots/json/table-caption-attr.qmd","total_length":115}],"sourceInfoPool":[{"d":0,"r":[75,80],"t":0},{"d":0,"r":[80,81],"t":0},{"d":0,"r":[81,88],"t":0},{"d":0,"r":[88,89],"t":0},{"d":0,"r":[72,115],"t":0},{"d":0,"r":[2,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":0,"r":[9,10],"t":0},{"d":0,"r":[2,11],"t":0},{"d":0,"r":[13,19],"t":0},{"d":0,"r":[19,20],"t":0},{"d":0,"r":[20,21],"t":0},{"d":0,"r":[13,22],"t":0},{"d":0,"r":[50,54],"t":0},{"d":0,"r":[54,55],"t":0},{"d":0,"r":[55,56],"t":0},{"d":0,"r":[50,59],"t":0},{"d":0,"r":[61,65],"t":0},{"d":0,"r":[65,66],"t":0},{"d":0,"r":[66,67],"t":0},{"d":0,"r":[61,70],"t":0},{"d":0,"r":[0,72],"t":0}]},"blocks":[{"c":[["",[],[["tbl-colwidths","[30,70]"]]],[null,[{"c":[{"c":"Table","s":0,"t":"Str"},{"s":1,"t":"Space"},{"c":"caption","s":2,"t":"Str"},{"s":3,"t":"Space"}],"s":4,"t":"Plain"}]],[[{"t":"AlignDefault"},{"t":"ColWidthDefault"}],[{"t":"AlignDefault"},{"t":"ColWidthDefault"}]],[["",[],[]],[[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Column","s":5,"t":"Str"},{"s":6,"t":"Space"},{"c":"1","s":7,"t":"Str"}],"s":8,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Column","s":9,"t":"Str"},{"s":10,"t":"Space"},{"c":"2","s":11,"t":"Str"}],"s":12,"t":"Plain"}]]]]]],[[["",[],[]],0,[],[[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Data","s":13,"t":"Str"},{"s":14,"t":"Space"},{"c":"1","s":15,"t":"Str"}],"s":16,"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Data","s":17,"t":"Str"},{"s":18,"t":"Space"},{"c":"2","s":19,"t":"Str"}],"s":20,"t":"Plain"}]]]]]]],[["",[],[]],[]]],"s":21,"t":"Table"}],"meta":{},"pandoc-api-version":[1,23,1]} \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/test_error_corpus.rs b/crates/quarto-markdown-pandoc/tests/test_error_corpus.rs new file mode 100644 index 0000000..9085058 --- /dev/null +++ b/crates/quarto-markdown-pandoc/tests/test_error_corpus.rs @@ -0,0 +1,233 @@ +/* + * test_error_corpus.rs + * + * Tests to verify error messages from the error corpus produce proper output + */ + +use regex::Regex; +use std::fs; +use std::path::PathBuf; + +/// Test that all files in resources/error-corpus/*.qmd produce ariadne-formatted errors +/// with file:line:column information and source code snippets. +#[test] +fn test_error_corpus_ariadne_output() { + let corpus_dir = PathBuf::from("resources/error-corpus"); + assert!( + corpus_dir.exists(), + "Error corpus directory should exist: {}", + corpus_dir.display() + ); + + // Find all .qmd files in the error corpus + let mut qmd_files: Vec = fs::read_dir(&corpus_dir) + .expect("Failed to read error corpus directory") + .filter_map(|entry| { + let entry = entry.ok()?; + let path = entry.path(); + if path.extension().and_then(|s| s.to_str()) == Some("qmd") { + Some(path) + } else { + None + } + }) + .collect(); + + qmd_files.sort(); + + assert!( + !qmd_files.is_empty(), + "Error corpus should contain at least one .qmd file" + ); + + // Compile regex once outside the loop + // Pattern matches: filename.qmd:123:456 (where 123 is line, 456 is column) + let location_pattern = Regex::new(r"\.qmd:\d+:\d+").expect("Invalid regex pattern"); + + for qmd_file in &qmd_files { + println!("Testing error corpus file: {}", qmd_file.display()); + + let content = fs::read_to_string(qmd_file) + .unwrap_or_else(|e| panic!("Failed to read {}: {}", qmd_file.display(), e)); + + // Parse the file - we expect it to fail with diagnostics + let result = quarto_markdown_pandoc::readers::qmd::read( + content.as_bytes(), + false, // not loose mode + &qmd_file.to_string_lossy(), + &mut std::io::sink(), + ); + + match result { + Ok(_) => { + panic!( + "Expected {} to produce errors, but it parsed successfully", + qmd_file.display() + ); + } + Err(diagnostics) => { + assert!( + !diagnostics.is_empty(), + "Expected diagnostics for {}", + qmd_file.display() + ); + + // Create a SourceContext for rendering + let mut source_context = quarto_source_map::SourceContext::new(); + source_context.add_file(qmd_file.to_string_lossy().to_string(), Some(content)); + + // Render each diagnostic to text + // Track whether at least one diagnostic has ariadne output + let mut has_any_ariadne = false; + + for diagnostic in &diagnostics { + let text_output = diagnostic.to_text(Some(&source_context)); + + // Check if this diagnostic has ariadne output + // Ariadne uses box drawing characters for pretty printing + let has_box_chars = text_output.contains("│") + || text_output.contains("─") + || text_output.contains("â•­") + || text_output.contains("╯"); + + if has_box_chars { + has_any_ariadne = true; + + // If it has ariadne output, it should have file:line:column notation + assert!( + location_pattern.is_match(&text_output), + "Ariadne output for {} should contain file:line:column notation (pattern: .qmd:NUMBER:NUMBER). Got:\n{}", + qmd_file.display(), + text_output + ); + } + } + + // At least one diagnostic should have had ariadne output + assert!( + has_any_ariadne, + "At least one diagnostic for {} should have ariadne output", + qmd_file.display() + ); + } + } + } +} + +/// Test that all files in resources/error-corpus/*.qmd produce JSON errors +/// with proper source location information (file_id and offsets). +#[test] +fn test_error_corpus_json_locations() { + let corpus_dir = PathBuf::from("resources/error-corpus"); + assert!( + corpus_dir.exists(), + "Error corpus directory should exist: {}", + corpus_dir.display() + ); + + // Find all .qmd files in the error corpus + let mut qmd_files: Vec = fs::read_dir(&corpus_dir) + .expect("Failed to read error corpus directory") + .filter_map(|entry| { + let entry = entry.ok()?; + let path = entry.path(); + if path.extension().and_then(|s| s.to_str()) == Some("qmd") { + Some(path) + } else { + None + } + }) + .collect(); + + qmd_files.sort(); + + assert!( + !qmd_files.is_empty(), + "Error corpus should contain at least one .qmd file" + ); + + for qmd_file in &qmd_files { + println!("Testing JSON error locations for: {}", qmd_file.display()); + + let content = fs::read_to_string(qmd_file) + .unwrap_or_else(|e| panic!("Failed to read {}: {}", qmd_file.display(), e)); + + // Parse the file - we expect it to fail with diagnostics + let result = quarto_markdown_pandoc::readers::qmd::read( + content.as_bytes(), + false, // not loose mode + &qmd_file.to_string_lossy(), + &mut std::io::sink(), + ); + + match result { + Ok(_) => { + panic!( + "Expected {} to produce errors, but it parsed successfully", + qmd_file.display() + ); + } + Err(diagnostics) => { + assert!( + !diagnostics.is_empty(), + "Expected diagnostics for {}", + qmd_file.display() + ); + + // Check each diagnostic has location information + for diagnostic in &diagnostics { + let json_value = diagnostic.to_json(); + + // Check that the main error has a location field + if json_value.get("location").is_some() { + let location = json_value.get("location").unwrap(); + + // Should have an Original variant with file_id and offsets + let original = location.get("Original"); + assert!( + original.is_some(), + "Error location for {} should have Original variant. Got:\n{}", + qmd_file.display(), + serde_json::to_string_pretty(&json_value).unwrap() + ); + + let original = original.unwrap(); + assert!( + original.get("file_id").is_some(), + "Error location for {} should have file_id. Got:\n{}", + qmd_file.display(), + serde_json::to_string_pretty(&json_value).unwrap() + ); + assert!( + original.get("start_offset").is_some(), + "Error location for {} should have start_offset. Got:\n{}", + qmd_file.display(), + serde_json::to_string_pretty(&json_value).unwrap() + ); + assert!( + original.get("end_offset").is_some(), + "Error location for {} should have end_offset. Got:\n{}", + qmd_file.display(), + serde_json::to_string_pretty(&json_value).unwrap() + ); + } + + // Check details also have location information + if let Some(details) = json_value.get("details").and_then(|d| d.as_array()) { + for detail in details { + if let Some(detail_loc) = detail.get("location") { + let original = detail_loc.get("Original"); + assert!( + original.is_some(), + "Detail location for {} should have Original variant. Got:\n{}", + qmd_file.display(), + serde_json::to_string_pretty(&json_value).unwrap() + ); + } + } + } + } + } + } + } +} diff --git a/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs b/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs index 73aed64..808a2e9 100644 --- a/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs +++ b/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs @@ -10,9 +10,13 @@ use tree_sitter_qmd::MarkdownParser; /// Helper to resolve a source info reference from the pool (compact format) /// Returns (start_offset, start_row, start_col, end_offset, end_row, end_col, type_code) +/// +/// Note: The new JSON format only stores offsets, not row/column. This function computes +/// row/column from the offsets using the FileInformation. fn resolve_source_ref( source_ref: &serde_json::Value, pool: &[serde_json::Value], + file_info: &quarto_source_map::FileInformation, ) -> (usize, usize, usize, usize, usize, usize, usize) { let ref_id = source_ref .as_u64() @@ -26,13 +30,48 @@ fn resolve_source_ref( .as_u64() .expect("Expected t to be a number") as usize; + // New format: r is [start_offset, end_offset] + let start_offset = r[0].as_u64().unwrap() as usize; + let end_offset = r[1].as_u64().unwrap() as usize; + + // For Substring type (t=1), we need to recursively resolve the offsets + // through the parent chain to get the file offsets + let (absolute_start, absolute_end) = match t { + 0 => { + // Original: offsets are already absolute in the file + (start_offset, end_offset) + } + 1 => { + // Substring: need to resolve through parent + let parent_id = source_info["d"].as_u64().unwrap() as usize; + let parent = &pool[parent_id]; + let parent_r = parent["r"].as_array().unwrap(); + let parent_start = parent_r[0].as_u64().unwrap() as usize; + // Substring offsets are relative to parent + (parent_start + start_offset, parent_start + end_offset) + } + 2 => { + // Concat: use the range directly (should span all pieces) + (start_offset, end_offset) + } + _ => panic!("Unknown source info type: {}", t), + }; + + // Compute row/column from absolute offsets using FileInformation + let start_loc = file_info + .offset_to_location(absolute_start) + .expect("Failed to convert start offset to location"); + let end_loc = file_info + .offset_to_location(absolute_end) + .expect("Failed to convert end offset to location"); + ( - r[0].as_u64().unwrap() as usize, // start_offset - r[1].as_u64().unwrap() as usize, // start_row - r[2].as_u64().unwrap() as usize, // start_col - r[3].as_u64().unwrap() as usize, // end_offset - r[4].as_u64().unwrap() as usize, // end_row - r[5].as_u64().unwrap() as usize, // end_col + start_loc.offset, + start_loc.row, + start_loc.column, + end_loc.offset, + end_loc.row, + end_loc.column, t, ) } @@ -69,6 +108,9 @@ fn test_inline_source_locations() { .as_array() .expect("Expected sourceInfoPool to be an array"); + // Create FileInformation for computing row/column from offsets + let file_info = quarto_source_map::FileInformation::new(input); + // Check that the source locations are correct for the inline nodes let blocks = json_value["blocks"].as_array().unwrap(); let para = &blocks[0]; @@ -79,7 +121,7 @@ fn test_inline_source_locations() { assert_eq!(hello_str["t"], "Str"); assert_eq!(hello_str["c"], "hello"); let (start_off, _start_row, start_col, end_off, _end_row, end_col, _type) = - resolve_source_ref(&hello_str["s"], pool); + resolve_source_ref(&hello_str["s"], pool, &file_info); assert_eq!(start_col, 0); assert_eq!(start_off, 0); assert_eq!(end_col, 5); @@ -89,7 +131,7 @@ fn test_inline_source_locations() { let space = &inlines[1]; assert_eq!(space["t"], "Space"); let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(&space["s"], pool); + resolve_source_ref(&space["s"], pool, &file_info); assert_eq!(start_col, 5); assert_eq!(start_off, 5); assert_eq!(end_col, 6); @@ -99,7 +141,7 @@ fn test_inline_source_locations() { let emph = &inlines[2]; assert_eq!(emph["t"], "Emph"); let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(&emph["s"], pool); + resolve_source_ref(&emph["s"], pool, &file_info); assert_eq!(start_col, 6); assert_eq!(start_off, 6); assert_eq!(end_col, 13); @@ -111,7 +153,7 @@ fn test_inline_source_locations() { assert_eq!(world_str["t"], "Str"); assert_eq!(world_str["c"], "world"); let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(&world_str["s"], pool); + resolve_source_ref(&world_str["s"], pool, &file_info); assert_eq!(start_col, 7); assert_eq!(start_off, 7); assert_eq!(end_col, 12); @@ -122,7 +164,7 @@ fn test_inline_source_locations() { assert_eq!(period["t"], "Str"); assert_eq!(period["c"], "."); let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(&period["s"], pool); + resolve_source_ref(&period["s"], pool, &file_info); assert_eq!(start_col, 13); assert_eq!(start_off, 13); assert_eq!(end_col, 14); @@ -163,6 +205,9 @@ fn test_merged_strings_preserve_location() { .as_array() .expect("Expected sourceInfoPool to be an array"); + // Create FileInformation for computing row/column from offsets + let file_info = quarto_source_map::FileInformation::new(input); + let blocks = json_value["blocks"].as_array().unwrap(); let para = &blocks[0]; let inlines = para["c"].as_array().unwrap(); @@ -176,7 +221,7 @@ fn test_merged_strings_preserve_location() { assert_eq!(hello["t"], "Str"); assert_eq!(hello["c"], "hello"); let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(&hello["s"], pool); + resolve_source_ref(&hello["s"], pool, &file_info); assert_eq!(start_col, 0); assert_eq!(start_off, 0); assert_eq!(end_col, 5); @@ -191,7 +236,7 @@ fn test_merged_strings_preserve_location() { assert_eq!(world["t"], "Str"); assert_eq!(world["c"], "world"); let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(&world["s"], pool); + resolve_source_ref(&world["s"], pool, &file_info); assert_eq!(start_col, 6); assert_eq!(start_off, 6); assert_eq!(end_col, 11); @@ -232,6 +277,9 @@ fn test_separate_strings_keep_separate_locations() { .as_array() .expect("Expected sourceInfoPool to be an array"); + // Create FileInformation for computing row/column from offsets + let file_info = quarto_source_map::FileInformation::new(input); + let blocks = json_value["blocks"].as_array().unwrap(); let para = &blocks[0]; let inlines = para["c"].as_array().unwrap(); @@ -244,7 +292,7 @@ fn test_separate_strings_keep_separate_locations() { assert_eq!(a_str["t"], "Str"); assert_eq!(a_str["c"], "a"); let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(&a_str["s"], pool); + resolve_source_ref(&a_str["s"], pool, &file_info); assert_eq!(start_col, 0); assert_eq!(start_off, 0); assert_eq!(end_col, 1); @@ -254,7 +302,7 @@ fn test_separate_strings_keep_separate_locations() { let strong = &inlines[1]; assert_eq!(strong["t"], "Strong"); let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(&strong["s"], pool); + resolve_source_ref(&strong["s"], pool, &file_info); assert_eq!(start_col, 1); assert_eq!(start_off, 1); assert_eq!(end_col, 6); @@ -265,7 +313,7 @@ fn test_separate_strings_keep_separate_locations() { assert_eq!(c_str["t"], "Str"); assert_eq!(c_str["c"], "c"); let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(&c_str["s"], pool); + resolve_source_ref(&c_str["s"], pool, &file_info); assert_eq!(start_col, 6); assert_eq!(start_off, 6); assert_eq!(end_col, 7); @@ -306,6 +354,9 @@ fn test_note_source_location() { .as_array() .expect("Expected sourceInfoPool to be an array"); + // Create FileInformation for computing row/column from offsets + let file_info = quarto_source_map::FileInformation::new(input); + let blocks = json_value["blocks"].as_array().unwrap(); let para = &blocks[0]; let inlines = para["c"].as_array().unwrap(); @@ -324,7 +375,7 @@ fn test_note_source_location() { // Check Note's source location spans the entire ^[note content] let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(¬e["s"], pool); + resolve_source_ref(¬e["s"], pool, &file_info); assert_eq!(start_col, 4); assert_eq!(start_off, 4); assert_eq!(end_col, 19); @@ -340,7 +391,7 @@ fn test_note_source_location() { // CRITICAL: The Paragraph wrapper should have proper source location // not SourceInfo::default() which would be FileId(0) with offset 0 let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(¬e_para["s"], pool); + resolve_source_ref(¬e_para["s"], pool, &file_info); // The paragraph wrapper should have the same source location as the Note itself // since it's a synthetic wrapper for the note's content @@ -407,6 +458,9 @@ fn test_note_reference_source_location() { .as_array() .expect("Expected sourceInfoPool to be an array"); + // Create FileInformation for computing row/column from offsets + let file_info = quarto_source_map::FileInformation::new(input); + let blocks = json_value["blocks"].as_array().unwrap(); let para = &blocks[0]; let inlines = para["c"].as_array().unwrap(); @@ -432,7 +486,7 @@ fn test_note_reference_source_location() { // CRITICAL: The Span should have proper source location from the NoteReference // not SourceInfo::default() which would be FileId(0) with offset 0 let (start_off, _start_row, start_col, end_off, _end_row, end_col, _t) = - resolve_source_ref(&span["s"], pool); + resolve_source_ref(&span["s"], pool, &file_info); // The [^note1] spans from column 10 to 18 (0-indexed) assert_eq!(start_col, 10); diff --git a/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs b/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs index 8ace709..27f815e 100644 --- a/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs +++ b/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs @@ -18,7 +18,7 @@ fn test_json_roundtrip_simple_paragraph() { blocks: vec![Block::Paragraph(Paragraph { content: vec![Inline::Str(Str { text: "Hello, world!".to_string(), - source_info: SourceInfo::original( + source_info: SourceInfo::from_range( FileId(0), Range { start: Location { @@ -34,7 +34,7 @@ fn test_json_roundtrip_simple_paragraph() { }, ), })], - source_info: SourceInfo::original( + source_info: SourceInfo::from_range( FileId(0), Range { start: Location { @@ -102,7 +102,7 @@ fn test_json_roundtrip_complex_document() { content: vec![ Inline::Str(Str { text: "This is ".to_string(), - source_info: SourceInfo::original( + source_info: SourceInfo::from_range( FileId(0), Range { start: Location { @@ -121,7 +121,7 @@ fn test_json_roundtrip_complex_document() { Inline::Strong(quarto_markdown_pandoc::pandoc::Strong { content: vec![Inline::Str(Str { text: "bold text".to_string(), - source_info: SourceInfo::original( + source_info: SourceInfo::from_range( FileId(0), Range { start: Location { @@ -137,7 +137,7 @@ fn test_json_roundtrip_complex_document() { }, ), })], - source_info: SourceInfo::original( + source_info: SourceInfo::from_range( FileId(0), Range { start: Location { @@ -155,7 +155,7 @@ fn test_json_roundtrip_complex_document() { }), Inline::Str(Str { text: ".".to_string(), - source_info: SourceInfo::original( + source_info: SourceInfo::from_range( FileId(0), Range { start: Location { @@ -172,7 +172,7 @@ fn test_json_roundtrip_complex_document() { ), }), ], - source_info: SourceInfo::original( + source_info: SourceInfo::from_range( FileId(0), Range { start: Location { @@ -191,7 +191,7 @@ fn test_json_roundtrip_complex_document() { Block::CodeBlock(quarto_markdown_pandoc::pandoc::CodeBlock { attr: ("".to_string(), vec![], HashMap::new()), text: "print('Hello, world!')".to_string(), - source_info: SourceInfo::original( + source_info: SourceInfo::from_range( FileId(0), Range { start: Location { @@ -261,7 +261,7 @@ fn test_json_write_then_read_matches_original_structure() { Block::Plain(quarto_markdown_pandoc::pandoc::Plain { content: vec![Inline::Str(Str { text: "Plain text".to_string(), - source_info: SourceInfo::original( + source_info: SourceInfo::from_range( FileId(0), Range { start: Location { @@ -277,7 +277,7 @@ fn test_json_write_then_read_matches_original_structure() { }, ), })], - source_info: SourceInfo::original( + source_info: SourceInfo::from_range( FileId(0), Range { start: Location { @@ -296,7 +296,7 @@ fn test_json_write_then_read_matches_original_structure() { Block::RawBlock(quarto_markdown_pandoc::pandoc::RawBlock { format: "html".to_string(), text: "
Raw HTML
".to_string(), - source_info: SourceInfo::original( + source_info: SourceInfo::from_range( FileId(0), Range { start: Location { @@ -348,3 +348,117 @@ fn test_json_write_then_read_matches_original_structure() { _ => panic!("Block type mismatch for second block"), } } + +/// Test that JSON roundtrip preserves source mapping capability (map_offset should work) +#[test] +fn test_json_roundtrip_preserves_source_mapping() { + let qmd_content = r#"--- +title: "Test" +--- + +Hello world +"#; + + // Create a temporary file for testing + let temp_dir = std::env::temp_dir(); + let test_file = temp_dir.join("test_json_roundtrip_mapping.qmd"); + std::fs::write(&test_file, qmd_content).expect("Failed to write test file"); + + // Step 1: Parse QMD to create initial AST with source mapping + let result = readers::qmd::read( + qmd_content.as_bytes(), + false, + &test_file.to_string_lossy(), + &mut std::io::sink(), + ); + let (pandoc1, context1, diagnostics) = result.expect("Failed to parse QMD"); + assert!(diagnostics.is_empty(), "Expected no parse errors"); + + // Step 2: Serialize to JSON + let mut json_buf = Vec::new(); + json::write(&pandoc1, &context1, &mut json_buf).expect("Failed to write JSON"); + + // Step 3: Verify that JSON contains files array with embedded FileInformation + let json_value: serde_json::Value = + serde_json::from_slice(&json_buf).expect("Failed to parse JSON"); + let files_in_json = json_value["astContext"]["files"].as_array(); + assert!( + files_in_json.is_some(), + "JSON should contain files array in astContext" + ); + let files_array = files_in_json.unwrap(); + assert_eq!(files_array.len(), 1, "Should have one file"); + + // Verify the file entry has name, line_breaks, and total_length + let file_obj = &files_array[0]; + assert!( + file_obj["name"].is_string(), + "File entry should have name field" + ); + assert!( + file_obj["line_breaks"].is_array(), + "File entry should have line_breaks array" + ); + assert!( + file_obj["total_length"].is_number(), + "File entry should have total_length" + ); + + // Step 4: Deserialize from JSON + let (pandoc2, context2) = + readers::json::read(&mut json_buf.as_slice()).expect("Failed to read JSON"); + + // Step 5: Verify that the deserialized AST has working source mapping + // Get the first block (should be a Para with "Hello world") + if let Some(first_block) = pandoc2.blocks.first() { + if let Block::Paragraph(para) = first_block { + // Verify we have a SourceInfo (just check that it has non-zero length) + assert!( + para.source_info.length() > 0, + "Para block should have source info with non-zero length after JSON roundtrip" + ); + + // The key test: map_offset should work on the deserialized SourceInfo + let mapped_start = para.source_info.map_offset(0, &context2.source_context); + assert!( + mapped_start.is_some(), + "map_offset should work after JSON roundtrip (start position). \ + This means SourceContext should have been populated with file information from disk." + ); + + let mapped_end = para + .source_info + .map_offset(para.source_info.length(), &context2.source_context); + assert!( + mapped_end.is_some(), + "map_offset should work after JSON roundtrip (end position)" + ); + + // Verify the mapped locations are sensible + let start_loc = mapped_start.unwrap(); + // Row 4 is where "Hello world" starts (after frontmatter and blank line) + assert_eq!( + start_loc.location.row, 4, + "After roundtrip, paragraph should still map to correct row" + ); + + // Also test inline elements have working source mapping + if let Some(Inline::Str(str_inline)) = para.content.first() { + let inline_mapped = str_inline + .source_info + .map_offset(0, &context2.source_context); + assert!( + inline_mapped.is_some(), + "map_offset should work on inline elements after roundtrip" + ); + } + } else { + panic!("Expected Para block, got {:?}", first_block); + } + } else { + panic!("Expected at least one block in the document"); + } + + // Clean up + std::fs::remove_file(&test_file).ok(); +} diff --git a/crates/quarto-markdown-pandoc/tests/test_location_health.rs b/crates/quarto-markdown-pandoc/tests/test_location_health.rs new file mode 100644 index 0000000..693ac3d --- /dev/null +++ b/crates/quarto-markdown-pandoc/tests/test_location_health.rs @@ -0,0 +1,873 @@ +//! Self-health tests for location information +//! +//! These tests verify invariant properties that must hold for ALL well-formed +//! parsed documents. They are designed to run on any .qmd file in the test suite +//! without requiring specific knowledge about the file's contents. +//! +//! Properties tested: +//! 1. Well-formed ranges: start <= end in all dimensions +//! 2. Offset/row/column consistency: conversions are proper inverses +//! 3. Bounds checking: all locations are within valid bounds +//! 4. Nesting consistency: child ranges contained in parent ranges +//! 5. Sequential consistency: sibling nodes don't overlap +//! 6. SourceMapping validity: parent references exist and are valid + +use quarto_markdown_pandoc::pandoc::{Block, Inline, Pandoc}; +use quarto_source_map::{Range, SourceInfo}; +use std::fmt; + +/// Represents a violation of location health properties +#[derive(Debug, Clone)] +pub struct LocationHealthViolation { + pub category: ViolationCategory, + pub message: String, + pub location_info: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ViolationCategory { + WellFormedRange, + OffsetRowColConsistency, + BoundsCheck, + NestingConsistency, + SequentialConsistency, + SourceMappingValidity, +} + +impl fmt::Display for LocationHealthViolation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[{:?}] {}", self.category, self.message)?; + if let Some(loc) = &self.location_info { + write!(f, " at {}", loc)?; + } + Ok(()) + } +} + +/// Accumulates violations found during validation +pub struct LocationHealthValidator { + violations: Vec, + source: String, +} + +impl LocationHealthValidator { + pub fn new(source: String) -> Self { + Self { + violations: Vec::new(), + source, + } + } + + pub fn add_violation( + &mut self, + category: ViolationCategory, + message: String, + range: Option<&Range>, + ) { + let location_info = range.map(|r| { + format!( + "offset {}:{} (row:{},col:{}) to {}:{} (row:{},col:{})", + r.start.offset, + r.start.offset, + r.start.row, + r.start.column, + r.end.offset, + r.end.offset, + r.end.row, + r.end.column + ) + }); + + self.violations.push(LocationHealthViolation { + category, + message, + location_info, + }); + } + + pub fn violations(&self) -> &[LocationHealthViolation] { + &self.violations + } + + pub fn has_violations(&self) -> bool { + !self.violations.is_empty() + } + + pub fn source(&self) -> &str { + &self.source + } +} + +/// Extract all SourceInfo from a Pandoc document by walking the AST +pub fn extract_all_source_info(doc: &Pandoc) -> Vec { + let mut source_infos = Vec::new(); + + for block in &doc.blocks { + collect_source_info_from_block(block, &mut source_infos); + } + + source_infos +} + +/// Recursively collect SourceInfo from a Block and its children +fn collect_source_info_from_block(block: &Block, source_infos: &mut Vec) { + match block { + Block::Plain(plain) => { + source_infos.push(plain.source_info.clone()); + for inline in &plain.content { + collect_source_info_from_inline(inline, source_infos); + } + } + Block::Paragraph(para) => { + source_infos.push(para.source_info.clone()); + for inline in ¶.content { + collect_source_info_from_inline(inline, source_infos); + } + } + Block::LineBlock(line_block) => { + source_infos.push(line_block.source_info.clone()); + for line in &line_block.content { + for inline in line { + collect_source_info_from_inline(inline, source_infos); + } + } + } + Block::Header(header) => { + source_infos.push(header.source_info.clone()); + for inline in &header.content { + collect_source_info_from_inline(inline, source_infos); + } + } + Block::CodeBlock(code_block) => { + source_infos.push(code_block.source_info.clone()); + } + Block::RawBlock(raw_block) => { + source_infos.push(raw_block.source_info.clone()); + } + Block::HorizontalRule(hr) => { + source_infos.push(hr.source_info.clone()); + } + Block::BlockQuote(quote) => { + source_infos.push(quote.source_info.clone()); + for child_block in "e.content { + collect_source_info_from_block(child_block, source_infos); + } + } + Block::Div(div) => { + source_infos.push(div.source_info.clone()); + for child_block in &div.content { + collect_source_info_from_block(child_block, source_infos); + } + } + Block::BulletList(bullet_list) => { + source_infos.push(bullet_list.source_info.clone()); + for item in &bullet_list.content { + for child_block in item { + collect_source_info_from_block(child_block, source_infos); + } + } + } + Block::OrderedList(ordered_list) => { + source_infos.push(ordered_list.source_info.clone()); + for item in &ordered_list.content { + for child_block in item { + collect_source_info_from_block(child_block, source_infos); + } + } + } + Block::DefinitionList(def_list) => { + source_infos.push(def_list.source_info.clone()); + for (term, definitions) in &def_list.content { + for inline in term { + collect_source_info_from_inline(inline, source_infos); + } + for definition in definitions { + for child_block in definition { + collect_source_info_from_block(child_block, source_infos); + } + } + } + } + Block::Table(table) => { + source_infos.push(table.source_info.clone()); + // Table headers + for row in &table.head.rows { + for cell in &row.cells { + for child_block in &cell.content { + collect_source_info_from_block(child_block, source_infos); + } + } + } + // Table bodies + for body in &table.bodies { + // Body head rows + for row in &body.head { + for cell in &row.cells { + for child_block in &cell.content { + collect_source_info_from_block(child_block, source_infos); + } + } + } + // Body body rows + for row in &body.body { + for cell in &row.cells { + for child_block in &cell.content { + collect_source_info_from_block(child_block, source_infos); + } + } + } + } + // Table footer + for row in &table.foot.rows { + for cell in &row.cells { + for child_block in &cell.content { + collect_source_info_from_block(child_block, source_infos); + } + } + } + } + Block::Figure(figure) => { + source_infos.push(figure.source_info.clone()); + for child_block in &figure.content { + collect_source_info_from_block(child_block, source_infos); + } + // Caption has optional long (blocks) + if let Some(long_caption) = &figure.caption.long { + for child_block in long_caption { + collect_source_info_from_block(child_block, source_infos); + } + } + } + Block::BlockMetadata(_) + | Block::NoteDefinitionPara(_) + | Block::NoteDefinitionFencedBlock(_) + | Block::CaptionBlock(_) => { + // TODO: handle these special block types if they have source info + } + } +} + +/// Recursively collect SourceInfo from an Inline and its children +fn collect_source_info_from_inline(inline: &Inline, source_infos: &mut Vec) { + match inline { + Inline::Str(str_node) => { + source_infos.push(str_node.source_info.clone()); + } + Inline::Emph(emph) => { + source_infos.push(emph.source_info.clone()); + for child in &emph.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Underline(underline) => { + source_infos.push(underline.source_info.clone()); + for child in &underline.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Strong(strong) => { + source_infos.push(strong.source_info.clone()); + for child in &strong.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Strikeout(strikeout) => { + source_infos.push(strikeout.source_info.clone()); + for child in &strikeout.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Superscript(sup) => { + source_infos.push(sup.source_info.clone()); + for child in &sup.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Subscript(sub) => { + source_infos.push(sub.source_info.clone()); + for child in &sub.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::SmallCaps(small_caps) => { + source_infos.push(small_caps.source_info.clone()); + for child in &small_caps.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Quoted(quoted) => { + source_infos.push(quoted.source_info.clone()); + for child in "ed.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Cite(cite) => { + source_infos.push(cite.source_info.clone()); + for child in &cite.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Code(code) => { + source_infos.push(code.source_info.clone()); + } + Inline::Space(space) => { + source_infos.push(space.source_info.clone()); + } + Inline::SoftBreak(soft_break) => { + source_infos.push(soft_break.source_info.clone()); + } + Inline::LineBreak(line_break) => { + source_infos.push(line_break.source_info.clone()); + } + Inline::Math(math) => { + source_infos.push(math.source_info.clone()); + } + Inline::RawInline(raw) => { + source_infos.push(raw.source_info.clone()); + } + Inline::Link(link) => { + source_infos.push(link.source_info.clone()); + for child in &link.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Image(image) => { + source_infos.push(image.source_info.clone()); + for child in &image.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Note(note) => { + source_infos.push(note.source_info.clone()); + for child_block in ¬e.content { + collect_source_info_from_block(child_block, source_infos); + } + } + Inline::Span(span) => { + source_infos.push(span.source_info.clone()); + for child in &span.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Shortcode(_) => { + // TODO: handle shortcode if it has source info + } + Inline::NoteReference(note_ref) => { + source_infos.push(note_ref.source_info.clone()); + } + Inline::Attr(_) => { + // Attr doesn't have source info - it's just metadata + } + Inline::Insert(insert) => { + source_infos.push(insert.source_info.clone()); + for child in &insert.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Delete(delete) => { + source_infos.push(delete.source_info.clone()); + for child in &delete.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::Highlight(highlight) => { + source_infos.push(highlight.source_info.clone()); + for child in &highlight.content { + collect_source_info_from_inline(child, source_infos); + } + } + Inline::EditComment(comment) => { + source_infos.push(comment.source_info.clone()); + for child in &comment.content { + collect_source_info_from_inline(child, source_infos); + } + } + } +} + +// ============================================================================ +// PHASE 2: Core Property Validators +// ============================================================================ + +/// Validate that a Range is well-formed +fn validate_well_formed_range(range: &Range, validator: &mut LocationHealthValidator) { + // Check: start.offset <= end.offset + if range.start.offset > range.end.offset { + validator.add_violation( + ViolationCategory::WellFormedRange, + format!( + "Start offset {} is greater than end offset {}", + range.start.offset, range.end.offset + ), + Some(range), + ); + } + + // Check: start.row <= end.row + if range.start.row > range.end.row { + validator.add_violation( + ViolationCategory::WellFormedRange, + format!( + "Start row {} is greater than end row {}", + range.start.row, range.end.row + ), + Some(range), + ); + } + + // Check: if same row, start.column <= end.column + if range.start.row == range.end.row && range.start.column > range.end.column { + validator.add_violation( + ViolationCategory::WellFormedRange, + format!( + "On same row {}, start column {} is greater than end column {}", + range.start.row, range.start.column, range.end.column + ), + Some(range), + ); + } +} + +/// Validate that offset and row/column are consistent for a Location +fn validate_offset_row_col_consistency( + location: &quarto_source_map::Location, + source: &str, + context: &str, + validator: &mut LocationHealthValidator, +) { + // Check: offset_to_location(offset) should give us the same row/col + if let Some(computed_loc) = + quarto_source_map::utils::offset_to_location(source, location.offset) + { + if computed_loc.row != location.row || computed_loc.column != location.column { + validator.add_violation( + ViolationCategory::OffsetRowColConsistency, + format!( + "{}: offset_to_location({}) returned (row:{}, col:{}) but expected (row:{}, col:{})", + context, + location.offset, + computed_loc.row, + computed_loc.column, + location.row, + location.column + ), + None, + ); + } + } else { + validator.add_violation( + ViolationCategory::OffsetRowColConsistency, + format!( + "{}: offset_to_location({}) returned None (offset out of bounds)", + context, location.offset + ), + None, + ); + } + + // Check: line_col_to_offset(row, col) should give us the same offset + if let Some(computed_offset) = + quarto_source_map::utils::line_col_to_offset(source, location.row, location.column) + { + if computed_offset != location.offset { + validator.add_violation( + ViolationCategory::OffsetRowColConsistency, + format!( + "{}: line_col_to_offset(row:{}, col:{}) returned offset {} but expected {}", + context, location.row, location.column, computed_offset, location.offset + ), + None, + ); + } + } else { + validator.add_violation( + ViolationCategory::OffsetRowColConsistency, + format!( + "{}: line_col_to_offset(row:{}, col:{}) returned None (row/col out of bounds)", + context, location.row, location.column + ), + None, + ); + } +} + +/// Validate that a Location is within valid bounds for the source +fn validate_location_bounds( + location: &quarto_source_map::Location, + source: &str, + context: &str, + validator: &mut LocationHealthValidator, +) { + let source_len = source.len(); + + // Check: offset <= source.len() + if location.offset > source_len { + validator.add_violation( + ViolationCategory::BoundsCheck, + format!( + "{}: offset {} exceeds source length {}", + context, location.offset, source_len + ), + None, + ); + } + + // Count number of lines in source + let num_lines = source.lines().count(); + if num_lines == 0 && location.row != 0 { + validator.add_violation( + ViolationCategory::BoundsCheck, + format!( + "{}: row {} invalid for empty file (should be 0)", + context, location.row + ), + None, + ); + } else if location.row >= num_lines && source_len > 0 { + // Allow row == num_lines for EOF position after final newline + // But if source is non-empty and we're beyond that, it's invalid + if location.row > num_lines || (location.row == num_lines && !source.ends_with('\n')) { + validator.add_violation( + ViolationCategory::BoundsCheck, + format!( + "{}: row {} exceeds number of lines {} (ends_with_newline: {})", + context, + location.row, + num_lines, + source.ends_with('\n') + ), + None, + ); + } + } +} + +/// Validate all core properties for a single SourceInfo +fn validate_source_info_core_properties( + source_info: &SourceInfo, + source: &str, + context: &str, + validator: &mut LocationHealthValidator, +) { + let start_offset = source_info.start_offset(); + let end_offset = source_info.end_offset(); + + // Build a Range object for compatibility with existing validation functions + // We compute the Location data from offsets using the source text + let start_location = quarto_source_map::utils::offset_to_location(source, start_offset) + .unwrap_or(quarto_source_map::Location { + offset: start_offset, + row: 0, + column: 0, + }); + + let end_location = quarto_source_map::utils::offset_to_location(source, end_offset).unwrap_or( + quarto_source_map::Location { + offset: end_offset, + row: 0, + column: 0, + }, + ); + + let range = Range { + start: start_location, + end: end_location, + }; + + // 1. Well-formed range + validate_well_formed_range(&range, validator); + + // 2. Offset/row/column consistency for start + validate_offset_row_col_consistency( + &range.start, + source, + &format!("{} start", context), + validator, + ); + + // 3. Offset/row/column consistency for end + validate_offset_row_col_consistency(&range.end, source, &format!("{} end", context), validator); + + // 4. Bounds checking for start + validate_location_bounds( + &range.start, + source, + &format!("{} start", context), + validator, + ); + + // 5. Bounds checking for end + validate_location_bounds(&range.end, source, &format!("{} end", context), validator); +} + +/// Run all core property validations on a Pandoc document +pub fn validate_core_properties(doc: &Pandoc, source: &str) -> Vec { + let mut validator = LocationHealthValidator::new(source.to_string()); + let source_infos = extract_all_source_info(doc); + + for (i, source_info) in source_infos.iter().enumerate() { + let context = format!("SourceInfo #{}", i); + validate_source_info_core_properties(source_info, source, &context, &mut validator); + } + + validator.violations().to_vec() +} + +#[cfg(test)] +mod tests { + use super::*; + use quarto_markdown_pandoc::pandoc::{ASTContext, treesitter_to_pandoc}; + use quarto_markdown_pandoc::utils::diagnostic_collector::DiagnosticCollector; + use tree_sitter_qmd::MarkdownParser; + + fn parse_qmd_helper(source: &str) -> Pandoc { + let mut parser = MarkdownParser::default(); + let input_bytes = source.as_bytes(); + let tree = parser + .parse(input_bytes, None) + .expect("Failed to parse input"); + + let context = ASTContext::anonymous(); + let mut error_collector = DiagnosticCollector::new(); + treesitter_to_pandoc( + &mut std::io::sink(), + &tree, + input_bytes, + &context, + &mut error_collector, + ) + .expect("Failed to convert to Pandoc AST") + } + + #[test] + fn test_extract_source_info_simple() { + let source = "Hello world"; + let doc = parse_qmd_helper(source); + + let source_infos = extract_all_source_info(&doc); + + // Should have at least: Paragraph, Str "Hello", Space, Str "world" + assert!( + source_infos.len() >= 4, + "Expected at least 4 source infos, got {}", + source_infos.len() + ); + } + + #[test] + fn test_extract_source_info_nested() { + let source = "This is *emphasis with **strong** inside*"; + let doc = parse_qmd_helper(source); + + let source_infos = extract_all_source_info(&doc); + + // Should have: Paragraph, multiple Str, Emph, Strong, etc. + assert!( + source_infos.len() > 5, + "Expected many source infos for nested structure, got {}", + source_infos.len() + ); + } + + #[test] + fn test_core_properties_simple() { + let source = "Hello world"; + let doc = parse_qmd_helper(source); + + let violations = validate_core_properties(&doc, source); + + if !violations.is_empty() { + eprintln!("Found {} violations:", violations.len()); + for v in &violations { + eprintln!(" {}", v); + } + } + + assert!( + violations.is_empty(), + "Expected no violations for simple document" + ); + } + + #[test] + fn test_core_properties_nested() { + let source = "This is *emphasis with **strong** inside*"; + let doc = parse_qmd_helper(source); + + let violations = validate_core_properties(&doc, source); + + if !violations.is_empty() { + eprintln!("Found {} violations:", violations.len()); + for v in &violations { + eprintln!(" {}", v); + } + } + + assert!( + violations.is_empty(), + "Expected no violations for nested document" + ); + } + + #[test] + fn test_core_properties_multiline() { + let source = "Line 1\nLine 2\nLine 3"; + let doc = parse_qmd_helper(source); + + let violations = validate_core_properties(&doc, source); + + if !violations.is_empty() { + eprintln!("Found {} violations:", violations.len()); + for v in &violations { + eprintln!(" {}", v); + } + } + + assert!( + violations.is_empty(), + "Expected no violations for multiline document" + ); + } + + #[test] + fn test_core_properties_empty() { + let source = ""; + let doc = parse_qmd_helper(source); + + let violations = validate_core_properties(&doc, source); + + if !violations.is_empty() { + eprintln!("Found {} violations:", violations.len()); + for v in &violations { + eprintln!(" {}", v); + } + } + + assert!( + violations.is_empty(), + "Expected no violations for empty document" + ); + } + + #[test] + fn test_core_properties_no_trailing_newline() { + let source = "Line 1\nLine 2\nLine 3"; + let doc = parse_qmd_helper(source); + + let violations = validate_core_properties(&doc, source); + + if !violations.is_empty() { + eprintln!("Found {} violations:", violations.len()); + for v in &violations { + eprintln!(" {}", v); + } + } + + assert!( + violations.is_empty(), + "Expected no violations for document without trailing newline" + ); + } + + #[test] + fn test_core_properties_with_trailing_newline() { + let source = "Line 1\nLine 2\nLine 3\n"; + let doc = parse_qmd_helper(source); + + let violations = validate_core_properties(&doc, source); + + if !violations.is_empty() { + eprintln!("Found {} violations:", violations.len()); + for v in &violations { + eprintln!(" {}", v); + } + } + + assert!( + violations.is_empty(), + "Expected no violations for document with trailing newline" + ); + } + + #[test] + fn test_core_properties_on_smoke_tests() { + use std::fs; + use std::path::Path; + + let smoke_dir = Path::new("tests/smoke"); + if !smoke_dir.exists() { + eprintln!("Smoke test directory not found, skipping"); + return; + } + + let mut file_count = 0; + let mut total_violations = 0; + + for entry in fs::read_dir(smoke_dir).unwrap() { + let entry = entry.unwrap(); + let path = entry.path(); + + if path.extension().and_then(|s| s.to_str()) == Some("qmd") { + file_count += 1; + let source = fs::read_to_string(&path).unwrap_or_else(|e| { + panic!("Failed to read {:?}: {}", path, e); + }); + + // Try to parse; skip files that fail (they may be intentionally malformed) + let mut parser = MarkdownParser::default(); + let input_bytes = source.as_bytes(); + let tree = match parser.parse(input_bytes, None) { + Some(tree) => tree, + None => { + eprintln!( + "Skipping {:?}: parse returned None", + path.file_name().unwrap() + ); + continue; + } + }; + + let context = ASTContext::anonymous(); + let mut error_collector = DiagnosticCollector::new(); + let doc = match treesitter_to_pandoc( + &mut std::io::sink(), + &tree, + input_bytes, + &context, + &mut error_collector, + ) { + Ok(doc) => doc, + Err(e) => { + eprintln!( + "Skipping {:?}: conversion failed: {:?}", + path.file_name().unwrap(), + e + ); + continue; + } + }; + + let violations = validate_core_properties(&doc, &source); + + if !violations.is_empty() { + eprintln!( + "\n{:?} has {} violations:", + path.file_name().unwrap(), + violations.len() + ); + for v in &violations { + eprintln!(" {}", v); + } + total_violations += violations.len(); + } + } + } + + eprintln!("\nTested {} smoke test files", file_count); + assert_eq!( + total_violations, 0, + "Found {} total violations across {} files", + total_violations, file_count + ); + } +} diff --git a/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs b/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs index f63b3a0..89574cb 100644 --- a/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs +++ b/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs @@ -9,18 +9,16 @@ use quarto_markdown_pandoc::writers; /// Helper to resolve a SourceInfo chain to absolute file offset fn resolve_source_offset(source: &quarto_source_map::SourceInfo) -> usize { - match &source.mapping { - quarto_source_map::SourceMapping::Original { .. } => source.range.start.offset, - quarto_source_map::SourceMapping::Substring { offset, parent } => { - offset + resolve_source_offset(parent) - } - quarto_source_map::SourceMapping::Concat { .. } => { - // For concat, just use the start offset - source.range.start.offset - } - quarto_source_map::SourceMapping::Transformed { .. } => { - // For transformed, just use the start offset - source.range.start.offset + match source { + quarto_source_map::SourceInfo::Original { start_offset, .. } => *start_offset, + quarto_source_map::SourceInfo::Substring { + parent, + start_offset, + .. + } => start_offset + resolve_source_offset(parent), + quarto_source_map::SourceInfo::Concat { pieces } => { + // For concat, use the start offset of the first piece + pieces.first().map(|p| p.offset_in_concat).unwrap_or(0) } } } diff --git a/crates/quarto-source-map/src/context.rs b/crates/quarto-source-map/src/context.rs index 3c4100d..2486771 100644 --- a/crates/quarto-source-map/src/context.rs +++ b/crates/quarto-source-map/src/context.rs @@ -73,6 +73,25 @@ impl SourceContext { id } + /// Add a file with pre-computed FileInformation + /// + /// This is useful when deserializing from formats (like JSON) that include + /// serialized FileInformation, avoiding the need to recompute line breaks + /// or read from disk. + /// + /// The file is created without content (content=None), so ariadne rendering + /// won't work, but map_offset() will work using the provided FileInformation. + pub fn add_file_with_info(&mut self, path: String, file_info: FileInformation) -> FileId { + let id = FileId(self.files.len()); + self.files.push(SourceFile { + path, + content: None, + file_info: Some(file_info), + metadata: FileMetadata { file_type: None }, + }); + id + } + /// Get a file by ID pub fn get_file(&self, id: FileId) -> Option<&SourceFile> { self.files.get(id.0) diff --git a/crates/quarto-source-map/src/file_info.rs b/crates/quarto-source-map/src/file_info.rs index e890ce2..f120b1d 100644 --- a/crates/quarto-source-map/src/file_info.rs +++ b/crates/quarto-source-map/src/file_info.rs @@ -42,6 +42,25 @@ impl FileInformation { } } + /// Create file information from pre-computed parts + /// + /// This is useful when deserializing from formats that store + /// line break information directly (like JSON). + /// + /// # Example + /// + /// ``` + /// use quarto_source_map::FileInformation; + /// + /// let info = FileInformation::from_parts(vec![6, 13], 20); + /// ``` + pub fn from_parts(line_breaks: Vec, total_length: usize) -> Self { + FileInformation { + line_breaks, + total_length, + } + } + /// Convert a byte offset to a Location with row and column /// /// Uses binary search to find which line contains the offset. @@ -99,6 +118,11 @@ impl FileInformation { self.total_length } + /// Get the line breaks array (byte offsets of newline characters) + pub fn line_breaks(&self) -> &[usize] { + &self.line_breaks + } + /// Get the number of lines in the file pub fn line_count(&self) -> usize { // If there are no newlines, there's 1 line diff --git a/crates/quarto-source-map/src/lib.rs b/crates/quarto-source-map/src/lib.rs index ec1aee6..ae8afa1 100644 --- a/crates/quarto-source-map/src/lib.rs +++ b/crates/quarto-source-map/src/lib.rs @@ -8,9 +8,9 @@ //! # Overview //! //! The core types are: -//! - [`SourceInfo`]: Tracks a location with its transformation history -//! - [`SourceMapping`]: Enum describing how content was transformed +//! - [`SourceInfo`]: Enum tracking a location and its transformation history //! - [`SourceContext`]: Manages files and provides content for mapping +//! - [`MappedLocation`]: Result of mapping through transformation chains //! //! # Example //! @@ -21,15 +21,13 @@ //! let mut ctx = SourceContext::new(); //! let file_id = ctx.add_file("main.qmd".into(), Some("# Hello\nWorld".into())); //! -//! // Create a source location -//! let range = Range { -//! start: Location { offset: 0, row: 0, column: 0 }, -//! end: Location { offset: 7, row: 0, column: 7 }, -//! }; -//! let info = SourceInfo::original(file_id, range.clone()); +//! // Create a source location (stores only offsets) +//! let info = SourceInfo::original(file_id, 0, 7); //! -//! // Verify the source info was created correctly -//! assert_eq!(info.range, range); +//! // Map to get row/column information +//! let mapped = info.map_offset(0, &ctx).unwrap(); +//! assert_eq!(mapped.location.row, 0); +//! assert_eq!(mapped.location.column, 0); //! ``` pub mod context; @@ -43,6 +41,6 @@ pub mod utils; pub use context::{FileMetadata, SourceContext, SourceFile}; pub use file_info::FileInformation; pub use mapping::MappedLocation; -pub use source_info::{RangeMapping, SourceInfo, SourceMapping, SourcePiece}; +pub use source_info::{SourceInfo, SourcePiece}; pub use types::{FileId, Location, Range}; pub use utils::{line_col_to_offset, offset_to_location, range_from_offsets}; diff --git a/crates/quarto-source-map/src/mapping.rs b/crates/quarto-source-map/src/mapping.rs index a8a9376..8b96de6 100644 --- a/crates/quarto-source-map/src/mapping.rs +++ b/crates/quarto-source-map/src/mapping.rs @@ -15,31 +15,37 @@ pub struct MappedLocation { impl SourceInfo { /// Map an offset in the current text back to original source pub fn map_offset(&self, offset: usize, ctx: &SourceContext) -> Option { - use crate::source_info::SourceMapping; - - match &self.mapping { - SourceMapping::Original { file_id } => { + match self { + SourceInfo::Original { + file_id, + start_offset, + .. + } => { // Direct mapping to original file let file = ctx.get_file(*file_id)?; let file_info = file.file_info.as_ref()?; + // Compute the absolute offset in the file + let absolute_offset = start_offset + offset; + // Convert offset to Location with row/column using efficient binary search - let location = file_info.offset_to_location(offset)?; + let location = file_info.offset_to_location(absolute_offset)?; Some(MappedLocation { file_id: *file_id, location, }) } - SourceMapping::Substring { + SourceInfo::Substring { parent, - offset: parent_offset, + start_offset, + .. } => { // Map to parent coordinates and recurse - let parent_offset_mapped = parent_offset + offset; - parent.map_offset(parent_offset_mapped, ctx) + let parent_offset = start_offset + offset; + parent.map_offset(parent_offset, ctx) } - SourceMapping::Concat { pieces } => { + SourceInfo::Concat { pieces } => { // Find which piece contains this offset for piece in pieces { let piece_start = piece.offset_in_concat; @@ -53,18 +59,6 @@ impl SourceInfo { } None // Offset not found in any piece } - SourceMapping::Transformed { parent, mapping } => { - // Find the mapping that contains this offset - for range_mapping in mapping { - if offset >= range_mapping.from_start && offset < range_mapping.from_end { - // Map to parent coordinates - let offset_in_range = offset - range_mapping.from_start; - let parent_offset = range_mapping.to_start + offset_in_range; - return parent.map_offset(parent_offset, ctx); - } - } - None // Offset not found in any mapping - } } } @@ -91,7 +85,7 @@ mod tests { let mut ctx = SourceContext::new(); let file_id = ctx.add_file("test.qmd".to_string(), Some("hello\nworld".to_string())); - let info = SourceInfo::original( + let info = SourceInfo::from_range( file_id, Range { start: Location { @@ -127,7 +121,7 @@ mod tests { let mut ctx = SourceContext::new(); let file_id = ctx.add_file("test.qmd".to_string(), Some("0123456789".to_string())); - let original = SourceInfo::original( + let original = SourceInfo::from_range( file_id, Range { start: Location { @@ -163,7 +157,7 @@ mod tests { let file_id1 = ctx.add_file("first.qmd".to_string(), Some("AAA".to_string())); let file_id2 = ctx.add_file("second.qmd".to_string(), Some("BBB".to_string())); - let info1 = SourceInfo::original( + let info1 = SourceInfo::from_range( file_id1, Range { start: Location { @@ -179,7 +173,7 @@ mod tests { }, ); - let info2 = SourceInfo::original( + let info2 = SourceInfo::from_range( file_id2, Range { start: Location { @@ -209,56 +203,12 @@ mod tests { assert_eq!(mapped.location.offset, 1); } - #[test] - fn test_map_offset_transformed() { - let mut ctx = SourceContext::new(); - let file_id = ctx.add_file("test.qmd".to_string(), Some("0123456789".to_string())); - - let original = SourceInfo::original( - file_id, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 10, - row: 0, - column: 10, - }, - }, - ); - - // Transform: map [0,3) to [5,8), skip everything else - use crate::source_info::RangeMapping; - let transformed = SourceInfo::transformed( - original, - vec![RangeMapping { - from_start: 0, - from_end: 3, - to_start: 5, - to_end: 8, - }], - ); - - // Map offset 0 (should map to original offset 5, which is '5') - let mapped = transformed.map_offset(0, &ctx).unwrap(); - assert_eq!(mapped.file_id, file_id); - assert_eq!(mapped.location.offset, 5); - - // Map offset 2 (should map to original offset 7, which is '7') - let mapped = transformed.map_offset(2, &ctx).unwrap(); - assert_eq!(mapped.file_id, file_id); - assert_eq!(mapped.location.offset, 7); - } - #[test] fn test_map_range() { let mut ctx = SourceContext::new(); let file_id = ctx.add_file("test.qmd".to_string(), Some("hello\nworld".to_string())); - let info = SourceInfo::original( + let info = SourceInfo::from_range( file_id, Range { start: Location { diff --git a/crates/quarto-source-map/src/source_info.rs b/crates/quarto-source-map/src/source_info.rs index 2ff33bc..faa3192 100644 --- a/crates/quarto-source-map/src/source_info.rs +++ b/crates/quarto-source-map/src/source_info.rs @@ -1,35 +1,46 @@ //! Source information with transformation tracking -use crate::types::{FileId, Location, Range}; +use crate::types::{FileId, Range}; use serde::{Deserialize, Serialize}; use std::rc::Rc; /// Source information tracking a location and its transformation history +/// +/// This enum stores only byte offsets. Row and column information is computed +/// on-demand via `map_offset()` using the FileInformation line break index. +/// +/// Design notes: +/// - Original: Points directly to a file with byte offsets +/// - Substring: Points to a range within a parent SourceInfo (offsets are relative to parent) +/// - Concat: Combines multiple SourceInfo pieces (preserves provenance when coalescing text) +/// +/// The Transformed variant was removed because it's not used in production code. +/// Text transformations (smart quotes, em-dashes) use Original SourceInfo pointing +/// to the pre-transformation text, accepting that the byte offsets are approximate. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct SourceInfo { - /// The range in the immediate/current text - pub range: Range, - /// How this range maps to its source - pub mapping: SourceMapping, -} - -/// Describes how source content was transformed -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum SourceMapping { +pub enum SourceInfo { /// Direct position in an original file - Original { file_id: FileId }, + /// + /// Stores only byte offsets. Use `map_offset()` to get row/column information. + Original { + file_id: FileId, + start_offset: usize, + end_offset: usize, + }, /// Substring extraction from a parent source + /// + /// Offsets are relative to the parent's text. + /// The chain of Substrings always resolves to an Original. Substring { parent: Rc, - offset: usize, + start_offset: usize, + end_offset: usize, }, /// Concatenation of multiple sources + /// + /// Used when coalescing adjacent text nodes while preserving + /// the fact that they came from different source locations. Concat { pieces: Vec }, - /// Transformed text with piecewise mapping - Transformed { - parent: Rc, - mapping: Vec, - }, } /// A piece of a concatenated source @@ -43,68 +54,44 @@ pub struct SourcePiece { pub length: usize, } -/// Maps a range in transformed text to parent text -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct RangeMapping { - /// Start offset in transformed text - pub from_start: usize, - /// End offset in transformed text - pub from_end: usize, - /// Start offset in parent text - pub to_start: usize, - /// End offset in parent text - pub to_end: usize, -} - impl Default for SourceInfo { fn default() -> Self { - SourceInfo::original( - FileId(0), - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 0, - row: 0, - column: 0, - }, - }, - ) + SourceInfo::Original { + file_id: FileId(0), + start_offset: 0, + end_offset: 0, + } } } impl SourceInfo { - /// Create source info for a position in an original file - pub fn original(file_id: FileId, range: Range) -> Self { - SourceInfo { - range, - mapping: SourceMapping::Original { file_id }, + /// Create source info for a position in an original file (from offsets) + pub fn original(file_id: FileId, start_offset: usize, end_offset: usize) -> Self { + SourceInfo::Original { + file_id, + start_offset, + end_offset, + } + } + + /// Create source info for a position in an original file (from Range) + /// + /// This is a compatibility helper for code that still uses Range. + /// The row and column information in the Range is ignored; only offsets are stored. + pub fn from_range(file_id: FileId, range: Range) -> Self { + SourceInfo::Original { + file_id, + start_offset: range.start.offset, + end_offset: range.end.offset, } } /// Create source info for a substring extraction pub fn substring(parent: SourceInfo, start: usize, end: usize) -> Self { - let length = end - start; - SourceInfo { - range: Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: length, - row: 0, - column: 0, - }, - }, - mapping: SourceMapping::Substring { - parent: Rc::new(parent), - offset: start, - }, + SourceInfo::Substring { + parent: Rc::new(parent), + start_offset: start, + end_offset: end, } } @@ -130,49 +117,8 @@ impl SourceInfo { }) .collect(); - let total_length = cumulative_offset; - - SourceInfo { - range: Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: total_length, - row: 0, - column: 0, - }, - }, - mapping: SourceMapping::Concat { - pieces: pieces_with_offsets, - }, - } - } - - /// Create source info for transformed text - pub fn transformed(parent: SourceInfo, mapping: Vec) -> Self { - // Find the max end offset in the transformed text - let total_length = mapping.iter().map(|m| m.from_end).max().unwrap_or(0); - - SourceInfo { - range: Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: total_length, - row: 0, - column: 0, - }, - }, - mapping: SourceMapping::Transformed { - parent: Rc::new(parent), - mapping, - }, + SourceInfo::Concat { + pieces: pieces_with_offsets, } } @@ -181,14 +127,55 @@ impl SourceInfo { /// This creates a Concat mapping that preserves both sources. /// The resulting SourceInfo spans from the start of self to the end of other. pub fn combine(&self, other: &SourceInfo) -> Self { - let self_length = self.range.end.offset - self.range.start.offset; - let other_length = other.range.end.offset - other.range.start.offset; + let self_length = self.length(); + let other_length = other.length(); SourceInfo::concat(vec![ (self.clone(), self_length), (other.clone(), other_length), ]) } + + /// Get the length (in bytes) represented by this SourceInfo + pub fn length(&self) -> usize { + match self { + SourceInfo::Original { + start_offset, + end_offset, + .. + } => end_offset - start_offset, + SourceInfo::Substring { + start_offset, + end_offset, + .. + } => end_offset - start_offset, + SourceInfo::Concat { pieces } => pieces.iter().map(|p| p.length).sum(), + } + } + + /// Get the start offset for this SourceInfo + /// + /// For Original and Substring, returns the start_offset field. + /// For Concat, returns 0 (the concat represents a new text starting at 0). + pub fn start_offset(&self) -> usize { + match self { + SourceInfo::Original { start_offset, .. } => *start_offset, + SourceInfo::Substring { start_offset, .. } => *start_offset, + SourceInfo::Concat { .. } => 0, + } + } + + /// Get the end offset for this SourceInfo + /// + /// For Original and Substring, returns the end_offset field. + /// For Concat, returns the total length. + pub fn end_offset(&self) -> usize { + match self { + SourceInfo::Original { end_offset, .. } => *end_offset, + SourceInfo::Substring { end_offset, .. } => *end_offset, + SourceInfo::Concat { .. } => self.length(), + } + } } #[cfg(test)] @@ -212,11 +199,15 @@ mod tests { }, }; - let info = SourceInfo::original(file_id, range.clone()); + let info = SourceInfo::from_range(file_id, range.clone()); - assert_eq!(info.range, range); - match info.mapping { - SourceMapping::Original { file_id: mapped_id } => { + assert_eq!(info.start_offset(), 0); + assert_eq!(info.end_offset(), 10); + assert_eq!(info.length(), 10); + match info { + SourceInfo::Original { + file_id: mapped_id, .. + } => { assert_eq!(mapped_id, file_id); } _ => panic!("Expected Original mapping"), @@ -239,7 +230,7 @@ mod tests { }, }; - let info = SourceInfo::original(file_id, range); + let info = SourceInfo::from_range(file_id, range); let json = serde_json::to_string(&info).unwrap(); let deserialized: SourceInfo = serde_json::from_str(&json).unwrap(); @@ -261,16 +252,22 @@ mod tests { column: 100, }, }; - let parent = SourceInfo::original(file_id, parent_range); + let parent = SourceInfo::from_range(file_id, parent_range); let substring = SourceInfo::substring(parent, 10, 20); - assert_eq!(substring.range.start.offset, 0); - assert_eq!(substring.range.end.offset, 10); // length = 20 - 10 = 10 - - match substring.mapping { - SourceMapping::Substring { offset, .. } => { - assert_eq!(offset, 10); + assert_eq!(substring.start_offset(), 10); + assert_eq!(substring.end_offset(), 20); + assert_eq!(substring.length(), 10); + + match substring { + SourceInfo::Substring { + start_offset, + end_offset, + .. + } => { + assert_eq!(start_offset, 10); + assert_eq!(end_offset, 20); } _ => panic!("Expected Substring mapping"), } @@ -281,7 +278,7 @@ mod tests { let file_id1 = FileId(0); let file_id2 = FileId(1); - let info1 = SourceInfo::original( + let info1 = SourceInfo::from_range( file_id1, Range { start: Location { @@ -297,7 +294,7 @@ mod tests { }, ); - let info2 = SourceInfo::original( + let info2 = SourceInfo::from_range( file_id2, Range { start: Location { @@ -315,11 +312,12 @@ mod tests { let concat = SourceInfo::concat(vec![(info1, 10), (info2, 15)]); - assert_eq!(concat.range.start.offset, 0); - assert_eq!(concat.range.end.offset, 25); // 10 + 15 + assert_eq!(concat.start_offset(), 0); + assert_eq!(concat.end_offset(), 25); // 10 + 15 + assert_eq!(concat.length(), 25); - match concat.mapping { - SourceMapping::Concat { pieces } => { + match concat { + SourceInfo::Concat { pieces } => { assert_eq!(pieces.len(), 2); assert_eq!(pieces[0].offset_in_concat, 0); assert_eq!(pieces[0].length, 10); @@ -330,113 +328,12 @@ mod tests { } } - #[test] - fn test_transformed_source_info() { - let file_id = FileId(0); - let parent = SourceInfo::original( - file_id, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 50, - row: 0, - column: 50, - }, - }, - ); - - let mapping = vec![ - RangeMapping { - from_start: 0, - from_end: 10, - to_start: 0, - to_end: 10, - }, - RangeMapping { - from_start: 10, - from_end: 20, - to_start: 20, - to_end: 30, - }, - ]; - - let transformed = SourceInfo::transformed(parent, mapping.clone()); - - assert_eq!(transformed.range.start.offset, 0); - assert_eq!(transformed.range.end.offset, 20); // max from_end - - match transformed.mapping { - SourceMapping::Transformed { mapping: m, .. } => { - assert_eq!(m, mapping); - } - _ => panic!("Expected Transformed mapping"), - } - } - - #[test] - fn test_nested_transformations() { - let file_id = FileId(0); - let original = SourceInfo::original( - file_id, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 100, - row: 0, - column: 100, - }, - }, - ); - - // Extract a substring - let substring = SourceInfo::substring(original, 10, 50); - - // Then transform it - let transformed = SourceInfo::transformed( - substring, - vec![RangeMapping { - from_start: 0, - from_end: 10, - to_start: 0, - to_end: 10, - }], - ); - - // Verify the chain: Original -> Substring -> Transformed - match &transformed.mapping { - SourceMapping::Transformed { parent, .. } => match &parent.mapping { - SourceMapping::Substring { - parent: grandparent, - offset, - } => { - assert_eq!(*offset, 10); - match &grandparent.mapping { - SourceMapping::Original { file_id: id } => { - assert_eq!(*id, file_id); - } - _ => panic!("Expected Original at root"), - } - } - _ => panic!("Expected Substring as parent"), - }, - _ => panic!("Expected Transformed at top level"), - } - } - #[test] fn test_combine_two_sources() { let file_id = FileId(0); // Create two separate source info objects - let info1 = SourceInfo::original( + let info1 = SourceInfo::from_range( file_id, Range { start: Location { @@ -452,7 +349,7 @@ mod tests { }, ); - let info2 = SourceInfo::original( + let info2 = SourceInfo::from_range( file_id, Range { start: Location { @@ -472,11 +369,12 @@ mod tests { let combined = info1.combine(&info2); // Should create a Concat with total length = 10 + 10 = 20 - assert_eq!(combined.range.start.offset, 0); - assert_eq!(combined.range.end.offset, 20); + assert_eq!(combined.start_offset(), 0); + assert_eq!(combined.end_offset(), 20); + assert_eq!(combined.length(), 20); - match combined.mapping { - SourceMapping::Concat { pieces } => { + match combined { + SourceInfo::Concat { pieces } => { assert_eq!(pieces.len(), 2); assert_eq!(pieces[0].length, 10); assert_eq!(pieces[0].offset_in_concat, 0); @@ -493,7 +391,7 @@ mod tests { let file_id1 = FileId(5); let file_id2 = FileId(10); - let info1 = SourceInfo::original( + let info1 = SourceInfo::from_range( file_id1, Range { start: Location { @@ -509,7 +407,7 @@ mod tests { }, ); - let info2 = SourceInfo::original( + let info2 = SourceInfo::from_range( file_id2, Range { start: Location { @@ -528,19 +426,19 @@ mod tests { let combined = info1.combine(&info2); // Verify both sources are preserved in the Concat - match combined.mapping { - SourceMapping::Concat { pieces } => { + match combined { + SourceInfo::Concat { pieces } => { assert_eq!(pieces.len(), 2); // First piece should come from file_id1 - match &pieces[0].source_info.mapping { - SourceMapping::Original { file_id } => assert_eq!(*file_id, file_id1), + match &pieces[0].source_info { + SourceInfo::Original { file_id, .. } => assert_eq!(*file_id, file_id1), _ => panic!("Expected Original mapping for first piece"), } // Second piece should come from file_id2 - match &pieces[1].source_info.mapping { - SourceMapping::Original { file_id } => assert_eq!(*file_id, file_id2), + match &pieces[1].source_info { + SourceInfo::Original { file_id, .. } => assert_eq!(*file_id, file_id2), _ => panic!("Expected Original mapping for second piece"), } } @@ -565,17 +463,13 @@ mod tests { }, }; - let info = SourceInfo::original(file_id, range); + let info = SourceInfo::from_range(file_id, range); let json = serde_json::to_value(&info).unwrap(); // Verify JSON structure - assert_eq!(json["range"]["start"]["offset"], 10); - assert_eq!(json["range"]["start"]["row"], 1); - assert_eq!(json["range"]["start"]["column"], 5); - assert_eq!(json["range"]["end"]["offset"], 50); - assert_eq!(json["range"]["end"]["row"], 3); - assert_eq!(json["range"]["end"]["column"], 10); - assert_eq!(json["mapping"]["Original"]["file_id"], 0); + assert_eq!(json["Original"]["file_id"], 0); + assert_eq!(json["Original"]["start_offset"], 10); + assert_eq!(json["Original"]["end_offset"], 50); // Verify round-trip let deserialized: SourceInfo = serde_json::from_value(json).unwrap(); @@ -598,22 +492,18 @@ mod tests { column: 20, }, }; - let parent = SourceInfo::original(file_id, parent_range); + let parent = SourceInfo::from_range(file_id, parent_range); let substring = SourceInfo::substring(parent, 10, 30); let json = serde_json::to_value(&substring).unwrap(); // Verify JSON structure - assert_eq!(json["range"]["start"]["offset"], 0); - assert_eq!(json["range"]["end"]["offset"], 20); // length = 30 - 10 = 20 - assert_eq!(json["mapping"]["Substring"]["offset"], 10); + assert_eq!(json["Substring"]["start_offset"], 10); + assert_eq!(json["Substring"]["end_offset"], 30); // Verify parent is serialized (with Rc, it's a full copy in JSON) - assert!(json["mapping"]["Substring"]["parent"].is_object()); - assert_eq!( - json["mapping"]["Substring"]["parent"]["mapping"]["Original"]["file_id"], - 0 - ); + assert!(json["Substring"]["parent"].is_object()); + assert_eq!(json["Substring"]["parent"]["Original"]["file_id"], 0); // Verify round-trip let deserialized: SourceInfo = serde_json::from_value(json).unwrap(); @@ -638,7 +528,7 @@ mod tests { column: 0, }, }; - let file_info = SourceInfo::original(file_id, file_range); + let file_info = SourceInfo::from_range(file_id, file_range); // Level 2: YAML frontmatter (substring of file) let yaml_info = SourceInfo::substring(file_info, 4, 150); @@ -649,14 +539,11 @@ mod tests { let json = serde_json::to_value(&value_info).unwrap(); // Verify nested structure - assert_eq!(json["mapping"]["Substring"]["offset"], 20); + assert_eq!(json["Substring"]["start_offset"], 20); + assert_eq!(json["Substring"]["end_offset"], 35); + assert_eq!(json["Substring"]["parent"]["Substring"]["start_offset"], 4); assert_eq!( - json["mapping"]["Substring"]["parent"]["mapping"]["Substring"]["offset"], - 4 - ); - assert_eq!( - json["mapping"]["Substring"]["parent"]["mapping"]["Substring"]["parent"]["mapping"]["Original"] - ["file_id"], + json["Substring"]["parent"]["Substring"]["parent"]["Original"]["file_id"], 0 ); @@ -671,7 +558,7 @@ mod tests { let file_id1 = FileId(0); let file_id2 = FileId(1); - let info1 = SourceInfo::original( + let info1 = SourceInfo::from_range( file_id1, Range { start: Location { @@ -687,7 +574,7 @@ mod tests { }, ); - let info2 = SourceInfo::original( + let info2 = SourceInfo::from_range( file_id2, Range { start: Location { @@ -707,127 +594,32 @@ mod tests { let json = serde_json::to_value(&combined).unwrap(); // Verify JSON structure - assert!(json["mapping"]["Concat"]["pieces"].is_array()); - let pieces = json["mapping"]["Concat"]["pieces"].as_array().unwrap(); + assert!(json["Concat"]["pieces"].is_array()); + let pieces = json["Concat"]["pieces"].as_array().unwrap(); assert_eq!(pieces.len(), 2); // First piece assert_eq!(pieces[0]["offset_in_concat"], 0); assert_eq!(pieces[0]["length"], 10); - assert_eq!( - pieces[0]["source_info"]["mapping"]["Original"]["file_id"], - 0 - ); + assert_eq!(pieces[0]["source_info"]["Original"]["file_id"], 0); // Second piece assert_eq!(pieces[1]["offset_in_concat"], 10); assert_eq!(pieces[1]["length"], 10); - assert_eq!( - pieces[1]["source_info"]["mapping"]["Original"]["file_id"], - 1 - ); + assert_eq!(pieces[1]["source_info"]["Original"]["file_id"], 1); // Verify round-trip let deserialized: SourceInfo = serde_json::from_value(json).unwrap(); assert_eq!(combined, deserialized); } - /// Test JSON serialization of Transformed mapping - #[test] - fn test_json_serialization_transformed() { - use crate::RangeMapping; - - let file_id = FileId(0); - let parent = SourceInfo::original( - file_id, - Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 20, - row: 0, - column: 20, - }, - }, - ); - - // Create a transformed source with range mappings - let mappings = vec![ - RangeMapping { - from_start: 0, - from_end: 5, - to_start: 0, - to_end: 5, - }, - RangeMapping { - from_start: 5, - from_end: 10, - to_start: 10, - to_end: 15, - }, - ]; - - let transformed = SourceInfo { - range: Range { - start: Location { - offset: 0, - row: 0, - column: 0, - }, - end: Location { - offset: 10, - row: 0, - column: 10, - }, - }, - mapping: SourceMapping::Transformed { - parent: Rc::new(parent), - mapping: mappings.clone(), - }, - }; - - let json = serde_json::to_value(&transformed).unwrap(); - - // Verify JSON structure - assert!(json["mapping"]["Transformed"]["mapping"].is_array()); - let json_mappings = json["mapping"]["Transformed"]["mapping"] - .as_array() - .unwrap(); - assert_eq!(json_mappings.len(), 2); - - // Verify first mapping - assert_eq!(json_mappings[0]["from_start"], 0); - assert_eq!(json_mappings[0]["from_end"], 5); - assert_eq!(json_mappings[0]["to_start"], 0); - assert_eq!(json_mappings[0]["to_end"], 5); - - // Verify second mapping - assert_eq!(json_mappings[1]["from_start"], 5); - assert_eq!(json_mappings[1]["from_end"], 10); - assert_eq!(json_mappings[1]["to_start"], 10); - assert_eq!(json_mappings[1]["to_end"], 15); - - // Verify parent is serialized - assert_eq!( - json["mapping"]["Transformed"]["parent"]["mapping"]["Original"]["file_id"], - 0 - ); - - // Verify round-trip - let deserialized: SourceInfo = serde_json::from_value(json).unwrap(); - assert_eq!(transformed, deserialized); - } - /// Test JSON serialization of complex nested structure (real-world example) #[test] fn test_json_serialization_complex_nested() { let file_id = FileId(0); // Simulate a .qmd file structure - let qmd_file = SourceInfo::original( + let qmd_file = SourceInfo::from_range( file_id, Range { start: Location { @@ -859,7 +651,7 @@ mod tests { // Verify this complex structure serializes assert!(json.is_object()); - assert!(json["mapping"]["Concat"].is_object()); + assert!(json["Concat"].is_object()); // Verify round-trip let deserialized: SourceInfo = serde_json::from_value(json).unwrap(); diff --git a/crates/quarto-yaml/claude-notes/implementation-plan.md b/crates/quarto-yaml/claude-notes/implementation-plan.md index 984ba83..cc06cc3 100644 --- a/crates/quarto-yaml/claude-notes/implementation-plan.md +++ b/crates/quarto-yaml/claude-notes/implementation-plan.md @@ -2,7 +2,7 @@ ## Overview -This crate implements `YamlWithSourceInfo`, a data structure that wraps `yaml-rust2::Yaml` with source location tracking. This uses the **owned data approach** as decided in the design discussion (see `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/session-logs/2025-10-13-yaml-lifetime-vs-owned-discussion.md`). +This crate implements `YamlWithSourceInfo`, a data structure that wraps `yaml-rust2::Yaml` with source location tracking. ## Architecture Decision: Owned Data @@ -159,9 +159,3 @@ impl MarkedEventReceiver for YamlBuilder { 3. **Unified SourceInfo** - Replace with project-wide SourceInfo type 4. **YAML tags** - Support for !expr and custom tags 5. **Multi-document** - Support YAML streams - -## References - -- Design document: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/yaml-with-source-info-design.md` -- Session log: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/session-logs/2025-10-13-yaml-lifetime-vs-owned-discussion.md` -- rust-analyzer patterns: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/rust-analyzer-owned-data-patterns.md` diff --git a/crates/quarto-yaml/src/error.rs b/crates/quarto-yaml/src/error.rs index 842fb1a..a1e5fb5 100644 --- a/crates/quarto-yaml/src/error.rs +++ b/crates/quarto-yaml/src/error.rs @@ -30,38 +30,31 @@ impl fmt::Display for Error { match self { Error::ParseError { message, location } => { write!(f, "Parse error: {}", message)?; - if let Some(loc) = location { - // Display with 1-indexed row/column - write!( - f, - " at {}:{}", - loc.range.start.row + 1, - loc.range.start.column + 1 - )?; + // TODO: Proper location display requires SourceContext to map offsets to row/column. + // For now, we only show the error message without location details. + // To fix: refactor Error type to carry SourceContext or resolve locations before creating errors. + if let Some(_loc) = location { + // Location information available but cannot display without SourceContext } Ok(()) } Error::UnexpectedEof { location } => { write!(f, "Unexpected end of input")?; - if let Some(loc) = location { - write!( - f, - " at {}:{}", - loc.range.start.row + 1, - loc.range.start.column + 1 - )?; + // TODO: Proper location display requires SourceContext to map offsets to row/column. + // For now, we only show the error message without location details. + // To fix: refactor Error type to carry SourceContext or resolve locations before creating errors. + if let Some(_loc) = location { + // Location information available but cannot display without SourceContext } Ok(()) } Error::InvalidStructure { message, location } => { write!(f, "Invalid YAML structure: {}", message)?; - if let Some(loc) = location { - write!( - f, - " at {}:{}", - loc.range.start.row + 1, - loc.range.start.column + 1 - )?; + // TODO: Proper location display requires SourceContext to map offsets to row/column. + // For now, we only show the error message without location details. + // To fix: refactor Error type to carry SourceContext or resolve locations before creating errors. + if let Some(_loc) = location { + // Location information available but cannot display without SourceContext } Ok(()) } diff --git a/crates/quarto-yaml/src/lib.rs b/crates/quarto-yaml/src/lib.rs index ae9d644..98f56c3 100644 --- a/crates/quarto-yaml/src/lib.rs +++ b/crates/quarto-yaml/src/lib.rs @@ -28,7 +28,7 @@ //! let yaml = parse(content).unwrap(); //! // Access with source location tracking //! if let Some(title) = yaml.get_hash_value("title") { -//! println!("Title at offset {}", title.source_info.range.start.offset); +//! println!("Title at offset {}", title.source_info.start_offset()); //! } //! ``` diff --git a/crates/quarto-yaml/src/parser.rs b/crates/quarto-yaml/src/parser.rs index 7218099..f990c41 100644 --- a/crates/quarto-yaml/src/parser.rs +++ b/crates/quarto-yaml/src/parser.rs @@ -38,7 +38,7 @@ pub fn parse(content: &str) -> Result { /// /// let yaml = parse_file("title: My Document", "config.yaml").unwrap(); /// // Filename tracking will be added in a future update -/// assert!(yaml.source_info.range.end.offset > 0); +/// assert!(yaml.source_info.end_offset() > 0); /// ``` /// /// # Errors @@ -67,7 +67,7 @@ pub fn parse_file(content: &str, filename: &str) -> Result { /// use quarto_source_map::{FileId, Location, Range}; /// /// // Create parent source info for a .qmd file -/// let parent = SourceInfo::original( +/// let parent = SourceInfo::from_range( /// FileId(1), /// Range { /// start: Location { offset: 0, row: 0, column: 0 }, @@ -107,7 +107,7 @@ fn parse_impl( // Create SourceInfo for the entire file content use quarto_source_map::{Location, Range}; - SourceInfo::original( + SourceInfo::from_range( file_id, Range { start: Location { @@ -135,6 +135,52 @@ fn parse_impl( builder.result() } +/// Helper function to create a contiguous span from start to end positions. +/// This is used for entry_span which should cover from key start to value end. +fn create_contiguous_span(start_info: &SourceInfo, end_info: &SourceInfo) -> SourceInfo { + // Extract the actual start and end offsets, handling the different SourceInfo variants + match (start_info, end_info) { + ( + SourceInfo::Original { + file_id: start_file, + start_offset: start, + .. + }, + SourceInfo::Original { + file_id: end_file, + end_offset: end, + .. + }, + ) => { + // Both are Original from the same file - create a single Original span + assert_eq!( + start_file, end_file, + "Key and value must be from the same file" + ); + SourceInfo::original(*start_file, *start, *end) + } + ( + SourceInfo::Substring { + parent: start_parent, + start_offset: start, + .. + }, + SourceInfo::Substring { + end_offset: end, .. + }, + ) => { + // Both are Substrings - they should have the same parent + // Use the first parent (they should be equivalent even if not the same Rc) + SourceInfo::substring((**start_parent).clone(), *start, *end) + } + _ => { + // Mixed types or Concat - fall back to combine which creates a Concat + // This shouldn't happen in normal YAML parsing but handle it gracefully + start_info.combine(end_info) + } + } +} + /// Builder that implements MarkedEventReceiver to construct YamlWithSourceInfo. struct YamlBuilder<'a> { /// The source text being parsed (reserved for future use in accurate scalar length computation) @@ -224,7 +270,7 @@ impl<'a> YamlBuilder<'a> { let start_row = marker.line(); // yaml-rust2 uses 0-based let start_column = marker.col(); // yaml-rust2 uses 0-based - SourceInfo::original( + SourceInfo::from_range( quarto_source_map::FileId(0), // Dummy FileId for now Range { start: Location { @@ -345,14 +391,8 @@ impl<'a> MarkedEventReceiver for YamlBuilder<'a> { let value_span = value.source_info.clone(); // Entry span from key start to value end - use quarto_source_map::Range; - let entry_span = SourceInfo::original( - quarto_source_map::FileId(0), // Dummy FileId - Range { - start: key_span.range.start.clone(), - end: value_span.range.end.clone(), - }, - ); + // Create a contiguous span (not a Concat) from key start to value end + let entry_span = create_contiguous_span(&key_span, &value_span); hash_entries.push(YamlHashEntry::new( key.clone(), @@ -497,21 +537,21 @@ project: // Check that source info is present // Note: row/column are 0-indexed in the new system - assert!(yaml.source_info.range.start.offset < yaml.source_info.range.end.offset); + assert!(yaml.source_info.start_offset() < yaml.source_info.end_offset()); let title = yaml.get_hash_value("title").unwrap(); // Verify the title value has a valid range - assert!(title.source_info.range.start.offset < title.source_info.range.end.offset); + assert!(title.source_info.start_offset() < title.source_info.end_offset()); } #[test] fn test_parse_with_filename() { let yaml = parse_file("title: Test", "config.yaml").unwrap(); - assert!(yaml.source_info.range.end.offset > 0); + assert!(yaml.source_info.end_offset() > 0); // Verify that we're now using Substring mapping for files - match &yaml.source_info.mapping { - quarto_source_map::SourceMapping::Substring { .. } => { + match &yaml.source_info { + SourceInfo::Substring { .. } => { // Expected: Substring mapping to parent file } _ => panic!("Expected Substring mapping for file parsing"), @@ -523,7 +563,7 @@ project: use quarto_source_map::{FileId, Location, Range}; // Simulate extracting YAML from a .qmd file at offset 100-150 - let parent = SourceInfo::original( + let parent = SourceInfo::from_range( FileId(42), Range { start: Location { @@ -543,14 +583,11 @@ project: let yaml = parse_with_parent(yaml_content, parent).unwrap(); // Verify root has Substring mapping - match &yaml.source_info.mapping { - quarto_source_map::SourceMapping::Substring { - parent: p, - offset: _, - } => { + match &yaml.source_info { + SourceInfo::Substring { parent: p, .. } => { // Parent should point to our original parent - match &p.mapping { - quarto_source_map::SourceMapping::Original { file_id } => { + match p.as_ref() { + SourceInfo::Original { file_id, .. } => { assert_eq!(file_id.0, 42); } _ => panic!("Expected parent to have Original mapping"), @@ -565,7 +602,7 @@ project: use quarto_source_map::{FileId, Location, Range}; // Parent file - let parent = SourceInfo::original( + let parent = SourceInfo::from_range( FileId(1), Range { start: Location { @@ -602,30 +639,15 @@ project: .expect("authors key not found"); // All should have Substring mappings - assert!(matches!( - project.source_info.mapping, - quarto_source_map::SourceMapping::Substring { .. } - )); - assert!(matches!( - title.source_info.mapping, - quarto_source_map::SourceMapping::Substring { .. } - )); - assert!(matches!( - authors.source_info.mapping, - quarto_source_map::SourceMapping::Substring { .. } - )); + assert!(matches!(project.source_info, SourceInfo::Substring { .. })); + assert!(matches!(title.source_info, SourceInfo::Substring { .. })); + assert!(matches!(authors.source_info, SourceInfo::Substring { .. })); // Array elements should also have Substring mappings if let Some(items) = authors.as_array() { assert_eq!(items.len(), 2); - assert!(matches!( - items[0].source_info.mapping, - quarto_source_map::SourceMapping::Substring { .. } - )); - assert!(matches!( - items[1].source_info.mapping, - quarto_source_map::SourceMapping::Substring { .. } - )); + assert!(matches!(items[0].source_info, SourceInfo::Substring { .. })); + assert!(matches!(items[1].source_info, SourceInfo::Substring { .. })); } else { panic!("Expected array for authors"); } @@ -637,7 +659,7 @@ project: // Parent document let parent_content = "---\ntitle: Test\nauthor: John\n---\n\nDocument content"; - let parent = SourceInfo::original( + let parent = SourceInfo::from_range( FileId(1), Range { start: Location { @@ -661,16 +683,16 @@ project: let title = yaml.get_hash_value("title").expect("title not found"); // Verify the title has a valid substring range - match &title.source_info.mapping { - quarto_source_map::SourceMapping::Substring { offset, .. } => { + match &title.source_info { + SourceInfo::Substring { start_offset, .. } => { // Offset should be relative to the yaml_content string - assert!(*offset < yaml_content.len()); + assert!(*start_offset < yaml_content.len()); } _ => panic!("Expected Substring mapping for title"), } // Check that range makes sense - assert!(title.source_info.range.start.offset < title.source_info.range.end.offset); + assert!(title.source_info.start_offset() < title.source_info.end_offset()); } #[test] @@ -678,8 +700,8 @@ project: // Parse without filename or parent - should use Original mapping let yaml = parse("title: Test").unwrap(); - match &yaml.source_info.mapping { - quarto_source_map::SourceMapping::Original { file_id } => { + match &yaml.source_info { + SourceInfo::Original { file_id, .. } => { assert_eq!(file_id.0, 0); // Anonymous FileId } _ => panic!("Expected Original mapping for anonymous parse"), @@ -689,13 +711,19 @@ project: /// Helper function to resolve a SourceInfo through the mapping chain to get /// the absolute offset in the original file. fn resolve_to_original_offset(info: &SourceInfo) -> (usize, quarto_source_map::FileId) { - match &info.mapping { - quarto_source_map::SourceMapping::Original { file_id } => { - (info.range.start.offset, *file_id) - } - quarto_source_map::SourceMapping::Substring { parent, offset } => { + match info { + SourceInfo::Original { + file_id, + start_offset, + .. + } => (*start_offset, *file_id), + SourceInfo::Substring { + parent, + start_offset, + .. + } => { let (parent_offset, file_id) = resolve_to_original_offset(parent); - (parent_offset + offset, file_id) + (parent_offset + start_offset, file_id) } _ => panic!("Unsupported mapping type for offset resolution"), } @@ -718,13 +746,13 @@ project: // Verify key location assert_eq!(hello_entry.key.yaml.as_str(), Some("hello")); - let key_offset = hello_entry.key_span.range.start.offset; + let key_offset = hello_entry.key_span.start_offset(); let key_str = &yaml_content[key_offset..key_offset + 5]; assert_eq!(key_str, "hello", "Key location should point to 'hello'"); // Verify value location assert_eq!(hello_entry.value.yaml.as_str(), Some("world")); - let value_offset = hello_entry.value_span.range.start.offset; + let value_offset = hello_entry.value_span.start_offset(); let value_str = &yaml_content[value_offset..value_offset + 5]; assert_eq!(value_str, "world", "Value location should point to 'world'"); @@ -740,11 +768,11 @@ project: .find(|e| e.key.yaml.as_str() == Some("foo")) .expect("Should have 'foo' key"); - let foo_key_offset = foo_entry.key_span.range.start.offset; + let foo_key_offset = foo_entry.key_span.start_offset(); let foo_key_str = &yaml_content[foo_key_offset..foo_key_offset + 3]; assert_eq!(foo_key_str, "foo", "Key location should point to 'foo'"); - let bar_value_offset = foo_entry.value_span.range.start.offset; + let bar_value_offset = foo_entry.value_span.start_offset(); let bar_value_str = &yaml_content[bar_value_offset..bar_value_offset + 3]; assert_eq!(bar_value_str, "bar", "Value location should point to 'bar'"); @@ -754,7 +782,7 @@ project: .find(|e| e.key.yaml.as_str() == Some("count")) .expect("Should have 'count' key"); - let count_key_offset = count_entry.key_span.range.start.offset; + let count_key_offset = count_entry.key_span.start_offset(); let count_key_str = &yaml_content[count_key_offset..count_key_offset + 5]; assert_eq!( count_key_str, "count", @@ -762,18 +790,18 @@ project: ); assert_eq!(count_entry.value.yaml.as_i64(), Some(42)); - let count_value_offset = count_entry.value_span.range.start.offset; + let count_value_offset = count_entry.value_span.start_offset(); let count_value_str = &yaml_content[count_value_offset..count_value_offset + 2]; assert_eq!(count_value_str, "42", "Value location should point to '42'"); // Test 4: Verify entry spans include both key and value // The entry span should start at the key and end after the value assert!( - hello_entry.entry_span.range.start.offset <= key_offset, + hello_entry.entry_span.start_offset() <= key_offset, "Entry span should start at or before the key" ); assert!( - hello_entry.entry_span.range.end.offset >= value_offset + 5, + hello_entry.entry_span.end_offset() >= value_offset + 5, "Entry span should end at or after the value" ); } @@ -816,7 +844,7 @@ We used the following approach... let yaml_content = yaml_match.as_str(); // Create parent SourceInfo for the entire .qmd file - let parent = SourceInfo::original( + let parent = SourceInfo::from_range( FileId(123), // Simulated FileId for test.qmd Range { start: Location { @@ -844,23 +872,28 @@ We used the following approach... assert_eq!(title.yaml.as_str(), Some("My Research Paper")); // Verify that the title's location maps back through the substring chain - match &title.source_info.mapping { - quarto_source_map::SourceMapping::Substring { parent: p, offset } => { + match &title.source_info { + SourceInfo::Substring { + parent: p, + start_offset, + .. + } => { // The offset should be within the YAML content - assert!(*offset < yaml_content.len()); + assert!(*start_offset < yaml_content.len()); // The parent should be another Substring pointing to the .qmd file - match &p.mapping { - quarto_source_map::SourceMapping::Substring { + match p.as_ref() { + SourceInfo::Substring { parent: grandparent, - offset: yaml_offset, + start_offset: yaml_offset, + .. } => { // This should point to the original .qmd file assert_eq!(*yaml_offset, yaml_start); // Grandparent should be the Original .qmd file - match &grandparent.mapping { - quarto_source_map::SourceMapping::Original { file_id } => { + match grandparent.as_ref() { + SourceInfo::Original { file_id, .. } => { assert_eq!(file_id.0, 123); } _ => panic!("Expected Original mapping for .qmd file"), @@ -883,8 +916,8 @@ We used the following approach... assert_eq!(theme.yaml.as_str(), Some("cosmo")); // The theme value should also have Substring mapping through the chain - match &theme.source_info.mapping { - quarto_source_map::SourceMapping::Substring { .. } => { + match &theme.source_info { + SourceInfo::Substring { .. } => { // Good - it has substring mapping } _ => panic!("Expected Substring mapping for deeply nested theme value"), diff --git a/crates/quarto-yaml/src/yaml_with_source_info.rs b/crates/quarto-yaml/src/yaml_with_source_info.rs index ee758a1..04dfa28 100644 --- a/crates/quarto-yaml/src/yaml_with_source_info.rs +++ b/crates/quarto-yaml/src/yaml_with_source_info.rs @@ -28,7 +28,7 @@ use yaml_rust2::Yaml; /// let yaml = parse("title: My Document").unwrap(); /// if let Some(title) = yaml.get_hash_value("title") { /// println!("Title: {:?}", title.yaml); -/// println!("Location: offset {}", title.source_info.range.start.offset); +/// println!("Location: offset {}", title.source_info.start_offset()); /// } /// ``` #[derive(Debug, Clone)] diff --git a/docs/writers/json.qmd b/docs/writers/json.qmd index 97e2438..8582545 100644 --- a/docs/writers/json.qmd +++ b/docs/writers/json.qmd @@ -14,7 +14,7 @@ The JSON output contains three main sections: "meta": { /* metadata */ }, "blocks": [ /* block elements */ ], "astContext": { - "filenames": [ /* array of source files */ ], + "files": [ /* array of source file information */ ], "sourceInfoPool": [ /* source location data */ ] } } @@ -35,9 +35,15 @@ Unlike Pandoc, `quarto-markdown-pandoc` tracks the exact source location of ever ```json { "astContext": { - "filenames": ["example.qmd"], + "files": [ + { + "name": "example.qmd", + "line_breaks": [11], + "total_length": 11 + } + ], "sourceInfoPool": [ - {"r": [0, 0, 0, 4, 0, 4], "t": 0, "d": 0} + {"r": [0, 5], "t": 0, "d": 0} ] }, "blocks": [ @@ -59,21 +65,21 @@ The `"s": 0` field means "look up source info at index 0 in the pool". Each entry in the `sourceInfoPool` array has this compact format: ```json -{"r": [start_offset, start_row, start_col, end_offset, end_row, end_col], "t": type, "d": data} +{"r": [start_offset, end_offset], "t": type, "d": data} ``` ### Fields -- **`r`** (range): 6-element array `[start_offset, start_row, start_col, end_offset, end_row, end_col]` - - All positions are 0-indexed - - `offset` is byte offset from start of source - - `row` and `col` are line and column numbers +- **`r`** (range): 2-element array `[start_offset, end_offset]` + - Byte offsets from the start of the relevant source + - For Original: absolute offsets in the source file + - For Substring: relative offsets within the parent source + - Row and column information is computed on-demand from the file's line break index - **`t`** (type): Integer indicating the source mapping type - `0` = Original (direct position in source file) - `1` = Substring (extracted from a parent source) - `2` = Concat (multiple sources joined together) - - `3` = Transformed (source that was modified with explicit mapping) - **`d`** (data): Type-specific data (see below) @@ -82,7 +88,7 @@ Each entry in the `sourceInfoPool` array has this compact format: Represents text directly from a source file. ```json -{"r": [0, 0, 0, 10, 0, 10], "t": 0, "d": 0} +{"r": [0, 5], "t": 0, "d": 0} ``` - **`d`**: The file ID (index into `astContext.filenames`) @@ -94,46 +100,30 @@ Represents text directly from a source file. Represents a substring extracted from another source. ```json -{"r": [0, 0, 0, 5, 0, 5], "t": 1, "d": [3, 10]} +{"r": [4, 17], "t": 1, "d": 3} ``` -- **`d`**: `[parent_id, offset]` - - `parent_id`: Index of the parent source in the pool - - `offset`: Byte offset within the parent where this substring starts +- **`d`**: Index of the parent source in the pool +- **`r`**: `[start_offset, end_offset]` within the parent source (relative offsets) -**Example**: A 5-byte substring starting at byte 10 of source #3 (e.g., extracting YAML value from frontmatter). +**Example**: A 13-byte substring from bytes 4-17 of source #3 (e.g., extracting YAML value from frontmatter). ### Type 2: Concat Represents multiple sources concatenated together. ```json -{"r": [0, 0, 0, 10, 0, 10], "t": 2, "d": [[1, 0, 5], [2, 5, 5]]} +{"r": [0, 10], "t": 2, "d": [[1, 0, 5], [2, 5, 5]]} ``` - **`d`**: Array of pieces, where each piece is `[source_info_id, offset_in_concat, length]` - `source_info_id`: Index of this piece's source in the pool - - `offset_in_concat`: Where this piece starts in the concatenated result + - `offset_in_concat`: Where this piece starts in the concatenated result (0-based) - `length`: Length of this piece in bytes +- **`r`**: `[0, total_length]` - the concatenated result spans from 0 to the sum of all piece lengths **Example**: Joining sources #1 (5 bytes) and #2 (5 bytes) to create a 10-byte result. -### Type 3: Transformed - -Represents source text that was transformed (e.g., entity decoding, shortcode expansion) with explicit range mappings. - -```json -{"r": [0, 0, 0, 8, 0, 8], "t": 3, "d": [4, [[0, 4, 0, 4], [4, 8, 6, 10]]]} -``` - -- **`d`**: `[parent_id, range_mappings]` - - `parent_id`: Index of the parent source in the pool - - `range_mappings`: Array of `[from_start, from_end, to_start, to_end]` - - `from_start`, `from_end`: Range in the transformed text (this source) - - `to_start`, `to_end`: Corresponding range in the parent text - -**Example**: 8 bytes of transformed text derived from bytes 0-4 and 6-10 of source #4. - ## Complete Example ```json @@ -152,12 +142,18 @@ Represents source text that was transformed (e.g., entity decoding, shortcode ex } ], "astContext": { - "filenames": ["example.qmd"], + "files": [ + { + "name": "example.qmd", + "line_breaks": [], + "total_length": 11 + } + ], "sourceInfoPool": [ - {"r": [0, 0, 0, 5, 0, 5], "t": 0, "d": 0}, - {"r": [5, 0, 5, 6, 0, 6], "t": 0, "d": 0}, - {"r": [6, 0, 6, 11, 0, 11], "t": 0, "d": 0}, - {"r": [0, 0, 0, 11, 0, 11], "t": 2, "d": [[0, 0, 5], [1, 5, 1], [2, 6, 5]]} + {"r": [0, 5], "t": 0, "d": 0}, + {"r": [5, 6], "t": 0, "d": 0}, + {"r": [6, 11], "t": 0, "d": 0}, + {"r": [0, 11], "t": 2, "d": [[0, 0, 5], [1, 5, 1], [2, 6, 5]]} ] } } @@ -165,13 +161,34 @@ Represents source text that was transformed (e.g., entity decoding, shortcode ex ### Explanation -- Pool entry 0: "Hello" at bytes 0-5 -- Pool entry 1: Space at byte 5-6 -- Pool entry 2: "world" at bytes 6-11 -- Pool entry 3: Concatenation of all three pieces +- Pool entry 0: "Hello" at bytes 0-5 (Original) +- Pool entry 1: Space at bytes 5-6 (Original) +- Pool entry 2: "world" at bytes 6-11 (Original) +- Pool entry 3: Concatenation of all three pieces (Concat) - The Para block references entry 3 (the full concatenated range) - Each inline element references its individual piece +## File Information + +The `files` array in `astContext` contains information about each source file. Each entry has: + +```json +{ + "name": "example.qmd", // Source file path + "line_breaks": [6, 13, 20], // Byte offsets of each newline character + "total_length": 25 // Total file size in bytes +} +``` + +The `line_breaks` and `total_length` fields allow JSON consumers to convert byte offsets to row/column positions without needing access to the original source files. + +To convert a byte offset to (row, column): +1. Use binary search on `line_breaks` to find which line contains the offset +2. The row is the index where the offset would be inserted +3. The column is the offset minus the start of that line + +This information is self-contained in the JSON, so consumers don't need to reopen source files + ## Pandoc compatibility For compatibility with tools expecting Pandoc JSON, either ignore the `"s"` fields and `astContext` section (that's what Pandoc will do) or remove them from the JSON object ahead of time.