Skip to content

Commit b7a67b7

Browse files
committed
ParseContext -> ASTContext, preparing for bigger changes
1 parent 83d023e commit b7a67b7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+208
-165
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* ast_context.rs
3+
* Copyright (c) 2025 Posit, PBC
4+
*/
5+
6+
/// Context passed through the parsing pipeline to provide information
7+
/// about the current parse operation and manage string ownership.
8+
/// The filenames vector will eventually be used to deduplicate strings
9+
/// in the AST by storing indices instead of cloning strings.
10+
#[derive(Debug, Clone)]
11+
pub struct ASTContext {
12+
pub filenames: Vec<String>,
13+
}
14+
15+
impl ASTContext {
16+
pub fn new() -> Self {
17+
ASTContext {
18+
filenames: Vec::new(),
19+
}
20+
}
21+
22+
pub fn with_filename(filename: impl Into<String>) -> Self {
23+
ASTContext {
24+
filenames: vec![filename.into()],
25+
}
26+
}
27+
28+
pub fn anonymous() -> Self {
29+
ASTContext {
30+
filenames: Vec::new(),
31+
}
32+
}
33+
34+
/// Add a filename to the context and return its index
35+
pub fn add_filename(&mut self, filename: String) -> usize {
36+
self.filenames.push(filename);
37+
self.filenames.len() - 1
38+
}
39+
40+
/// Get the primary filename (first in the vector), if any
41+
pub fn primary_filename(&self) -> Option<&String> {
42+
self.filenames.first()
43+
}
44+
}
45+
46+
impl Default for ASTContext {
47+
fn default() -> Self {
48+
Self::new()
49+
}
50+
}

crates/quarto-markdown-pandoc/src/pandoc/location.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2025 Posit, PBC
44
*/
55

6-
use crate::pandoc::parse_context::ParseContext;
6+
use crate::pandoc::ast_context::ASTContext;
77

88
////////////////////////////////////////////////////////////////////////////////////////////////////
99
// Source location tracking
@@ -85,11 +85,8 @@ pub fn node_source_info(node: &tree_sitter::Node) -> SourceInfo {
8585
SourceInfo::with_range(node_location(node))
8686
}
8787

88-
pub fn node_source_info_with_context(
89-
node: &tree_sitter::Node,
90-
context: &ParseContext,
91-
) -> SourceInfo {
92-
SourceInfo::new(context.filename.clone(), node_location(node))
88+
pub fn node_source_info_with_context(node: &tree_sitter::Node, context: &ASTContext) -> SourceInfo {
89+
SourceInfo::new(context.primary_filename().cloned(), node_location(node))
9390
}
9491

9592
pub fn empty_range() -> Range {

crates/quarto-markdown-pandoc/src/pandoc/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Copyright (c) 2025 Posit, PBC
44
*/
55

6+
pub mod ast_context;
67
pub mod attr;
78
pub mod block;
89
pub mod caption;
@@ -11,7 +12,6 @@ pub mod list;
1112
pub mod location;
1213
pub mod meta;
1314
pub mod pandoc;
14-
pub mod parse_context;
1515
pub mod shortcode;
1616
pub mod table;
1717
pub mod treesitter;
@@ -36,6 +36,6 @@ pub use crate::pandoc::table::{
3636
Alignment, Cell, ColWidth, Row, Table, TableBody, TableFoot, TableHead,
3737
};
3838

39+
pub use crate::pandoc::ast_context::ASTContext;
3940
pub use crate::pandoc::meta::{Meta, MetaValue, rawblock_to_meta};
40-
pub use crate::pandoc::parse_context::ParseContext;
4141
pub use crate::pandoc::treesitter::treesitter_to_pandoc;

crates/quarto-markdown-pandoc/src/pandoc/parse_context.rs

Lines changed: 0 additions & 27 deletions
This file was deleted.

crates/quarto-markdown-pandoc/src/pandoc/treesitter.rs

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ use crate::pandoc::treesitter_utils::text_helpers::*;
4646
use crate::pandoc::treesitter_utils::thematic_break::process_thematic_break;
4747
use crate::pandoc::treesitter_utils::uri_autolink::process_uri_autolink;
4848

49+
use crate::pandoc::ast_context::ASTContext;
4950
use crate::pandoc::block::{Block, Blocks, BulletList, OrderedList, Paragraph, Plain, RawBlock};
5051
use crate::pandoc::inline::{
5152
Delete, EditComment, Emph, Highlight, Inline, Insert, Note, RawInline, Space, Str, Strikeout,
@@ -57,7 +58,6 @@ use crate::pandoc::location::{
5758
node_source_info_with_context,
5859
};
5960
use crate::pandoc::pandoc::Pandoc;
60-
use crate::pandoc::parse_context::ParseContext;
6161
use core::panic;
6262
use once_cell::sync::Lazy;
6363
use regex::Regex;
@@ -70,7 +70,7 @@ use treesitter_utils::pandocnativeintermediate::PandocNativeIntermediate;
7070
fn process_list(
7171
node: &tree_sitter::Node,
7272
children: Vec<(String, PandocNativeIntermediate)>,
73-
context: &ParseContext,
73+
context: &ASTContext,
7474
) -> PandocNativeIntermediate {
7575
// a list is loose if it has at least one loose item
7676
// an item is loose if
@@ -222,9 +222,9 @@ fn process_list(
222222
}
223223

224224
fn process_list_item(
225-
node: &tree_sitter::Node,
225+
list_item_node: &tree_sitter::Node,
226226
children: Vec<(String, PandocNativeIntermediate)>,
227-
context: &ParseContext,
227+
context: &ASTContext,
228228
) -> PandocNativeIntermediate {
229229
let mut list_attr: Option<ListAttributes> = None;
230230
let children = children
@@ -250,19 +250,23 @@ fn process_list_item(
250250
}
251251
match child {
252252
PandocNativeIntermediate::IntermediateBlock(block) => Some(block),
253-
PandocNativeIntermediate::IntermediateMetadataString(text, range) => {
253+
PandocNativeIntermediate::IntermediateMetadataString(text, _range) => {
254254
// for now we assume it's metadata and emit it as a rawblock
255255
Some(Block::RawBlock(RawBlock {
256256
format: "quarto_minus_metadata".to_string(),
257257
text,
258-
source_info: SourceInfo::with_range(range),
258+
source_info: node_source_info_with_context(list_item_node, context),
259259
}))
260260
}
261261
_ => None,
262262
}
263263
})
264264
.collect();
265-
PandocNativeIntermediate::IntermediateListItem(children, node_location(node), list_attr)
265+
PandocNativeIntermediate::IntermediateListItem(
266+
children,
267+
node_location(list_item_node),
268+
list_attr,
269+
)
266270
}
267271

268272
// Macro for simple emphasis-like inline processing
@@ -290,19 +294,19 @@ fn process_native_inline<T: Write>(
290294
whitespace_re: &Regex,
291295
inline_buf: &mut T,
292296
node_text_fn: impl Fn() -> String,
293-
context: &ParseContext,
297+
context: &ASTContext,
294298
) -> Inline {
295299
match child {
296300
PandocNativeIntermediate::IntermediateInline(inline) => inline,
297301
PandocNativeIntermediate::IntermediateBaseText(text, range) => {
298302
if let Some(_) = whitespace_re.find(&text) {
299303
Inline::Space(Space {
300-
source_info: SourceInfo::new(context.filename.clone(), range),
304+
source_info: SourceInfo::new(context.primary_filename().cloned(), range),
301305
})
302306
} else {
303307
Inline::Str(Str {
304308
text: apply_smart_quotes(text),
305-
source_info: SourceInfo::new(context.filename.clone(), range),
309+
source_info: SourceInfo::new(context.primary_filename().cloned(), range),
306310
})
307311
}
308312
}
@@ -349,7 +353,7 @@ fn process_native_inlines<T: Write>(
349353
children: Vec<(String, PandocNativeIntermediate)>,
350354
whitespace_re: &Regex,
351355
inlines_buf: &mut T,
352-
context: &ParseContext,
356+
context: &ASTContext,
353357
) -> Vec<Inline> {
354358
let mut inlines: Vec<Inline> = Vec::new();
355359
for (_, child) in children {
@@ -361,12 +365,12 @@ fn process_native_inlines<T: Write>(
361365
PandocNativeIntermediate::IntermediateBaseText(text, range) => {
362366
if let Some(_) = whitespace_re.find(&text) {
363367
inlines.push(Inline::Space(Space {
364-
source_info: SourceInfo::new(context.filename.clone(), range),
368+
source_info: SourceInfo::new(context.primary_filename().cloned(), range),
365369
}))
366370
} else {
367371
inlines.push(Inline::Str(Str {
368372
text,
369-
source_info: SourceInfo::new(context.filename.clone(), range),
373+
source_info: SourceInfo::new(context.primary_filename().cloned(), range),
370374
}))
371375
}
372376
}
@@ -388,7 +392,7 @@ fn native_visitor<T: Write>(
388392
node: &tree_sitter::Node,
389393
children: Vec<(String, PandocNativeIntermediate)>,
390394
input_bytes: &[u8],
391-
context: &ParseContext,
395+
context: &ASTContext,
392396
) -> PandocNativeIntermediate {
393397
// TODO What sounded like a good idea with two buffers
394398
// is becoming annoying now...
@@ -436,8 +440,8 @@ fn native_visitor<T: Write>(
436440
| "code_content"
437441
| "latex_content"
438442
| "text_base" => create_base_text_from_node_text(node, input_bytes),
439-
"document" => process_document(children, context),
440-
"section" => process_section(children, context),
443+
"document" => process_document(node, children, context),
444+
"section" => process_section(node, children, context),
441445
"paragraph" => process_paragraph(node, children, context),
442446
"indented_code_block" => {
443447
process_indented_code_block(node, children, input_bytes, &indent_re, context)
@@ -453,13 +457,17 @@ fn native_visitor<T: Write>(
453457
"key_value_value" => string_as_base_text(),
454458
"link_title" => process_link_title(node, input_bytes, context),
455459
"link_text" => PandocNativeIntermediate::IntermediateInlines(native_inlines(children)),
456-
"image" => {
457-
treesitter_utils::image::process_image(&mut image_buf, node_text, children, context)
458-
}
460+
"image" => treesitter_utils::image::process_image(
461+
node,
462+
&mut image_buf,
463+
node_text,
464+
children,
465+
context,
466+
),
459467
"image_description" => {
460468
PandocNativeIntermediate::IntermediateInlines(native_inlines(children))
461469
}
462-
"inline_link" => process_inline_link(&mut link_buf, node_text, children, context),
470+
"inline_link" => process_inline_link(node, &mut link_buf, node_text, children, context),
463471
"key_value_specifier" => process_key_value_specifier(buf, children, context),
464472
"raw_specifier" => process_raw_specifier(node, input_bytes, context),
465473
"emphasis" => emphasis_inline!(
@@ -665,7 +673,7 @@ pub fn treesitter_to_pandoc<T: Write>(
665673
buf: &mut T,
666674
tree: &tree_sitter_qmd::MarkdownTree,
667675
input_bytes: &[u8],
668-
context: &ParseContext,
676+
context: &ASTContext,
669677
) -> Result<Pandoc, Vec<String>> {
670678
let result = bottomup_traverse_concrete_tree(
671679
&mut tree.walk(),

crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/attribute.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
* Copyright (c) 2025 Posit, PBC
44
*/
55

6-
use crate::pandoc::parse_context::ParseContext;
6+
use crate::pandoc::ast_context::ASTContext;
77
use crate::pandoc::treesitter_utils::pandocnativeintermediate::PandocNativeIntermediate;
88

99
/// Process attribute node to extract commonmark attributes
1010
pub fn process_attribute(
1111
children: Vec<(String, PandocNativeIntermediate)>,
12-
context: &ParseContext,
12+
_context: &ASTContext,
1313
) -> PandocNativeIntermediate {
1414
for (node, child) in children {
1515
match child {

crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/atx_heading.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
* Copyright (c) 2025 Posit, PBC
77
*/
88

9+
use crate::pandoc::ast_context::ASTContext;
910
use crate::pandoc::attr::Attr;
1011
use crate::pandoc::block::{Block, Header};
1112
use crate::pandoc::inline::Inline;
12-
use crate::pandoc::location::{SourceInfo, node_source_info_with_context};
13-
use crate::pandoc::parse_context::ParseContext;
13+
use crate::pandoc::location::node_source_info_with_context;
1414
use std::collections::HashMap;
1515
use std::io::Write;
1616

@@ -20,7 +20,7 @@ pub fn process_atx_heading<T: Write>(
2020
buf: &mut T,
2121
node: &tree_sitter::Node,
2222
children: Vec<(String, PandocNativeIntermediate)>,
23-
context: &ParseContext,
23+
context: &ASTContext,
2424
) -> PandocNativeIntermediate {
2525
let mut level = 0;
2626
let mut content: Vec<Inline> = Vec::new();

crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/backslash_escape.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33
* Copyright (c) 2025 Posit, PBC
44
*/
55

6+
use crate::pandoc::ast_context::ASTContext;
67
use crate::pandoc::location::node_source_info_with_context;
7-
use crate::pandoc::parse_context::ParseContext;
88
use crate::pandoc::treesitter_utils::pandocnativeintermediate::PandocNativeIntermediate;
99

1010
/// Process a backslash escape by removing the leading backslash
1111
pub fn process_backslash_escape(
1212
node: &tree_sitter::Node,
1313
input_bytes: &[u8],
14-
context: &ParseContext,
14+
context: &ASTContext,
1515
) -> PandocNativeIntermediate {
1616
// This is a backslash escape, we need to extract the content
1717
// by removing the backslash

crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/block_quote.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
* Copyright (c) 2025 Posit, PBC
77
*/
88

9+
use crate::pandoc::ast_context::ASTContext;
910
use crate::pandoc::block::{Block, BlockQuote, Blocks, RawBlock};
1011
use crate::pandoc::location::{SourceInfo, node_source_info_with_context};
11-
use crate::pandoc::parse_context::ParseContext;
1212
use std::io::Write;
1313

1414
use super::pandocnativeintermediate::PandocNativeIntermediate;
@@ -17,7 +17,7 @@ pub fn process_block_quote<T: Write>(
1717
buf: &mut T,
1818
node: &tree_sitter::Node,
1919
children: Vec<(String, PandocNativeIntermediate)>,
20-
context: &ParseContext,
20+
context: &ASTContext,
2121
) -> PandocNativeIntermediate {
2222
let mut content: Blocks = Vec::new();
2323
for (node_type, child) in children {

crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/citation.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,17 @@
66
* Copyright (c) 2025 Posit, PBC
77
*/
88

9+
use crate::pandoc::ast_context::ASTContext;
910
use crate::pandoc::inline::{Citation, CitationMode, Cite, Inline, Str};
1011
use crate::pandoc::location::node_source_info_with_context;
11-
use crate::pandoc::parse_context::ParseContext;
1212

1313
use super::pandocnativeintermediate::PandocNativeIntermediate;
1414

1515
pub fn process_citation<F>(
1616
node: &tree_sitter::Node,
1717
node_text: F,
1818
children: Vec<(String, PandocNativeIntermediate)>,
19-
context: &ParseContext,
19+
context: &ASTContext,
2020
) -> PandocNativeIntermediate
2121
where
2222
F: Fn() -> String,

0 commit comments

Comments
 (0)