Skip to content

Commit a0921b1

Browse files
committed
defn list checking, glob expansion
1 parent 9c85a1b commit a0921b1

File tree

10 files changed

+607
-3
lines changed

10 files changed

+607
-3
lines changed

crates/qmd-syntax-helper/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ regex = "1.10"
2424
colored = "2.1"
2525
quarto-markdown-pandoc.workspace = true
2626
include_dir = "0.7"
27+
serde = { version = "1.0", features = ["derive"] }
28+
serde_json = "1.0"
29+
glob = "0.3"
2730

2831
[lints]
2932
workspace = true
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
-- Lua filter to convert Pandoc DefinitionList AST nodes to div-based definition lists
2+
-- This produces output in the definition-list div syntax used by quarto-markdown
3+
4+
if PANDOC_VERSION and PANDOC_VERSION.must_be_at_least then
5+
PANDOC_VERSION:must_be_at_least("2.11")
6+
else
7+
error("pandoc version >=2.11 is required")
8+
end
9+
10+
-- Convert a DefinitionList to a div with .definition-list class
11+
local function definition_list_to_div(def_list)
12+
-- Build div attributes with .definition-list class
13+
local div_attr = pandoc.Attr('', {'definition-list'}, {})
14+
15+
-- Build the outer bullet list containing all term-definition pairs
16+
local outer_items = {}
17+
18+
-- Each item in the definition list is a tuple: (term, definitions)
19+
-- term: list of inline elements
20+
-- definitions: list of definition blocks (each definition is a list of blocks)
21+
for _, item in ipairs(def_list.content) do
22+
local term = item[1] -- List of inline elements
23+
local definitions = item[2] -- List of definition blocks
24+
25+
-- Create the inner bullet list containing the definitions
26+
local def_items = {}
27+
for _, def_blocks in ipairs(definitions) do
28+
-- Each definition is a list of blocks
29+
-- Clone the blocks to avoid modifying the original
30+
local blocks = pandoc.Blocks({})
31+
for _, block in ipairs(def_blocks) do
32+
table.insert(blocks, block:clone())
33+
end
34+
35+
-- Ensure we have at least one block
36+
if #blocks == 0 then
37+
blocks = pandoc.Blocks({pandoc.Para({})})
38+
end
39+
40+
table.insert(def_items, blocks)
41+
end
42+
43+
-- Create a bullet list for the definitions
44+
local def_list_elem = pandoc.BulletList(def_items)
45+
46+
-- Create the outer list item containing:
47+
-- 1. The term as a paragraph
48+
-- 2. The nested bullet list of definitions
49+
local term_para = pandoc.Para(term)
50+
table.insert(outer_items, {term_para, def_list_elem})
51+
end
52+
53+
-- Create the outer bullet list (list of term-definition pairs)
54+
local outer_list = pandoc.BulletList(outer_items)
55+
56+
-- Create the div containing the outer list
57+
return pandoc.Div({outer_list}, div_attr)
58+
end
59+
60+
return {{DefinitionList = definition_list_to_div}}
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
use anyhow::{Context, Result};
2+
use colored::Colorize;
3+
use regex::Regex;
4+
use std::path::Path;
5+
use std::process::{Command, Stdio};
6+
7+
use crate::utils::file_io::{read_file, write_file};
8+
use crate::utils::resources::ResourceManager;
9+
use quarto_markdown_pandoc::readers::json;
10+
use quarto_markdown_pandoc::writers::qmd;
11+
12+
pub struct DefinitionListConverter {
13+
def_item_regex: Regex,
14+
resources: ResourceManager,
15+
}
16+
17+
#[derive(Debug)]
18+
pub struct DefinitionList {
19+
pub text: String,
20+
pub start_line: usize,
21+
pub end_line: usize,
22+
}
23+
24+
impl DefinitionListConverter {
25+
pub fn new() -> Result<Self> {
26+
Ok(Self {
27+
// Matches definition list items that start with `:` followed by spaces
28+
def_item_regex: Regex::new(r"^:\s+").unwrap(),
29+
resources: ResourceManager::new()?,
30+
})
31+
}
32+
33+
/// Find all definition lists in the content
34+
pub fn find_definition_lists(&self, content: &str) -> Vec<DefinitionList> {
35+
let lines: Vec<&str> = content.lines().collect();
36+
let mut lists = Vec::new();
37+
let mut i = 0;
38+
39+
while i < lines.len() {
40+
let line = lines[i];
41+
42+
// Look for a definition item (line starting with `: `)
43+
// But not div fences (`::`or `:::`)
44+
if self.def_item_regex.is_match(line) && !line.starts_with("::") {
45+
// Found a definition item, now scan backwards to find the term
46+
let mut start_idx = i;
47+
48+
// Skip back over any blank lines
49+
while start_idx > 0 && lines[start_idx - 1].trim().is_empty() {
50+
start_idx -= 1;
51+
}
52+
53+
// The line before the blank lines should be the term
54+
if start_idx > 0 {
55+
start_idx -= 1;
56+
}
57+
58+
// Now scan forward to collect all terms and definitions in this list
59+
let mut end_idx = i;
60+
i += 1;
61+
62+
loop {
63+
// Continue through continuation lines and blank lines
64+
while i < lines.len() {
65+
let line = lines[i];
66+
if line.starts_with(" ") || line.trim().is_empty() {
67+
end_idx = i;
68+
i += 1;
69+
} else {
70+
break;
71+
}
72+
}
73+
74+
// Check if the next item is part of this definition list
75+
// It should be: optional non-blank line (term), then blank lines, then `: `
76+
if i < lines.len() {
77+
let potential_term = lines[i];
78+
79+
// Not a definition line, might be next term
80+
if !self.def_item_regex.is_match(potential_term)
81+
|| potential_term.starts_with("::") {
82+
// Look ahead for a definition line
83+
let mut j = i + 1;
84+
while j < lines.len() && lines[j].trim().is_empty() {
85+
j += 1;
86+
}
87+
88+
if j < lines.len()
89+
&& self.def_item_regex.is_match(lines[j])
90+
&& !lines[j].starts_with("::") {
91+
// Found another term-definition pair
92+
end_idx = j;
93+
i = j + 1;
94+
continue;
95+
} else {
96+
// No more definition items
97+
break;
98+
}
99+
} else {
100+
// This IS a definition line (continuation of same term)
101+
end_idx = i;
102+
i += 1;
103+
continue;
104+
}
105+
} else {
106+
break;
107+
}
108+
}
109+
110+
// Extract the definition list text
111+
let list_lines = &lines[start_idx..=end_idx];
112+
let list_text = list_lines.join("\n");
113+
114+
lists.push(DefinitionList {
115+
text: list_text,
116+
start_line: start_idx,
117+
end_line: end_idx,
118+
});
119+
} else {
120+
i += 1;
121+
}
122+
}
123+
124+
lists
125+
}
126+
127+
/// Convert a single definition list by:
128+
/// 1. Running pandoc with the Lua filter to convert to JSON
129+
/// 2. Using quarto-markdown-pandoc library to convert JSON to markdown
130+
pub fn convert_list(&self, list_text: &str) -> Result<String> {
131+
use std::io::Write;
132+
133+
// Get the Lua filter path from resources
134+
let filter_path = self
135+
.resources
136+
.get_resource("filters/definition-list-to-div.lua")?;
137+
138+
// Step 1: pandoc -f markdown -t json -L filter.lua
139+
let mut pandoc = Command::new("pandoc")
140+
.args(&["-f", "markdown", "-t", "json"])
141+
.arg("-L")
142+
.arg(&filter_path)
143+
.stdin(Stdio::piped())
144+
.stdout(Stdio::piped())
145+
.stderr(Stdio::piped())
146+
.spawn()
147+
.context("Failed to spawn pandoc")?;
148+
149+
{
150+
let stdin = pandoc
151+
.stdin
152+
.as_mut()
153+
.context("Failed to get pandoc stdin")?;
154+
stdin.write_all(list_text.as_bytes())?;
155+
}
156+
157+
let pandoc_output = pandoc.wait_with_output()?;
158+
159+
if !pandoc_output.status.success() {
160+
anyhow::bail!(
161+
"pandoc failed: {}",
162+
String::from_utf8_lossy(&pandoc_output.stderr)
163+
);
164+
}
165+
166+
// Step 2: Use library to convert JSON to markdown
167+
let mut json_reader = std::io::Cursor::new(&pandoc_output.stdout);
168+
let (pandoc_ast, _ctx) = json::read(&mut json_reader)
169+
.context("Failed to parse JSON output from pandoc")?;
170+
171+
let mut output = Vec::new();
172+
qmd::write(&pandoc_ast, &mut output)
173+
.context("Failed to write markdown output")?;
174+
175+
let result = String::from_utf8(output)
176+
.context("Failed to parse output as UTF-8")?
177+
.trim_end()
178+
.to_string();
179+
180+
Ok(result)
181+
}
182+
183+
/// Process a single file
184+
pub fn process_file(
185+
&self,
186+
file_path: &Path,
187+
in_place: bool,
188+
check: bool,
189+
verbose: bool,
190+
) -> Result<()> {
191+
let content = read_file(file_path)?;
192+
let lists = self.find_definition_lists(&content);
193+
194+
if lists.is_empty() {
195+
if verbose {
196+
println!(" No definition lists found");
197+
}
198+
return Ok(());
199+
}
200+
201+
if verbose || check {
202+
println!(
203+
" Found {} definition list(s)",
204+
lists.len().to_string().yellow()
205+
);
206+
}
207+
208+
// Convert each list and build new content
209+
let mut lines: Vec<String> = content.lines().map(|s| s.to_string()).collect();
210+
let mut offset: isize = 0; // Track line offset as we modify
211+
212+
for (idx, list) in lists.iter().enumerate() {
213+
if verbose {
214+
println!(" Converting list {}...", idx + 1);
215+
}
216+
217+
let converted = self.convert_list(&list.text)?;
218+
219+
// Calculate actual line positions with offset
220+
let start = (list.start_line as isize + offset) as usize;
221+
let end = (list.end_line as isize + offset) as usize;
222+
223+
if check {
224+
println!(
225+
" List {} at lines {}-{}:",
226+
idx + 1,
227+
list.start_line,
228+
list.end_line
229+
);
230+
println!(
231+
" {} {} lines -> {} {} lines",
232+
"Original:".red(),
233+
list.end_line - list.start_line + 1,
234+
"Converted:".green(),
235+
converted.lines().count()
236+
);
237+
}
238+
239+
// Replace the list in the lines
240+
let converted_lines: Vec<String> = converted.lines().map(|s| s.to_string()).collect();
241+
let new_len = converted_lines.len();
242+
let old_len = end - start + 1;
243+
244+
// Splice in the new lines
245+
lines.splice(start..=end, converted_lines);
246+
247+
// Update offset for next list
248+
offset += new_len as isize - old_len as isize;
249+
}
250+
251+
if check {
252+
println!(" {} No changes written (--check mode)", "✓".green());
253+
return Ok(());
254+
}
255+
256+
let new_content = lines.join("\n") + "\n";
257+
258+
if in_place {
259+
write_file(file_path, &new_content)?;
260+
println!(" {} Converted {} list(s)", "✓".green(), lists.len());
261+
} else {
262+
// Output to stdout
263+
print!("{}", new_content);
264+
}
265+
266+
Ok(())
267+
}
268+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
pub mod definition_lists;
12
pub mod grid_tables;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub mod syntax_check;

0 commit comments

Comments
 (0)