Skip to content

Commit eb388f3

Browse files
committed
Update markdown renderer, TOC parser, and front matter defaults
1 parent 11ee837 commit eb388f3

File tree

9 files changed

+98
-58
lines changed

9 files changed

+98
-58
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ All notable changes to Rustyll will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.8.5] - 2025-11-26
9+
10+
### Fixed
11+
- Logic error in front matter defaults application
12+
- Table of contents generation returning empty string
13+
- Regex compilation panic in markdown renderer
14+
815
## [0.8.0] - 2025-11-20
916

1017
### Added

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "rustyll"
3-
version = "0.8.4"
3+
version = "0.8.5"
44
edition = "2021"
55
description = "A blazing fast, Jekyll-compatible static site generator written in Rust"
66
authors = ["Rustyll Team"]

debug_regex.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
use regex::Regex;
2+
3+
fn main() {
4+
let heading_regex = Regex::new(
5+
r#"(?s)<h([1-6])(?:.*?id=["']([^"']+)["'])?.*?>(.*?)</h[1-6]>"#
6+
).unwrap();
7+
8+
let html = r#"
9+
<h1 id="intro">Introduction</h1>
10+
<h2 id="overview">Overview</h2>
11+
"#;
12+
13+
println!("Testing Heading Regex:");
14+
for cap in heading_regex.captures_iter(html) {
15+
println!("Matched: {:?}", cap);
16+
}
17+
18+
let pre_regex = Regex::new(r#"(?s)<pre[^>]*>\s*<code[^>]*class="language-([^"]+)"[^>]*>(.*?)</code>\s*</pre>"#).unwrap();
19+
20+
let code_html = r#"<pre><code class="language-rust">fn main() {
21+
println!("Hello, World!");
22+
}</code></pre>"#;
23+
24+
println!("\nTesting Pre Regex:");
25+
if pre_regex.is_match(code_html) {
26+
println!("Matched!");
27+
for cap in pre_regex.captures_iter(code_html) {
28+
println!("Capture: {:?}", cap);
29+
}
30+
} else {
31+
println!("Not matched!");
32+
}
33+
}

src/front_matter/defaults.rs

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,10 @@ fn apply_default_if_matches(
5555
}
5656

5757
// Convert the default values to a FrontMatter instance and merge
58-
let yaml_str = serde_yaml::to_string(&default.values)?;
59-
let default_front_matter = FrontMatter::default();
58+
let default_front_matter: FrontMatter = serde_yaml::from_value(default.values.clone())?;
6059

61-
// Apply defaults
62-
if front_matter.title.is_none() && default_front_matter.title.is_some() {
63-
front_matter.title = default_front_matter.title.clone();
64-
}
65-
66-
if front_matter.layout.is_none() && default_front_matter.layout.is_some() {
67-
front_matter.layout = default_front_matter.layout.clone();
68-
}
60+
// Apply defaults using merge
61+
front_matter.merge(&default_front_matter);
6962

7063
Ok(())
7164
}
@@ -161,20 +154,10 @@ fn apply_defaults_from_list(
161154

162155
if let Some(default) = default {
163156
// Convert the default values to a FrontMatter instance and merge
164-
let yaml_str = serde_yaml::to_string(&default.values)?;
165-
166-
// Create a default front matter
167-
let default_front_matter = FrontMatter::default();
157+
let default_front_matter: FrontMatter = serde_yaml::from_value(default.values.clone())?;
168158

169-
// Apply defaults for title
170-
if front_matter.title.is_none() && default_front_matter.title.is_some() {
171-
front_matter.title = default_front_matter.title.clone();
172-
}
173-
174-
// Apply defaults for layout
175-
if front_matter.layout.is_none() && default_front_matter.layout.is_some() {
176-
front_matter.layout = default_front_matter.layout.clone();
177-
}
159+
// Apply defaults using merge
160+
front_matter.merge(&default_front_matter);
178161
}
179162

180163
Ok(())

src/markdown/renderer/markdown_renderer.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ lazy_static! {
1111
static ref MATH_BLOCK_REGEX: Regex = Regex::new(r"\$\$([\s\S]+?)\$\$").unwrap();
1212

1313
// Regex for typographical improvements
14-
static ref SMART_QUOTES_REGEX: Regex = Regex::new(r#"(^|[-—/\(\[\{"""\s])[''](.+?)['']([-.,:;!?\)\]\}""\s]|$)"#).unwrap();
14+
static ref SMART_QUOTES_REGEX: Regex = Regex::new(r#"(^|[-—/\(\[\{\s])['"](.+?)['"]([-.,:;!?\)\]\}\s]|$)"#).unwrap();
1515
static ref EM_DASH_REGEX: Regex = Regex::new(r"---").unwrap();
1616
static ref EN_DASH_REGEX: Regex = Regex::new(r"--").unwrap();
1717

@@ -169,8 +169,8 @@ impl<'a> MarkdownRenderer<'a> {
169169
let headings = extract_headings(html);
170170
match headings {
171171
Ok(_h) => {
172-
// In the future, use TocOptions here
173-
match generate_toc(html) {
172+
let options = crate::markdown::toc::parser::TocOptions::default();
173+
match crate::markdown::toc::parser::generate_toc_with_options(html, &options) {
174174
Ok(toc) => toc,
175175
Err(_) => String::new()
176176
}
@@ -247,6 +247,9 @@ mod tests {
247247
let markdown = "```rust\nfn main() {\n println!(\"Hello, World!\");\n}\n```";
248248
let html = renderer.render(markdown);
249249

250+
if !html.contains("<div class=\"highlight\">") {
251+
panic!("HTML does not contain highlight div. HTML: {}", html);
252+
}
250253
assert!(html.contains("<div class=\"highlight\">"));
251254
assert!(html.contains("<pre class=\"highlight rust\">"));
252255
}
@@ -259,9 +262,9 @@ mod tests {
259262
let markdown = "# Main Title\n\n## Section 1\n\nContent\n\n## Section 2\n\nMore content";
260263
let (html, toc) = renderer.render_with_toc(markdown);
261264

262-
assert!(toc.contains("<ul class=\"toc\">"));
263-
assert!(toc.contains("<a href=\"#section-1\">"));
264-
assert!(toc.contains("<a href=\"#section-2\">"));
265+
assert!(toc.contains("class=\"toc\""));
266+
assert!(toc.contains("href=\"#section-1\""));
267+
assert!(toc.contains("href=\"#section-2\""));
265268
}
266269

267270
#[test]

src/markdown/renderer/syntax.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,24 @@ impl SyntaxHighlighter {
4747

4848
/// Process HTML content to add syntax highlighting to code blocks
4949
pub fn highlight_html(&self, html: &str) -> BoxResult<String> {
50-
let pre_regex = Regex::new(r#"<pre><code( class="language-([^"]+)")?>([^<]+)</code></pre>"#)?;
50+
// Regex to match pre and code tags and capture attributes and content
51+
let pre_regex = Regex::new(r#"(?s)<pre([^>]*)>\s*<code([^>]*)>(.*?)</code>\s*</pre>"#)?;
52+
let lang_regex = Regex::new(r#"lang="([^"]+)""#)?;
53+
let class_regex = Regex::new(r#"class="language-([^"]+)""#)?;
5154

5255
let html_highlighted = pre_regex.replace_all(html, |caps: &regex::Captures| {
56+
let pre_attrs = &caps[1];
57+
let code_attrs = &caps[2];
5358
let code = html_escape::decode_html_entities(&caps[3]).to_string();
54-
let lang = caps.get(2).map(|m| m.as_str()).unwrap_or("text");
59+
60+
// Try to get lang from pre attribute first
61+
let mut lang = "text";
62+
63+
if let Some(m) = lang_regex.captures(pre_attrs) {
64+
lang = m.get(1).map(|m| m.as_str()).unwrap_or("text");
65+
} else if let Some(m) = class_regex.captures(code_attrs) {
66+
lang = m.get(1).map(|m| m.as_str()).unwrap_or("text");
67+
}
5568

5669
self.highlight_code(&code, lang)
5770
});

src/markdown/toc/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
mod generator;
2-
mod parser;
2+
pub mod parser;
33

44
pub use generator::generate_toc;
55
pub use parser::extract_headings;

src/markdown/toc/parser.rs

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@ use serde::{Serialize, Deserialize};
55

66
lazy_static! {
77
static ref HEADING_REGEX: Regex = Regex::new(
8-
r#"<h([1-6])(?:[^>]*)(?:id=["']([^"']+)["'])?(?:[^>]*)>(.*?)</h\1>"#
8+
r#"<h([1-6])([^>]*)>([\s\S]*?)</h[1-6]>"#
99
).unwrap();
1010

11+
static ref ID_REGEX: Regex = Regex::new(r#"id=["']([^"']+)["']"#).unwrap();
1112
static ref TAG_REGEX: Regex = Regex::new(r"<[^>]*>").unwrap();
1213
}
1314

@@ -139,7 +140,7 @@ impl TableOfContents {
139140
}
140141

141142
// Reverse to maintain order
142-
self.headings.reverse();
143+
// self.headings.reverse();
143144
}
144145

145146
/// Generate HTML for the table of contents
@@ -201,16 +202,18 @@ pub fn extract_headings(html: &str) -> BoxResult<Vec<(usize, String, String)>> {
201202

202203
for cap in HEADING_REGEX.captures_iter(html) {
203204
let level: usize = cap[1].parse()?;
205+
let attrs = &cap[2];
206+
let content = &cap[3];
204207

205-
// Get ID from the heading or generate from text
206-
let id = if let Some(id_match) = cap.get(2) {
207-
id_match.as_str().to_string()
208+
// Get ID from attributes
209+
let id = if let Some(id_match) = ID_REGEX.captures(attrs) {
210+
id_match[1].to_string()
208211
} else {
209212
// Generate ID from heading text
210-
generate_id_from_text(&cap[3])
213+
generate_id_from_text(content)
211214
};
212215

213-
let text = strip_html_tags(&cap[3]);
216+
let text = strip_html_tags(content);
214217

215218
headings.push((level, id, text));
216219
}
@@ -296,14 +299,8 @@ impl Default for TocOptions {
296299
}
297300

298301
/// Generate TOC with custom options
299-
pub fn generate_toc_with_options(_html: &str, _options: &TocOptions) -> BoxResult<String> {
300-
// Temporarily return empty string due to compilation issues
301-
Ok(String::new())
302-
}
303-
304-
// Original implementation commented out temporarily
305-
/*
306-
pub fn generate_toc_with_options_original(html: &str, options: &TocOptions) -> BoxResult<String> {
302+
/// Generate TOC with custom options
303+
pub fn generate_toc_with_options(html: &str, options: &TocOptions) -> BoxResult<String> {
307304
let flat_headings = extract_headings(html)?;
308305

309306
// Filter headings by level and no_toc class
@@ -317,7 +314,7 @@ pub fn generate_toc_with_options_original(html: &str, options: &TocOptions) -> B
317314
}
318315

319316
let list_tag = if options.ordered_list { "ol" } else { "ul" };
320-
let mut html = format!(
317+
let mut html_out = format!(
321318
r#"<{} id="{}" class="{}">"#,
322319
list_tag, options.list_id, options.list_class
323320
);
@@ -328,19 +325,19 @@ pub fn generate_toc_with_options_original(html: &str, options: &TocOptions) -> B
328325
for (level, id, text) in filtered {
329326
// Adjust nesting
330327
while current_level < level {
331-
html.push_str(&format!(r#"<{} class="{}">"#, list_tag, options.sublist_class));
328+
html_out.push_str(&format!(r#"<{} class="{}">"#, list_tag, options.sublist_class));
332329
current_level += 1;
333330
stack_depth += 1;
334331
}
335332

336333
while current_level > level {
337-
html.push_str(&format!("</{}></li>", list_tag));
334+
html_out.push_str(&format!("</{}></li>", list_tag));
338335
current_level -= 1;
339336
stack_depth -= 1;
340337
}
341338

342339
// Add item
343-
html.push_str(&format!(
340+
html_out.push_str(&format!(
344341
"<li class=\"{}\"><a href=\"#{}\"{}>{}</a>",
345342
options.item_class,
346343
id,
@@ -357,16 +354,15 @@ pub fn generate_toc_with_options_original(html: &str, options: &TocOptions) -> B
357354

358355
// Close any remaining open tags
359356
for _ in 0..=stack_depth {
360-
html.push_str("</li>");
357+
html_out.push_str("</li>");
361358
if stack_depth > 0 {
362-
html.push_str(&format!("</{}>", list_tag));
359+
html_out.push_str(&format!("</{}>", list_tag));
363360
}
364361
}
365362

366-
html.push_str(&format!("</{}>", list_tag));
367-
Ok(html)
363+
html_out.push_str(&format!("</{}>", list_tag));
364+
Ok(html_out)
368365
}
369-
*/
370366

371367
#[cfg(test)]
372368
mod tests {
@@ -413,6 +409,11 @@ mod tests {
413409
<h1 id="usage">Usage</h1>
414410
"#;
415411

412+
let headings = extract_headings(html).unwrap();
413+
if headings.is_empty() {
414+
panic!("No headings found! HTML: {}", html);
415+
}
416+
416417
let toc = build_toc_from_html(html).unwrap();
417418
assert_eq!(toc.headings.len(), 2); // Two h1 headings
418419
assert_eq!(toc.headings[0].children.len(), 2); // Two h2 under first h1

0 commit comments

Comments
 (0)