Update markdown renderer, TOC parser, and front matter defaults

matbrgz · matbrgz · commit eb388f338163 · 2025-12-26T16:26:22.000-03:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,13 @@ All notable changes to Rustyll will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.8.5] - 2025-11-26
+
+### Fixed
+- Logic error in front matter defaults application
+- Table of contents generation returning empty string
+- Regex compilation panic in markdown renderer
+
 ## [0.8.0] - 2025-11-20
 
 ### Added
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "rustyll"
-version = "0.8.4"
+version = "0.8.5"
 edition = "2021"
 description = "A blazing fast, Jekyll-compatible static site generator written in Rust"
 authors = ["Rustyll Team"]
diff --git a/debug_regex.rs b/debug_regex.rs
@@ -0,0 +1,33 @@
+use regex::Regex;
+
+fn main() {
+    let heading_regex = Regex::new(
+        r#"(?s)<h([1-6])(?:.*?id=["']([^"']+)["'])?.*?>(.*?)</h[1-6]>"#
+    ).unwrap();
+
+    let html = r#"
+            <h1 id="intro">Introduction</h1>
+            <h2 id="overview">Overview</h2>
+    "#;
+
+    println!("Testing Heading Regex:");
+    for cap in heading_regex.captures_iter(html) {
+        println!("Matched: {:?}", cap);
+    }
+
+    let pre_regex = Regex::new(r#"(?s)<pre[^>]*>\s*<code[^>]*class="language-([^"]+)"[^>]*>(.*?)</code>\s*</pre>"#).unwrap();
+    
+    let code_html = r#"<pre><code class="language-rust">fn main() {
+    println!("Hello, World!");
+}</code></pre>"#;
+
+    println!("\nTesting Pre Regex:");
+    if pre_regex.is_match(code_html) {
+        println!("Matched!");
+        for cap in pre_regex.captures_iter(code_html) {
+             println!("Capture: {:?}", cap);
+        }
+    } else {
+        println!("Not matched!");
+    }
+}
diff --git a/src/front_matter/defaults.rs b/src/front_matter/defaults.rs
@@ -55,17 +55,10 @@ fn apply_default_if_matches(
     }
     
     // Convert the default values to a FrontMatter instance and merge
-    let yaml_str = serde_yaml::to_string(&default.values)?;
-    let default_front_matter = FrontMatter::default();
+    let default_front_matter: FrontMatter = serde_yaml::from_value(default.values.clone())?;
     
-    // Apply defaults
-    if front_matter.title.is_none() && default_front_matter.title.is_some() {
-        front_matter.title = default_front_matter.title.clone();
-    }
-    
-    if front_matter.layout.is_none() && default_front_matter.layout.is_some() {
-        front_matter.layout = default_front_matter.layout.clone();
-    }
+    // Apply defaults using merge
+    front_matter.merge(&default_front_matter);
     
     Ok(())
 }
@@ -161,20 +154,10 @@ fn apply_defaults_from_list(
     
     if let Some(default) = default {
         // Convert the default values to a FrontMatter instance and merge
-        let yaml_str = serde_yaml::to_string(&default.values)?;
-        
-        // Create a default front matter
-        let default_front_matter = FrontMatter::default();
+        let default_front_matter: FrontMatter = serde_yaml::from_value(default.values.clone())?;
         
-        // Apply defaults for title
-        if front_matter.title.is_none() && default_front_matter.title.is_some() {
-            front_matter.title = default_front_matter.title.clone();
-        }
-        
-        // Apply defaults for layout
-        if front_matter.layout.is_none() && default_front_matter.layout.is_some() {
-            front_matter.layout = default_front_matter.layout.clone();
-        }
+        // Apply defaults using merge
+        front_matter.merge(&default_front_matter);
     }
     
     Ok(())
diff --git a/src/markdown/renderer/markdown_renderer.rs b/src/markdown/renderer/markdown_renderer.rs
@@ -11,7 +11,7 @@ lazy_static! {
     static ref MATH_BLOCK_REGEX: Regex = Regex::new(r"\$\$([\s\S]+?)\$\$").unwrap();
     
     // Regex for typographical improvements
-    static ref SMART_QUOTES_REGEX: Regex = Regex::new(r#"(^|[-—/\(\[\{"""\s])[''](.+?)['']([-.,:;!?\)\]\}""\s]|$)"#).unwrap();
+    static ref SMART_QUOTES_REGEX: Regex = Regex::new(r#"(^|[-—/\(\[\{\s])['"](.+?)['"]([-.,:;!?\)\]\}\s]|$)"#).unwrap();
     static ref EM_DASH_REGEX: Regex = Regex::new(r"---").unwrap();
     static ref EN_DASH_REGEX: Regex = Regex::new(r"--").unwrap();
     
@@ -169,8 +169,8 @@ impl<'a> MarkdownRenderer<'a> {
         let headings = extract_headings(html);
         match headings {
             Ok(_h) => {
-                // In the future, use TocOptions here
-                match generate_toc(html) {
+                let options = crate::markdown::toc::parser::TocOptions::default();
+                match crate::markdown::toc::parser::generate_toc_with_options(html, &options) {
                     Ok(toc) => toc,
                     Err(_) => String::new()
                 }
@@ -247,6 +247,9 @@ mod tests {
         let markdown = "```rust\nfn main() {\n    println!(\"Hello, World!\");\n}\n```";
         let html = renderer.render(markdown);
         
+        if !html.contains("<div class=\"highlight\">") {
+            panic!("HTML does not contain highlight div. HTML: {}", html);
+        }
         assert!(html.contains("<div class=\"highlight\">"));
         assert!(html.contains("<pre class=\"highlight rust\">"));
     }
@@ -259,9 +262,9 @@ mod tests {
         let markdown = "# Main Title\n\n## Section 1\n\nContent\n\n## Section 2\n\nMore content";
         let (html, toc) = renderer.render_with_toc(markdown);
         
-        assert!(toc.contains("<ul class=\"toc\">"));
-        assert!(toc.contains("<a href=\"#section-1\">"));
-        assert!(toc.contains("<a href=\"#section-2\">"));
+        assert!(toc.contains("class=\"toc\""));
+        assert!(toc.contains("href=\"#section-1\""));
+        assert!(toc.contains("href=\"#section-2\""));
     }
     
     #[test]
diff --git a/src/markdown/renderer/syntax.rs b/src/markdown/renderer/syntax.rs
@@ -47,11 +47,24 @@ impl SyntaxHighlighter {
     
     /// Process HTML content to add syntax highlighting to code blocks
     pub fn highlight_html(&self, html: &str) -> BoxResult<String> {
-        let pre_regex = Regex::new(r#"<pre><code( class="language-([^"]+)")?>([^<]+)</code></pre>"#)?;
+        // Regex to match pre and code tags and capture attributes and content
+        let pre_regex = Regex::new(r#"(?s)<pre([^>]*)>\s*<code([^>]*)>(.*?)</code>\s*</pre>"#)?;
+        let lang_regex = Regex::new(r#"lang="([^"]+)""#)?;
+        let class_regex = Regex::new(r#"class="language-([^"]+)""#)?;
         
         let html_highlighted = pre_regex.replace_all(html, |caps: &regex::Captures| {
+            let pre_attrs = &caps[1];
+            let code_attrs = &caps[2];
             let code = html_escape::decode_html_entities(&caps[3]).to_string();
-            let lang = caps.get(2).map(|m| m.as_str()).unwrap_or("text");
+            
+            // Try to get lang from pre attribute first
+            let mut lang = "text";
+            
+            if let Some(m) = lang_regex.captures(pre_attrs) {
+                lang = m.get(1).map(|m| m.as_str()).unwrap_or("text");
+            } else if let Some(m) = class_regex.captures(code_attrs) {
+                lang = m.get(1).map(|m| m.as_str()).unwrap_or("text");
+            }
             
             self.highlight_code(&code, lang)
         });
diff --git a/src/markdown/toc/mod.rs b/src/markdown/toc/mod.rs
@@ -1,5 +1,5 @@
 mod generator;
-mod parser;
+pub mod parser;
 
 pub use generator::generate_toc;
 pub use parser::extract_headings;
diff --git a/src/markdown/toc/parser.rs b/src/markdown/toc/parser.rs
@@ -5,9 +5,10 @@ use serde::{Serialize, Deserialize};
 
 lazy_static! {
     static ref HEADING_REGEX: Regex = Regex::new(
-        r#"<h([1-6])(?:[^>]*)(?:id=["']([^"']+)["'])?(?:[^>]*)>(.*?)</h\1>"#
+        r#"<h([1-6])([^>]*)>([\s\S]*?)</h[1-6]>"#
     ).unwrap();
 
+    static ref ID_REGEX: Regex = Regex::new(r#"id=["']([^"']+)["']"#).unwrap();
     static ref TAG_REGEX: Regex = Regex::new(r"<[^>]*>").unwrap();
 }
 
@@ -139,7 +140,7 @@ impl TableOfContents {
         }
 
         // Reverse to maintain order
-        self.headings.reverse();
+        // self.headings.reverse();
     }
 
     /// Generate HTML for the table of contents
@@ -201,16 +202,18 @@ pub fn extract_headings(html: &str) -> BoxResult<Vec<(usize, String, String)>> {
 
     for cap in HEADING_REGEX.captures_iter(html) {
         let level: usize = cap[1].parse()?;
+        let attrs = &cap[2];
+        let content = &cap[3];
 
-        // Get ID from the heading or generate from text
-        let id = if let Some(id_match) = cap.get(2) {
-            id_match.as_str().to_string()
+        // Get ID from attributes
+        let id = if let Some(id_match) = ID_REGEX.captures(attrs) {
+            id_match[1].to_string()
         } else {
             // Generate ID from heading text
-            generate_id_from_text(&cap[3])
+            generate_id_from_text(content)
         };
 
-        let text = strip_html_tags(&cap[3]);
+        let text = strip_html_tags(content);
 
         headings.push((level, id, text));
     }
@@ -296,14 +299,8 @@ impl Default for TocOptions {
 }
 
 /// Generate TOC with custom options
-pub fn generate_toc_with_options(_html: &str, _options: &TocOptions) -> BoxResult<String> {
-    // Temporarily return empty string due to compilation issues
-    Ok(String::new())
-}
-
-// Original implementation commented out temporarily
-/*
-pub fn generate_toc_with_options_original(html: &str, options: &TocOptions) -> BoxResult<String> {
+/// Generate TOC with custom options
+pub fn generate_toc_with_options(html: &str, options: &TocOptions) -> BoxResult<String> {
     let flat_headings = extract_headings(html)?;
 
     // Filter headings by level and no_toc class
@@ -317,7 +314,7 @@ pub fn generate_toc_with_options_original(html: &str, options: &TocOptions) -> B
     }
 
     let list_tag = if options.ordered_list { "ol" } else { "ul" };
-    let mut html = format!(
+    let mut html_out = format!(
         r#"<{} id="{}" class="{}">"#,
         list_tag, options.list_id, options.list_class
     );
@@ -328,19 +325,19 @@ pub fn generate_toc_with_options_original(html: &str, options: &TocOptions) -> B
     for (level, id, text) in filtered {
         // Adjust nesting
         while current_level < level {
-            html.push_str(&format!(r#"<{} class="{}">"#, list_tag, options.sublist_class));
+            html_out.push_str(&format!(r#"<{} class="{}">"#, list_tag, options.sublist_class));
             current_level += 1;
             stack_depth += 1;
         }
 
         while current_level > level {
-            html.push_str(&format!("</{}></li>", list_tag));
+            html_out.push_str(&format!("</{}></li>", list_tag));
             current_level -= 1;
             stack_depth -= 1;
         }
 
         // Add item
-        html.push_str(&format!(
+        html_out.push_str(&format!(
             "<li class=\"{}\"><a href=\"#{}\"{}>{}</a>",
             options.item_class,
             id,
@@ -357,16 +354,15 @@ pub fn generate_toc_with_options_original(html: &str, options: &TocOptions) -> B
 
     // Close any remaining open tags
     for _ in 0..=stack_depth {
-        html.push_str("</li>");
+        html_out.push_str("</li>");
         if stack_depth > 0 {
-            html.push_str(&format!("</{}>", list_tag));
+            html_out.push_str(&format!("</{}>", list_tag));
         }
     }
 
-    html.push_str(&format!("</{}>", list_tag));
-    Ok(html)
+    html_out.push_str(&format!("</{}>", list_tag));
+    Ok(html_out)
 }
-*/
 
 #[cfg(test)]
 mod tests {
@@ -413,6 +409,11 @@ mod tests {
             <h1 id="usage">Usage</h1>
         "#;
 
+        let headings = extract_headings(html).unwrap();
+        if headings.is_empty() {
+             panic!("No headings found! HTML: {}", html);
+        }
+
         let toc = build_toc_from_html(html).unwrap();
         assert_eq!(toc.headings.len(), 2); // Two h1 headings
         assert_eq!(toc.headings[0].children.len(), 2); // Two h2 under first h1