@@ -6,11 +6,11 @@ use crate::config::{ensure_dir, get_cwd};
66use crate :: error:: { Result , SkillcError } ;
77use crate :: index:: { self , SCHEMA_VERSION } ;
88use crate :: logging:: { LogEntry , get_run_id, init_log_db, log_access_with_fallback} ;
9+ use crate :: markdown;
910use crate :: resolver:: { ResolvedSkill , resolve_skill} ;
1011use crate :: { OutputFormat , verbose} ;
1112use chrono:: Utc ;
1213use crossterm:: style:: Stylize ;
13- use lazy_regex:: { Lazy , Regex , lazy_regex} ;
1414use rusqlite:: { Connection , params} ;
1515use serde:: Serialize ;
1616use std:: fs;
@@ -19,9 +19,6 @@ use std::path::{Path, PathBuf};
1919use std:: time:: Instant ;
2020use walkdir:: WalkDir ;
2121
22- /// Regex for parsing markdown headings (validated at compile time).
23- static HEADING_RE : Lazy < Regex > = lazy_regex ! ( r"^(#{1,6})\s+(.+)$" ) ;
24-
2522/// Search result entry.
2623#[ derive( Debug , Serialize ) ]
2724pub struct SearchResult {
@@ -356,22 +353,12 @@ fn index_markdown(conn: &Connection, source_dir: &Path, file_path: &Path) -> Res
356353
357354 let lines: Vec < & str > = content. lines ( ) . collect ( ) ;
358355
359- // Find all headings with their positions
360- let mut headings: Vec < ( usize , usize , String ) > = Vec :: new ( ) ; // (line_num, level, text)
361- for ( i, line) in lines. iter ( ) . enumerate ( ) {
362- if let Some ( caps) = HEADING_RE . captures ( line) {
363- let level = caps
364- . get ( 1 )
365- . ok_or_else ( || SkillcError :: Internal ( "regex group 1 missing" . into ( ) ) ) ?
366- . as_str ( )
367- . len ( ) ;
368- let text = caps
369- . get ( 2 )
370- . ok_or_else ( || SkillcError :: Internal ( "regex group 2 missing" . into ( ) ) ) ?
371- . as_str ( )
372- . to_string ( ) ;
373- headings. push ( ( i, level, text) ) ;
374- }
356+ // Find all headings with their positions using AST parsing.
357+ // This avoids false positives from code blocks.
358+ let mut headings: Vec < ( usize , usize , String ) > = Vec :: new ( ) ; // (line_idx, level, text)
359+ for heading in markdown:: extract_headings ( & content) {
360+ let line_idx = heading. line . saturating_sub ( 1 ) ;
361+ headings. push ( ( line_idx, heading. level , heading. text ) ) ;
375362 }
376363
377364 if headings. is_empty ( ) {
@@ -716,6 +703,7 @@ fn build_fts_query(query: &str) -> String {
716703#[ cfg( test) ]
717704mod tests {
718705 use super :: * ;
706+ use tempfile:: TempDir ;
719707
720708 #[ test]
721709 fn test_build_fts_query_simple ( ) {
@@ -744,4 +732,56 @@ mod tests {
744732 let hash = compute_hash16 ( & path) ;
745733 assert_eq ! ( hash. len( ) , 16 ) ;
746734 }
735+
736+ #[ test]
737+ fn test_index_markdown_ignores_code_block_headings ( ) {
738+ let temp = TempDir :: new ( ) . unwrap ( ) ;
739+ let source_dir = temp. path ( ) ;
740+ let file_path = source_dir. join ( "perf.md" ) ;
741+ let content = r#"# Title
742+
743+ ## Typst Performance Profiling
744+
745+ ```md
746+ ## Not a heading
747+ ```
748+
749+ More text
750+
751+ ## Next Section
752+ Text
753+ "# ;
754+ std:: fs:: write ( & file_path, content) . unwrap ( ) ;
755+
756+ let conn = Connection :: open_in_memory ( ) . unwrap ( ) ;
757+ conn. execute (
758+ "CREATE VIRTUAL TABLE sections USING fts5(file, section, content, tokenize='unicode61')" ,
759+ [ ] ,
760+ )
761+ . unwrap ( ) ;
762+ conn. execute (
763+ "CREATE TABLE headings (
764+ id INTEGER PRIMARY KEY,
765+ file TEXT NOT NULL,
766+ text TEXT NOT NULL,
767+ level INTEGER NOT NULL,
768+ start_line INTEGER NOT NULL,
769+ end_line INTEGER NOT NULL
770+ )" ,
771+ [ ] ,
772+ )
773+ . unwrap ( ) ;
774+ conn. execute (
775+ "CREATE INDEX idx_headings_text ON headings(text COLLATE NOCASE)" ,
776+ [ ] ,
777+ )
778+ . unwrap ( ) ;
779+
780+ index_markdown ( & conn, source_dir, & file_path) . unwrap ( ) ;
781+
782+ let headings = index:: query_headings ( & conn, "Typst Performance Profiling" , None ) . unwrap ( ) ;
783+ assert_eq ! ( headings. len( ) , 1 ) ;
784+ assert_eq ! ( headings[ 0 ] . start_line, 3 ) ;
785+ assert_eq ! ( headings[ 0 ] . end_line, 11 ) ;
786+ }
747787}
0 commit comments