@@ -116,6 +116,9 @@ def extract_file_references(self, content: str, source_file: str) -> list[FilePa
116116 - tests/...
117117 - Relative paths: ../path/to/file
118118
119+ Content inside fenced code blocks is excluded to avoid false positives
120+ from code examples that reference hypothetical or template paths.
121+
119122 Args:
120123 content: Markdown file content
121124 source_file: Path to source file
@@ -125,34 +128,64 @@ def extract_file_references(self, content: str, source_file: str) -> list[FilePa
125128 """
126129 references = []
127130
128- # Regex patterns for file paths
129- patterns = [
131+ # Inline code patterns – these target explicit backtick-wrapped paths in prose
132+ inline_patterns = [
130133 r'`(src/[^`]+\.(cs|csproj|axaml|json))`' ,
131134 r'`(docs/[^`]+\.md)`' ,
132135 r'`(tests/[^`]+\.(cs|csproj))`' ,
133- r'\]\((\.\./[^)]+\.md)\)' , # Relative markdown links
136+ ]
137+
138+ # Link patterns – match markdown link syntax [text](path)
139+ link_patterns = [
140+ r'\]\((\.\./[^)]+\.md)\)' , # Relative markdown links
134141 r'\]\(([^)]+\.(cs|md|json|axaml))\)' , # Any file in markdown links
135142 ]
136143
137- for line_num , line in enumerate (content .split ('\n ' ), start = 1 ):
138- for pattern in patterns :
144+ lines = content .split ('\n ' )
145+ in_code_fence = False
146+
147+ for line_num , line in enumerate (lines , start = 1 ):
148+ # Track fenced code block boundaries
149+ stripped = line .strip ()
150+ if stripped .startswith ('```' ) or stripped .startswith ('~~~' ):
151+ in_code_fence = not in_code_fence
152+ continue
153+
154+ # Skip all extraction while inside a fenced code block –
155+ # paths and links there are illustrative/hypothetical, not real refs.
156+ if in_code_fence :
157+ continue
158+
159+ # Extract inline backtick patterns (prose references)
160+ for pattern in inline_patterns :
139161 for match in re .finditer (pattern , line ):
140162 referenced_path = match .group (1 )
163+ path_type = "relative" if referenced_path .startswith ('../' ) else (
164+ "absolute" if referenced_path .startswith ('/' ) else "project_relative"
165+ )
166+ references .append (FilePathReference (
167+ source_file = source_file ,
168+ line_number = line_num ,
169+ referenced_path = referenced_path ,
170+ path_type = path_type ,
171+ exists = False
172+ ))
141173
142- # Determine path type
143- if referenced_path .startswith ('../' ):
144- path_type = "relative"
145- elif referenced_path .startswith ('/' ):
146- path_type = "absolute"
147- else :
148- path_type = "project_relative"
149-
174+ # Extract link patterns; strip inline code spans first to avoid
175+ # matching syntax written inside backticks.
176+ line_no_inline = re .sub (r'`[^`]+`' , '' , line )
177+ for pattern in link_patterns :
178+ for match in re .finditer (pattern , line_no_inline ):
179+ referenced_path = match .group (1 )
180+ path_type = "relative" if referenced_path .startswith ('../' ) else (
181+ "absolute" if referenced_path .startswith ('/' ) else "project_relative"
182+ )
150183 references .append (FilePathReference (
151184 source_file = source_file ,
152185 line_number = line_num ,
153186 referenced_path = referenced_path ,
154187 path_type = path_type ,
155- exists = False # Will be validated later
188+ exists = False
156189 ))
157190
158191 return references
0 commit comments