@@ -116,6 +116,9 @@ def extract_file_references(self, content: str, source_file: str) -> list[FilePa
116116 - tests/...
117117 - Relative paths: ../path/to/file
118118
119+ Content inside fenced code blocks is excluded to avoid false positives
120+ from code examples that reference hypothetical or template paths.
121+
119122 Args:
120123 content: Markdown file content
121124 source_file: Path to source file
@@ -125,36 +128,62 @@ def extract_file_references(self, content: str, source_file: str) -> list[FilePa
125128 """
126129 references = []
127130
128- # Regex patterns for file paths
129- patterns = [
131+ # Inline code patterns – these target explicit backtick-wrapped paths in prose
132+ inline_patterns = [
130133 r'`(src/[^`]+\.(cs|csproj|axaml|json))`' ,
131134 r'`(docs/[^`]+\.md)`' ,
132135 r'`(tests/[^`]+\.(cs|csproj))`' ,
133- r'\]\((\.\./[^)]+\.md)\)' , # Relative markdown links
136+ ]
137+
138+ # Link patterns – match markdown link syntax [text](path)
139+ link_patterns = [
140+ r'\]\((\.\./[^)]+\.md)\)' , # Relative markdown links
134141 r'\]\(([^)]+\.(cs|md|json|axaml))\)' , # Any file in markdown links
135142 ]
136143
137- for line_num , line in enumerate (content .split ('\n ' ), start = 1 ):
138- for pattern in patterns :
139- for match in re .finditer (pattern , line ):
140- referenced_path = match .group (1 )
144+ lines = content .split ('\n ' )
145+ in_code_fence = False
141146
142- # Determine path type
143- if referenced_path .startswith ('../' ):
144- path_type = "relative"
145- elif referenced_path .startswith ('/' ):
146- path_type = "absolute"
147- else :
148- path_type = "project_relative"
147+ for line_num , line in enumerate (lines , start = 1 ):
148+ # Track fenced code block boundaries
149+ stripped = line .strip ()
150+ if stripped .startswith ('```' ) or stripped .startswith ('~~~' ):
151+ in_code_fence = not in_code_fence
152+ continue
149153
154+ # Always extract inline backtick patterns (safe – content is inside backticks)
155+ for pattern in inline_patterns :
156+ for match in re .finditer (pattern , line ):
157+ referenced_path = match .group (1 )
158+ path_type = "relative" if referenced_path .startswith ('../' ) else (
159+ "absolute" if referenced_path .startswith ('/' ) else "project_relative"
160+ )
150161 references .append (FilePathReference (
151162 source_file = source_file ,
152163 line_number = line_num ,
153164 referenced_path = referenced_path ,
154165 path_type = path_type ,
155- exists = False # Will be validated later
166+ exists = False
156167 ))
157168
169+ # Only extract link patterns outside fenced code blocks
170+ if not in_code_fence :
171+ # Temporarily remove inline code spans to avoid matching inside them
172+ line_no_inline = re .sub (r'`[^`]+`' , '' , line )
173+ for pattern in link_patterns :
174+ for match in re .finditer (pattern , line_no_inline ):
175+ referenced_path = match .group (1 )
176+ path_type = "relative" if referenced_path .startswith ('../' ) else (
177+ "absolute" if referenced_path .startswith ('/' ) else "project_relative"
178+ )
179+ references .append (FilePathReference (
180+ source_file = source_file ,
181+ line_number = line_num ,
182+ referenced_path = referenced_path ,
183+ path_type = path_type ,
184+ exists = False
185+ ))
186+
158187 return references
159188
160189 def extract_links (self , content : str ) -> list [str ]:
0 commit comments