99from difflib import unified_diff
1010from enum import Enum
1111from pathlib import Path
12- from typing import Optional
12+ from typing import Optional , Generator , Callable
1313
1414from render import Renderer , RenderStatus , MissingMetadataError
1515from scanner import Scanner
1616
1717from aws_doc_sdk_examples_tools .doc_gen import DocGen
18+ from aws_doc_sdk_examples_tools .metadata_errors import MetadataError
19+ from collections import defaultdict
20+ import re
21+
22+ # Folders to exclude from processing (can be extended as needed)
23+ EXCLUDED_FOLDERS = {'.kiro' , '.git' , 'node_modules' , '__pycache__' }
24+
25+
26+ def apply_folder_exclusion_patches ():
27+ """
28+ Apply patches to exclude specified folders from processing.
29+ This integrates folder exclusion as a core feature.
30+ """
31+ from aws_doc_sdk_examples_tools import file_utils , validator_config
32+ from aws_doc_sdk_examples_tools .fs import Fs , PathFs
33+
34+ def patched_skip (path : Path ) -> bool :
35+ """Enhanced skip function that ignores specified folders."""
36+ # Check if path contains any excluded folders
37+ if any (excluded_folder in path .parts for excluded_folder in EXCLUDED_FOLDERS ):
38+ return True
39+
40+ # Call original skip logic
41+ return path .suffix .lower () not in validator_config .EXT_LOOKUP or path .name in validator_config .IGNORE_FILES
42+
43+ def patched_get_files (
44+ root : Path , skip : Callable [[Path ], bool ] = lambda _ : False , fs : Fs = PathFs ()
45+ ) -> Generator [Path , None , None ]:
46+ """Enhanced get_files that uses our patched skip function."""
47+ for path in file_utils .walk_with_gitignore (root , fs = fs ):
48+ if not patched_skip (path ):
49+ yield path
50+
51+ # Apply the patches
52+ validator_config .skip = patched_skip
53+ file_utils .get_files = patched_get_files
54+
55+ excluded_list = ', ' .join (sorted (EXCLUDED_FOLDERS ))
56+ print (f"Applied folder exclusion: { excluded_list } folders excluded" )
57+
58+
59+ # Apply folder exclusion patches when module is imported
60+ apply_folder_exclusion_patches ()
1861
1962
2063# Default to not using Rich
2669logging .basicConfig (level = os .environ .get ("LOGLEVEL" , "INFO" ).upper (), force = True )
2770
2871
72+ class UnmatchedSnippetTagError (MetadataError ):
73+ def __init__ (self , file , id , tag = None , line = None , tag_type = None ):
74+ super ().__init__ (file = file , id = id )
75+ self .tag = tag
76+ self .line = line
77+ self .tag_type = tag_type # 'start' or 'end'
78+
79+ def message (self ):
80+ return f"Unmatched snippet-{ self .tag_type } tag '{ self .tag } ' at line { self .line } "
81+
82+
83+ class DuplicateSnippetTagError (MetadataError ):
84+ def __init__ (self , file , id , tag = None , line = None ):
85+ super ().__init__ (file = file , id = id )
86+ self .tag = tag
87+ self .line = line
88+
89+ def message (self ):
90+ return f"Duplicate snippet tag '{ self .tag } ' found at line { self .line } "
91+
92+
93+ def validate_snippet_tags (doc_gen : DocGen ):
94+ """Validate snippet-start/snippet-end pairs across all files."""
95+ errors = []
96+
97+ # We need to scan files directly since DocGen.snippets only contains valid pairs
98+ from aws_doc_sdk_examples_tools .file_utils import get_files
99+ from aws_doc_sdk_examples_tools .validator_config import skip
100+
101+ for file_path in get_files (doc_gen .root , skip , fs = doc_gen .fs ):
102+ try :
103+ content = doc_gen .fs .read (file_path )
104+ lines = content .splitlines ()
105+
106+ snippet_starts = {} # Track all snippet-start tags and their line numbers
107+ snippet_ends = {} # Track all snippet-end tags and their line numbers
108+ snippet_tags_seen = set () # Track all tags in this file to detect duplicates
109+
110+ for line_num , line in enumerate (lines , 1 ):
111+ # Look for snippet-start patterns (# or // comment styles)
112+ start_match = re .search (r'(#|//)\s*snippet-start:\[([^\]]+)\]' , line )
113+ if start_match :
114+ tag = start_match .group (2 )
115+
116+ # Check for duplicate start tags in the same file
117+ if tag in snippet_starts :
118+ errors .append (DuplicateSnippetTagError (
119+ file = file_path ,
120+ id = f"Duplicate snippet-start tag in { file_path } " ,
121+ tag = tag ,
122+ line = line_num
123+ ))
124+ else :
125+ snippet_starts [tag ] = line_num
126+ snippet_tags_seen .add (tag )
127+
128+ # Look for snippet-end patterns
129+ end_match = re .search (r'(#|//)\s*snippet-end:\[([^\]]+)\]' , line )
130+ if end_match :
131+ tag = end_match .group (2 )
132+
133+ # Check for duplicate end tags in the same file
134+ if tag in snippet_ends :
135+ errors .append (DuplicateSnippetTagError (
136+ file = file_path ,
137+ id = f"Duplicate snippet-end tag in { file_path } " ,
138+ tag = tag ,
139+ line = line_num
140+ ))
141+ else :
142+ snippet_ends [tag ] = line_num
143+
144+ # Check that every snippet-start has a corresponding snippet-end
145+ for tag , start_line in snippet_starts .items ():
146+ if tag not in snippet_ends :
147+ errors .append (UnmatchedSnippetTagError (
148+ file = file_path ,
149+ id = f"Unclosed snippet-start in { file_path } " ,
150+ tag = tag ,
151+ line = start_line ,
152+ tag_type = 'start'
153+ ))
154+
155+ # Check that every snippet-end has a corresponding snippet-start
156+ for tag , end_line in snippet_ends .items ():
157+ if tag not in snippet_starts :
158+ errors .append (UnmatchedSnippetTagError (
159+ file = file_path ,
160+ id = f"Unmatched snippet-end in { file_path } " ,
161+ tag = tag ,
162+ line = end_line ,
163+ tag_type = 'end'
164+ ))
165+
166+ except Exception as e :
167+ # Skip files that can't be read (binary files, etc.)
168+ continue
169+
170+ return errors
171+
172+
29173def prepare_scanner (doc_gen : DocGen ) -> Optional [Scanner ]:
30174 for path in (doc_gen .root / ".doc_gen/metadata" ).glob ("*_metadata.yaml" ):
31175 doc_gen .process_metadata (path )
32176 doc_gen .collect_snippets ()
33177 doc_gen .validate ()
178+
179+ # Validate snippet tag pairs
180+ snippet_errors = validate_snippet_tags (doc_gen )
181+ if snippet_errors :
182+ doc_gen .errors .extend (snippet_errors )
183+
34184 if doc_gen .errors :
35185 error_strings = [str (error ) for error in doc_gen .errors ]
36186 failed_list = "\n " .join (f"DocGen Error: { e } " for e in error_strings )
@@ -200,4 +350,4 @@ def make_diff(renderer, id):
200350 current = renderer .read_current ().split ("\n " )
201351 expected = renderer .readme_text .split ("\n " )
202352 diff = unified_diff (current , expected , f"{ id } /current" , f"{ id } /expected" )
203- return "\n " .join (diff )
353+ return "\n " .join (diff )
0 commit comments