22from collections import defaultdict
33from dataclasses import asdict
44from pathlib import Path
5- from typing import Any , DefaultDict , Dict , List
5+ from typing import Any , DefaultDict , Dict , List , Set , Tuple
66
7+ import difflib
78import logging
89import yaml
910
1213
1314logging .basicConfig (level = logging .INFO )
1415
15- logger = logging .getLogger (__file__ )
16+ logger = logging .getLogger (Path ( __file__ ). name )
1617
1718
1819def write_many (root : Path , to_write : Dict [str , str ]):
@@ -111,6 +112,56 @@ def excerpt_dict(excerpt: Dict) -> Dict:
111112 return reordered
112113
113114
115+ def collect_yaml (root : Path ) -> Dict [str , Dict ]:
116+ yaml_files = {}
117+ metadata_dir = root / ".doc_gen" / "metadata"
118+
119+ if not metadata_dir .exists ():
120+ return yaml_files
121+
122+ for yaml_path in metadata_dir .glob ("**/*.yaml" ):
123+ rel_path = yaml_path .relative_to (root )
124+
125+ with open (yaml_path , "r" ) as file :
126+ try :
127+ content = yaml .safe_load (file )
128+ yaml_files [str (rel_path )] = content
129+ except yaml .YAMLError as e :
130+ logger .error (f"Error parsing YAML file { yaml_path } : { e } " )
131+
132+ return yaml_files
133+
134+
135+ def report_yaml_differences (
136+ before_values : Dict [str , Dict ], after_values : Dict [str , Dict ]
137+ ) -> List [Tuple [str , str ]]:
138+ """
139+ Compare before and after YAML values and return a list of differences.
140+
141+ Args:
142+ before_values: Dictionary of YAML content before changes
143+ after_values: Dictionary of YAML content after changes
144+
145+ Returns:
146+ List of tuples containing (file_path, difference_type) where difference_type is
147+ 'added', 'removed', or 'modified'
148+ """
149+ differences = []
150+ for file_path in set (before_values .keys ()) | set (after_values .keys ()):
151+ before = before_values .get (file_path )
152+ after = after_values .get (file_path )
153+
154+ if before != after :
155+ if file_path not in before_values :
156+ differences .append ((file_path , "added" ))
157+ elif file_path not in after_values :
158+ differences .append ((file_path , "removed" ))
159+ else :
160+ differences .append ((file_path , "modified" ))
161+
162+ return differences
163+
164+
114165def main ():
115166 parser = ArgumentParser (
116167 description = "Build a DocGen instance and normalize the metadata."
@@ -123,9 +174,21 @@ def main():
123174 if not root .is_dir ():
124175 logger .error (f"Expected { root } to be a directory." )
125176
177+ before_values = collect_yaml (root )
126178 doc_gen = DocGen .from_root (root )
127179 writes = prepare_write (doc_gen .examples )
128180 write_many (root , writes )
181+ after_values = collect_yaml (root )
182+
183+ if before_values != after_values :
184+ differences = report_yaml_differences (before_values , after_values )
185+ logger .error (f"YAML content changed in { len (differences )} files after writing:" )
186+ for file_path , diff_type in differences :
187+ logger .error (f" - { file_path } : { diff_type } " )
188+ else :
189+ logger .info (
190+ f"Metadata for { root .name } has been normalized and verified for consistency."
191+ )
129192
130193
131194if __name__ == "__main__" :
0 commit comments