awsdocs · cpyle0819 · Jul 10, 2025 · Jul 10, 2025
diff --git a/aws_doc_sdk_examples_tools/yaml_writer.py b/aws_doc_sdk_examples_tools/yaml_writer.py
@@ -2,8 +2,9 @@
 from collections import defaultdict
 from dataclasses import asdict
 from pathlib import Path
-from typing import Any, DefaultDict, Dict, List
+from typing import Any, DefaultDict, Dict, List, Set, Tuple
 
+import difflib
 import logging
 import yaml
 
@@ -12,7 +13,7 @@
 
 logging.basicConfig(level=logging.INFO)
 
-logger = logging.getLogger(__file__)
+logger = logging.getLogger(Path(__file__).name)
 
 
 def write_many(root: Path, to_write: Dict[str, str]):
@@ -111,6 +112,45 @@ def excerpt_dict(excerpt: Dict) -> Dict:
     return reordered
 
 
+def collect_yaml(root: Path) -> Dict[str, Dict]:
+    yaml_files: Dict[str, Dict] = {}
+    metadata_dir = root / ".doc_gen" / "metadata"
+
+    if not metadata_dir.exists():
+        return yaml_files
+
+    for yaml_path in metadata_dir.glob("**/*.yaml"):
+        rel_path = yaml_path.relative_to(root)
+
+        with open(yaml_path, "r") as file:
+            try:
+                content = yaml.safe_load(file)
+                yaml_files[str(rel_path)] = content
+            except yaml.YAMLError as e:
+                logger.error(f"Error parsing YAML file {yaml_path}: {e}")
+
+    return yaml_files
+
+
+def report_yaml_differences(
+    before_values: Dict[str, Dict], after_values: Dict[str, Dict]
+) -> List[Tuple[str, str]]:
+    differences = []
+    for file_path in set(before_values.keys()) | set(after_values.keys()):
+        before = before_values.get(file_path)
+        after = after_values.get(file_path)
+
+        if before != after:
+            if file_path not in before_values:
+                differences.append((file_path, "added"))
+            elif file_path not in after_values:
+                differences.append((file_path, "removed"))
+            else:
+                differences.append((file_path, "modified"))
+
+    return differences
+
+
 def main():
     parser = ArgumentParser(
         description="Build a DocGen instance and normalize the metadata."
@@ -123,9 +163,21 @@ def main():
     if not root.is_dir():
         logger.error(f"Expected {root} to be a directory.")
 
+    before_values = collect_yaml(root)
     doc_gen = DocGen.from_root(root)
     writes = prepare_write(doc_gen.examples)
     write_many(root, writes)
+    after_values = collect_yaml(root)
+
+    if before_values != after_values:
+        differences = report_yaml_differences(before_values, after_values)
+        logger.error(f"YAML content changed in {len(differences)} files after writing:")
+        for file_path, diff_type in differences:
+            logger.error(f"  - {file_path}: {diff_type}")
+    else:
+        logger.info(
+            f"Metadata for {root.name} has been normalized and verified for consistency."
+        )
 
 
 if __name__ == "__main__":

diff --git a/aws_doc_sdk_examples_tools/yaml_writer_test.py b/aws_doc_sdk_examples_tools/yaml_writer_test.py
@@ -2,7 +2,10 @@
 import pytest
 
 from aws_doc_sdk_examples_tools.doc_gen import DocGen
-from aws_doc_sdk_examples_tools.yaml_writer import prepare_write
+from aws_doc_sdk_examples_tools.yaml_writer import (
+    prepare_write,
+    report_yaml_differences,
+)
 
 
 ROOT = Path(__file__).parent / "test_resources" / "doc_gen_test"
@@ -46,3 +49,42 @@ def test_doc_gen(sample_doc_gen: DocGen):
     }
 
     assert writes == expected_writes
+
+
+def test_report_yaml_differences_with_changes():
+    """Test that report_yaml_differences correctly identifies added, removed, and modified files."""
+    before = {
+        "file1.yaml": {"key1": "value1"},
+        "file2.yaml": {"key2": "value2"},
+        "file3.yaml": {"key3": "value3"},
+    }
+
+    after = {
+        "file1.yaml": {"key1": "changed_value"},  # Modified
+        "file3.yaml": {"key3": "value3"},  # Unchanged
+        "file4.yaml": {"key4": "value4"},  # Added
+        # file2.yaml is removed
+    }
+
+    differences = report_yaml_differences(before, after)
+
+    # Sort the differences for consistent comparison
+    differences.sort()
+
+    expected = [
+        ("file1.yaml", "modified"),
+        ("file2.yaml", "removed"),
+        ("file4.yaml", "added"),
+    ]
+    expected.sort()
+
+    assert differences == expected
+
+
+def test_report_yaml_differences_no_changes():
+    """Test that report_yaml_differences returns an empty list when dictionaries are identical."""
+    before = {"file1.yaml": {"key": "value"}}
+    after = {"file1.yaml": {"key": "value"}}
+
+    differences = report_yaml_differences(before, after)
+    assert differences == []