Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 54 additions & 2 deletions aws_doc_sdk_examples_tools/yaml_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
from collections import defaultdict
from dataclasses import asdict
from pathlib import Path
from typing import Any, DefaultDict, Dict, List
from typing import Any, DefaultDict, Dict, List, Set, Tuple

import difflib
import logging
import yaml

Expand All @@ -12,7 +13,7 @@

logging.basicConfig(level=logging.INFO)

logger = logging.getLogger(__file__)
logger = logging.getLogger(Path(__file__).name)


def write_many(root: Path, to_write: Dict[str, str]):
Expand Down Expand Up @@ -111,6 +112,45 @@ def excerpt_dict(excerpt: Dict) -> Dict:
return reordered


def collect_yaml(root: Path) -> Dict[str, Dict]:
yaml_files: Dict[str, Dict] = {}
metadata_dir = root / ".doc_gen" / "metadata"

if not metadata_dir.exists():
return yaml_files

for yaml_path in metadata_dir.glob("**/*.yaml"):
rel_path = yaml_path.relative_to(root)

with open(yaml_path, "r") as file:
try:
content = yaml.safe_load(file)
yaml_files[str(rel_path)] = content
except yaml.YAMLError as e:
logger.error(f"Error parsing YAML file {yaml_path}: {e}")

return yaml_files


def report_yaml_differences(
before_values: Dict[str, Dict], after_values: Dict[str, Dict]
) -> List[Tuple[str, str]]:
differences = []
for file_path in set(before_values.keys()) | set(after_values.keys()):
before = before_values.get(file_path)
after = after_values.get(file_path)

if before != after:
if file_path not in before_values:
differences.append((file_path, "added"))
elif file_path not in after_values:
differences.append((file_path, "removed"))
else:
differences.append((file_path, "modified"))

return differences


def main():
parser = ArgumentParser(
description="Build a DocGen instance and normalize the metadata."
Expand All @@ -123,9 +163,21 @@ def main():
if not root.is_dir():
logger.error(f"Expected {root} to be a directory.")

before_values = collect_yaml(root)
doc_gen = DocGen.from_root(root)
writes = prepare_write(doc_gen.examples)
write_many(root, writes)
after_values = collect_yaml(root)

if before_values != after_values:
differences = report_yaml_differences(before_values, after_values)
logger.error(f"YAML content changed in {len(differences)} files after writing:")
for file_path, diff_type in differences:
logger.error(f" - {file_path}: {diff_type}")
else:
logger.info(
f"Metadata for {root.name} has been normalized and verified for consistency."
)


if __name__ == "__main__":
Expand Down
44 changes: 43 additions & 1 deletion aws_doc_sdk_examples_tools/yaml_writer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
import pytest

from aws_doc_sdk_examples_tools.doc_gen import DocGen
from aws_doc_sdk_examples_tools.yaml_writer import prepare_write
from aws_doc_sdk_examples_tools.yaml_writer import (
prepare_write,
report_yaml_differences,
)


ROOT = Path(__file__).parent / "test_resources" / "doc_gen_test"
Expand Down Expand Up @@ -46,3 +49,42 @@ def test_doc_gen(sample_doc_gen: DocGen):
}

assert writes == expected_writes


def test_report_yaml_differences_with_changes():
"""Test that report_yaml_differences correctly identifies added, removed, and modified files."""
before = {
"file1.yaml": {"key1": "value1"},
"file2.yaml": {"key2": "value2"},
"file3.yaml": {"key3": "value3"},
}

after = {
"file1.yaml": {"key1": "changed_value"}, # Modified
"file3.yaml": {"key3": "value3"}, # Unchanged
"file4.yaml": {"key4": "value4"}, # Added
# file2.yaml is removed
}

differences = report_yaml_differences(before, after)

# Sort the differences for consistent comparison
differences.sort()

expected = [
("file1.yaml", "modified"),
("file2.yaml", "removed"),
("file4.yaml", "added"),
]
expected.sort()

assert differences == expected


def test_report_yaml_differences_no_changes():
"""Test that report_yaml_differences returns an empty list when dictionaries are identical."""
before = {"file1.yaml": {"key": "value"}}
after = {"file1.yaml": {"key": "value"}}

differences = report_yaml_differences(before, after)
assert differences == []