diff --git a/.gitignore b/.gitignore index 704c243..6f79950 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,4 @@ __pycache__ *.egg-info .idea/ build/ -dist/ -.ailly_iam_policy -*.log \ No newline at end of file +dist/ \ No newline at end of file diff --git a/aws_doc_sdk_examples_tools/lliam/domain/commands.py b/aws_doc_sdk_examples_tools/lliam/domain/commands.py index 2f52d10..d351c7d 100644 --- a/aws_doc_sdk_examples_tools/lliam/domain/commands.py +++ b/aws_doc_sdk_examples_tools/lliam/domain/commands.py @@ -25,3 +25,9 @@ class UpdateReservoir(Command): root: Path batches: List[str] packages: List[str] + + +@dataclass +class DedupeReservoir(Command): + root: Path + packages: List[str] diff --git a/aws_doc_sdk_examples_tools/lliam/entry_points/lliam_app.py b/aws_doc_sdk_examples_tools/lliam/entry_points/lliam_app.py index ac67686..bc90464 100644 --- a/aws_doc_sdk_examples_tools/lliam/entry_points/lliam_app.py +++ b/aws_doc_sdk_examples_tools/lliam/entry_points/lliam_app.py @@ -81,6 +81,23 @@ def update_reservoir( handle_domain_errors(errors) +@app.command() +def dedupe_reservoir( + iam_tributary_root: str, + packages: Annotated[ + Optional[str], typer.Option(help="Comma delimited list of packages to update") + ] = None, +) -> None: + """ + Enumerate fields that must be unique (e.g. title_abbrev) + """ + doc_gen_root = Path(iam_tributary_root) + package_names = parse_package_names(packages) + cmd = commands.DedupeReservoir(root=doc_gen_root, packages=package_names) + errors = messagebus.handle(cmd) + handle_domain_errors(errors) + + def handle_domain_errors(errors: List[errors.DomainError]): if errors: for error in errors: diff --git a/aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py b/aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py new file mode 100644 index 0000000..68359e0 --- /dev/null +++ b/aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py @@ -0,0 +1,43 @@ +from collections import Counter +from dataclasses import replace +import logging +from typing import Dict + +from aws_doc_sdk_examples_tools.doc_gen import DocGen +from aws_doc_sdk_examples_tools.lliam.domain.commands import DedupeReservoir +from aws_doc_sdk_examples_tools.metadata import Example +from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many + +logger = logging.getLogger(__name__) + + +def make_title_abbreviation(example: Example, counter: Counter): + count = counter[example.title_abbrev] + abbrev = f"{example.title_abbrev} ({count + 1})" if count else example.title_abbrev + counter[example.title_abbrev] += 1 + return abbrev + + +def handle_dedupe_reservoir(cmd: DedupeReservoir, uow: None): + doc_gen = DocGen.from_root(cmd.root) + + examples: Dict[str, Example] = {} + + for id, example in doc_gen.examples.items(): + if cmd.packages and example.file: + package = example.file.name.split("_metadata.yaml")[0] + if package in cmd.packages: + examples[id] = example + else: + examples[id] = example + + title_abbrev_counts: Counter = Counter() + + for id, example in examples.items(): + examples[id] = replace( + example, + title_abbrev=make_title_abbreviation(example, title_abbrev_counts), + ) + + writes = prepare_write(examples) + write_many(cmd.root, writes) diff --git a/aws_doc_sdk_examples_tools/lliam/service_layer/messagebus.py b/aws_doc_sdk_examples_tools/lliam/service_layer/messagebus.py index 2579ad9..9f551a8 100644 --- a/aws_doc_sdk_examples_tools/lliam/service_layer/messagebus.py +++ b/aws_doc_sdk_examples_tools/lliam/service_layer/messagebus.py @@ -3,6 +3,7 @@ from aws_doc_sdk_examples_tools.lliam.domain import commands from aws_doc_sdk_examples_tools.lliam.service_layer import ( create_prompts, + dedupe_reservoir, update_doc_gen, run_ailly, unit_of_work, @@ -33,4 +34,5 @@ def handle_command(command: commands.Command, uow: Optional[unit_of_work.FsUnitO commands.CreatePrompts: create_prompts.create_prompts, commands.RunAilly: run_ailly.handle_run_ailly, commands.UpdateReservoir: update_doc_gen.handle_update_reservoir, + commands.DedupeReservoir: dedupe_reservoir.handle_dedupe_reservoir, } diff --git a/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py b/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py index 6fa3378..c8703cf 100644 --- a/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py +++ b/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py @@ -1,9 +1,11 @@ +from dataclasses import replace import json import logging from collections import Counter from pathlib import Path from typing import Dict, Iterable, List +from aws_doc_sdk_examples_tools.lliam.adapters.repository import DEFAULT_METADATA_PREFIX from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many from aws_doc_sdk_examples_tools.lliam.config import ( @@ -15,19 +17,22 @@ logger = logging.getLogger(__name__) +Updates = Dict[str, List[Dict[str, str]]] + IAM_LANGUAGE = "IAMPolicyGrammar" -def examples_from_updates(updates: List[Dict]) -> Iterable[Example]: +def examples_from_updates(updates: Updates) -> Iterable[Example]: """ Takes a list of example metadata updates and returns an iterable of examples with the applied updates. """ indexed_updates = {} - for item in updates: - if "id" in item: - indexed_updates[item["id"]] = item + for update_list in updates.values(): + for item in update_list: + if "id" in item: + indexed_updates[item["id"]] = item examples = [ Example( @@ -43,45 +48,55 @@ def examples_from_updates(updates: List[Dict]) -> Iterable[Example]: return examples -def make_title_abbreviation(old: Example, new: Example, abbreviations: Counter): - language = old.languages[IAM_LANGUAGE] +def get_source_title(example: Example) -> str: + language = example.languages[IAM_LANGUAGE] version = language.versions[0] source = version.source - source_title = source.title if source else "" - base = f"{new.title_abbrev} (from '{source_title}' guide)" - abbreviations[base] += 1 - count = abbreviations[base] - return f"{base} ({count})" if count > 1 else base + return source.title if source else "" def update_examples(doc_gen: DocGen, examples: Iterable[Example]) -> Dict[str, Example]: """ Merge a subset of example properties into a DocGen instance. """ - title_abbrevs = Counter( - [example.title_abbrev for example in doc_gen.examples.values()] - ) - updated = {} + for example in examples: - if doc_gen_example := doc_gen.examples.get(example.id): - doc_gen_example.title = example.title - doc_gen_example.title_abbrev = make_title_abbreviation( - old=doc_gen_example, new=example, abbreviations=title_abbrevs + if example.id in doc_gen.examples: + source_title = get_source_title(doc_gen.examples[example.id]) + # This is a hack. TCA is replacing AWS with &AWS;, which entity converter + # then does another pass on. So we end up with things like "&AWS; &GLUlong;" + # which render as "AWS AWS Glue". We should look at this closer when time permits. + source_title = source_title.replace("&AWS;", "AWS") + new_abbrev = f"{example.title_abbrev} (from '{source_title}' guide)" + doc_gen_example = replace( + doc_gen.examples[example.id], + title=example.title, + title_abbrev=new_abbrev, + synopsis=example.synopsis, ) - doc_gen_example.synopsis = example.synopsis - updated[doc_gen_example.id] = doc_gen_example + doc_gen.examples[example.id] = doc_gen_example else: logger.warning(f"Could not find example with id: {example.id}") - return updated + return doc_gen.examples -def update_doc_gen(doc_gen_root: Path, updates: List[Dict]) -> Dict[str, Example]: - doc_gen = DocGen.from_root(doc_gen_root) +def update_doc_gen(doc_gen: DocGen, updates: Updates) -> Dict[str, Example]: examples = examples_from_updates(updates) updated_examples = update_examples(doc_gen, examples) return updated_examples +def merge_updates(a: Updates, b: Updates) -> Updates: + merged: Updates = dict(a) + for package_name, updates in b.items(): + if package_name not in merged: + merged[package_name] = updates + else: + # Assumption: Updates will not conflict. + merged[package_name].extend(updates) + return merged + + def handle_update_reservoir(cmd: UpdateReservoir, uow: None): update_files = ( [AILLY_DIR_PATH / f"updates_{batch}.json" for batch in cmd.batches] @@ -93,23 +108,29 @@ def handle_update_reservoir(cmd: UpdateReservoir, uow: None): logger.warning("No IAM update files found to process") return + doc_gen = DocGen.from_root(cmd.root) + + combined_updates: Updates = {} + for update_file in sorted(update_files): if update_file.exists(): - logger.info(f"Processing updates from {update_file.name}") - updates = json.loads(update_file.read_text()) + updates: Updates = json.loads(update_file.read_text()) if cmd.packages: - updates = [ - update - for package, update_list in updates.items() - if package in cmd.packages - for update in update_list - ] + updates = { + package_name: update_list + for package_name, update_list in updates.items() + if package_name in cmd.packages + } + if not updates: logger.warning(f"No matching updates to run in {update_file.name}") continue - examples = update_doc_gen(doc_gen_root=cmd.root, updates=updates) - writes = prepare_write(examples) - write_many(cmd.root, writes) + combined_updates = merge_updates(combined_updates, updates) + else: logger.warning(f"Update file not found: {update_file}") + + updated_examples = update_doc_gen(doc_gen=doc_gen, updates=combined_updates) + writes = prepare_write(updated_examples) + write_many(cmd.root, writes) diff --git a/aws_doc_sdk_examples_tools/metadata_validator.py b/aws_doc_sdk_examples_tools/metadata_validator.py index cdd1302..5c91198 100755 --- a/aws_doc_sdk_examples_tools/metadata_validator.py +++ b/aws_doc_sdk_examples_tools/metadata_validator.py @@ -163,7 +163,11 @@ def _validate_aws_entity_usage(value: str) -> bool: If these counts differ, there's an invalid usage. """ xval = value.replace("&", "&") - xtree = xml_tree.fromstring(f"{xval}") + try: + xtree = xml_tree.fromstring(f"{xval}") + except Exception as e: + print(xval) + raise e blocks = ( xtree.findall(".//programlisting") + xtree.findall(".//code") diff --git a/aws_doc_sdk_examples_tools/yaml_writer.py b/aws_doc_sdk_examples_tools/yaml_writer.py index 1c74921..64be9c1 100644 --- a/aws_doc_sdk_examples_tools/yaml_writer.py +++ b/aws_doc_sdk_examples_tools/yaml_writer.py @@ -2,9 +2,8 @@ from collections import defaultdict from dataclasses import asdict from pathlib import Path -from typing import Any, DefaultDict, Dict, List, Set, Tuple +from typing import Any, DefaultDict, Dict, List, Tuple -import difflib import logging import yaml @@ -146,7 +145,8 @@ def report_yaml_differences( elif file_path not in after_values: differences.append((file_path, "removed")) else: - differences.append((file_path, "modified")) + diff = f"{before}\n\n---\n\n{after}" + differences.append((file_path, diff)) return differences @@ -172,8 +172,8 @@ def main(): if before_values != after_values: differences = report_yaml_differences(before_values, after_values) logger.error(f"YAML content changed in {len(differences)} files after writing:") - for file_path, diff_type in differences: - logger.error(f" - {file_path}: {diff_type}") + for difference in differences: + logger.error(difference) else: logger.info( f"Metadata for {root.name} has been normalized and verified for consistency." diff --git a/aws_doc_sdk_examples_tools/yaml_writer_test.py b/aws_doc_sdk_examples_tools/yaml_writer_test.py index d15c19e..65815b8 100644 --- a/aws_doc_sdk_examples_tools/yaml_writer_test.py +++ b/aws_doc_sdk_examples_tools/yaml_writer_test.py @@ -72,12 +72,11 @@ def test_report_yaml_differences_with_changes(): differences.sort() expected = [ - ("file1.yaml", "modified"), + ("file1.yaml", "{'key1': 'value1'}\n\n---\n\n{'key1': 'changed_value'}"), ("file2.yaml", "removed"), ("file4.yaml", "added"), ] expected.sort() - assert differences == expected