diff --git a/.gitignore b/.gitignore
index 704c243..6f79950 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,4 @@ __pycache__
*.egg-info
.idea/
build/
-dist/
-.ailly_iam_policy
-*.log
\ No newline at end of file
+dist/
\ No newline at end of file
diff --git a/aws_doc_sdk_examples_tools/lliam/domain/commands.py b/aws_doc_sdk_examples_tools/lliam/domain/commands.py
index 2f52d10..d351c7d 100644
--- a/aws_doc_sdk_examples_tools/lliam/domain/commands.py
+++ b/aws_doc_sdk_examples_tools/lliam/domain/commands.py
@@ -25,3 +25,9 @@ class UpdateReservoir(Command):
root: Path
batches: List[str]
packages: List[str]
+
+
+@dataclass
+class DedupeReservoir(Command):
+ root: Path
+ packages: List[str]
diff --git a/aws_doc_sdk_examples_tools/lliam/entry_points/lliam_app.py b/aws_doc_sdk_examples_tools/lliam/entry_points/lliam_app.py
index ac67686..bc90464 100644
--- a/aws_doc_sdk_examples_tools/lliam/entry_points/lliam_app.py
+++ b/aws_doc_sdk_examples_tools/lliam/entry_points/lliam_app.py
@@ -81,6 +81,23 @@ def update_reservoir(
handle_domain_errors(errors)
+@app.command()
+def dedupe_reservoir(
+ iam_tributary_root: str,
+ packages: Annotated[
+ Optional[str], typer.Option(help="Comma delimited list of packages to update")
+ ] = None,
+) -> None:
+ """
+ Enumerate fields that must be unique (e.g. title_abbrev)
+ """
+ doc_gen_root = Path(iam_tributary_root)
+ package_names = parse_package_names(packages)
+ cmd = commands.DedupeReservoir(root=doc_gen_root, packages=package_names)
+ errors = messagebus.handle(cmd)
+ handle_domain_errors(errors)
+
+
def handle_domain_errors(errors: List[errors.DomainError]):
if errors:
for error in errors:
diff --git a/aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py b/aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py
new file mode 100644
index 0000000..68359e0
--- /dev/null
+++ b/aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py
@@ -0,0 +1,43 @@
+from collections import Counter
+from dataclasses import replace
+import logging
+from typing import Dict
+
+from aws_doc_sdk_examples_tools.doc_gen import DocGen
+from aws_doc_sdk_examples_tools.lliam.domain.commands import DedupeReservoir
+from aws_doc_sdk_examples_tools.metadata import Example
+from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many
+
+logger = logging.getLogger(__name__)
+
+
+def make_title_abbreviation(example: Example, counter: Counter):
+ count = counter[example.title_abbrev]
+ abbrev = f"{example.title_abbrev} ({count + 1})" if count else example.title_abbrev
+ counter[example.title_abbrev] += 1
+ return abbrev
+
+
+def handle_dedupe_reservoir(cmd: DedupeReservoir, uow: None):
+ doc_gen = DocGen.from_root(cmd.root)
+
+ examples: Dict[str, Example] = {}
+
+ for id, example in doc_gen.examples.items():
+ if cmd.packages and example.file:
+ package = example.file.name.split("_metadata.yaml")[0]
+ if package in cmd.packages:
+ examples[id] = example
+ else:
+ examples[id] = example
+
+ title_abbrev_counts: Counter = Counter()
+
+ for id, example in examples.items():
+ examples[id] = replace(
+ example,
+ title_abbrev=make_title_abbreviation(example, title_abbrev_counts),
+ )
+
+ writes = prepare_write(examples)
+ write_many(cmd.root, writes)
diff --git a/aws_doc_sdk_examples_tools/lliam/service_layer/messagebus.py b/aws_doc_sdk_examples_tools/lliam/service_layer/messagebus.py
index 2579ad9..9f551a8 100644
--- a/aws_doc_sdk_examples_tools/lliam/service_layer/messagebus.py
+++ b/aws_doc_sdk_examples_tools/lliam/service_layer/messagebus.py
@@ -3,6 +3,7 @@
from aws_doc_sdk_examples_tools.lliam.domain import commands
from aws_doc_sdk_examples_tools.lliam.service_layer import (
create_prompts,
+ dedupe_reservoir,
update_doc_gen,
run_ailly,
unit_of_work,
@@ -33,4 +34,5 @@ def handle_command(command: commands.Command, uow: Optional[unit_of_work.FsUnitO
commands.CreatePrompts: create_prompts.create_prompts,
commands.RunAilly: run_ailly.handle_run_ailly,
commands.UpdateReservoir: update_doc_gen.handle_update_reservoir,
+ commands.DedupeReservoir: dedupe_reservoir.handle_dedupe_reservoir,
}
diff --git a/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py b/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py
index 6fa3378..c8703cf 100644
--- a/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py
+++ b/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py
@@ -1,9 +1,11 @@
+from dataclasses import replace
import json
import logging
from collections import Counter
from pathlib import Path
from typing import Dict, Iterable, List
+from aws_doc_sdk_examples_tools.lliam.adapters.repository import DEFAULT_METADATA_PREFIX
from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many
from aws_doc_sdk_examples_tools.lliam.config import (
@@ -15,19 +17,22 @@
logger = logging.getLogger(__name__)
+Updates = Dict[str, List[Dict[str, str]]]
+
IAM_LANGUAGE = "IAMPolicyGrammar"
-def examples_from_updates(updates: List[Dict]) -> Iterable[Example]:
+def examples_from_updates(updates: Updates) -> Iterable[Example]:
"""
Takes a list of example metadata updates and returns an
iterable of examples with the applied updates.
"""
indexed_updates = {}
- for item in updates:
- if "id" in item:
- indexed_updates[item["id"]] = item
+ for update_list in updates.values():
+ for item in update_list:
+ if "id" in item:
+ indexed_updates[item["id"]] = item
examples = [
Example(
@@ -43,45 +48,55 @@ def examples_from_updates(updates: List[Dict]) -> Iterable[Example]:
return examples
-def make_title_abbreviation(old: Example, new: Example, abbreviations: Counter):
- language = old.languages[IAM_LANGUAGE]
+def get_source_title(example: Example) -> str:
+ language = example.languages[IAM_LANGUAGE]
version = language.versions[0]
source = version.source
- source_title = source.title if source else ""
- base = f"{new.title_abbrev} (from '{source_title}' guide)"
- abbreviations[base] += 1
- count = abbreviations[base]
- return f"{base} ({count})" if count > 1 else base
+ return source.title if source else ""
def update_examples(doc_gen: DocGen, examples: Iterable[Example]) -> Dict[str, Example]:
"""
Merge a subset of example properties into a DocGen instance.
"""
- title_abbrevs = Counter(
- [example.title_abbrev for example in doc_gen.examples.values()]
- )
- updated = {}
+
for example in examples:
- if doc_gen_example := doc_gen.examples.get(example.id):
- doc_gen_example.title = example.title
- doc_gen_example.title_abbrev = make_title_abbreviation(
- old=doc_gen_example, new=example, abbreviations=title_abbrevs
+ if example.id in doc_gen.examples:
+ source_title = get_source_title(doc_gen.examples[example.id])
+ # This is a hack. TCA is replacing AWS with &AWS;, which entity converter
+ # then does another pass on. So we end up with things like "&AWS; &GLUlong;"
+ # which render as "AWS AWS Glue". We should look at this closer when time permits.
+ source_title = source_title.replace("&AWS;", "AWS")
+ new_abbrev = f"{example.title_abbrev} (from '{source_title}' guide)"
+ doc_gen_example = replace(
+ doc_gen.examples[example.id],
+ title=example.title,
+ title_abbrev=new_abbrev,
+ synopsis=example.synopsis,
)
- doc_gen_example.synopsis = example.synopsis
- updated[doc_gen_example.id] = doc_gen_example
+ doc_gen.examples[example.id] = doc_gen_example
else:
logger.warning(f"Could not find example with id: {example.id}")
- return updated
+ return doc_gen.examples
-def update_doc_gen(doc_gen_root: Path, updates: List[Dict]) -> Dict[str, Example]:
- doc_gen = DocGen.from_root(doc_gen_root)
+def update_doc_gen(doc_gen: DocGen, updates: Updates) -> Dict[str, Example]:
examples = examples_from_updates(updates)
updated_examples = update_examples(doc_gen, examples)
return updated_examples
+def merge_updates(a: Updates, b: Updates) -> Updates:
+ merged: Updates = dict(a)
+ for package_name, updates in b.items():
+ if package_name not in merged:
+ merged[package_name] = updates
+ else:
+ # Assumption: Updates will not conflict.
+ merged[package_name].extend(updates)
+ return merged
+
+
def handle_update_reservoir(cmd: UpdateReservoir, uow: None):
update_files = (
[AILLY_DIR_PATH / f"updates_{batch}.json" for batch in cmd.batches]
@@ -93,23 +108,29 @@ def handle_update_reservoir(cmd: UpdateReservoir, uow: None):
logger.warning("No IAM update files found to process")
return
+ doc_gen = DocGen.from_root(cmd.root)
+
+ combined_updates: Updates = {}
+
for update_file in sorted(update_files):
if update_file.exists():
- logger.info(f"Processing updates from {update_file.name}")
- updates = json.loads(update_file.read_text())
+ updates: Updates = json.loads(update_file.read_text())
if cmd.packages:
- updates = [
- update
- for package, update_list in updates.items()
- if package in cmd.packages
- for update in update_list
- ]
+ updates = {
+ package_name: update_list
+ for package_name, update_list in updates.items()
+ if package_name in cmd.packages
+ }
+
if not updates:
logger.warning(f"No matching updates to run in {update_file.name}")
continue
- examples = update_doc_gen(doc_gen_root=cmd.root, updates=updates)
- writes = prepare_write(examples)
- write_many(cmd.root, writes)
+ combined_updates = merge_updates(combined_updates, updates)
+
else:
logger.warning(f"Update file not found: {update_file}")
+
+ updated_examples = update_doc_gen(doc_gen=doc_gen, updates=combined_updates)
+ writes = prepare_write(updated_examples)
+ write_many(cmd.root, writes)
diff --git a/aws_doc_sdk_examples_tools/metadata_validator.py b/aws_doc_sdk_examples_tools/metadata_validator.py
index cdd1302..5c91198 100755
--- a/aws_doc_sdk_examples_tools/metadata_validator.py
+++ b/aws_doc_sdk_examples_tools/metadata_validator.py
@@ -163,7 +163,11 @@ def _validate_aws_entity_usage(value: str) -> bool:
If these counts differ, there's an invalid usage.
"""
xval = value.replace("&", "&")
- xtree = xml_tree.fromstring(f"{xval}")
+ try:
+ xtree = xml_tree.fromstring(f"{xval}")
+ except Exception as e:
+ print(xval)
+ raise e
blocks = (
xtree.findall(".//programlisting")
+ xtree.findall(".//code")
diff --git a/aws_doc_sdk_examples_tools/yaml_writer.py b/aws_doc_sdk_examples_tools/yaml_writer.py
index 1c74921..64be9c1 100644
--- a/aws_doc_sdk_examples_tools/yaml_writer.py
+++ b/aws_doc_sdk_examples_tools/yaml_writer.py
@@ -2,9 +2,8 @@
from collections import defaultdict
from dataclasses import asdict
from pathlib import Path
-from typing import Any, DefaultDict, Dict, List, Set, Tuple
+from typing import Any, DefaultDict, Dict, List, Tuple
-import difflib
import logging
import yaml
@@ -146,7 +145,8 @@ def report_yaml_differences(
elif file_path not in after_values:
differences.append((file_path, "removed"))
else:
- differences.append((file_path, "modified"))
+ diff = f"{before}\n\n---\n\n{after}"
+ differences.append((file_path, diff))
return differences
@@ -172,8 +172,8 @@ def main():
if before_values != after_values:
differences = report_yaml_differences(before_values, after_values)
logger.error(f"YAML content changed in {len(differences)} files after writing:")
- for file_path, diff_type in differences:
- logger.error(f" - {file_path}: {diff_type}")
+ for difference in differences:
+ logger.error(difference)
else:
logger.info(
f"Metadata for {root.name} has been normalized and verified for consistency."
diff --git a/aws_doc_sdk_examples_tools/yaml_writer_test.py b/aws_doc_sdk_examples_tools/yaml_writer_test.py
index d15c19e..65815b8 100644
--- a/aws_doc_sdk_examples_tools/yaml_writer_test.py
+++ b/aws_doc_sdk_examples_tools/yaml_writer_test.py
@@ -72,12 +72,11 @@ def test_report_yaml_differences_with_changes():
differences.sort()
expected = [
- ("file1.yaml", "modified"),
+ ("file1.yaml", "{'key1': 'value1'}\n\n---\n\n{'key1': 'changed_value'}"),
("file2.yaml", "removed"),
("file4.yaml", "added"),
]
expected.sort()
-
assert differences == expected