Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,4 @@ __pycache__
*.egg-info
.idea/
build/
dist/
.ailly_iam_policy
*.log
dist/
6 changes: 6 additions & 0 deletions aws_doc_sdk_examples_tools/lliam/domain/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,9 @@ class UpdateReservoir(Command):
root: Path
batches: List[str]
packages: List[str]


@dataclass
class DedupeReservoir(Command):
root: Path
packages: List[str]
17 changes: 17 additions & 0 deletions aws_doc_sdk_examples_tools/lliam/entry_points/lliam_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,23 @@ def update_reservoir(
handle_domain_errors(errors)


@app.command()
def dedupe_reservoir(
iam_tributary_root: str,
packages: Annotated[
Optional[str], typer.Option(help="Comma delimited list of packages to update")
] = None,
) -> None:
"""
Enumerate fields that must be unique (e.g. title_abbrev)
"""
doc_gen_root = Path(iam_tributary_root)
package_names = parse_package_names(packages)
cmd = commands.DedupeReservoir(root=doc_gen_root, packages=package_names)
errors = messagebus.handle(cmd)
handle_domain_errors(errors)


def handle_domain_errors(errors: List[errors.DomainError]):
if errors:
for error in errors:
Expand Down
43 changes: 43 additions & 0 deletions aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from collections import Counter
from dataclasses import replace
import logging
from typing import Dict

from aws_doc_sdk_examples_tools.doc_gen import DocGen
from aws_doc_sdk_examples_tools.lliam.domain.commands import DedupeReservoir
from aws_doc_sdk_examples_tools.metadata import Example
from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many

logger = logging.getLogger(__name__)


def make_title_abbreviation(example: Example, counter: Counter):
count = counter[example.title_abbrev]
abbrev = f"{example.title_abbrev} ({count + 1})" if count else example.title_abbrev
counter[example.title_abbrev] += 1
return abbrev


def handle_dedupe_reservoir(cmd: DedupeReservoir, uow: None):
doc_gen = DocGen.from_root(cmd.root)

examples: Dict[str, Example] = {}

for id, example in doc_gen.examples.items():
if cmd.packages and example.file:
package = example.file.name.split("_metadata.yaml")[0]
if package in cmd.packages:
examples[id] = example
else:
examples[id] = example

title_abbrev_counts: Counter = Counter()

for id, example in examples.items():
examples[id] = replace(
example,
title_abbrev=make_title_abbreviation(example, title_abbrev_counts),
)

writes = prepare_write(examples)
write_many(cmd.root, writes)
2 changes: 2 additions & 0 deletions aws_doc_sdk_examples_tools/lliam/service_layer/messagebus.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from aws_doc_sdk_examples_tools.lliam.domain import commands
from aws_doc_sdk_examples_tools.lliam.service_layer import (
create_prompts,
dedupe_reservoir,
update_doc_gen,
run_ailly,
unit_of_work,
Expand Down Expand Up @@ -33,4 +34,5 @@ def handle_command(command: commands.Command, uow: Optional[unit_of_work.FsUnitO
commands.CreatePrompts: create_prompts.create_prompts,
commands.RunAilly: run_ailly.handle_run_ailly,
commands.UpdateReservoir: update_doc_gen.handle_update_reservoir,
commands.DedupeReservoir: dedupe_reservoir.handle_dedupe_reservoir,
}
91 changes: 56 additions & 35 deletions aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from dataclasses import replace
import json
import logging
from collections import Counter
from pathlib import Path
from typing import Dict, Iterable, List

from aws_doc_sdk_examples_tools.lliam.adapters.repository import DEFAULT_METADATA_PREFIX
from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many

from aws_doc_sdk_examples_tools.lliam.config import (
Expand All @@ -15,19 +17,22 @@

logger = logging.getLogger(__name__)

Updates = Dict[str, List[Dict[str, str]]]

IAM_LANGUAGE = "IAMPolicyGrammar"


def examples_from_updates(updates: List[Dict]) -> Iterable[Example]:
def examples_from_updates(updates: Updates) -> Iterable[Example]:
"""
Takes a list of example metadata updates and returns an
iterable of examples with the applied updates.
"""

indexed_updates = {}
for item in updates:
if "id" in item:
indexed_updates[item["id"]] = item
for update_list in updates.values():
for item in update_list:
if "id" in item:
indexed_updates[item["id"]] = item

examples = [
Example(
Expand All @@ -43,45 +48,55 @@ def examples_from_updates(updates: List[Dict]) -> Iterable[Example]:
return examples


def make_title_abbreviation(old: Example, new: Example, abbreviations: Counter):
language = old.languages[IAM_LANGUAGE]
def get_source_title(example: Example) -> str:
language = example.languages[IAM_LANGUAGE]
version = language.versions[0]
source = version.source
source_title = source.title if source else ""
base = f"{new.title_abbrev} (from '{source_title}' guide)"
abbreviations[base] += 1
count = abbreviations[base]
return f"{base} ({count})" if count > 1 else base
return source.title if source else ""


def update_examples(doc_gen: DocGen, examples: Iterable[Example]) -> Dict[str, Example]:
"""
Merge a subset of example properties into a DocGen instance.
"""
title_abbrevs = Counter(
[example.title_abbrev for example in doc_gen.examples.values()]
)
updated = {}

for example in examples:
if doc_gen_example := doc_gen.examples.get(example.id):
doc_gen_example.title = example.title
doc_gen_example.title_abbrev = make_title_abbreviation(
old=doc_gen_example, new=example, abbreviations=title_abbrevs
if example.id in doc_gen.examples:
source_title = get_source_title(doc_gen.examples[example.id])
# This is a hack. TCA is replacing AWS with &AWS;, which entity converter
# then does another pass on. So we end up with things like "&AWS; &GLUlong;"
# which render as "AWS AWS Glue". We should look at this closer when time permits.
source_title = source_title.replace("&AWS;", "AWS")
new_abbrev = f"{example.title_abbrev} (from '{source_title}' guide)"
doc_gen_example = replace(
doc_gen.examples[example.id],
title=example.title,
title_abbrev=new_abbrev,
synopsis=example.synopsis,
)
doc_gen_example.synopsis = example.synopsis
updated[doc_gen_example.id] = doc_gen_example
doc_gen.examples[example.id] = doc_gen_example
else:
logger.warning(f"Could not find example with id: {example.id}")
return updated
return doc_gen.examples


def update_doc_gen(doc_gen_root: Path, updates: List[Dict]) -> Dict[str, Example]:
doc_gen = DocGen.from_root(doc_gen_root)
def update_doc_gen(doc_gen: DocGen, updates: Updates) -> Dict[str, Example]:
examples = examples_from_updates(updates)
updated_examples = update_examples(doc_gen, examples)
return updated_examples


def merge_updates(a: Updates, b: Updates) -> Updates:
merged: Updates = dict(a)
for package_name, updates in b.items():
if package_name not in merged:
merged[package_name] = updates
else:
# Assumption: Updates will not conflict.
merged[package_name].extend(updates)
return merged


def handle_update_reservoir(cmd: UpdateReservoir, uow: None):
update_files = (
[AILLY_DIR_PATH / f"updates_{batch}.json" for batch in cmd.batches]
Expand All @@ -93,23 +108,29 @@ def handle_update_reservoir(cmd: UpdateReservoir, uow: None):
logger.warning("No IAM update files found to process")
return

doc_gen = DocGen.from_root(cmd.root)

combined_updates: Updates = {}

for update_file in sorted(update_files):
if update_file.exists():
logger.info(f"Processing updates from {update_file.name}")
updates = json.loads(update_file.read_text())
updates: Updates = json.loads(update_file.read_text())
if cmd.packages:
updates = [
update
for package, update_list in updates.items()
if package in cmd.packages
for update in update_list
]
updates = {
package_name: update_list
for package_name, update_list in updates.items()
if package_name in cmd.packages
}

if not updates:
logger.warning(f"No matching updates to run in {update_file.name}")
continue
examples = update_doc_gen(doc_gen_root=cmd.root, updates=updates)

writes = prepare_write(examples)
write_many(cmd.root, writes)
combined_updates = merge_updates(combined_updates, updates)

else:
logger.warning(f"Update file not found: {update_file}")

updated_examples = update_doc_gen(doc_gen=doc_gen, updates=combined_updates)
writes = prepare_write(updated_examples)
write_many(cmd.root, writes)
6 changes: 5 additions & 1 deletion aws_doc_sdk_examples_tools/metadata_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,11 @@ def _validate_aws_entity_usage(value: str) -> bool:
If these counts differ, there's an invalid usage.
"""
xval = value.replace("&", "&")
xtree = xml_tree.fromstring(f"<fake><para>{xval}</para></fake>")
try:
xtree = xml_tree.fromstring(f"<fake><para>{xval}</para></fake>")
except Exception as e:
print(xval)
raise e
blocks = (
xtree.findall(".//programlisting")
+ xtree.findall(".//code")
Expand Down
10 changes: 5 additions & 5 deletions aws_doc_sdk_examples_tools/yaml_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
from collections import defaultdict
from dataclasses import asdict
from pathlib import Path
from typing import Any, DefaultDict, Dict, List, Set, Tuple
from typing import Any, DefaultDict, Dict, List, Tuple

import difflib
import logging
import yaml

Expand Down Expand Up @@ -146,7 +145,8 @@ def report_yaml_differences(
elif file_path not in after_values:
differences.append((file_path, "removed"))
else:
differences.append((file_path, "modified"))
diff = f"{before}\n\n---\n\n{after}"
differences.append((file_path, diff))

return differences

Expand All @@ -172,8 +172,8 @@ def main():
if before_values != after_values:
differences = report_yaml_differences(before_values, after_values)
logger.error(f"YAML content changed in {len(differences)} files after writing:")
for file_path, diff_type in differences:
logger.error(f" - {file_path}: {diff_type}")
for difference in differences:
logger.error(difference)
else:
logger.info(
f"Metadata for {root.name} has been normalized and verified for consistency."
Expand Down
3 changes: 1 addition & 2 deletions aws_doc_sdk_examples_tools/yaml_writer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,11 @@ def test_report_yaml_differences_with_changes():
differences.sort()

expected = [
("file1.yaml", "modified"),
("file1.yaml", "{'key1': 'value1'}\n\n---\n\n{'key1': 'changed_value'}"),
("file2.yaml", "removed"),
("file4.yaml", "added"),
]
expected.sort()

assert differences == expected


Expand Down