diff --git a/aws_doc_sdk_examples_tools/lliam/service_layer/run_ailly.py b/aws_doc_sdk_examples_tools/lliam/service_layer/run_ailly.py new file mode 100644 index 0000000..4004b2f --- /dev/null +++ b/aws_doc_sdk_examples_tools/lliam/service_layer/run_ailly.py @@ -0,0 +1,215 @@ +import json +import logging +import time +from collections import defaultdict +from datetime import timedelta +from pathlib import Path +from subprocess import run +from typing import Any, Dict, List, Optional, Set + +from aws_doc_sdk_examples_tools.lliam.domain.commands import RunAilly +from aws_doc_sdk_examples_tools.lliam.config import ( + AILLY_DIR_PATH, + BATCH_PREFIX, +) + +logger = logging.getLogger(__file__) + + +def handle_run_ailly(cmd: RunAilly, uow: None): + resolved_batches = resolve_requested_batches(cmd.batches) + + if resolved_batches: + total_start_time = time.time() + + for batch in resolved_batches: + run_ailly_single_batch(batch) + + total_end_time = time.time() + total_duration = total_end_time - total_start_time + num_batches = len(resolved_batches) + logger.info( + f"[TIMECHECK] {num_batches} batches took {format_duration(total_duration)} to run" + ) + + +def resolve_requested_batches(batch_names: List[str]) -> List[Path]: + if not batch_names: + batch_paths = [ + p + for p in AILLY_DIR_PATH.iterdir() + if p.is_dir() and p.name.startswith(BATCH_PREFIX) + ] + + return batch_paths + + batch_paths = [] + + for batch_name in batch_names: + batch_path = Path(AILLY_DIR_PATH / batch_name) + if not batch_path.exists(): + raise FileNotFoundError(batch_path) + if not batch_path.is_dir(): + raise NotADirectoryError(batch_path) + batch_paths.append(batch_path) + + return batch_paths + + +def run_ailly_single_batch(batch: Path) -> None: + """Run ailly and process files for a single batch.""" + batch_start_time = time.time() + iam_updates_path = AILLY_DIR_PATH / f"updates_{batch.name}.json" + + cmd = [ + "ailly", + "--max-depth", + "10", + "--root", + str(AILLY_DIR_PATH), + batch.name, + ] + logger.info(f"Running {cmd}") + run(cmd) + + batch_end_time = time.time() + batch_duration = batch_end_time - batch_start_time + logger.info( + f"[TIMECHECK] {batch.name} took {format_duration(batch_duration)} to run" + ) + + logger.info(f"Processing generated content for {batch.name}") + process_ailly_files(input_dir=batch, output_file=iam_updates_path) + + +EXPECTED_KEYS: Set[str] = set(["title", "title_abbrev"]) +VALUE_PREFIXES: Dict[str, str] = {"title": "", "title_abbrev": "", "synopsis": ""} + + +class MissingExpectedKeys(Exception): + pass + + +def parse_fenced_blocks(content: str, fence="===") -> List[List[str]]: + blocks = [] + inside_fence = False + current_block: List[str] = [] + + for line in content.splitlines(): + if line.strip() == fence: + if inside_fence: + blocks.append(current_block) + current_block = [] + inside_fence = not inside_fence + elif inside_fence: + current_block.append(line) + + return blocks + + +def parse_block_lines( + block: List[str], key_pairs: Dict[str, str], expected_keys=EXPECTED_KEYS +): + for line in block: + if "=>" in line: + parts = line.split("=>", 1) + key = parts[0].strip() + value = parts[1].strip() if len(parts) > 1 else "" + key_pairs[key] = value + if missing_keys := expected_keys - key_pairs.keys(): + raise MissingExpectedKeys(missing_keys) + + +def parse_ailly_file( + file_path: str, value_prefixes: Dict[str, str] = VALUE_PREFIXES +) -> Dict[str, Any]: + """ + Parse an .md.ailly.md file and extract key-value pairs that are between === fence markers. Each + key value pair is assumed to be on one line and in the form of `key => value`. This formatting is + totally dependent on the LLM output written by Ailly. + + Args: + file_path: Path to the .md.ailly.md file + + Returns: + Dictionary containing the extracted key-value pairs + """ + result: Dict[str, str] = {} + + try: + with open(file_path, "r", encoding="utf-8") as file: + content = file.read() + + blocks = parse_fenced_blocks(content) + + for block in blocks: + parse_block_lines(block, result) + + for key, prefix in value_prefixes.items(): + if key in result: + result[key] = f"{prefix}{result[key]}" + + result["id"] = Path(file_path).name.split(".md.ailly.md")[0] + result["_source_file"] = file_path + + except Exception as e: + logger.error(f"Error parsing file {file_path}", exc_info=e) + + return result + + +def parse_package_name(policy_update: Dict[str, str]) -> Optional[str]: + if not policy_update: + return None + + if not isinstance(policy_update, dict): + return None + + if not (id := policy_update.get("id")): + return None + + id_parts = [part.strip() for part in id.split(".")] + + if id_parts[0] != "iam-policies": + return None + + return id_parts[1] # The package name, hopefully. + + +def process_ailly_files( + input_dir: Path, output_file: Path, file_pattern: str = "*.md.ailly.md" +) -> None: + """ + Process all .md.ailly.md files in the input directory and write the results as JSON to the output file. + + Args: + input_dir: Directory containing .md.ailly.md files + output_file: Path to the output JSON file + file_pattern: Pattern to match files (default: "*.md.ailly.md") + """ + results = defaultdict(list) + + try: + for file_path in input_dir.rglob(file_pattern): + logger.info(f"Processing file: {file_path}") + policy_update = parse_ailly_file(str(file_path)) + if policy_update: + package_name = parse_package_name(policy_update) + if not package_name: + raise TypeError(f"Could not get package name from policy update.") + results[package_name].append(policy_update) + + with open(output_file, "w", encoding="utf-8") as out_file: + json.dump(results, out_file, indent=2) + + logger.info( + f"Successfully processed files. Output written to {output_file.name}" + ) + + except Exception as e: + logger.error("Error processing files", exc_info=e) + + +def format_duration(seconds: float) -> str: + td = timedelta(seconds=seconds) + return str(td).zfill(8) diff --git a/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py b/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py new file mode 100644 index 0000000..1380b73 --- /dev/null +++ b/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py @@ -0,0 +1,115 @@ +import json +import logging +from collections import Counter +from pathlib import Path +from typing import Dict, Iterable, List + +from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many + +from aws_doc_sdk_examples_tools.lliam.config import ( + AILLY_DIR_PATH, + BATCH_PREFIX, +) +from aws_doc_sdk_examples_tools.lliam.domain.commands import UpdateReservoir +from aws_doc_sdk_examples_tools.doc_gen import DocGen, Example + +logger = logging.getLogger(__name__) + +IAM_LANGUAGE = "IAMPolicyGrammar" + + +def examples_from_updates(updates: List[Dict]) -> Iterable[Example]: + """ + Takes a list of example metadata updates and returns an + iterable of examples with the applied updates. + """ + + indexed_updates = {} + for item in updates: + if "id" in item: + indexed_updates[item["id"]] = item + + examples = [ + Example( + id=id, + file=None, + languages={}, + title=update.get("title"), + title_abbrev=update.get("title_abbrev"), + synopsis=update.get("synopsis"), + ) + for id, update in indexed_updates.items() + ] + return examples + + +def make_title_abbreviation(old: Example, new: Example, abbreviations: Counter): + language = old.languages[IAM_LANGUAGE] + version = language.versions[0] + source = version.source + source_title = source.title if source else "" + base = f"{new.title_abbrev} (from '{source_title}' docs)" + abbreviations[base] += 1 + count = abbreviations[base] + return f"{base} ({count})" if count > 1 else base + + +def update_examples(doc_gen: DocGen, examples: Iterable[Example]) -> Dict[str, Example]: + """ + Merge a subset of example properties into a DocGen instance. + """ + title_abbrevs = Counter( + [example.title_abbrev for example in doc_gen.examples.values()] + ) + updated = {} + for example in examples: + if doc_gen_example := doc_gen.examples.get(example.id): + doc_gen_example.title = example.title + doc_gen_example.title_abbrev = make_title_abbreviation( + old=doc_gen_example, new=example, abbreviations=title_abbrevs + ) + doc_gen_example.synopsis = example.synopsis + updated[doc_gen_example.id] = doc_gen_example + else: + logger.warning(f"Could not find example with id: {example.id}") + return updated + + +def update_doc_gen(doc_gen_root: Path, updates: List[Dict]) -> Dict[str, Example]: + doc_gen = DocGen.from_root(doc_gen_root) + examples = examples_from_updates(updates) + updated_examples = update_examples(doc_gen, examples) + return updated_examples + + +def handle_update_reservoir(cmd: UpdateReservoir, uow: None): + update_files = ( + [AILLY_DIR_PATH / f"updates_{batch}.json" for batch in cmd.batches] + if cmd.batches + else list(AILLY_DIR_PATH.glob(f"updates_{BATCH_PREFIX}*.json")) + ) + + if not update_files: + logger.warning("No IAM update files found to process") + return + + for update_file in sorted(update_files): + if update_file.exists(): + logger.info(f"Processing updates from {update_file.name}") + updates = json.loads(update_file.read_text()) + if cmd.packages: + updates = [ + update + for package, update_list in updates.items() + if package in cmd.packages + for update in update_list + ] + if not updates: + logger.warning(f"No matching updates to run in {update_file.name}") + continue + examples = update_doc_gen(doc_gen_root=cmd.root, updates=updates) + + writes = prepare_write(examples) + write_many(cmd.root, writes) + else: + logger.warning(f"Update file not found: {update_file}") diff --git a/aws_doc_sdk_examples_tools/lliam/test/run_ailly_test.py b/aws_doc_sdk_examples_tools/lliam/test/run_ailly_test.py new file mode 100644 index 0000000..2de2457 --- /dev/null +++ b/aws_doc_sdk_examples_tools/lliam/test/run_ailly_test.py @@ -0,0 +1,149 @@ +import json +import tempfile +import pytest +from pathlib import Path + +from aws_doc_sdk_examples_tools.lliam.service_layer.run_ailly import ( + process_ailly_files, + VALUE_PREFIXES, +) + +SAMPLE_PROMPT_RESPONSE_1 = """# IAM Policy Example for S3 Bucket Access + +This example demonstrates how to create an IAM policy that grants read-only access to an S3 bucket. + +## Policy Details + +=== +title => Grant Read-Only Access to an S3 Bucket +title_abbrev => S3 Read-Only +synopsis => This example shows how to create an IAM policy that grants read-only access to objects in an S3 bucket. +description => This policy grants permissions to list and get objects from a specific S3 bucket. It's useful for scenarios where users need to view but not modify bucket contents. +service => IAM +category => Security +languages => JSON, AWS CLI +=== + +## Implementation + +Here's how you would implement this policy: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::example-bucket", + "arn:aws:s3:::example-bucket/*" + ] + } + ] +} +``` + +## Additional Notes + +Remember to replace 'example-bucket' with your actual bucket name when using this policy. +""" + +SAMPLE_PROMPT_RESPONSE_2 = """# IAM Policy Example for EC2 Instance Management + +This example demonstrates how to create an IAM policy for EC2 instance management. + +## Policy Details + +=== +title => Manage EC2 Instances in a Specific Region +title_abbrev => Region Specific EC2 +synopsis => This example shows how to create an IAM policy that allows management of EC2 instances in a specific AWS region. +description => This policy grants permissions to view, start, stop, and reboot EC2 instances in a specific region. It's useful for operations teams who need to manage instance lifecycles. +service => IAM +category => Security +languages => JSON, AWS CLI +=== + +## Implementation + +Here's how you would implement this policy: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:DescribeInstances", + "ec2:StartInstances", + "ec2:StopInstances", + "ec2:RebootInstances" + ], + "Resource": "*", + "Condition": { + "StringEquals": { + "aws:RequestedRegion": "us-west-2" + } + } + } + ] +} +``` + +## Additional Notes + +Modify the region condition to match your specific requirements. +""" + + +@pytest.fixture +def test_environment(): + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + ailly_dir = temp_path / "ailly_dir" + ailly_dir.mkdir(exist_ok=True) + + output_path = temp_path / "iam_updates.json" + + create_sample_ailly_files(ailly_dir) + + yield { + "ailly_dir": ailly_dir, + "output_path": output_path, + "temp_dir": temp_path, + } + + +def create_sample_ailly_files(ailly_dir): + # Sample file 1 + file1_path = ailly_dir / "iam-policies.SomeGuide.1.md.ailly.md" + with open(file1_path, "w") as f: + f.write(SAMPLE_PROMPT_RESPONSE_1) + + # Sample file 2 + file2_path = ailly_dir / "iam-policies.SomeGuide.2.md.ailly.md" + with open(file2_path, "w") as f: + f.write(SAMPLE_PROMPT_RESPONSE_2) + + +def test_process_ailly_files(test_environment): + ailly_dir = test_environment["ailly_dir"] + output_path = test_environment["output_path"] + + process_ailly_files(ailly_dir, output_path) + + assert output_path.exists() + + with open(output_path, "r") as f: + results = json.load(f) + + assert len(results) == 1 + assert len(list(results.values())[0]) == 2 + + for example in results.get("SomeGuide"): + assert example["title"].startswith(f"{VALUE_PREFIXES.get('title')}") diff --git a/aws_doc_sdk_examples_tools/lliam/test/update_doc_gen_test.py b/aws_doc_sdk_examples_tools/lliam/test/update_doc_gen_test.py new file mode 100644 index 0000000..a8e2b08 --- /dev/null +++ b/aws_doc_sdk_examples_tools/lliam/test/update_doc_gen_test.py @@ -0,0 +1,48 @@ +import pytest +from pathlib import Path + +from aws_doc_sdk_examples_tools.doc_gen import DocGen +from aws_doc_sdk_examples_tools.metadata import Example +from aws_doc_sdk_examples_tools.lliam.service_layer.update_doc_gen import ( + update_examples, +) + + +@pytest.fixture +def doc_gen_tributary(): + """ + Fixture that returns a DocGen instance using the doc_gen_tributary_test as root. + """ + tributary_root = ( + Path(__file__).parent.parent.parent + / "test_resources" + / "doc_gen_tributary_test" + ) + doc_gen = DocGen.from_root(tributary_root) + doc_gen.collect_snippets() + return doc_gen + + +def smoke_test_doc_gen(doc_gen_tributary: DocGen): + assert isinstance(doc_gen_tributary, DocGen) + + +def test_update_examples_title_abbrev(doc_gen_tributary: DocGen): + """Test that title_abbrev is updated correctly with service_main suffix.""" + # Create an example with a title_abbrev to update + update_example = Example( + id="iam_policies_example", + file=None, + languages={}, + title_abbrev="Updated Title Abbrev", + ) + + # Update the examples + update_examples(doc_gen_tributary, [update_example]) + + # Verify title_abbrev was updated with the service_main suffix + updated_example = doc_gen_tributary.examples["iam_policies_example"] + assert ( + updated_example.title_abbrev + == "Updated Title Abbrev (from 'AWS Account Management' docs)" + ) diff --git a/aws_doc_sdk_examples_tools/test_resources/doc_gen_tributary_test/.doc_gen/config/sdks.yaml b/aws_doc_sdk_examples_tools/test_resources/doc_gen_tributary_test/.doc_gen/config/sdks.yaml index 441eede..aff72bf 100644 --- a/aws_doc_sdk_examples_tools/test_resources/doc_gen_tributary_test/.doc_gen/config/sdks.yaml +++ b/aws_doc_sdk_examples_tools/test_resources/doc_gen_tributary_test/.doc_gen/config/sdks.yaml @@ -11,3 +11,19 @@ IAMPolicy: name: "&SAZR;" link_template: "SomeTemplate" guide: "&guide-iam-user;" +IAMPolicyGrammar: + display: "JSON" + property: json + syntax: json + is_pseudo_sdk: true + sdk: + 1: + long: "IAM policy grammar" + short: "IAM policy grammar" + suppress_version_heading: true + guide: "IAM/latest/UserGuide/introduction.html" + api_ref: + uid: "IAMPolicy" + name: "&SAZR;" + link_template: "https://amazon.com/reference.html" + guide: "&guide-iam-user;" \ No newline at end of file diff --git a/aws_doc_sdk_examples_tools/test_resources/doc_gen_tributary_test/.doc_gen/metadata/AccountControlApiDoc_metadata.yaml b/aws_doc_sdk_examples_tools/test_resources/doc_gen_tributary_test/.doc_gen/metadata/AccountControlApiDoc_metadata.yaml new file mode 100644 index 0000000..e62adbf --- /dev/null +++ b/aws_doc_sdk_examples_tools/test_resources/doc_gen_tributary_test/.doc_gen/metadata/AccountControlApiDoc_metadata.yaml @@ -0,0 +1,21 @@ +iam_policies_example: + category: IAMPolicy + languages: + IAMPolicyGrammar: + versions: + - authors: + - alias: amazonian@amazon.com + name: Sir Peccy + excerpts: + - description: test + owner: AWS/Documentation/Accounts Management Docs + sdk_version: 1 + source: + title: AWS Account Management + url: https://code.amazon.com/packages/AccountControlApiDoc + services: + iam: {} + synopsis: This identity-based policy allows the attached identity to retrieve the + billing alternate contact information for a specific account within an organization. + title: Allow retrieval of a specific alternate contact type for an account + title_abbrev: Allow retrieval of a specific alternate contact type for an account \ No newline at end of file