diff --git a/aws_doc_sdk_examples_tools/agent/README.md b/aws_doc_sdk_examples_tools/agent/README.md deleted file mode 100644 index 6955f13..0000000 --- a/aws_doc_sdk_examples_tools/agent/README.md +++ /dev/null @@ -1,78 +0,0 @@ -# Ailly Prompt Workflow - -This project automates the process of generating, running, parsing, and applying [Ailly](https://www.npmjs.com/package/@ailly/cli) prompt outputs to an AWS DocGen project. It combines all steps into one streamlined command using a single Python script. - ---- - -## 📦 Overview - -This tool: -1. **Generates** Ailly prompts from DocGen snippets. -2. **Runs** Ailly CLI to get enhanced metadata. -3. **Parses** Ailly responses into structured JSON. -4. **Updates** your DocGen examples with the new metadata. - -All of this is done with one command. - ---- - -## ✅ Prerequisites - -- Python 3.8+ -- Node.js and npm (for `npx`) -- A DocGen project directory - ---- - -## 🚀 Usage - -From your project root, run: - -```bash -python -m aws_doc_sdk_examples_tools.agent.bin.main \ - /path/to/your/docgen/project \ - --system-prompts path/to/system_prompt.txt -``` - -### 🔧 Arguments - -- `iam_tributary_root`: Path to the root directory of your IAM policy tributary -- `--system-prompts`: List of system prompt files or strings to include in the Ailly configuration -- `--skip-generation`: Skip the prompt generation and Ailly execution steps (useful for reprocessing existing outputs) - -Run `python -m aws_doc_sdk_examples_tools.agent.bin.main update --help` for more info. - ---- - -## 🗂 What This Does - -Under the hood, this script: - -1. Creates a directory `.ailly_iam_policy` containing: - - One Markdown file per snippet. - - A `.aillyrc` configuration file. - -2. Runs `npx @ailly/cli` to generate `.ailly.md` outputs. - -3. Parses the Ailly `.ailly.md` files into a single `iam_updates.json` file. - -4. Updates each matching `Example` in the DocGen instance with: - - `title` - - `title_abbrev` - - `synopsis` - ---- - -## 💡 Example - -```bash -python -m aws_doc_sdk_examples_tools.agent.bin.main \ - ~/projects/AWSIAMPolicyExampleReservoir \ - --system-prompts prompts/system_prompt.txt -``` - -This will: -- Write prompts and config to `.ailly_iam_policy/` -- Run Ailly and capture results -- Parse and save output as `.ailly_iam_policy/iam_updates.json` -- Apply updates to your DocGen examples diff --git a/aws_doc_sdk_examples_tools/agent/bin/main.py b/aws_doc_sdk_examples_tools/agent/bin/main.py deleted file mode 100644 index e363289..0000000 --- a/aws_doc_sdk_examples_tools/agent/bin/main.py +++ /dev/null @@ -1,52 +0,0 @@ -from pathlib import Path -from subprocess import run -from typing import List - -import typer - -from aws_doc_sdk_examples_tools.agent.make_prompts import make_prompts -from aws_doc_sdk_examples_tools.agent.process_ailly_files import process_ailly_files -from aws_doc_sdk_examples_tools.agent.update_doc_gen import update_doc_gen -from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many - -app = typer.Typer() - -AILLY_DIR = ".ailly_iam_policy" -AILLY_DIR_PATH = Path(AILLY_DIR) -IAM_UPDATES_PATH = AILLY_DIR_PATH / "iam_updates.json" - - -@app.command() -def update( - iam_tributary_root: str, - system_prompts: List[str] = [], - skip_generation: bool = False, -) -> None: - """ - Generate new IAM policy metadata for a tributary. - """ - doc_gen_root = Path(iam_tributary_root) - - if not skip_generation: - make_prompts( - doc_gen_root=doc_gen_root, - system_prompts=system_prompts, - out_dir=AILLY_DIR_PATH, - language="IAMPolicyGrammar", - ) - run(["npx @ailly/cli@1.7.0-rc1", "--root", AILLY_DIR]) - - process_ailly_files( - input_dir=str(AILLY_DIR_PATH), output_file=str(IAM_UPDATES_PATH) - ) - - doc_gen = update_doc_gen( - doc_gen_root=doc_gen_root, iam_updates_path=IAM_UPDATES_PATH - ) - - writes = prepare_write(doc_gen.examples) - write_many(doc_gen_root, writes) - - -if __name__ == "__main__": - app() diff --git a/aws_doc_sdk_examples_tools/agent/make_prompts.py b/aws_doc_sdk_examples_tools/agent/make_prompts.py deleted file mode 100644 index b0333da..0000000 --- a/aws_doc_sdk_examples_tools/agent/make_prompts.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python - -import logging -import os -from pathlib import Path -from typing import List -import yaml - -from aws_doc_sdk_examples_tools.doc_gen import DocGen, Snippet - -DEFAULT_METADATA_PREFIX = "[DEFAULT]" - - -# Setup logging -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def make_doc_gen(root: Path) -> DocGen: - """Create and return a DocGen instance from the given root directory.""" - doc_gen = DocGen.from_root(root) - doc_gen.collect_snippets() - return doc_gen - - -def write_prompts(doc_gen: DocGen, out_dir: Path, language: str) -> None: - examples = doc_gen.examples - snippets = doc_gen.snippets - for example_id, example in examples.items(): - # TCXContentAnalyzer prefixes new metadata title/title_abbrev entries with - # the DEFAULT_METADATA_PREFIX. Checking this here to make sure we're only - # running the LLM tool on new extractions. - title = example.title or "" - title_abbrev = example.title_abbrev or "" - if title.startswith(DEFAULT_METADATA_PREFIX) and title_abbrev.startswith( - DEFAULT_METADATA_PREFIX - ): - prompt_path = out_dir / f"{example_id}.md" - snippet_key = ( - example.languages[language] - .versions[0] - .excerpts[0] - .snippet_files[0] - .replace("/", ".") - ) - snippet = snippets[snippet_key] - prompt_path.write_text(snippet.code, encoding="utf-8") - - -def setup_ailly(system_prompts: List[str], out_dir: Path) -> None: - """Create the .aillyrc configuration file.""" - fence = "---" - options = { - "isolated": "true", - "mcp": { - "awslabs.aws-documentation-mcp-server": { - "type": "stdio", - "command": "uvx", - "args": ["awslabs.aws-documentation-mcp-server@latest"], - } - }, - } - options_block = yaml.dump(options).strip() - prompts_block = "\n".join(system_prompts) - - content = f"{fence}\n{options_block}\n{fence}\n{prompts_block}" - - aillyrc_path = out_dir / ".aillyrc" - aillyrc_path.write_text(content, encoding="utf-8") - - -def read_files(values: List[str]) -> List[str]: - """Read contents of files into a list of file contents.""" - contents = [] - for value in values: - if os.path.isfile(value): - with open(value, "r", encoding="utf-8") as f: - contents.append(f.read()) - else: - contents.append(value) - return contents - - -def validate_root_path(doc_gen_root: Path): - assert doc_gen_root.is_dir() - - -def make_prompts( - doc_gen_root: Path, system_prompts: List[str], out_dir: Path, language: str -) -> None: - """Generate prompts and configuration files for Ailly.""" - validate_root_path(doc_gen_root) - out_dir.mkdir(parents=True, exist_ok=True) - system_prompts = read_files(system_prompts) - setup_ailly(system_prompts, out_dir) - doc_gen = make_doc_gen(doc_gen_root) - write_prompts(doc_gen=doc_gen, out_dir=out_dir, language=language) diff --git a/aws_doc_sdk_examples_tools/agent/process_ailly_files.py b/aws_doc_sdk_examples_tools/agent/process_ailly_files.py deleted file mode 100644 index c4a703a..0000000 --- a/aws_doc_sdk_examples_tools/agent/process_ailly_files.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Parse generated Ailly output for key: value pairs. - -This module processes *.md.ailly.md files, extracts key-value pairs, -converts them to JSON entries in an array, and writes the JSON array -to a specified output file. -""" - -import json -import logging -from pathlib import Path -from typing import Any, Dict, List, Set - -logging.basicConfig(level=logging.WARNING) -logger = logging.getLogger(__name__) - -EXPECTED_KEYS: Set[str] = set(["title", "title_abbrev"]) -VALUE_PREFIXES: Dict[str, str] = {"title": "", "title_abbrev": "", "synopsis": ""} - - -class MissingExpectedKeys(Exception): - pass - - -def parse_fenced_blocks(content: str, fence="===") -> List[List[str]]: - blocks = [] - inside_fence = False - current_block: List[str] = [] - - for line in content.splitlines(): - if line.strip() == fence: - if inside_fence: - blocks.append(current_block) - current_block = [] - inside_fence = not inside_fence - elif inside_fence: - current_block.append(line) - - return blocks - - -def parse_block_lines( - block: List[str], key_pairs: Dict[str, str], expected_keys=EXPECTED_KEYS -): - for line in block: - if "=>" in line: - parts = line.split("=>", 1) - key = parts[0].strip() - value = parts[1].strip() if len(parts) > 1 else "" - key_pairs[key] = value - if missing_keys := expected_keys - key_pairs.keys(): - raise MissingExpectedKeys(missing_keys) - - -def parse_ailly_file( - file_path: str, value_prefixes: Dict[str, str] = VALUE_PREFIXES -) -> Dict[str, Any]: - """ - Parse an .md.ailly.md file and extract key-value pairs that are between === fence markers. Each - key value pair is assumed to be on one line and in the form of `key => value`. This formatting is - totally dependent on the LLM output written by Ailly. - - Args: - file_path: Path to the .md.ailly.md file - - Returns: - Dictionary containing the extracted key-value pairs - """ - result: Dict[str, str] = {} - - try: - with open(file_path, "r", encoding="utf-8") as file: - content = file.read() - - blocks = parse_fenced_blocks(content) - - for block in blocks: - parse_block_lines(block, result) - - for key, prefix in value_prefixes.items(): - if key in result: - result[key] = f"{prefix}{result[key]}" - - result["title_abbrev"] = result["title"] - result["id"] = Path(file_path).name.split(".md.ailly.md")[0] - result["_source_file"] = file_path - - except Exception as e: - logger.error(f"Error parsing file {file_path}", exc_info=e) - - return result - - -def process_ailly_files( - input_dir: str, output_file: str, file_pattern: str = "*.md.ailly.md" -) -> None: - """ - Process all .md.ailly.md files in the input directory and write the results as JSON to the output file. - - Args: - input_dir: Directory containing .md.ailly.md files - output_file: Path to the output JSON file - file_pattern: Pattern to match files (default: "*.md.ailly.md") - """ - results = [] - input_path = Path(input_dir) - - try: - for file_path in input_path.glob(file_pattern): - logger.info(f"Processing file: {file_path}") - parsed_data = parse_ailly_file(str(file_path)) - if parsed_data: - results.append(parsed_data) - - with open(output_file, "w", encoding="utf-8") as out_file: - json.dump(results, out_file, indent=2) - - logger.info( - f"Successfully processed {len(results)} files. Output written to {output_file}" - ) - - except Exception as e: - logger.error("Error processing files", exc_info=e) diff --git a/aws_doc_sdk_examples_tools/agent/process_ailly_files_test.py b/aws_doc_sdk_examples_tools/agent/process_ailly_files_test.py deleted file mode 100644 index 2d9de4d..0000000 --- a/aws_doc_sdk_examples_tools/agent/process_ailly_files_test.py +++ /dev/null @@ -1,159 +0,0 @@ -import json -import tempfile -import pytest -from pathlib import Path - -from aws_doc_sdk_examples_tools.agent.process_ailly_files import ( - process_ailly_files, - VALUE_PREFIXES, -) - - -@pytest.fixture -def test_environment(): - with tempfile.TemporaryDirectory() as temp_dir: - temp_path = Path(temp_dir) - ailly_dir = temp_path / "ailly_dir" - ailly_dir.mkdir(exist_ok=True) - - output_path = temp_path / "iam_updates.json" - - create_sample_ailly_files(ailly_dir) - - yield { - "ailly_dir": ailly_dir, - "output_path": output_path, - "temp_dir": temp_path, - } - - -def create_sample_ailly_files(ailly_dir): - # Sample file 1 - file1_path = ailly_dir / "example1.md.ailly.md" - with open(file1_path, "w") as f: - f.write( - """# IAM Policy Example for S3 Bucket Access - -This example demonstrates how to create an IAM policy that grants read-only access to an S3 bucket. - -## Policy Details - -=== -title => Grant Read-Only Access to an S3 Bucket -title_abbrev => S3 Read-Only -synopsis => This example shows how to create an IAM policy that grants read-only access to objects in an S3 bucket. -description => This policy grants permissions to list and get objects from a specific S3 bucket. It's useful for scenarios where users need to view but not modify bucket contents. -service => IAM -category => Security -languages => JSON, AWS CLI -=== - -## Implementation - -Here's how you would implement this policy: - -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "s3:GetObject", - "s3:ListBucket" - ], - "Resource": [ - "arn:aws:s3:::example-bucket", - "arn:aws:s3:::example-bucket/*" - ] - } - ] -} -``` - -## Additional Notes - -Remember to replace 'example-bucket' with your actual bucket name when using this policy. -""" - ) - - # Sample file 2 - file2_path = ailly_dir / "example2.md.ailly.md" - with open(file2_path, "w") as f: - f.write( - """# IAM Policy Example for EC2 Instance Management - -This example demonstrates how to create an IAM policy for EC2 instance management. - -## Policy Details - -=== -title => Manage EC2 Instances in a Specific Region -title_abbrev => Region Specific EC2 -synopsis => This example shows how to create an IAM policy that allows management of EC2 instances in a specific AWS region. -description => This policy grants permissions to view, start, stop, and reboot EC2 instances in a specific region. It's useful for operations teams who need to manage instance lifecycles. -service => IAM -category => Security -languages => JSON, AWS CLI -=== - -## Implementation - -Here's how you would implement this policy: - -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "ec2:DescribeInstances", - "ec2:StartInstances", - "ec2:StopInstances", - "ec2:RebootInstances" - ], - "Resource": "*", - "Condition": { - "StringEquals": { - "aws:RequestedRegion": "us-west-2" - } - } - } - ] -} -``` - -## Additional Notes - -Modify the region condition to match your specific requirements. -""" - ) - - -def test_process_ailly_files(test_environment): - ailly_dir = test_environment["ailly_dir"] - output_path = test_environment["output_path"] - - process_ailly_files(str(ailly_dir), str(output_path)) - - assert output_path.exists() - - with open(output_path, "r") as f: - results = json.load(f) - - assert len(results) == 2 - - example1 = next((item for item in results if item.get("id") == "example1"), None) - example2 = next((item for item in results if item.get("id") == "example2"), None) - - assert example1 is not None - assert example2 is not None - assert ( - example1["title"] - == f"{VALUE_PREFIXES.get('title')}Grant Read-Only Access to an S3 Bucket" - ) - assert ( - example2["title"] - == f"{VALUE_PREFIXES.get('title')}Manage EC2 Instances in a Specific Region" - ) diff --git a/aws_doc_sdk_examples_tools/agent/update_doc_gen.py b/aws_doc_sdk_examples_tools/agent/update_doc_gen.py deleted file mode 100644 index 0a78ab4..0000000 --- a/aws_doc_sdk_examples_tools/agent/update_doc_gen.py +++ /dev/null @@ -1,57 +0,0 @@ -import json -import logging -from pathlib import Path -from typing import Iterable - -from aws_doc_sdk_examples_tools.doc_gen import DocGen, Example - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def examples_from_updates(updates_path: Path) -> Iterable[Example]: - """ - Take a path to a file containing a list of example metadata updates - and returns an iterable of examples with the applied updates. - """ - updates = json.loads(updates_path.read_text()) - - if isinstance(updates, list): - updates_dict = {} - for item in updates: - if "id" in item: - updates_dict[item["id"]] = item - updates = updates_dict - - examples = [ - Example( - id=id, - file=Path(), - languages={}, - title=update.get("title"), - title_abbrev=update.get("title_abbrev"), - synopsis=update.get("synopsis"), - ) - for id, update in updates.items() - ] - return examples - - -def update_examples(doc_gen: DocGen, examples: Iterable[Example]) -> None: - """ - Merge a subset of example properties into a DocGen instance. - """ - for example in examples: - if doc_gen_example := doc_gen.examples.get(example.id): - doc_gen_example.title = example.title - doc_gen_example.title_abbrev = example.title_abbrev - doc_gen_example.synopsis = example.synopsis - else: - logger.warning(f"Could not find example with id: {example.id}") - - -def update_doc_gen(doc_gen_root: Path, iam_updates_path: Path) -> DocGen: - doc_gen = DocGen.from_root(doc_gen_root) - examples = examples_from_updates(iam_updates_path) - update_examples(doc_gen, examples) - return doc_gen