From 9e29cd0ab5cdd1a4d914a5be96d6126b0ab75a14 Mon Sep 17 00:00:00 2001 From: Corey Pyle Date: Thu, 24 Apr 2025 14:05:13 -0400 Subject: [PATCH 1/3] Add script for generating new titles/descriptions for snippet metadata. --- .../scripts/base_prompt.txt | 11 ++ .../scripts/policy.json.example.md | 20 ++++ aws_doc_sdk_examples_tools/scripts/retry.py | 57 ++++++++++ .../scripts/snippet_summarize.py | 107 ++++++++++++++++++ requirements.txt | 3 + 5 files changed, 198 insertions(+) create mode 100644 aws_doc_sdk_examples_tools/scripts/base_prompt.txt create mode 100644 aws_doc_sdk_examples_tools/scripts/policy.json.example.md create mode 100644 aws_doc_sdk_examples_tools/scripts/retry.py create mode 100644 aws_doc_sdk_examples_tools/scripts/snippet_summarize.py diff --git a/aws_doc_sdk_examples_tools/scripts/base_prompt.txt b/aws_doc_sdk_examples_tools/scripts/base_prompt.txt new file mode 100644 index 0000000..05e5ac2 --- /dev/null +++ b/aws_doc_sdk_examples_tools/scripts/base_prompt.txt @@ -0,0 +1,11 @@ +Provide a 'title', 'title_abbrev', and 'description' for this example in json format. +Title should be a title, title_abbrev should be a 1-5 word variation on the title, and description should +explain in one paragraph what the code is going. Provide the json raw, without markdown fences. + +Sample result: + +{ + "title": "Get an object by name from an Amazon S3 bucket", + "title_abbrev": "Get an object", + "description": "Use the AWS SDK for JavaScript to get an object from an Amazon S3 bucket. Steps are included that demonstrate how to split large downloads up into multiple parts." +} \ No newline at end of file diff --git a/aws_doc_sdk_examples_tools/scripts/policy.json.example.md b/aws_doc_sdk_examples_tools/scripts/policy.json.example.md new file mode 100644 index 0000000..1258d7d --- /dev/null +++ b/aws_doc_sdk_examples_tools/scripts/policy.json.example.md @@ -0,0 +1,20 @@ +# Example output of the snippet_summarize script for IAM policies +``` +[ + { + "title": "Allows Amazon SNS to send messages to an Amazon SQS dead-letter queue", + "title_abbrev": "Allows SNS to SQS messages", + "description": "This resource-based policy allows Amazon SNS to send messages to a specific Amazon SQS dead-letter queue. The policy grants the SNS service principal permission to perform the SQS:SendMessage action on the queue named [MyDeadLetterQueue], but only when the source ARN matches the specified SNS topic [MyTopic]. This policy would be attached to the SQS queue to enable it to receive messages from the SNS topic when message delivery fails. Replace [us-east-2] with your specific AWS Region." + }, + { + "title": "Allows managing log delivery and related resources", + "title_abbrev": "Allows log delivery management", + "description": "This identity-based policy allows managing CloudWatch Logs delivery configurations and related resources. The policy grants permissions to create, get, update, delete, and list log deliveries, as well as manage resource policies for a specific log group [SampleLogGroupName]. It also allows creating service-linked roles, tagging Firehose delivery streams, and managing bucket policies for a specific S3 bucket [bucket-name]. Replace [region] and [account-id] with your specific AWS Region and account ID, and [bucket-name] with your actual S3 bucket name." + }, + { + "title": "Allows Amazon SNS to send messages to an SQS dead-letter queue", + "title_abbrev": "SNS to SQS permissions", + "description": "This resource-based policy allows Amazon SNS to send messages to a specific Amazon SQS queue that serves as a dead-letter queue. The policy grants the SNS service permission to perform the SQS:SendMessage action on the queue named [MyDeadLetterQueue] in the [us-east-2] Region. The policy includes a condition that restricts this permission to messages originating from the SNS topic named [MyTopic] in the same Region and account." + } +] +``` \ No newline at end of file diff --git a/aws_doc_sdk_examples_tools/scripts/retry.py b/aws_doc_sdk_examples_tools/scripts/retry.py new file mode 100644 index 0000000..6e2dcc7 --- /dev/null +++ b/aws_doc_sdk_examples_tools/scripts/retry.py @@ -0,0 +1,57 @@ +import logging +from functools import wraps +import random +import time +from typing import Callable, Type, Tuple + +logger = logging.getLogger(__name__) + + +def retry_with_backoff( + exceptions: Tuple[Type[Exception], ...], + max_retries: int = 5, + initial_delay: float = 1.0, + backoff_factor: float = 2.0, + max_delay: float = 60.0, + jitter: float = 0.1, +) -> Callable: + """ + Decorator for retrying a function with exponential backoff upon specified exceptions. + + :param exceptions: Tuple of exception classes to catch. + :param max_retries: Maximum number of retry attempts. + :param initial_delay: Initial delay between retries in seconds. + :param backoff_factor: Factor by which the delay increases after each retry. + :param max_delay: Maximum delay between retries in seconds. + :param jitter: Random jitter added to delay to prevent thundering herd problem. + :return: Decorated function with retry logic. + + This method was AI generated, and reviewed by a human. + """ + + def decorator(func: Callable) -> Callable: + @wraps(func) + def wrapper(*args, **kwargs): + delay = initial_delay + for attempt in range(1, max_retries + 1): + try: + return func(*args, **kwargs) + except exceptions as e: + if attempt == max_retries: + logger.error( + f"Function '{func.__name__}' failed after {max_retries} attempts." + ) + raise + else: + sleep_time = min(delay, max_delay) + sleep_time += jitter * (2 * random.random() - 1) # Add jitter + logger.warning( + f"Attempt {attempt} for function '{func.__name__}' failed with {e.__class__.__name__}: {e} " + f"Retrying in {sleep_time:.2f} seconds..." + ) + time.sleep(sleep_time) + delay *= backoff_factor + + return wrapper + + return decorator diff --git a/aws_doc_sdk_examples_tools/scripts/snippet_summarize.py b/aws_doc_sdk_examples_tools/scripts/snippet_summarize.py new file mode 100644 index 0000000..134ffcf --- /dev/null +++ b/aws_doc_sdk_examples_tools/scripts/snippet_summarize.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python + +import argparse +import json +import logging +import os +from pathlib import Path +from typing import Dict, Optional + +import boto3 +from botocore.exceptions import ClientError + +from aws_doc_sdk_examples_tools.doc_gen import DocGen, Snippet +from aws_doc_sdk_examples_tools.scripts.retry import retry_with_backoff + + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class BedrockRuntime: + def __init__(self, model_id: str = "us.anthropic.claude-3-7-sonnet-20250219-v1:0"): + self.client = boto3.client("bedrock-runtime") + self.model_id = model_id + self.base_prompt = Path( + os.path.dirname(__file__), "base_prompt.txt" + ).read_text() + self.conversation = [{"role": "user", "content": [{"text": self.base_prompt}]}] + + def converse(self, conversation): + self.conversation.extend(conversation) + response = self.client.converse( + modelId=self.model_id, + messages=self.conversation, + inferenceConfig={"maxTokens": 512, "temperature": 0.5, "topP": 0.9}, + ) + response_text = response["output"]["message"]["content"][0]["text"] + return response_text + + +def make_doc_gen(root: Path): + doc_gen = DocGen.from_root(root) + doc_gen.collect_snippets() + return doc_gen + + +@retry_with_backoff(exceptions=(ClientError,), max_retries=10) +def generate_snippet_description( + bedrock_runtime: BedrockRuntime, snippet: Snippet, prompt: Optional[str] +) -> Dict: + content = ( + [{"text": prompt}, {"text": snippet.code}] + if prompt + else [{"text": snippet.code}] + ) + conversation = [ + { + "role": "user", + "content": content, + } + ] + + response_text = bedrock_runtime.converse(conversation) + + try: + # This assumes the response is JSON, which couples snippet + # description generation to a specific prompt. + return json.loads(response_text) + except Exception as e: + logger.warning(f"Failed to parse response. Response: {response_text}") + return {} + + +def generate_descriptions(snippets: Dict[str, Snippet], prompt: Optional[str]): + runtime = BedrockRuntime() + results = [] + for snippet_id, snippet in snippets.items(): + response = generate_snippet_description(runtime, snippet, prompt) + results.append(response) + # Just need a few results for the demo. + if len(results) == 3: + break + print(results) + + +def main(doc_gen_root: Path, prompt: Optional[Path]): + doc_gen = make_doc_gen(doc_gen_root) + prompt_text = prompt.read_text() if prompt and prompt.exists() else None + generate_descriptions(doc_gen.snippets, prompt_text) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate new titles and descriptions for DocGen snippets" + ) + parser.add_argument( + "--doc-gen-root", required=True, help="Path to DocGen ready project" + ) + parser.add_argument( + "--prompt", + help="Path to an additional prompt to be used for refining the output", + ) + args = parser.parse_args() + + doc_gen_root = Path(args.doc_gen_root) + prompt = Path(args.prompt) if args.prompt else None + main(doc_gen_root, prompt) diff --git a/requirements.txt b/requirements.txt index 284010a..9c3d10e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ black==24.3.0 +boto3==1.37.38 +botocore==1.37.38 flake8==6.1.0 mypy==1.8.0 mypy-extensions==1.0.0 @@ -6,5 +8,6 @@ pathspec==0.11.2 pytest==8.0.0 PyYAML==6.0.1 requests==2.32.0 +types-boto3==1.37.38 types-PyYAML==6.0.12.12 yamale==4.0.4 From 402aaf2481a0dc85cc06c28309405d000b0bd2bd Mon Sep 17 00:00:00 2001 From: Corey Pyle Date: Mon, 28 Apr 2025 13:00:41 -0400 Subject: [PATCH 2/3] Switch to using Ailly instead of boto3. --- .../scripts/base_prompt.txt | 11 -- .../scripts/make_prompts.py | 88 ++++++++++++++ aws_doc_sdk_examples_tools/scripts/retry.py | 57 ---------- .../scripts/snippet_summarize.py | 107 ------------------ requirements.txt | 3 - 5 files changed, 88 insertions(+), 178 deletions(-) delete mode 100644 aws_doc_sdk_examples_tools/scripts/base_prompt.txt create mode 100644 aws_doc_sdk_examples_tools/scripts/make_prompts.py delete mode 100644 aws_doc_sdk_examples_tools/scripts/retry.py delete mode 100644 aws_doc_sdk_examples_tools/scripts/snippet_summarize.py diff --git a/aws_doc_sdk_examples_tools/scripts/base_prompt.txt b/aws_doc_sdk_examples_tools/scripts/base_prompt.txt deleted file mode 100644 index 05e5ac2..0000000 --- a/aws_doc_sdk_examples_tools/scripts/base_prompt.txt +++ /dev/null @@ -1,11 +0,0 @@ -Provide a 'title', 'title_abbrev', and 'description' for this example in json format. -Title should be a title, title_abbrev should be a 1-5 word variation on the title, and description should -explain in one paragraph what the code is going. Provide the json raw, without markdown fences. - -Sample result: - -{ - "title": "Get an object by name from an Amazon S3 bucket", - "title_abbrev": "Get an object", - "description": "Use the AWS SDK for JavaScript to get an object from an Amazon S3 bucket. Steps are included that demonstrate how to split large downloads up into multiple parts." -} \ No newline at end of file diff --git a/aws_doc_sdk_examples_tools/scripts/make_prompts.py b/aws_doc_sdk_examples_tools/scripts/make_prompts.py new file mode 100644 index 0000000..6506217 --- /dev/null +++ b/aws_doc_sdk_examples_tools/scripts/make_prompts.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +import argparse +import logging +import os +from pathlib import Path +from typing import Dict, List + +from aws_doc_sdk_examples_tools.doc_gen import DocGen, Snippet + +# Setup logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def make_doc_gen(root: Path) -> DocGen: + """Create and return a DocGen instance from the given root directory.""" + doc_gen = DocGen.from_root(root) + doc_gen.collect_snippets() + return doc_gen + + +def write_prompts(snippets: Dict[str, Snippet], out: Path) -> None: + """Write each snippet's code into a separate Markdown file.""" + out.mkdir(parents=True, exist_ok=True) + for snippet_id, snippet in snippets.items(): + snippet_path = out / f"{snippet_id}.md" + snippet_path.write_text(snippet.code, encoding="utf-8") + + +def setup_ailly(system_prompts: List[str], out: Path) -> None: + """Create the .aillyrc configuration file.""" + fence = "---" + options = {"isolated": "true"} + options_block = "\n".join(f"{key}: {value}" for key, value in options.items()) + prompts_block = "\n".join(system_prompts) + + content = f"{fence}\n{options_block}\n{fence}\n{prompts_block}" + + aillyrc_path = out / ".aillyrc" + aillyrc_path.parent.mkdir(parents=True, exist_ok=True) + aillyrc_path.write_text(content, encoding="utf-8") + + +def parse_prompts_arg(values: List[str]) -> List[str]: + """Parse system prompts from a list of strings or file paths.""" + prompts = [] + for value in values: + if os.path.isfile(value): + with open(value, "r", encoding="utf-8") as f: + prompts.append(f.read()) + else: + prompts.append(value) + return prompts + + +def main(doc_gen_root: Path, system_prompts: List[str], out: str = ".ailly_prompts") -> None: + """Generate prompts and configuration files for Ailly.""" + out_path = Path(out) + setup_ailly(system_prompts, out_path) + + doc_gen = make_doc_gen(doc_gen_root) + write_prompts(doc_gen.snippets, out_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Write Ailly prompts for DocGen snippets and parse the results." + ) + parser.add_argument( + "--doc-gen-root", required=True, + help="Path to a DocGen ready project." + ) + parser.add_argument( + "--system-prompts", nargs="+", required=True, + help="List of prompt strings or file paths to store in a .aillyrc file." + ) + parser.add_argument( + "--out", + default=".ailly_prompts", + help="Directory where Ailly prompt files will be written. Defaults to '.ailly_prompts'." + ) + + args = parser.parse_args() + + doc_gen_root = Path(args.doc_gen_root) + system_prompts = parse_prompts_arg(args.system_prompts) + main(doc_gen_root, system_prompts, out=args.out) diff --git a/aws_doc_sdk_examples_tools/scripts/retry.py b/aws_doc_sdk_examples_tools/scripts/retry.py deleted file mode 100644 index 6e2dcc7..0000000 --- a/aws_doc_sdk_examples_tools/scripts/retry.py +++ /dev/null @@ -1,57 +0,0 @@ -import logging -from functools import wraps -import random -import time -from typing import Callable, Type, Tuple - -logger = logging.getLogger(__name__) - - -def retry_with_backoff( - exceptions: Tuple[Type[Exception], ...], - max_retries: int = 5, - initial_delay: float = 1.0, - backoff_factor: float = 2.0, - max_delay: float = 60.0, - jitter: float = 0.1, -) -> Callable: - """ - Decorator for retrying a function with exponential backoff upon specified exceptions. - - :param exceptions: Tuple of exception classes to catch. - :param max_retries: Maximum number of retry attempts. - :param initial_delay: Initial delay between retries in seconds. - :param backoff_factor: Factor by which the delay increases after each retry. - :param max_delay: Maximum delay between retries in seconds. - :param jitter: Random jitter added to delay to prevent thundering herd problem. - :return: Decorated function with retry logic. - - This method was AI generated, and reviewed by a human. - """ - - def decorator(func: Callable) -> Callable: - @wraps(func) - def wrapper(*args, **kwargs): - delay = initial_delay - for attempt in range(1, max_retries + 1): - try: - return func(*args, **kwargs) - except exceptions as e: - if attempt == max_retries: - logger.error( - f"Function '{func.__name__}' failed after {max_retries} attempts." - ) - raise - else: - sleep_time = min(delay, max_delay) - sleep_time += jitter * (2 * random.random() - 1) # Add jitter - logger.warning( - f"Attempt {attempt} for function '{func.__name__}' failed with {e.__class__.__name__}: {e} " - f"Retrying in {sleep_time:.2f} seconds..." - ) - time.sleep(sleep_time) - delay *= backoff_factor - - return wrapper - - return decorator diff --git a/aws_doc_sdk_examples_tools/scripts/snippet_summarize.py b/aws_doc_sdk_examples_tools/scripts/snippet_summarize.py deleted file mode 100644 index 134ffcf..0000000 --- a/aws_doc_sdk_examples_tools/scripts/snippet_summarize.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python - -import argparse -import json -import logging -import os -from pathlib import Path -from typing import Dict, Optional - -import boto3 -from botocore.exceptions import ClientError - -from aws_doc_sdk_examples_tools.doc_gen import DocGen, Snippet -from aws_doc_sdk_examples_tools.scripts.retry import retry_with_backoff - - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -class BedrockRuntime: - def __init__(self, model_id: str = "us.anthropic.claude-3-7-sonnet-20250219-v1:0"): - self.client = boto3.client("bedrock-runtime") - self.model_id = model_id - self.base_prompt = Path( - os.path.dirname(__file__), "base_prompt.txt" - ).read_text() - self.conversation = [{"role": "user", "content": [{"text": self.base_prompt}]}] - - def converse(self, conversation): - self.conversation.extend(conversation) - response = self.client.converse( - modelId=self.model_id, - messages=self.conversation, - inferenceConfig={"maxTokens": 512, "temperature": 0.5, "topP": 0.9}, - ) - response_text = response["output"]["message"]["content"][0]["text"] - return response_text - - -def make_doc_gen(root: Path): - doc_gen = DocGen.from_root(root) - doc_gen.collect_snippets() - return doc_gen - - -@retry_with_backoff(exceptions=(ClientError,), max_retries=10) -def generate_snippet_description( - bedrock_runtime: BedrockRuntime, snippet: Snippet, prompt: Optional[str] -) -> Dict: - content = ( - [{"text": prompt}, {"text": snippet.code}] - if prompt - else [{"text": snippet.code}] - ) - conversation = [ - { - "role": "user", - "content": content, - } - ] - - response_text = bedrock_runtime.converse(conversation) - - try: - # This assumes the response is JSON, which couples snippet - # description generation to a specific prompt. - return json.loads(response_text) - except Exception as e: - logger.warning(f"Failed to parse response. Response: {response_text}") - return {} - - -def generate_descriptions(snippets: Dict[str, Snippet], prompt: Optional[str]): - runtime = BedrockRuntime() - results = [] - for snippet_id, snippet in snippets.items(): - response = generate_snippet_description(runtime, snippet, prompt) - results.append(response) - # Just need a few results for the demo. - if len(results) == 3: - break - print(results) - - -def main(doc_gen_root: Path, prompt: Optional[Path]): - doc_gen = make_doc_gen(doc_gen_root) - prompt_text = prompt.read_text() if prompt and prompt.exists() else None - generate_descriptions(doc_gen.snippets, prompt_text) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Generate new titles and descriptions for DocGen snippets" - ) - parser.add_argument( - "--doc-gen-root", required=True, help="Path to DocGen ready project" - ) - parser.add_argument( - "--prompt", - help="Path to an additional prompt to be used for refining the output", - ) - args = parser.parse_args() - - doc_gen_root = Path(args.doc_gen_root) - prompt = Path(args.prompt) if args.prompt else None - main(doc_gen_root, prompt) diff --git a/requirements.txt b/requirements.txt index 9c3d10e..284010a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,4 @@ black==24.3.0 -boto3==1.37.38 -botocore==1.37.38 flake8==6.1.0 mypy==1.8.0 mypy-extensions==1.0.0 @@ -8,6 +6,5 @@ pathspec==0.11.2 pytest==8.0.0 PyYAML==6.0.1 requests==2.32.0 -types-boto3==1.37.38 types-PyYAML==6.0.12.12 yamale==4.0.4 From e7e83483c9b4f6af4036536d7ff3349f125a2522 Mon Sep 17 00:00:00 2001 From: Corey Pyle Date: Mon, 28 Apr 2025 13:04:43 -0400 Subject: [PATCH 3/3] Change example title. --- aws_doc_sdk_examples_tools/scripts/policy.json.example.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws_doc_sdk_examples_tools/scripts/policy.json.example.md b/aws_doc_sdk_examples_tools/scripts/policy.json.example.md index 1258d7d..da7a414 100644 --- a/aws_doc_sdk_examples_tools/scripts/policy.json.example.md +++ b/aws_doc_sdk_examples_tools/scripts/policy.json.example.md @@ -1,4 +1,4 @@ -# Example output of the snippet_summarize script for IAM policies +# Example generated IAM policy descriptions ``` [ {