Add script for generating new titles/descriptions for snippet metadata.

cpyle0819 · cpyle0819 · commit 9e29cd0ab5cd · 2025-04-25T14:26:36.000-04:00
diff --git a/aws_doc_sdk_examples_tools/scripts/base_prompt.txt b/aws_doc_sdk_examples_tools/scripts/base_prompt.txt
@@ -0,0 +1,11 @@
+Provide a 'title', 'title_abbrev', and 'description' for this example in json format.
+Title should be a title, title_abbrev should be a 1-5 word variation on the title, and description should
+explain in one paragraph what the code is going. Provide the json raw, without markdown fences.
+
+Sample result:
+
+{ 
+    "title": "Get an object by name from an Amazon S3 bucket",
+    "title_abbrev": "Get an object",
+    "description": "Use the AWS SDK for JavaScript to get an object from an Amazon S3 bucket. Steps are included that demonstrate how to split large downloads up into multiple parts."
+}
diff --git a/aws_doc_sdk_examples_tools/scripts/policy.json.example.md b/aws_doc_sdk_examples_tools/scripts/policy.json.example.md
@@ -0,0 +1,20 @@
+# Example output of the snippet_summarize script for IAM policies
+```
+[
+  {
+    "title": "Allows Amazon SNS to send messages to an Amazon SQS dead-letter queue",
+    "title_abbrev": "Allows SNS to SQS messages",
+    "description": "This resource-based policy allows Amazon SNS to send messages to a specific Amazon SQS dead-letter queue. The policy grants the SNS service principal permission to perform the SQS:SendMessage action on the queue named [MyDeadLetterQueue], but only when the source ARN matches the specified SNS topic [MyTopic]. This policy would be attached to the SQS queue to enable it to receive messages from the SNS topic when message delivery fails. Replace [us-east-2] with your specific AWS Region."
+  },
+  {
+    "title": "Allows managing log delivery and related resources",
+    "title_abbrev": "Allows log delivery management",
+    "description": "This identity-based policy allows managing CloudWatch Logs delivery configurations and related resources. The policy grants permissions to create, get, update, delete, and list log deliveries, as well as manage resource policies for a specific log group [SampleLogGroupName]. It also allows creating service-linked roles, tagging Firehose delivery streams, and managing bucket policies for a specific S3 bucket [bucket-name]. Replace [region] and [account-id] with your specific AWS Region and account ID, and [bucket-name] with your actual S3 bucket name."
+  },
+  {
+    "title": "Allows Amazon SNS to send messages to an SQS dead-letter queue",
+    "title_abbrev": "SNS to SQS permissions",
+    "description": "This resource-based policy allows Amazon SNS to send messages to a specific Amazon SQS queue that serves as a dead-letter queue. The policy grants the SNS service permission to perform the SQS:SendMessage action on the queue named [MyDeadLetterQueue] in the [us-east-2] Region. The policy includes a condition that restricts this permission to messages originating from the SNS topic named [MyTopic] in the same Region and account."
+  }
+]
+```
diff --git a/aws_doc_sdk_examples_tools/scripts/retry.py b/aws_doc_sdk_examples_tools/scripts/retry.py
@@ -0,0 +1,57 @@
+import logging
+from functools import wraps
+import random
+import time
+from typing import Callable, Type, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+def retry_with_backoff(
+    exceptions: Tuple[Type[Exception], ...],
+    max_retries: int = 5,
+    initial_delay: float = 1.0,
+    backoff_factor: float = 2.0,
+    max_delay: float = 60.0,
+    jitter: float = 0.1,
+) -> Callable:
+    """
+    Decorator for retrying a function with exponential backoff upon specified exceptions.
+
+    :param exceptions: Tuple of exception classes to catch.
+    :param max_retries: Maximum number of retry attempts.
+    :param initial_delay: Initial delay between retries in seconds.
+    :param backoff_factor: Factor by which the delay increases after each retry.
+    :param max_delay: Maximum delay between retries in seconds.
+    :param jitter: Random jitter added to delay to prevent thundering herd problem.
+    :return: Decorated function with retry logic.
+
+    This method was AI generated, and reviewed by a human.
+    """
+
+    def decorator(func: Callable) -> Callable:
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            delay = initial_delay
+            for attempt in range(1, max_retries + 1):
+                try:
+                    return func(*args, **kwargs)
+                except exceptions as e:
+                    if attempt == max_retries:
+                        logger.error(
+                            f"Function '{func.__name__}' failed after {max_retries} attempts."
+                        )
+                        raise
+                    else:
+                        sleep_time = min(delay, max_delay)
+                        sleep_time += jitter * (2 * random.random() - 1)  # Add jitter
+                        logger.warning(
+                            f"Attempt {attempt} for function '{func.__name__}' failed with {e.__class__.__name__}: {e} "
+                            f"Retrying in {sleep_time:.2f} seconds..."
+                        )
+                        time.sleep(sleep_time)
+                        delay *= backoff_factor
+
+        return wrapper
+
+    return decorator
diff --git a/aws_doc_sdk_examples_tools/scripts/snippet_summarize.py b/aws_doc_sdk_examples_tools/scripts/snippet_summarize.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Dict, Optional 
+
+import boto3
+from botocore.exceptions import ClientError
+
+from aws_doc_sdk_examples_tools.doc_gen import DocGen, Snippet
+from aws_doc_sdk_examples_tools.scripts.retry import retry_with_backoff
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class BedrockRuntime:
+    def __init__(self, model_id: str = "us.anthropic.claude-3-7-sonnet-20250219-v1:0"):
+        self.client = boto3.client("bedrock-runtime")
+        self.model_id = model_id
+        self.base_prompt = Path(
+            os.path.dirname(__file__), "base_prompt.txt"
+        ).read_text()
+        self.conversation = [{"role": "user", "content": [{"text": self.base_prompt}]}]
+
+    def converse(self, conversation):
+        self.conversation.extend(conversation)
+        response = self.client.converse(
+            modelId=self.model_id,
+            messages=self.conversation,
+            inferenceConfig={"maxTokens": 512, "temperature": 0.5, "topP": 0.9},
+        )
+        response_text = response["output"]["message"]["content"][0]["text"]
+        return response_text
+
+
+def make_doc_gen(root: Path):
+    doc_gen = DocGen.from_root(root)
+    doc_gen.collect_snippets()
+    return doc_gen
+
+
+@retry_with_backoff(exceptions=(ClientError,), max_retries=10)
+def generate_snippet_description(
+    bedrock_runtime: BedrockRuntime, snippet: Snippet, prompt: Optional[str]
+) -> Dict:
+    content = (
+        [{"text": prompt}, {"text": snippet.code}]
+        if prompt
+        else [{"text": snippet.code}]
+    )
+    conversation = [
+        {
+            "role": "user",
+            "content": content,
+        }
+    ]
+
+    response_text = bedrock_runtime.converse(conversation)
+
+    try:
+        # This assumes the response is JSON, which couples snippet
+        # description generation to a specific prompt.
+        return json.loads(response_text)
+    except Exception as e:
+        logger.warning(f"Failed to parse response. Response: {response_text}")
+        return {}
+
+
+def generate_descriptions(snippets: Dict[str, Snippet], prompt: Optional[str]):
+    runtime = BedrockRuntime()
+    results = []
+    for snippet_id, snippet in snippets.items():
+        response = generate_snippet_description(runtime, snippet, prompt)
+        results.append(response)
+        # Just need a few results for the demo.
+        if len(results) == 3:
+            break
+    print(results)
+
+
+def main(doc_gen_root: Path, prompt: Optional[Path]):
+    doc_gen = make_doc_gen(doc_gen_root)
+    prompt_text = prompt.read_text() if prompt and prompt.exists() else None
+    generate_descriptions(doc_gen.snippets, prompt_text)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Generate new titles and descriptions for DocGen snippets"
+    )
+    parser.add_argument(
+        "--doc-gen-root", required=True, help="Path to DocGen ready project"
+    )
+    parser.add_argument(
+        "--prompt",
+        help="Path to an additional prompt to be used for refining the output",
+    )
+    args = parser.parse_args()
+
+    doc_gen_root = Path(args.doc_gen_root)
+    prompt = Path(args.prompt) if args.prompt else None
+    main(doc_gen_root, prompt)
diff --git a/requirements.txt b/requirements.txt
@@ -1,10 +1,13 @@
 black==24.3.0
+boto3==1.37.38
+botocore==1.37.38
 flake8==6.1.0
 mypy==1.8.0
 mypy-extensions==1.0.0
 pathspec==0.11.2
 pytest==8.0.0
 PyYAML==6.0.1
 requests==2.32.0
+types-boto3==1.37.38
 types-PyYAML==6.0.12.12
 yamale==4.0.4