Skip to content

Commit 333e1ed

Browse files
committed
Add script for generating new titles/descriptions for snippet metadata.
1 parent ca76286 commit 333e1ed

File tree

3 files changed

+95
-0
lines changed

3 files changed

+95
-0
lines changed

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
black==24.3.0
2+
boto3==1.37.38
3+
botocore==1.37.38
24
flake8==6.1.0
35
mypy==1.8.0
46
mypy-extensions==1.0.0
57
pathspec==0.11.2
68
pytest==8.0.0
79
PyYAML==6.0.1
810
requests==2.32.0
11+
types-boto3==1.37.38
912
types-PyYAML==6.0.12.12
1013
yamale==4.0.4

scripts/base_prompt.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Provide a 'title', 'title_abbrev', and 'description' for this example in json format.
2+
Title should be a title, title_abbrev should be a 1-5 word variation on the title, and description should
3+
explain in one paragraph what the code is going. Provide the json raw, without markdown fences.
4+
5+
Sample result:
6+
7+
{
8+
"title": "Get an object by name from an Amazon S3 bucket",
9+
"title_abbrev": "Get an object",
10+
"description": "Use the AWS SDK for JavaScript to get an object from an Amazon S3 bucket. Steps are included that demonstrate how to split large downloads up into multiple parts."
11+
}

scripts/snippet_summarize.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env python
2+
3+
import argparse
4+
import logging
5+
import os
6+
from pathlib import Path
7+
from typing import Dict, Optional
8+
9+
import boto3
10+
from botocore.exceptions import ClientError
11+
12+
from aws_doc_sdk_examples_tools.doc_gen import DocGen, Snippet
13+
14+
15+
logging.basicConfig(level=logging.INFO)
16+
logger = logging.getLogger(__name__)
17+
18+
19+
def make_doc_gen(root: Path):
20+
doc_gen = DocGen.from_root(root)
21+
doc_gen.collect_snippets()
22+
return doc_gen
23+
24+
25+
def generate_descriptions(snippets: Dict[str, Snippet], prompt: Optional[str]):
26+
client = boto3.client("bedrock-runtime", region_name="us-west-2")
27+
base_prompt = Path(os.path.dirname(__file__), "base_prompt.txt").read_text()
28+
model_id = "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
29+
results = []
30+
for snippet_id, snippet in snippets.items():
31+
content = [{"text": base_prompt}]
32+
if prompt:
33+
content.append({"text": prompt})
34+
content.append({"text": snippet.code})
35+
conversation = [
36+
{
37+
"role": "user",
38+
"content": content,
39+
}
40+
]
41+
42+
try:
43+
response = client.converse(
44+
modelId=model_id,
45+
messages=conversation,
46+
inferenceConfig={"maxTokens": 512, "temperature": 0.5, "topP": 0.9},
47+
)
48+
49+
# Extract and print the response text.
50+
response_text = response["output"]["message"]["content"][0]["text"]
51+
results.append(response_text)
52+
53+
except (ClientError, Exception) as e:
54+
logger.warning(
55+
f"ERROR: Can't invoke '{model_id}'. Name: {type(e).__name__}, Reason: {e}"
56+
)
57+
print(results)
58+
59+
60+
def main(doc_gen_root: Path, prompt: Path):
61+
doc_gen = make_doc_gen(doc_gen_root)
62+
prompt_text = prompt.read_text() if prompt and prompt.exists() else None
63+
generate_descriptions(doc_gen.snippets, prompt_text)
64+
65+
66+
if __name__ == "__main__":
67+
parser = argparse.ArgumentParser(
68+
description="Generate new titles and descriptions for DocGen snippets"
69+
)
70+
parser.add_argument(
71+
"--doc-gen-root", required=True, help="Path to DocGen ready project"
72+
)
73+
parser.add_argument(
74+
"--prompt",
75+
help="Path to an additional prompt to be used for refining the output",
76+
)
77+
args = parser.parse_args()
78+
79+
doc_gen_root = Path(args.doc_gen_root)
80+
prompt = Path(args.prompt) if args.prompt else None
81+
main(doc_gen_root, prompt)

0 commit comments

Comments
 (0)