Add JSON parsing of output.

cpyle0819 · cpyle0819 · commit 9668adbc5405 · 2025-04-25T10:54:18.000-04:00
diff --git a/scripts/snippet_summarize.py b/scripts/snippet_summarize.py
@@ -1,10 +1,11 @@
 #!/usr/bin/env python
 
 import argparse
+import json
 import logging
 import os
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict, List, NewType, Optional, TypedDict
 
 import boto3
 from botocore.exceptions import ClientError
@@ -16,43 +17,78 @@
 logger = logging.getLogger(__name__)
 
 
+class ConversationEntry(TypedDict):
+    role: str
+    content: str
+
+
+Conversation = NewType("Conversation", List[ConversationEntry])
+
+
+class BedrockRuntime:
+    def __init__(self, model_id: str = "us.anthropic.claude-3-7-sonnet-20250219-v1:0"):
+        self.client = boto3.client("bedrock-runtime")
+        self.model_id = model_id
+        self.base_prompt = Path(
+            os.path.dirname(__file__), "base_prompt.txt"
+        ).read_text()
+        self.conversation = [
+            ConversationEntry({"role": "user", "content": [{"text": self.base_prompt}]})
+        ]
+
+    def converse(self, conversation: Conversation):
+        self.conversation.extend(conversation)
+        response = self.client.converse(
+            modelId=self.model_id,
+            messages=self.conversation,
+            inferenceConfig={"maxTokens": 512, "temperature": 0.5, "topP": 0.9},
+        )
+        response_text = response["output"]["message"]["content"][0]["text"]
+        return response_text
+
+
 def make_doc_gen(root: Path):
     doc_gen = DocGen.from_root(root)
     doc_gen.collect_snippets()
     return doc_gen
 
 
+def generate_snippet_description(
+    bedrock_runtime: BedrockRuntime, snippet: Snippet, prompt: Optional[str]
+) -> Dict:
+    content = (
+        [{"text": prompt}, {"text": snippet.code}]
+        if prompt
+        else [{"text": snippet.code}]
+    )
+    conversation = [
+        {
+            "role": "user",
+            "content": content,
+        }
+    ]
+
+    response_text = bedrock_runtime.converse(conversation)
+
+    try:
+        # This assumes the response is JSON, which couples snippet
+        # description generation to a specific prompt.
+        return json.loads(response_text)
+    except Exception as e:
+        logger.warning("Failed to parse response.", response=response_text)
+        return {}
+
+
 def generate_descriptions(snippets: Dict[str, Snippet], prompt: Optional[str]):
-    client = boto3.client("bedrock-runtime", region_name="us-west-2")
-    base_prompt = Path(os.path.dirname(__file__), "base_prompt.txt").read_text()
-    model_id = "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
+    runtime = BedrockRuntime()
     results = []
     for snippet_id, snippet in snippets.items():
-        content = [{"text": base_prompt}]
-        if prompt:
-            content.append({"text": prompt})
-        content.append({"text": snippet.code})
-        conversation = [
-            {
-                "role": "user",
-                "content": content,
-            }
-        ]
-
         try:
-            response = client.converse(
-                modelId=model_id,
-                messages=conversation,
-                inferenceConfig={"maxTokens": 512, "temperature": 0.5, "topP": 0.9},
-            )
-
-            # Extract and print the response text.
-            response_text = response["output"]["message"]["content"][0]["text"]
-            results.append(response_text)
-
+            response = generate_snippet_description(runtime, snippet, prompt)
+            results.append(response)
         except (ClientError, Exception) as e:
             logger.warning(
-                f"ERROR: Can't invoke '{model_id}'. Name: {type(e).__name__}, Reason: {e}"
+                f"ERROR: Can't invoke '{runtime.model_id}'. Name: {type(e).__name__}, Reason: {e}"
             )
     print(results)