abi · marcelovicentegc · Feb 3, 2025 · Feb 3, 2025 · marcelovicentegc · Feb 4, 2025
diff --git a/backend/config.py b/backend/config.py
@@ -2,6 +2,15 @@
 # Setting to True will stream a mock response instead of calling the OpenAI API
 # TODO: Should only be set to true when value is 'True', not any abitrary truthy value
 import os
+import boto3
+
+def has_valid_aws_credentials():
+    sts = boto3.client('sts')
+    try:
+        sts.get_caller_identity()
+        return True
+    except boto3.exceptions.ClientError:
+        return False
 
 NUM_VARIANTS = 2
 
@@ -10,6 +19,7 @@
 ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", None)
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", None)
 OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", None)
+AWS_CREDENTIALS = has_valid_aws_credentials()
 
 # Image generation (optional)
 REPLICATE_API_KEY = os.environ.get("REPLICATE_API_KEY", None)

diff --git a/backend/evals/core.py b/backend/evals/core.py
@@ -1,9 +1,10 @@
-from config import ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY
+from config import ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, AWS_CREDENTIALS
 from llm import (
     Llm,
     stream_claude_response,
     stream_gemini_response,
     stream_openai_response,
+    stream_bedrock_response
 )
 from prompts import assemble_prompt
 from prompts.types import Stack
@@ -46,6 +47,15 @@ async def process_chunk(_: str):
             callback=lambda x: process_chunk(x),
             model=model,
         )
+    elif model == Llm.BEDROCK_CLAUDE_3_5_SONNET_2024_06_20:
+        if not AWS_CREDENTIALS:
+            raise Exception("AWS credentials not found")
+
+        completion = await stream_bedrock_response(
+            prompt_messages,
+            callback=lambda x: process_chunk(x),
+            model=model,
+        )
     else:
         if not OPENAI_API_KEY:
             raise Exception("OpenAI API key not found")

diff --git a/backend/llm.py b/backend/llm.py
@@ -11,6 +11,8 @@
 from image_processing.utils import process_image
 from google import genai
 from google.genai import types
+from boto3 import Session
+import json
 
 from utils import pprint_prompt
 
@@ -29,6 +31,7 @@ class Llm(Enum):
     CLAUDE_3_5_SONNET_2024_10_22 = "claude-3-5-sonnet-20241022"
     GEMINI_2_0_FLASH_EXP = "gemini-2.0-flash-exp"
     O1_2024_12_17 = "o1-2024-12-17"
+    BEDROCK_CLAUDE_3_5_SONNET_2024_06_20 = "anthropic.claude-3-5-sonnet-20240620-v1:0"
 
 
 class Completion(TypedDict):
@@ -91,24 +94,13 @@ async def stream_openai_response(
 
     completion_time = time.time() - start_time
     return {"duration": completion_time, "code": full_response}
+
 
-
-# TODO: Have a seperate function that translates OpenAI messages to Claude messages
-async def stream_claude_response(
-    messages: List[ChatCompletionMessageParam],
-    api_key: str,
-    callback: Callable[[str], Awaitable[None]],
-    model: Llm,
-) -> Completion:
-    start_time = time.time()
-    client = AsyncAnthropic(api_key=api_key)
-
-    # Base parameters
-    max_tokens = 8192
-    temperature = 0.0
-
-    # Translate OpenAI messages to Claude messages
-
+def process_claude_messages(messages: List[ChatCompletionMessageParam]) -> tuple[str, List[dict]]:
+    """
+    Process messages for Claude by converting image URLs to base64 data
+    and removing the image URL parameter from the message.
+    """
     # Deep copy messages to avoid modifying the original list
     cloned_messages = copy.deepcopy(messages)
 
@@ -139,6 +131,27 @@ async def stream_claude_response(
                     "data": base64_data,
                 }
 
+    return system_prompt, claude_messages
+
+
+# TODO: Have a seperate function that translates OpenAI messages to Claude messages
+async def stream_claude_response(
+    messages: List[ChatCompletionMessageParam],
+    api_key: str,
+    callback: Callable[[str], Awaitable[None]],
+    model: Llm,
+) -> Completion:
+    start_time = time.time()
+    client = AsyncAnthropic(api_key=api_key)
+
+    # Base parameters
+    max_tokens = 8192
+    temperature = 0.0
+
+    # Translate OpenAI messages to Claude messages
+
+    system_prompt, claude_messages = process_claude_messages(messages)
+
     # Stream Claude response
     async with client.messages.stream(
         model=model.value,
@@ -300,3 +313,69 @@ async def stream_gemini_response(
             await callback(response.text)  # type: ignore
     completion_time = time.time() - start_time
     return {"duration": completion_time, "code": full_response}
+
+
+async def stream_bedrock_response(
+    messages: List[ChatCompletionMessageParam],
+    callback: Callable[[str], Awaitable[None]],
+    model: Llm,
+) -> Completion:
+    print(f"Invoking {model} on AWS Bedrock")
+    start_time = time.time()
+
+    # Initialize Bedrock runtime client
+    session = Session()
+
+    # Expect configuration from environment variables or /.aws/credentials
+    bedrock_client = session.client(
+        service_name='bedrock-runtime',
+    )
+
+    full_response = ""
+
+    system_prompt, claude_messages = process_claude_messages(messages)
+
+    body = {
+        "anthropic_version": "bedrock-2023-05-31", 
+        "max_tokens": 8192,
+        "messages": claude_messages,
+        "temperature":0.0,
+        "system":system_prompt,
+    }
+
+    # Convert the payload to bytes
+    body_bytes = json.dumps(body).encode('utf-8')    
+
+    response = bedrock_client.invoke_model_with_response_stream(
+        body=body_bytes,
+        contentType='application/json',
+        accept='application/json',
+        modelId=model.value,
+        trace='DISABLED',
+    )
+
+    for event in response['body']:
+        if 'chunk' in event:
+            chunk = event['chunk']['bytes'].decode('utf-8')
+            if chunk:
+                chunk_obj = json.loads(chunk)
+                if chunk_obj.get('delta', {}).get('type') == 'text_delta':
+                    response_text = chunk_obj['delta']['text']
+                    full_response += response_text
+                    await callback(response_text)
+        elif 'internalServerException' in event:
+            raise Exception(event['internalServerException']['message'])
+        elif 'modelStreamErrorException' in event:
+            raise Exception(event['modelStreamErrorException']['message'])
+        elif 'validationException' in event:
+            raise Exception(event['validationException']['message'])
+        elif 'throttlingException' in event:
+            raise Exception(event['throttlingException']['message'])
+        elif 'modelTimeoutException' in event:
+            raise Exception(event['modelTimeoutException']['message'])
+        elif 'serviceUnavailableException' in event:
+            raise Exception(event['serviceUnavailableException']['message'])
+
+    completion_time = time.time() - start_time
+
+    return {"duration": completion_time, "code": full_response}
diff --git a/backend/poetry.lock b/backend/poetry.lock
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
@@ -22,6 +22,7 @@ types-pillow = "^10.2.0.20240520"
 aiohttp = "^3.9.5"
 pydantic = "^2.10"
 google-genai = "^0.3.0"
+boto3 = "^1.36.11"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.3"