Merge pull request #38 from DefangLabs/jordan/bedrock-access-gateway

jordanstephens · web-flow · commit 196c5f295e9e · 2025-04-02T11:00:23.000-07:00
Migrate to Bedrock
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -16,7 +16,8 @@
                 "SECRET_KEY": "supersecret",
                 "SESSION_COOKIE_SECURE": "0",
                 "OPENAI_BASE_URL": "http://localhost:8080/api/v1",
-                "OPENAI_API_KEY": "secret"
+                "OPENAI_API_KEY": "secret",
+                "MODEL": "anthropic.claude-3-sonnet-20240229-v1:0"
             },
             "args": [
                 "run",
diff --git a/app/rag_system.py b/app/rag_system.py
@@ -7,6 +7,7 @@
 import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
 
+openai.api_base = os.getenv("OPENAI_BASE_URL")
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
 class RAGSystem:
@@ -134,7 +135,7 @@ def answer_query_stream(self, query):
             messages.append(system_message)
 
             stream = openai.ChatCompletion.create(
-                model="gpt-4-turbo",
+                model=os.getenv("MODEL"),
                 messages=messages,
                 temperature=0.5,
                 max_tokens=2048,
diff --git a/compose.dev.yaml b/compose.dev.yaml
@@ -1,27 +1,40 @@
 services:
-  rag-chatbot:
-    build:
-      context: ./app
-      shm_size: "16gb"
-    ports:
-      - target: 5050
-        published: 5050 # MacOS AirPlay uses port 5000
-        protocol: tcp
-        mode: ingress
+  app:
+    extends:
+      file: compose.yaml
+      service: app
+    env_file:
+      - .env
     environment:
       ASK_TOKEN: asktoken
       FLASK_APP: app.py
       SECRET_KEY: supersecret
       SEGMENT_WRITE_KEY: ${SEGMENT_WRITE_KEY} # Set your Segment write key here or in the .env file
       SESSION_COOKIE_SECURE: 0
-      OPENAI_API_KEY: ${OPENAI_API_KEY} # Set your OpenAI API key here or in the .env file
+      OPENAI_BASE_URL: "http://llm:5051/api/v1"
       SENTENCE_TRANSFORMERS_HOME: /app/sentence-transformers
     volumes:
       - type: bind
         source: ./app
         target: /app
     command: flask run --host=0.0.0.0 --port=5050
-    deploy:
-      resources:
-        reservations:
-          memory: 4G
+
+  llm:
+    extends:
+      file: compose.yaml
+      service: llm
+    env_file:
+      - .env
+    ports:
+      - target: 5051
+        published: 5051
+        protocol: tcp
+        mode: ingress
+    environment:
+      - AWS_REGION=us-west-2
+      - AWS_PROFILE=defang-lab
+      - PORT=5051
+    volumes:
+      - type: bind
+        source: ~/.aws
+        target: /root/.aws
diff --git a/compose.yaml b/compose.yaml
@@ -1,5 +1,5 @@
 services:
-  rag-chatbot:
+  app:
     restart: always
     domainname: ask.defang.io
     x-defang-dns-role: arn:aws:iam::258338292852:role/dnsadmin-39a19c3
@@ -19,6 +19,8 @@ services:
       SEGMENT_WRITE_KEY:
       SESSION_COOKIE_SECURE: 1
       OPENAI_API_KEY: ${OPENAI_API_KEY} # Set your OpenAI API key here or in the .env file
+      OPENAI_BASE_URL: "http://llm/api/v1"
+      MODEL: "anthropic.claude-3-sonnet-20240229-v1:0"
     command: uwsgi --http 0.0.0.0:5050 --wsgi-file app.py --callable app --processes 4 --threads 2
     deploy:
       resources:
@@ -30,3 +32,16 @@ services:
       timeout: 10s
       retries: 5
       #start_period: 40s
+
+  llm:
+    image: defangio/openai-access-gateway
+    x-defang-llm: true
+    ports:
+      - target: 80
+        published: 80
+        protocol: tcp
+        mode: host
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost/health"]