added logic to return metadata for image from api to the ui

codinglabsong · codinglabsong · commit 3bff07152b16 · 2025-08-20T09:49:31.000-07:00
diff --git a/api/llm/agent.py b/api/llm/agent.py
@@ -1,5 +1,7 @@
 import atexit
 import os
+import re
+from datetime import datetime
 from functools import lru_cache
 from typing import List, Optional
 
@@ -68,9 +70,7 @@ def get_agent():
     return _agent_executor
 
 
-def chat_with_agent(
-    message: str, user_id: str = "default", selected_images: Optional[List[dict]] = None
-) -> str:
+def chat_with_agent(message: str, user_id: str = "default", selected_images: Optional[List[dict]] = None) -> tuple[str, Optional[dict]]:
     """
     Send a message to the agent and get a response.
 
@@ -80,7 +80,7 @@ def chat_with_agent(
         selected_images: List of selected image objects (optional)
 
     Returns:
-        The agent's response as a string
+        Tuple of (agent_response, generated_image_data)
     """
     agent = get_agent()
 
@@ -89,9 +89,7 @@ def chat_with_agent(
     if selected_images and len(selected_images) > 0:
         image_context = "\n\nSelected Images:\n"
         for i, img in enumerate(selected_images, 1):
-            image_context += (
-                f"{i}. {img.get('title', 'Untitled')} (ID: {img.get('id', 'unknown')})\n"
-            )
+            image_context += f"{i}. {img.get('title', 'Untitled')} (ID: {img.get('id', 'unknown')})\n"
             image_context += f"   Type: {img.get('type', 'unknown')}\n"
             image_context += f"   Description: {img.get('description', 'No description')}\n"
             if img.get("url"):
@@ -103,20 +101,86 @@ def chat_with_agent(
     config = {"configurable": {"thread_id": user_id}}
 
     # Get response from agent
-    response = agent.invoke(
-        {"messages": [{"role": "user", "content": full_message}]}, config=config
-    )
+    response = agent.invoke({"messages": [{"role": "user", "content": full_message}]}, config=config)
 
     # Extract the last message from the agent
+    agent_response = "I'm sorry, I couldn't process your request. Please try again."
+    generated_image_data = None
+
     if response and "messages" in response and len(response["messages"]) > 0:
         last_message = response["messages"][-1]
         # Handle both AIMessage objects and dictionaries
         if hasattr(last_message, "content"):
-            return last_message.content
+            agent_response = last_message.content
         elif isinstance(last_message, dict) and "content" in last_message:
-            return last_message["content"]
-
-    return "I'm sorry, I couldn't process your request. Please try again."
+            agent_response = last_message["content"]
+
+        # Check if any tools were used (image generation)
+        if "intermediate_steps" in response and response["intermediate_steps"]:
+            for step in response["intermediate_steps"]:
+                if len(step) >= 2 and "generate_image" in str(step[0]):
+                    # Extract image data from the tool result
+                    tool_result = step[1]
+                    if "Image ID:" in tool_result:
+                        # Parse the image ID and title from the response
+                        image_id_match = re.search(r"Image ID: ([a-f0-9-]+)", tool_result)
+                        title_match = re.search(r"Title: (.+?)(?:\n|$)", tool_result)
+
+                        if image_id_match:
+                            image_id = image_id_match.group(1)
+                            title = title_match.group(1) if title_match else "Generated Image"
+
+                            # Get metadata from S3
+                            import boto3
+
+                            s3_client = boto3.client(
+                                "s3",
+                                region_name=os.environ.get("AWS_REGION", "us-east-1"),
+                                aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
+                                aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
+                            )
+
+                            bucket_name = os.environ.get("AWS_S3_BUCKET_NAME")
+                            if bucket_name:
+                                try:
+                                    # Get metadata from S3
+                                    metadata_response = s3_client.head_object(Bucket=bucket_name, Key=f"users/{user_id}/images/{image_id}")
+                                    metadata = metadata_response.get("Metadata", {})
+
+                                    # Generate presigned URL
+                                    presigned_url = s3_client.generate_presigned_url(
+                                        "get_object",
+                                        Params={
+                                            "Bucket": bucket_name,
+                                            "Key": f"users/{user_id}/images/{image_id}",
+                                        },
+                                        ExpiresIn=7200,  # 2 hours
+                                    )
+
+                                    generated_image_data = {
+                                        "id": image_id,
+                                        "url": presigned_url,
+                                        "title": metadata.get("title", title),
+                                        "description": f"AI-generated image: {metadata.get('generationPrompt', 'Based on your request')}",
+                                        "timestamp": metadata.get("uploadedAt", datetime.now().isoformat()),
+                                        "type": "generated",
+                                    }
+                                except Exception as e:
+                                    print(f"Error getting S3 metadata: {e}")
+                                    # Fallback to basic data
+                                    generated_image_data = {
+                                        "id": image_id,
+                                        "url": "",  # Will be empty if we can't generate URL
+                                        "title": title,
+                                        "description": "AI-generated image",
+                                        "timestamp": datetime.now().isoformat(),
+                                        "type": "generated",
+                                    }
+                                    # Add error message to agent response
+                                    agent_response += "\n\n⚠️ Note: I generated the image successfully,\
+                                        but there was an issue retrieving it from the database."
+
+    return agent_response, generated_image_data
 
 
 if __name__ == "__main__":
diff --git a/api/llm/prompt.py b/api/llm/prompt.py
@@ -4,4 +4,9 @@
 You are a helpful AI image editing assistant. You help users with image editing
 tasks and provide guidance on how to modify their images.
 
+You can generate images using the generate_image tool. However, remember that
+you are only allowed to generate one image per user's request. You are NOT allowed
+to generate more than one image per user's request, no matter how many images the user
+wants to generate per request (e.g. generate 10 images for me based on this one image).
+
 """
diff --git a/api/llm/tools.py b/api/llm/tools.py
@@ -21,6 +21,7 @@ def generate_image(
         prompt: str,
         user_id: str,
         image_url: str,
+        title: str = "Generated Image",
     ) -> str:
         """
         Generate an image based on a prompt.
@@ -37,9 +38,7 @@ def generate_image(
         }
 
         # Generate image using Replicate
-        version = (
-            "stability-ai/sdxl:" "7762fd07cf82c948538e41f63f77d685e02b063e37e496e96eefd46c929f9bdc"
-        )
+        version = "stability-ai/sdxl:" "7762fd07cf82c948538e41f63f77d685e02b063e37e496e96eefd46c929f9bdc"
         output = replicate.run(
             version,
             input=input,
@@ -68,16 +67,18 @@ def generate_image(
         # Upload to S3
         try:
             s3_result = upload_generated_image_to_s3(
-                image_data=image_data, image_id=image_id, user_id=user_id, prompt=prompt
+                image_data=image_data,
+                image_id=image_id,
+                user_id=user_id,
+                prompt=prompt,
+                title=title,
             )
 
             if s3_result["success"]:
                 return f"Image generated successfully! User can find it his/her gallery. \
-                    Image ID: {image_id}"
+                    Image ID: {image_id}, Title: {title}"
             else:
-                return (
-                    f"Image generated but failed to save: {s3_result.get('error', 'Unknown error')}"
-                )
+                return f"Image generated but failed to save: {s3_result.get('error', 'Unknown error')}"
 
         except Exception as e:
             return f"Image generated but failed to save to storage: {str(e)}"
diff --git a/api/llm/utils.py b/api/llm/utils.py
@@ -6,9 +6,7 @@
 from botocore.exceptions import ClientError
 
 
-def upload_generated_image_to_s3(
-    image_data: bytes, image_id: str, user_id: str, prompt: str
-) -> Dict[str, Any]:
+def upload_generated_image_to_s3(image_data: bytes, image_id: str, user_id: str, prompt: str, title: str = "Generated Image") -> Dict[str, Any]:
     """
     Upload a generated image to S3.
 
@@ -17,9 +15,10 @@ def upload_generated_image_to_s3(
         image_id: Unique identifier for the image
         user_id: User identifier
         prompt: The prompt used to generate the image
+        title: Custom title for the image
 
     Returns:
-        Dict with success status and URL or error message
+        Dict with success status, URL, and metadata or error message
     """
     try:
         # Initialize S3 client
@@ -44,7 +43,7 @@ def upload_generated_image_to_s3(
             Body=image_data,
             ContentType="image/png",
             Metadata={
-                "title": "Generated Image",  # TODO: add title to the image provided by agent
+                "title": title,
                 "imageId": image_id,
                 "userId": user_id,
                 "uploadedAt": datetime.now().isoformat(),
@@ -60,7 +59,11 @@ def upload_generated_image_to_s3(
             ExpiresIn=7200,  # 2 hours
         )
 
-        return {"success": True, "url": presigned_url, "image_id": image_id}
+        # Get metadata from S3
+        metadata_response = s3_client.head_object(Bucket=bucket_name, Key=key)
+        metadata = metadata_response.get("Metadata", {})
+
+        return {"success": True, "url": presigned_url, "image_id": image_id, "metadata": metadata}
 
     except ClientError as e:
         return {"success": False, "error": str(e)}
diff --git a/api/pyproject.toml b/api/pyproject.toml
@@ -40,11 +40,11 @@ where = ["."]
 include = ["llm*", "server*"]
 
 [tool.black]
-line-length = 100
+line-length = 150
 target-version = ["py310"]
 
 [tool.ruff]
-line-length = 100
+line-length = 150
 target-version = "py310"
 fix = true
 unsafe-fixes = true
diff --git a/api/server/main.py b/api/server/main.py
@@ -18,9 +18,19 @@ class ChatRequest(BaseModel):
     user_id: Optional[str] = None
 
 
+class GeneratedImage(BaseModel):
+    id: str
+    url: str
+    title: str
+    description: str
+    timestamp: str
+    type: str = "generated"
+
+
 class ChatResponse(BaseModel):
     response: str
     status: str = "success"
+    generated_image: Optional[GeneratedImage] = None
 
 
 @app.get("/")
@@ -42,18 +52,24 @@ async def chat_endpoint(request: ChatRequest):
         request: ChatRequest containing message, selected_images, and user_id
 
     Returns:
-        ChatResponse with AI response and status.
+        ChatResponse with AI response, status, and optional generated image metadata.
     """
     try:
         # Use the LLM agent to get a response
         user_id = request.user_id or "default"
-        response = chat_with_agent(
+        response, generated_image_data = chat_with_agent(
             message=request.message,
             user_id=user_id,
             selected_images=request.selected_images,
         )
 
-        return ChatResponse(response=response, status="success")
+        # Create response with optional generated image
+        chat_response = ChatResponse(response=response, status="success")
+
+        if generated_image_data:
+            chat_response.generated_image = GeneratedImage(**generated_image_data)
+
+        return chat_response
 
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
diff --git a/src/app/page.tsx b/src/app/page.tsx
@@ -122,6 +122,24 @@ export default function Home() {
 
       const aiMessage = createMessage(apiResponse.response, "agent");
       addMessage(aiMessage);
+
+      // Handle generated image if present
+      if (apiResponse.generated_image) {
+        const generatedImage: ImageItem = {
+          id: apiResponse.generated_image.id,
+          url: apiResponse.generated_image.url,
+          title: apiResponse.generated_image.title,
+          description: apiResponse.generated_image.description,
+          timestamp: new Date(apiResponse.generated_image.timestamp),
+          type: "generated" as const,
+        };
+
+        // Add to images list
+        setImages((prev) => [...prev, generatedImage]);
+
+        // Scroll to show the new image
+        setTimeout(scrollToRight, 100);
+      }
     } catch (error) {
       console.error("Error getting AI response:", error);
       const fallbackMessage = createMessage(
diff --git a/src/lib/actions.ts b/src/lib/actions.ts
@@ -6,6 +6,7 @@ import {
   GetObjectCommand,
 } from "@aws-sdk/client-s3";
 import { getSignedUrl } from "@aws-sdk/s3-request-presigner";
+import type { GeneratedImage } from "./types";
 
 interface ChatRequest {
   message: string;
@@ -23,6 +24,7 @@ interface ChatRequest {
 interface ChatResponse {
   response: string;
   status: string;
+  generated_image?: GeneratedImage;
 }
 
 interface UploadResponse {
diff --git a/src/lib/types.ts b/src/lib/types.ts
@@ -13,3 +13,12 @@ export interface ImageItem {
   timestamp: Date;
   type: "uploaded" | "generated" | "sample";
 }
+
+export interface GeneratedImage {
+  id: string;
+  url: string;
+  title: string;
+  description: string;
+  timestamp: string;
+  type: string;
+}