morphik-org
diff --git a/‎core/agent.py‎
Lines changed: 175 additions & 8 deletions b/‎core/agent.py‎
Lines changed: 175 additions & 8 deletions
diff --git a/‎core/api.py‎
Lines changed: 3 additions & 3 deletions b/‎core/api.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎core/tools/document_tools.py‎
Lines changed: 33 additions & 6 deletions b/‎core/tools/document_tools.py‎
Lines changed: 33 additions & 6 deletions
diff --git a/‎ee/ui-component/app/test/agent-chat/page.tsx‎
Lines changed: 5 additions & 0 deletions b/‎ee/ui-component/app/test/agent-chat/page.tsx‎
Lines changed: 5 additions & 0 deletions
@@ -35,6 +35,7 @@ def __init__(
         model: str = None,
     ):
         self.document_service = document_service
+        self.sources = {}
         # Load settings
         self.settings = get_settings()
         self.model = model or self.settings.AGENT_MODEL
@@ -56,7 +57,6 @@ def __init__(
                 }
             )
 
-        # TODO: Evaluate and improve the prompt here please!
         # System prompt
         self.system_prompt = """
 You are Morphik, an intelligent research assistant. You can use the following tools to help answer user queries:
@@ -68,20 +68,70 @@ def __init__(
 - list_graphs: list available knowledge graphs
 - save_to_memory: save important information to persistent memory
 - list_documents: list documents accessible to you
+
 Use function calls to invoke these tools when needed. When you have gathered all necessary information,
-provide a clear, concise final answer. Include all relevant details and cite your sources.
-Always use markdown formatting.
+instead of providing a direct text response, you must return a structured response with display objects.
+
+Your response should be a JSON array of display objects, each with:
+1. "type": either "text" or "image"
+2. "content": for text objects, this is markdown content; for image objects, this is a base64-encoded image
+3. "source": the source ID of the chunk where you found this information
+
+Example response format:
+```json
+[
+  {
+    "type": "text",
+    "content": "## Introduction to the Topic\nHere is some detailed information...",
+    "source": "doc123-chunk1"
+  },
+  {
+    "type": "text",
+    "content": "This analysis shows that...",
+    "source": "doc456-chunk2"
+  }
+]
+```
+
+When you use retrieve_chunks, you'll get source IDs for each chunk. Use these IDs in your response.
+For example, if you see "Source ID: doc123-chunk4" for important information, attribute it in your response.
+
+Always attribute the information to its specific source. Break your response into multiple display objects
+when citing different sources. Use markdown formatting for text content to improve readability.
 """.strip()
 
     async def _execute_tool(self, name: str, args: dict, auth: AuthContext):
         """Dispatch tool calls, injecting document_service and auth."""
         match name:
             case "retrieve_chunks":
-                return await retrieve_chunks(document_service=self.document_service, auth=auth, **args)
+                content, sources = await retrieve_chunks(document_service=self.document_service, auth=auth, **args)
+                self.sources.update(sources)
+                return content
             case "retrieve_document":
-                return await retrieve_document(document_service=self.document_service, auth=auth, **args)
+                result = await retrieve_document(document_service=self.document_service, auth=auth, **args)
+                # Add document as a source if it's a successful retrieval
+                if isinstance(result, str) and not result.startswith("Document") and not result.startswith("Error"):
+                    doc_id = args.get("document_id", "unknown")
+                    source_id = f"doc{doc_id}-full"
+                    self.sources[source_id] = {
+                        "document_id": doc_id,
+                        "document_name": f"Full Document {doc_id}",
+                        "chunk_number": "full",
+                    }
+                return result
             case "document_analyzer":
-                return await document_analyzer(document_service=self.document_service, auth=auth, **args)
+                result = await document_analyzer(document_service=self.document_service, auth=auth, **args)
+                # Track document being analyzed as a source
+                if args.get("document_id"):
+                    doc_id = args.get("document_id")
+                    analysis_type = args.get("analysis_type", "analysis")
+                    source_id = f"doc{doc_id}-{analysis_type}"
+                    self.sources[source_id] = {
+                        "document_id": doc_id,
+                        "document_name": f"Document {doc_id} ({analysis_type})",
+                        "analysis_type": analysis_type,
+                    }
+                return result
             case "execute_code":
                 res = await execute_code(**args)
                 return res["content"]
@@ -133,8 +183,125 @@ async def run(self, query: str, auth: AuthContext) -> str:
             # If no tool call, return final content
             if not getattr(msg, "tool_calls", None):
                 logger.info("No tool calls detected, returning final content")
-                # Return final content and the history
-                return msg.content, tool_history
+
+                # Parse the response as display objects if possible
+                display_objects = []
+                default_text = ""
+
+                try:
+                    # Check if the response is JSON formatted
+                    import re
+
+                    # Try to extract JSON content if present using a regex pattern for common JSON formats
+                    json_pattern = r'\[\s*{.*}\s*\]|\{\s*".*"\s*:.*\}'
+                    json_match = re.search(json_pattern, msg.content, re.DOTALL)
+
+                    if json_match:
+                        potential_json = json_match.group(0)
+                        parsed_content = json.loads(potential_json)
+
+                        # Handle both array and object formats
+                        if isinstance(parsed_content, list):
+                            for item in parsed_content:
+                                if isinstance(item, dict) and "type" in item and "content" in item:
+                                    # Convert to standardized display object format
+                                    display_obj = {
+                                        "type": item.get("type", "text"),
+                                        "content": item.get("content", ""),
+                                        "source": item.get("source", "agent-response"),
+                                    }
+                                    if "caption" in item and item["type"] == "image":
+                                        display_obj["caption"] = item["caption"]
+                                    if item["type"] == "image":
+                                        display_obj["content"] = self.sources[item["source"]]["content"]
+                                    display_objects.append(display_obj)
+                        elif (
+                            isinstance(parsed_content, dict)
+                            and "type" in parsed_content
+                            and "content" in parsed_content
+                        ):
+                            # Single display object
+                            display_obj = {
+                                "type": parsed_content.get("type", "text"),
+                                "content": parsed_content.get("content", ""),
+                                "source": parsed_content.get("source", "agent-response"),
+                            }
+                            if "caption" in parsed_content and parsed_content["type"] == "image":
+                                display_obj["caption"] = parsed_content["caption"]
+                            if item["type"] == "image":
+                                display_obj["content"] = self.sources[item["source"]]["content"]
+                            display_objects.append(display_obj)
+
+                    # If no display objects were created, treat the entire content as text
+                    if not display_objects:
+                        default_text = msg.content
+                except (json.JSONDecodeError, ValueError) as e:
+                    logger.warning(f"Failed to parse response as JSON: {e}")
+                    default_text = msg.content
+
+                # If no structured display objects were found, create a default text object
+                if not display_objects and default_text:
+                    display_objects.append({"type": "text", "content": default_text, "source": "agent-response"})
+
+                # Create sources from the collected source IDs in display objects
+                sources = []
+                seen_source_ids = set()
+
+                for obj in display_objects:
+                    source_id = obj.get("source")
+                    if source_id and source_id != "agent-response" and source_id not in seen_source_ids:
+                        seen_source_ids.add(source_id)
+                        # Extract document info from source ID if available
+                        if "-" in source_id:
+                            parts = source_id.split("-", 1)
+                            doc_id = parts[0].replace("doc", "")
+                            sources.append(
+                                {
+                                    "sourceId": source_id,
+                                    "documentName": f"Document {doc_id}",
+                                    "documentId": doc_id,
+                                    "content": self.sources.get(source_id, {"content": ""})["content"],
+                                }
+                            )
+                        else:
+                            sources.append(
+                                {
+                                    "sourceId": source_id,
+                                    "documentName": "Referenced Source",
+                                    "documentId": "unknown",
+                                    "content": self.sources.get(source_id, {"content": ""})["content"],
+                                }
+                            )
+
+                # Add agent response source if not already included
+                if "agent-response" not in seen_source_ids:
+                    sources.append(
+                        {
+                            "sourceId": "agent-response",
+                            "documentName": "Agent Response",
+                            "documentId": "system",
+                            "content": msg.content,
+                        }
+                    )
+
+                # Add sources from document chunks used during the session
+                for source_id, source_info in self.sources.items():
+                    if source_id not in seen_source_ids:
+                        sources.append(
+                            {
+                                "sourceId": source_id,
+                                "documentName": source_info.get("document_name", "Unknown Document"),
+                                "documentId": source_info.get("document_id", "unknown"),
+                            }
+                        )
+
+                # Return final content, tool history, display objects and sources
+                return {
+                    "response": msg.content,
+                    "tool_history": tool_history,
+                    "display_objects": display_objects,
+                    "sources": sources,
+                }
 
             call = msg.tool_calls[0]
             name = call.function.name
 
@@ -966,9 +966,9 @@ async def agent_query(request: AgentQueryRequest, auth: AuthContext = Depends(ve
     if settings.MODE == "cloud" and auth.user_id:
         await check_and_increment_limits(auth, "agent", 1)
     # Use shared agent instance and pass auth to run
-    response_content, tool_history = await morphik_agent.run(request.query, auth)
-    # Return both in the response dictionary
-    return {"response": response_content, "tool_history": tool_history}
+    response = await morphik_agent.run(request.query, auth)
+    # Return the complete response dictionary
+    return response
 
 
 @app.post("/documents", response_model=List[Document])
 
@@ -60,6 +60,7 @@ async def retrieve_chunks(
             folder_name=folder_name,
             end_user_id=end_user_id,
         )
+        sources = {}
 
         # Format the results for LiteLLM tool response
         content = []
@@ -68,28 +69,54 @@ async def retrieve_chunks(
         content.append({"type": "text", "text": f"Found {len(chunks)} relevant chunks:"})
 
         for chunk in chunks:
+            # Create a unique source ID for this chunk
+            source_id = f"doc{chunk.document_id}-chunk{chunk.chunk_number}"
+
+            # Store source information
+            sources[source_id] = {
+                "document_id": chunk.document_id,
+                "document_name": chunk.filename or "Unnamed Document",
+                "chunk_number": chunk.chunk_number,
+                "score": chunk.score,
+                "content": chunk.content,
+            }
+
+            chunk_content = [{"type": "text", "text": f"Source ID: {source_id}"}]
+
             # Check if this is an image chunk
             if chunk.metadata.get("is_image", False):
                 # Add image to content
                 if chunk.content.startswith("data:"):
                     # Already in data URL format
-                    content.append({"type": "image_url", "image_url": {"url": chunk.content}})
+                    chunk_content.append({"type": "image_url", "image_url": {"url": chunk.content}})
                 else:
                     # Assuming it's base64, convert to data URL format
-                    # TODO: potential bug here, if the base64 image is not a png
-                    content.append(
+                    chunk_content.append(
                         {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{chunk.content}"}}
                     )
+
+                # Tell the agent this is a reference to an image
+                chunk_content.append(
+                    {
+                        "type": "text",
+                        "text": f"This is an image from {chunk.filename or 'Unnamed'} (Score: {chunk.score:.2f}). "
+                        + f"When referencing this image, cite source: {source_id}",
+                    }
+                )
             else:
                 # Add text content with metadata
-                text = f"Document: {chunk.filename or 'Unnamed'} (Score: {chunk.score:.2f})\n\n{chunk.content}"
-                content.append(
+                text = f"Document: {chunk.filename or 'Unnamed'} (Score: {chunk.score:.2f})\n"
+                text += f"When referencing this content, cite source: {source_id}\n\n"
+                text += chunk.content
+
+                chunk_content.append(
                     {
                         "type": "text",
                         "text": text,
                     }
                 )
-        return content
+            content.extend(chunk_content)
+        return content, sources
     except Exception as e:
         raise ToolError(f"Error retrieving chunks: {str(e)}")
 
 
@@ -0,0 +1,5 @@
+import AgentChatTestView from "../../../components/chat/AgentChatTestView";
+
+export default function AgentChatTestPage() {
+  return <AgentChatTestView />;
+}