Skip to content

Commit c29b01e

Browse files
inline images (#149)
1 parent 7ab30f1 commit c29b01e

File tree

7 files changed

+607
-76
lines changed

7 files changed

+607
-76
lines changed

core/agent.py

Lines changed: 175 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def __init__(
3535
model: str = None,
3636
):
3737
self.document_service = document_service
38+
self.sources = {}
3839
# Load settings
3940
self.settings = get_settings()
4041
self.model = model or self.settings.AGENT_MODEL
@@ -56,7 +57,6 @@ def __init__(
5657
}
5758
)
5859

59-
# TODO: Evaluate and improve the prompt here please!
6060
# System prompt
6161
self.system_prompt = """
6262
You are Morphik, an intelligent research assistant. You can use the following tools to help answer user queries:
@@ -68,20 +68,70 @@ def __init__(
6868
- list_graphs: list available knowledge graphs
6969
- save_to_memory: save important information to persistent memory
7070
- list_documents: list documents accessible to you
71+
7172
Use function calls to invoke these tools when needed. When you have gathered all necessary information,
72-
provide a clear, concise final answer. Include all relevant details and cite your sources.
73-
Always use markdown formatting.
73+
instead of providing a direct text response, you must return a structured response with display objects.
74+
75+
Your response should be a JSON array of display objects, each with:
76+
1. "type": either "text" or "image"
77+
2. "content": for text objects, this is markdown content; for image objects, this is a base64-encoded image
78+
3. "source": the source ID of the chunk where you found this information
79+
80+
Example response format:
81+
```json
82+
[
83+
{
84+
"type": "text",
85+
"content": "## Introduction to the Topic\nHere is some detailed information...",
86+
"source": "doc123-chunk1"
87+
},
88+
{
89+
"type": "text",
90+
"content": "This analysis shows that...",
91+
"source": "doc456-chunk2"
92+
}
93+
]
94+
```
95+
96+
When you use retrieve_chunks, you'll get source IDs for each chunk. Use these IDs in your response.
97+
For example, if you see "Source ID: doc123-chunk4" for important information, attribute it in your response.
98+
99+
Always attribute the information to its specific source. Break your response into multiple display objects
100+
when citing different sources. Use markdown formatting for text content to improve readability.
74101
""".strip()
75102

76103
async def _execute_tool(self, name: str, args: dict, auth: AuthContext):
77104
"""Dispatch tool calls, injecting document_service and auth."""
78105
match name:
79106
case "retrieve_chunks":
80-
return await retrieve_chunks(document_service=self.document_service, auth=auth, **args)
107+
content, sources = await retrieve_chunks(document_service=self.document_service, auth=auth, **args)
108+
self.sources.update(sources)
109+
return content
81110
case "retrieve_document":
82-
return await retrieve_document(document_service=self.document_service, auth=auth, **args)
111+
result = await retrieve_document(document_service=self.document_service, auth=auth, **args)
112+
# Add document as a source if it's a successful retrieval
113+
if isinstance(result, str) and not result.startswith("Document") and not result.startswith("Error"):
114+
doc_id = args.get("document_id", "unknown")
115+
source_id = f"doc{doc_id}-full"
116+
self.sources[source_id] = {
117+
"document_id": doc_id,
118+
"document_name": f"Full Document {doc_id}",
119+
"chunk_number": "full",
120+
}
121+
return result
83122
case "document_analyzer":
84-
return await document_analyzer(document_service=self.document_service, auth=auth, **args)
123+
result = await document_analyzer(document_service=self.document_service, auth=auth, **args)
124+
# Track document being analyzed as a source
125+
if args.get("document_id"):
126+
doc_id = args.get("document_id")
127+
analysis_type = args.get("analysis_type", "analysis")
128+
source_id = f"doc{doc_id}-{analysis_type}"
129+
self.sources[source_id] = {
130+
"document_id": doc_id,
131+
"document_name": f"Document {doc_id} ({analysis_type})",
132+
"analysis_type": analysis_type,
133+
}
134+
return result
85135
case "execute_code":
86136
res = await execute_code(**args)
87137
return res["content"]
@@ -133,8 +183,125 @@ async def run(self, query: str, auth: AuthContext) -> str:
133183
# If no tool call, return final content
134184
if not getattr(msg, "tool_calls", None):
135185
logger.info("No tool calls detected, returning final content")
136-
# Return final content and the history
137-
return msg.content, tool_history
186+
187+
# Parse the response as display objects if possible
188+
display_objects = []
189+
default_text = ""
190+
191+
try:
192+
# Check if the response is JSON formatted
193+
import re
194+
195+
# Try to extract JSON content if present using a regex pattern for common JSON formats
196+
json_pattern = r'\[\s*{.*}\s*\]|\{\s*".*"\s*:.*\}'
197+
json_match = re.search(json_pattern, msg.content, re.DOTALL)
198+
199+
if json_match:
200+
potential_json = json_match.group(0)
201+
parsed_content = json.loads(potential_json)
202+
203+
# Handle both array and object formats
204+
if isinstance(parsed_content, list):
205+
for item in parsed_content:
206+
if isinstance(item, dict) and "type" in item and "content" in item:
207+
# Convert to standardized display object format
208+
display_obj = {
209+
"type": item.get("type", "text"),
210+
"content": item.get("content", ""),
211+
"source": item.get("source", "agent-response"),
212+
}
213+
if "caption" in item and item["type"] == "image":
214+
display_obj["caption"] = item["caption"]
215+
if item["type"] == "image":
216+
display_obj["content"] = self.sources[item["source"]]["content"]
217+
display_objects.append(display_obj)
218+
elif (
219+
isinstance(parsed_content, dict)
220+
and "type" in parsed_content
221+
and "content" in parsed_content
222+
):
223+
# Single display object
224+
display_obj = {
225+
"type": parsed_content.get("type", "text"),
226+
"content": parsed_content.get("content", ""),
227+
"source": parsed_content.get("source", "agent-response"),
228+
}
229+
if "caption" in parsed_content and parsed_content["type"] == "image":
230+
display_obj["caption"] = parsed_content["caption"]
231+
if item["type"] == "image":
232+
display_obj["content"] = self.sources[item["source"]]["content"]
233+
display_objects.append(display_obj)
234+
235+
# If no display objects were created, treat the entire content as text
236+
if not display_objects:
237+
default_text = msg.content
238+
except (json.JSONDecodeError, ValueError) as e:
239+
logger.warning(f"Failed to parse response as JSON: {e}")
240+
default_text = msg.content
241+
242+
# If no structured display objects were found, create a default text object
243+
if not display_objects and default_text:
244+
display_objects.append({"type": "text", "content": default_text, "source": "agent-response"})
245+
246+
# Create sources from the collected source IDs in display objects
247+
sources = []
248+
seen_source_ids = set()
249+
250+
for obj in display_objects:
251+
source_id = obj.get("source")
252+
if source_id and source_id != "agent-response" and source_id not in seen_source_ids:
253+
seen_source_ids.add(source_id)
254+
# Extract document info from source ID if available
255+
if "-" in source_id:
256+
parts = source_id.split("-", 1)
257+
doc_id = parts[0].replace("doc", "")
258+
sources.append(
259+
{
260+
"sourceId": source_id,
261+
"documentName": f"Document {doc_id}",
262+
"documentId": doc_id,
263+
"content": self.sources.get(source_id, {"content": ""})["content"],
264+
}
265+
)
266+
else:
267+
sources.append(
268+
{
269+
"sourceId": source_id,
270+
"documentName": "Referenced Source",
271+
"documentId": "unknown",
272+
"content": self.sources.get(source_id, {"content": ""})["content"],
273+
}
274+
)
275+
276+
# Add agent response source if not already included
277+
if "agent-response" not in seen_source_ids:
278+
sources.append(
279+
{
280+
"sourceId": "agent-response",
281+
"documentName": "Agent Response",
282+
"documentId": "system",
283+
"content": msg.content,
284+
}
285+
)
286+
287+
# Add sources from document chunks used during the session
288+
for source_id, source_info in self.sources.items():
289+
if source_id not in seen_source_ids:
290+
sources.append(
291+
{
292+
"sourceId": source_id,
293+
"documentName": source_info.get("document_name", "Unknown Document"),
294+
"documentId": source_info.get("document_id", "unknown"),
295+
}
296+
)
297+
298+
# Return final content, tool history, display objects and sources
299+
return {
300+
"response": msg.content,
301+
"tool_history": tool_history,
302+
"display_objects": display_objects,
303+
"sources": sources,
304+
}
138305

139306
call = msg.tool_calls[0]
140307
name = call.function.name

core/api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -966,9 +966,9 @@ async def agent_query(request: AgentQueryRequest, auth: AuthContext = Depends(ve
966966
if settings.MODE == "cloud" and auth.user_id:
967967
await check_and_increment_limits(auth, "agent", 1)
968968
# Use shared agent instance and pass auth to run
969-
response_content, tool_history = await morphik_agent.run(request.query, auth)
970-
# Return both in the response dictionary
971-
return {"response": response_content, "tool_history": tool_history}
969+
response = await morphik_agent.run(request.query, auth)
970+
# Return the complete response dictionary
971+
return response
972972

973973

974974
@app.post("/documents", response_model=List[Document])

core/tools/document_tools.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ async def retrieve_chunks(
6060
folder_name=folder_name,
6161
end_user_id=end_user_id,
6262
)
63+
sources = {}
6364

6465
# Format the results for LiteLLM tool response
6566
content = []
@@ -68,28 +69,54 @@ async def retrieve_chunks(
6869
content.append({"type": "text", "text": f"Found {len(chunks)} relevant chunks:"})
6970

7071
for chunk in chunks:
72+
# Create a unique source ID for this chunk
73+
source_id = f"doc{chunk.document_id}-chunk{chunk.chunk_number}"
74+
75+
# Store source information
76+
sources[source_id] = {
77+
"document_id": chunk.document_id,
78+
"document_name": chunk.filename or "Unnamed Document",
79+
"chunk_number": chunk.chunk_number,
80+
"score": chunk.score,
81+
"content": chunk.content,
82+
}
83+
84+
chunk_content = [{"type": "text", "text": f"Source ID: {source_id}"}]
85+
7186
# Check if this is an image chunk
7287
if chunk.metadata.get("is_image", False):
7388
# Add image to content
7489
if chunk.content.startswith("data:"):
7590
# Already in data URL format
76-
content.append({"type": "image_url", "image_url": {"url": chunk.content}})
91+
chunk_content.append({"type": "image_url", "image_url": {"url": chunk.content}})
7792
else:
7893
# Assuming it's base64, convert to data URL format
79-
# TODO: potential bug here, if the base64 image is not a png
80-
content.append(
94+
chunk_content.append(
8195
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{chunk.content}"}}
8296
)
97+
98+
# Tell the agent this is a reference to an image
99+
chunk_content.append(
100+
{
101+
"type": "text",
102+
"text": f"This is an image from {chunk.filename or 'Unnamed'} (Score: {chunk.score:.2f}). "
103+
+ f"When referencing this image, cite source: {source_id}",
104+
}
105+
)
83106
else:
84107
# Add text content with metadata
85-
text = f"Document: {chunk.filename or 'Unnamed'} (Score: {chunk.score:.2f})\n\n{chunk.content}"
86-
content.append(
108+
text = f"Document: {chunk.filename or 'Unnamed'} (Score: {chunk.score:.2f})\n"
109+
text += f"When referencing this content, cite source: {source_id}\n\n"
110+
text += chunk.content
111+
112+
chunk_content.append(
87113
{
88114
"type": "text",
89115
"text": text,
90116
}
91117
)
92-
return content
118+
content.extend(chunk_content)
119+
return content, sources
93120
except Exception as e:
94121
raise ToolError(f"Error retrieving chunks: {str(e)}")
95122

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import AgentChatTestView from "../../../components/chat/AgentChatTestView";
2+
3+
export default function AgentChatTestPage() {
4+
return <AgentChatTestView />;
5+
}

0 commit comments

Comments
 (0)