Skip to content

Commit d15368a

Browse files
chore: Updating documentation, adding exception handling for Vector Stores in RAG Tool, more tests on migration, and migrate off of inference_api for context_retriever for RAG (#3367)
# What does this PR do? - Updating documentation on migration from RAG Tool to Vector Stores and Files APIs - Adding exception handling for Vector Stores in RAG Tool - Add more tests on migration from RAG Tool to Vector Stores - Migrate off of inference_api for context_retriever for RAG <!-- If resolving an issue, uncomment and update the line below --> <!-- Closes #[issue-number] --> ## Test Plan Integration and unit tests added Signed-off-by: Francisco Javier Arceo <[email protected]>
1 parent f31bcc1 commit d15368a

File tree

5 files changed

+360
-50
lines changed

5 files changed

+360
-50
lines changed

docs/source/building_applications/rag.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,31 @@ chunks_response = client.vector_io.query(
9393

9494
### Using the RAG Tool
9595

96+
> **⚠️ DEPRECATION NOTICE**: The RAG Tool is being deprecated in favor of directly using the OpenAI-compatible Search
97+
> API. We recommend migrating to the OpenAI APIs for better compatibility and future support.
98+
9699
A better way to ingest documents is to use the RAG Tool. This tool allows you to ingest documents from URLs, files, etc.
97100
and automatically chunks them into smaller pieces. More examples for how to format a RAGDocument can be found in the
98101
[appendix](#more-ragdocument-examples).
99102

103+
#### OpenAI API Integration & Migration
104+
105+
The RAG tool has been updated to use OpenAI-compatible APIs. This provides several benefits:
106+
107+
- **Files API Integration**: Documents are now uploaded using OpenAI's file upload endpoints
108+
- **Vector Stores API**: Vector storage operations use OpenAI's vector store format with configurable chunking strategies
109+
- **Error Resilience:** When processing multiple documents, individual failures are logged but don't crash the operation. Failed documents are skipped while successful ones continue processing.
110+
111+
**Migration Path:**
112+
We recommend migrating to the OpenAI-compatible Search API for:
113+
1. **Better OpenAI Ecosystem Integration**: Direct compatibility with OpenAI tools and workflows including the Responses API
114+
2**Future-Proof**: Continued support and feature development
115+
3**Full OpenAI Compatibility**: Vector Stores, Files, and Search APIs are fully compatible with OpenAI's Responses API
116+
117+
The OpenAI APIs are used under the hood, so you can continue to use your existing RAG Tool code with minimal changes.
118+
However, we recommend updating your code to use the new OpenAI-compatible APIs for better long-term support. If any
119+
documents fail to process, they will be logged in the response but will not cause the entire operation to fail.
120+
100121
```python
101122
from llama_stack_client import RAGDocument
102123

llama_stack/providers/inline/tool_runtime/rag/context_retriever.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from jinja2 import Template
99

1010
from llama_stack.apis.common.content_types import InterleavedContent
11-
from llama_stack.apis.inference import UserMessage
11+
from llama_stack.apis.inference import OpenAIUserMessageParam
1212
from llama_stack.apis.tools.rag_tool import (
1313
DefaultRAGQueryGeneratorConfig,
1414
LLMRAGQueryGeneratorConfig,
@@ -61,16 +61,16 @@ async def llm_rag_query_generator(
6161
messages = [interleaved_content_as_str(content)]
6262

6363
template = Template(config.template)
64-
content = template.render({"messages": messages})
64+
rendered_content: str = template.render({"messages": messages})
6565

6666
model = config.model
67-
message = UserMessage(content=content)
68-
response = await inference_api.chat_completion(
69-
model_id=model,
67+
message = OpenAIUserMessageParam(content=rendered_content)
68+
response = await inference_api.openai_chat_completion(
69+
model=model,
7070
messages=[message],
7171
stream=False,
7272
)
7373

74-
query = response.completion_message.content
74+
query = response.choices[0].message.content
7575

7676
return query

llama_stack/providers/inline/tool_runtime/rag/memory.py

Lines changed: 87 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,7 @@
4545
from llama_stack.log import get_logger
4646
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
4747
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
48-
from llama_stack.providers.utils.memory.vector_store import (
49-
content_from_doc,
50-
parse_data_url,
51-
)
48+
from llama_stack.providers.utils.memory.vector_store import parse_data_url
5249

5350
from .config import RagToolRuntimeConfig
5451
from .context_retriever import generate_rag_query
@@ -60,6 +57,47 @@ def make_random_string(length: int = 8):
6057
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
6158

6259

60+
async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
61+
"""Get raw binary data and mime type from a RAGDocument for file upload."""
62+
if isinstance(doc.content, URL):
63+
if doc.content.uri.startswith("data:"):
64+
parts = parse_data_url(doc.content.uri)
65+
mime_type = parts["mimetype"]
66+
data = parts["data"]
67+
68+
if parts["is_base64"]:
69+
file_data = base64.b64decode(data)
70+
else:
71+
file_data = data.encode("utf-8")
72+
73+
return file_data, mime_type
74+
else:
75+
async with httpx.AsyncClient() as client:
76+
r = await client.get(doc.content.uri)
77+
r.raise_for_status()
78+
mime_type = r.headers.get("content-type", "application/octet-stream")
79+
return r.content, mime_type
80+
else:
81+
if isinstance(doc.content, str):
82+
content_str = doc.content
83+
else:
84+
content_str = interleaved_content_as_str(doc.content)
85+
86+
if content_str.startswith("data:"):
87+
parts = parse_data_url(content_str)
88+
mime_type = parts["mimetype"]
89+
data = parts["data"]
90+
91+
if parts["is_base64"]:
92+
file_data = base64.b64decode(data)
93+
else:
94+
file_data = data.encode("utf-8")
95+
96+
return file_data, mime_type
97+
else:
98+
return content_str.encode("utf-8"), "text/plain"
99+
100+
63101
class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
64102
def __init__(
65103
self,
@@ -95,46 +133,52 @@ async def insert(
95133
return
96134

97135
for doc in documents:
98-
if isinstance(doc.content, URL):
99-
if doc.content.uri.startswith("data:"):
100-
parts = parse_data_url(doc.content.uri)
101-
file_data = base64.b64decode(parts["data"]) if parts["is_base64"] else parts["data"].encode()
102-
mime_type = parts["mimetype"]
103-
else:
104-
async with httpx.AsyncClient() as client:
105-
response = await client.get(doc.content.uri)
106-
file_data = response.content
107-
mime_type = doc.mime_type or response.headers.get("content-type", "application/octet-stream")
108-
else:
109-
content_str = await content_from_doc(doc)
110-
file_data = content_str.encode("utf-8")
111-
mime_type = doc.mime_type or "text/plain"
112-
113-
file_extension = mimetypes.guess_extension(mime_type) or ".txt"
114-
filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}")
115-
116-
file_obj = io.BytesIO(file_data)
117-
file_obj.name = filename
118-
119-
upload_file = UploadFile(file=file_obj, filename=filename)
120-
121-
created_file = await self.files_api.openai_upload_file(
122-
file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
123-
)
124-
125-
chunking_strategy = VectorStoreChunkingStrategyStatic(
126-
static=VectorStoreChunkingStrategyStaticConfig(
127-
max_chunk_size_tokens=chunk_size_in_tokens,
128-
chunk_overlap_tokens=chunk_size_in_tokens // 4,
136+
try:
137+
try:
138+
file_data, mime_type = await raw_data_from_doc(doc)
139+
except Exception as e:
140+
log.error(f"Failed to extract content from document {doc.document_id}: {e}")
141+
continue
142+
143+
file_extension = mimetypes.guess_extension(mime_type) or ".txt"
144+
filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}")
145+
146+
file_obj = io.BytesIO(file_data)
147+
file_obj.name = filename
148+
149+
upload_file = UploadFile(file=file_obj, filename=filename)
150+
151+
try:
152+
created_file = await self.files_api.openai_upload_file(
153+
file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
154+
)
155+
except Exception as e:
156+
log.error(f"Failed to upload file for document {doc.document_id}: {e}")
157+
continue
158+
159+
chunking_strategy = VectorStoreChunkingStrategyStatic(
160+
static=VectorStoreChunkingStrategyStaticConfig(
161+
max_chunk_size_tokens=chunk_size_in_tokens,
162+
chunk_overlap_tokens=chunk_size_in_tokens // 4,
163+
)
129164
)
130-
)
131165

132-
await self.vector_io_api.openai_attach_file_to_vector_store(
133-
vector_store_id=vector_db_id,
134-
file_id=created_file.id,
135-
attributes=doc.metadata,
136-
chunking_strategy=chunking_strategy,
137-
)
166+
try:
167+
await self.vector_io_api.openai_attach_file_to_vector_store(
168+
vector_store_id=vector_db_id,
169+
file_id=created_file.id,
170+
attributes=doc.metadata,
171+
chunking_strategy=chunking_strategy,
172+
)
173+
except Exception as e:
174+
log.error(
175+
f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}"
176+
)
177+
continue
178+
179+
except Exception as e:
180+
log.error(f"Unexpected error processing document {doc.document_id}: {e}")
181+
continue
138182

139183
async def query(
140184
self,
@@ -274,7 +318,6 @@ async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvoc
274318
if query_config:
275319
query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
276320
else:
277-
# handle someone passing an empty dict
278321
query_config = RAGQueryConfig()
279322

280323
query = kwargs["query"]
@@ -285,6 +328,6 @@ async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvoc
285328
)
286329

287330
return ToolInvocationResult(
288-
content=result.content,
331+
content=result.content or [],
289332
metadata=result.metadata,
290333
)

0 commit comments

Comments
 (0)