Skip to content

Commit 1c6bb8d

Browse files
ds-juliamayds-bogdan-banasiakmhordynskiKonrad Czarnotads-sebastianchwilczynski
authored
feat: integrate weaviate (#581)
Co-authored-by: Bogdan Banasiak <[email protected]> Co-authored-by: Mateusz Hordyński <[email protected]> Co-authored-by: Konrad Czarnota <[email protected]> Co-authored-by: ds-sebastianchwilczynski <[email protected]>
1 parent 968da61 commit 1c6bb8d

File tree

16 files changed

+4480
-2956
lines changed

16 files changed

+4480
-2956
lines changed

.github/workflows/shared-packages.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,22 @@ jobs:
144144
--health-interval 10s
145145
--health-timeout 5s
146146
--health-retries 5
147+
weaviate:
148+
image: cr.weaviate.io/semitechnologies/weaviate:1.30.6
149+
env:
150+
QUERY_DEFAULTS_LIMIT: 25
151+
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: true
152+
PERSISTENCE_DATA_PATH: /var/lib/weaviate
153+
ENABLE_API_BASED_MODULES: true
154+
CLUSTER_HOSTNAME: node1
155+
ports:
156+
- 8080:8080
157+
- 50051:50051
158+
options: >-
159+
--health-cmd "wget --no-verbose --tries=1 --spider http://localhost:8080/v1/.well-known/ready || exit 1"
160+
--health-interval 10s
161+
--health-timeout 5s
162+
--health-retries 5
147163
148164
steps:
149165
- uses: actions/checkout@v4

.libraries-whitelist.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,6 @@ mirakuru
1010
psycopg
1111
pytest-postgresql
1212
python-bidi
13+
psycopg-binary
14+
psycopg-pool
15+
griffe

docs/api_reference/core/vector-stores.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,6 @@
1212

1313
::: ragbits.core.vector_stores.qdrant.QdrantVectorStore
1414

15-
::: ragbits.core.vector_stores.pgvector.PgVectorStore
15+
::: ragbits.core.vector_stores.pgvector.PgVectorStore
16+
17+
::: ragbits.core.vector_stores.weaviate.WeaviateVectorStore
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
"""
2+
Ragbits Document Search Example: Weaviate
3+
4+
This example demonstrates how to use the `DocumentSearch` class to search for documents with a more advanced setup.
5+
We will use the `LiteLLMEmbedder` class to embed the documents and the query, the `WeaviateVectorStore` class to store
6+
the embeddings.
7+
8+
The script performs the following steps:
9+
10+
1. Create a list of documents.
11+
2. Initialize the `LiteLLMEmbedder` class with the OpenAI `text-embedding-3-small` embedding model.
12+
3. Initialize the `WeaviateVectorStore` class with a `WeaviateAsyncClient` local instance and an index name.
13+
4. Initialize the `DocumentSearch` class with the embedder and the vector store.
14+
5. Ingest the documents into the `DocumentSearch` instance.
15+
6. List all documents in the vector store.
16+
7. Search for documents using a query.
17+
8. Print the list of all documents and the search results.
18+
19+
To run the script, execute the following command:
20+
21+
```bash
22+
uv run examples/document-search/weaviate_db.py
23+
```
24+
25+
Requires local Weaviate instance to be running, instructions how to set it up can be found here: https://weaviate.io/developers/weaviate/quickstart/local
26+
"""
27+
28+
# /// script
29+
# requires-python = ">=3.10"
30+
# dependencies = [
31+
# "ragbits-document-search",
32+
# "ragbits-core[weaviate]",
33+
# ]
34+
# ///
35+
36+
import asyncio
37+
38+
import weaviate
39+
40+
from ragbits.core.audit import set_trace_handlers
41+
from ragbits.core.embeddings.dense import LiteLLMEmbedder
42+
from ragbits.core.vector_stores.base import VectorStoreOptions
43+
from ragbits.core.vector_stores.weaviate import WeaviateVectorStore
44+
from ragbits.document_search import DocumentSearch, DocumentSearchOptions
45+
from ragbits.document_search.documents.document import DocumentMeta
46+
47+
set_trace_handlers("cli")
48+
49+
documents = [
50+
DocumentMeta.from_literal(
51+
"""
52+
RIP boiled water. You will be mist.
53+
"""
54+
),
55+
DocumentMeta.from_literal(
56+
"""
57+
Why doesn't James Bond fart in bed? Because it would blow his cover.
58+
"""
59+
),
60+
DocumentMeta.from_literal(
61+
"""
62+
Why programmers don't like to swim? Because they're scared of the floating points.
63+
"""
64+
),
65+
DocumentMeta.from_literal(
66+
"""
67+
This one is completely unrelated.
68+
"""
69+
),
70+
]
71+
72+
73+
async def main() -> None:
74+
"""
75+
Run the example.
76+
"""
77+
client = weaviate.use_async_with_local()
78+
embedder = LiteLLMEmbedder(model_name="text-embedding-3-small")
79+
vector_store = WeaviateVectorStore(
80+
client=client,
81+
index_name="jokes",
82+
embedder=embedder,
83+
)
84+
document_search = DocumentSearch(
85+
vector_store=vector_store,
86+
)
87+
88+
await document_search.ingest(documents)
89+
90+
all_documents = await vector_store.list()
91+
92+
print()
93+
print("All documents:")
94+
print([doc.metadata["content"] for doc in all_documents])
95+
96+
query = "I'm boiling my water and I need a joke"
97+
vector_store_options = VectorStoreOptions(
98+
k=2,
99+
score_threshold=0.6,
100+
)
101+
options = DocumentSearchOptions(vector_store_options=vector_store_options)
102+
results = await document_search.search(query, options=options)
103+
104+
print()
105+
print(f"Documents similar to: {query}")
106+
print([element.text_representation for element in results])
107+
108+
109+
if __name__ == "__main__":
110+
asyncio.run(main())

packages/ragbits-core/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## Unreleased
44

5+
- Integrate Weaviate vector store (#347)
56
- Remove numpy dependency (#666)
67
- Fix typing in LLM generate_streaming (#628)
78
- Fix typing in LLM generate (#568)

packages/ragbits-core/pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ dependencies = [
3939
"litellm>=1.55.0,<2.0.0",
4040
"aiohttp>=3.10.8,<4.0.0",
4141
"filetype>=1.2.0,<2.0.0",
42+
"griffe>=1.7.3,<2.0.0"
4243
]
4344

4445
[project.urls]
@@ -94,6 +95,9 @@ hf = [
9495
s3 = [
9596
"boto3>=1.35.42,<2.0.0",
9697
]
98+
weaviate = [
99+
"weaviate-client>=4.15.4,<5.0.0",
100+
]
97101

98102
[tool.uv]
99103
dev-dependencies = [

packages/ragbits-core/src/ragbits/core/llms/litellm.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -337,18 +337,28 @@ async def _get_litellm_response(
337337
) -> ModelResponse | CustomStreamWrapper:
338338
entrypoint = self.router or litellm
339339

340+
# Prepare kwargs for the completion call
341+
completion_kwargs = {
342+
"messages": conversation,
343+
"model": self.model_name,
344+
"response_format": response_format,
345+
"tools": tools,
346+
"stream": stream,
347+
**options.dict(),
348+
}
349+
350+
# Only add these parameters if we're not using a router
351+
# Router instances have these configured at initialization time
352+
if self.router is None:
353+
if self.api_base is not None:
354+
completion_kwargs["base_url"] = self.api_base
355+
if self.api_key is not None:
356+
completion_kwargs["api_key"] = self.api_key
357+
if self.api_version is not None:
358+
completion_kwargs["api_version"] = self.api_version
359+
340360
try:
341-
response = await entrypoint.acompletion(
342-
messages=conversation,
343-
model=self.model_name,
344-
base_url=self.api_base,
345-
api_key=self.api_key,
346-
api_version=self.api_version,
347-
response_format=response_format,
348-
tools=tools,
349-
stream=stream,
350-
**options.dict(),
351-
)
361+
response = await entrypoint.acompletion(**completion_kwargs)
352362
except litellm.openai.APIConnectionError as exc:
353363
raise LLMConnectionError() from exc
354364
except litellm.openai.APIStatusError as exc:

packages/ragbits-core/src/ragbits/core/vector_stores/base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,20 @@ class VectorStoreEntry(BaseModel):
2727
image_bytes: SerializableBytes | None = None
2828
metadata: dict = {}
2929

30+
@pydantic.model_validator(mode="after")
31+
def validate_metadata_serializable(self) -> Self:
32+
"""
33+
Validates that metadata is JSON serializable.
34+
35+
Raises:
36+
ValueError: If metadata contains non-serializable values.
37+
"""
38+
try:
39+
self.model_dump_json()
40+
except Exception as e:
41+
raise ValueError(f"Metadata must be JSON serializable. Error: {str(e)}") from e
42+
return self
43+
3044
@pydantic.model_validator(mode="after")
3145
def text_or_image_required(self) -> Self:
3246
"""

0 commit comments

Comments
 (0)