Skip to content

Commit f4c08d6

Browse files
committed
feature #398 [Store] Add Weaviate (Guikingone)
This PR was merged into the main branch. Discussion ---------- [Store] Add Weaviate | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | yes | Issues | None | License | MIT Hi 👋🏻 This PR aims to introduce the support for `Weaviate` as a store. Commits ------- 7c64af3 feat(store): Weaviate support
2 parents 5ba97a6 + 7c64af3 commit f4c08d6

File tree

10 files changed

+526
-1
lines changed

10 files changed

+526
-1
lines changed

examples/.env

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,7 @@ CHROMADB_PORT=8001
128128
CLICKHOUSE_HOST=http://symfony:[email protected]:8123
129129
CLICKHOUSE_DATABASE=symfony
130130
CLICKHOUSE_TABLE=symfony
131+
132+
# Weaviate (store)
133+
WEAVIATE_HOST=http://127.0.0.1:8080
134+
WEAVIATE_API_KEY=symfony

examples/compose.yaml

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ services:
6868
MINIO_SECRET_KEY: minioadmin
6969
ports:
7070
- '9001:9001'
71-
- '9000:9000'
7271
volumes:
7372
- minio_vlm:/minio_data
7473
command: minio server /minio_data --console-address ":9001"
@@ -145,9 +144,29 @@ services:
145144
ports:
146145
- '8108:8108'
147146

147+
weaviate:
148+
image: cr.weaviate.io/semitechnologies/weaviate:1.32.4
149+
command: ['--host', '0.0.0.0', '--port', '8080', '--scheme', 'http']
150+
environment:
151+
QUERY_DEFAULTS_LIMIT: 25
152+
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'false'
153+
AUTHENTICATION_APIKEY_ENABLED: 'true'
154+
AUTHENTICATION_APIKEY_ALLOWED_KEYS: 'symfony'
155+
AUTHENTICATION_APIKEY_USERS: 'symfony'
156+
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
157+
ENABLE_API_BASED_MODULES: 'true'
158+
CLUSTER_HOSTNAME: 'node1'
159+
RAFT_ENABLE_ONE_NODE_RECOVERY: 'true' # See https://github.com/weaviate/weaviate/issues/5491#issuecomment-2416929309
160+
volumes:
161+
- weaviate_data:/var/lib/weaviate
162+
ports:
163+
- '8080:8080'
164+
- '50051:50051'
165+
148166
volumes:
149167
typesense_data:
150168
etcd_vlm:
151169
minio_vlm:
152170
milvus_vlm:
153171
chroma_vlm:
172+
weaviate_data:

examples/rag/weaviate.php

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Agent\Toolbox\AgentProcessor;
14+
use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch;
15+
use Symfony\AI\Agent\Toolbox\Toolbox;
16+
use Symfony\AI\Fixtures\Movies;
17+
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
18+
use Symfony\AI\Platform\Bridge\OpenAi\Gpt;
19+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
20+
use Symfony\AI\Platform\Message\Message;
21+
use Symfony\AI\Platform\Message\MessageBag;
22+
use Symfony\AI\Store\Bridge\Weaviate\Store;
23+
use Symfony\AI\Store\Document\Metadata;
24+
use Symfony\AI\Store\Document\TextDocument;
25+
use Symfony\AI\Store\Document\Vectorizer;
26+
use Symfony\AI\Store\Indexer;
27+
use Symfony\Component\Uid\Uuid;
28+
29+
require_once dirname(__DIR__).'/bootstrap.php';
30+
31+
// initialize the store
32+
$store = new Store(
33+
httpClient: http_client(),
34+
endpointUrl: env('WEAVIATE_HOST'),
35+
apiKey: env('WEAVIATE_API_KEY'),
36+
collection: 'Movies',
37+
);
38+
39+
// initialize the index
40+
$store->setup();
41+
42+
// create embeddings and documents
43+
$documents = [];
44+
foreach (Movies::all() as $i => $movie) {
45+
$documents[] = new TextDocument(
46+
id: Uuid::v4(),
47+
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
48+
metadata: new Metadata($movie),
49+
);
50+
}
51+
52+
// create embeddings for documents
53+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
54+
$vectorizer = new Vectorizer($platform, $embeddings = new Embeddings());
55+
$indexer = new Indexer($vectorizer, $store, logger());
56+
$indexer->index($documents);
57+
58+
$model = new Gpt(Gpt::GPT_4O_MINI);
59+
60+
$similaritySearch = new SimilaritySearch($platform, $embeddings, $store);
61+
$toolbox = new Toolbox([$similaritySearch], logger: logger());
62+
$processor = new AgentProcessor($toolbox);
63+
$agent = new Agent($platform, $model, [$processor], [$processor], logger());
64+
65+
$messages = new MessageBag(
66+
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
67+
Message::ofUser('Which movie fits the theme of technology?')
68+
);
69+
$result = $agent->call($messages);
70+
71+
echo $result->getContent().\PHP_EOL;

src/ai-bundle/config/options.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,16 @@
337337
->end()
338338
->end()
339339
->end()
340+
->arrayNode('weaviate')
341+
->useAttributeAsKey('name')
342+
->arrayPrototype()
343+
->children()
344+
->scalarNode('endpoint')->cannotBeEmpty()->end()
345+
->scalarNode('api_key')->isRequired()->end()
346+
->scalarNode('collection')->isRequired()->end()
347+
->end()
348+
->end()
349+
->end()
340350
->end()
341351
->end()
342352
->arrayNode('indexer')

src/ai-bundle/src/AiBundle.php

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
use Symfony\AI\Store\Bridge\Qdrant\Store as QdrantStore;
6363
use Symfony\AI\Store\Bridge\SurrealDb\Store as SurrealDbStore;
6464
use Symfony\AI\Store\Bridge\Typesense\Store as TypesenseStore;
65+
use Symfony\AI\Store\Bridge\Weaviate\Store as WeaviateStore;
6566
use Symfony\AI\Store\Document\Vectorizer;
6667
use Symfony\AI\Store\Indexer;
6768
use Symfony\AI\Store\StoreInterface;
@@ -946,6 +947,24 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde
946947
$container->setDefinition('ai.store.'.$type.'.'.$name, $definition);
947948
}
948949
}
950+
951+
if ('weaviate' === $type) {
952+
foreach ($stores as $name => $store) {
953+
$arguments = [
954+
new Reference('http_client'),
955+
$store['endpoint'],
956+
$store['api_key'],
957+
$store['collection'],
958+
];
959+
960+
$definition = new Definition(WeaviateStore::class);
961+
$definition
962+
->addTag('ai.store')
963+
->setArguments($arguments);
964+
965+
$container->setDefinition('ai.store.'.$type.'.'.$name, $definition);
966+
}
967+
}
949968
}
950969

951970
/**

src/ai-bundle/tests/DependencyInjection/AiBundleTest.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,13 @@ private function getFullConfig(): array
528528
'dimensions' => 768,
529529
],
530530
],
531+
'weaviate' => [
532+
'my_weaviate_store' => [
533+
'endpoint' => 'http://localhost:8080',
534+
'api_key' => 'bar',
535+
'collection' => 'my_weaviate_collection',
536+
],
537+
],
531538
],
532539
'indexer' => [
533540
'my_text_indexer' => [

src/store/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ CHANGELOG
4646
- Qdrant
4747
- SurrealDB
4848
- Typesense
49+
- Weaviate
4950
* Add Retrieval Augmented Generation (RAG) support:
5051
- Document embedding storage
5152
- Similarity search for relevant documents

src/store/doc/index.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ You can find more advanced usage in combination with an Agent using the store fo
4848
* `Similarity Search with SurrealDB (RAG)`_
4949
* `Similarity Search with Symfony Cache (RAG)`_
5050
* `Similarity Search with Typesense (RAG)`_
51+
* `Similarity Search with Weaviate (RAG)`_
5152

5253
.. note::
5354

@@ -72,6 +73,7 @@ Supported Stores
7273
* `SurrealDB`_
7374
* `Symfony Cache`_
7475
* `Typesense`_
76+
* `Weaviate`_
7577

7678
.. note::
7779

@@ -113,6 +115,7 @@ This leads to a store implementing two methods::
113115
.. _`Similarity Search with Qdrant (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/qdrant.php
114116
.. _`Similarity Search with SurrealDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/surrealdb.php
115117
.. _`Similarity Search with Typesense (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/typesense.php
118+
.. _`Similarity Search with Weaviate (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/weaviate.php
116119
.. _`Azure AI Search`: https://azure.microsoft.com/products/ai-services/ai-search
117120
.. _`Chroma`: https://www.trychroma.com/
118121
.. _`MariaDB`: https://mariadb.org/projects/mariadb-vector/
@@ -128,3 +131,4 @@ This leads to a store implementing two methods::
128131
.. _`Typesense`: https://typesense.org/
129132
.. _`GitHub`: https://github.com/symfony/ai/issues/16
130133
.. _`Symfony Cache`: https://symfony.com/doc/current/components/cache.html
134+
.. _`Weaviate`: https://weaviate.io/
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Bridge\Weaviate;
13+
14+
use Symfony\AI\Platform\Vector\NullVector;
15+
use Symfony\AI\Platform\Vector\Vector;
16+
use Symfony\AI\Store\Document\Metadata;
17+
use Symfony\AI\Store\Document\VectorDocument;
18+
use Symfony\AI\Store\Exception\InvalidArgumentException;
19+
use Symfony\AI\Store\ManagedStoreInterface;
20+
use Symfony\AI\Store\StoreInterface;
21+
use Symfony\Component\Uid\Uuid;
22+
use Symfony\Contracts\HttpClient\HttpClientInterface;
23+
24+
/**
25+
* @author Guillaume Loulier <[email protected]>
26+
*/
27+
final readonly class Store implements ManagedStoreInterface, StoreInterface
28+
{
29+
public function __construct(
30+
private HttpClientInterface $httpClient,
31+
private string $endpointUrl,
32+
#[\SensitiveParameter] private string $apiKey,
33+
private string $collection,
34+
) {
35+
}
36+
37+
public function setup(array $options = []): void
38+
{
39+
if ([] !== $options) {
40+
throw new InvalidArgumentException('No supported options.');
41+
}
42+
43+
$this->request('POST', 'v1/schema', [
44+
'class' => $this->collection,
45+
]);
46+
}
47+
48+
public function add(VectorDocument ...$documents): void
49+
{
50+
$this->request('POST', 'v1/batch/objects', [
51+
'fields' => [
52+
'ALL',
53+
],
54+
'objects' => array_map($this->convertToIndexableArray(...), $documents),
55+
]);
56+
}
57+
58+
public function query(Vector $vector, array $options = []): array
59+
{
60+
$results = $this->request('POST', 'v1/graphql', [
61+
'query' => \sprintf('{
62+
Get {
63+
%s (
64+
nearVector: {
65+
vector: [%s]
66+
}
67+
) {
68+
uuid,
69+
vector,
70+
_metadata
71+
}
72+
}
73+
}', $this->collection, implode(', ', $vector->getData())),
74+
]);
75+
76+
return array_map($this->convertToVectorDocument(...), $results['data']['Get'][$this->collection]);
77+
}
78+
79+
public function drop(): void
80+
{
81+
$this->request('DELETE', \sprintf('v1/schema/%s', $this->collection), []);
82+
}
83+
84+
/**
85+
* @param array<string, mixed> $payload
86+
*
87+
* @return array<string, mixed>
88+
*/
89+
private function request(string $method, string $endpoint, array $payload): array
90+
{
91+
$url = \sprintf('%s/%s', $this->endpointUrl, $endpoint);
92+
93+
$finalPayload = [
94+
'auth_bearer' => $this->apiKey,
95+
];
96+
97+
if ([] !== $payload) {
98+
$finalPayload['json'] = $payload;
99+
}
100+
101+
$result = $this->httpClient->request($method, $url, $finalPayload);
102+
103+
return $result->toArray();
104+
}
105+
106+
/**
107+
* @return array<string, mixed>
108+
*/
109+
private function convertToIndexableArray(VectorDocument $document): array
110+
{
111+
return [
112+
'class' => $this->collection,
113+
'id' => $document->id->toRfc4122(),
114+
'vector' => $document->vector->getData(),
115+
'properties' => [
116+
'uuid' => $document->id->toRfc4122(),
117+
'vector' => $document->vector->getData(),
118+
'_metadata' => json_encode($document->metadata->getArrayCopy()),
119+
],
120+
];
121+
}
122+
123+
/**
124+
* @param array<string, mixed> $data
125+
*/
126+
private function convertToVectorDocument(array $data): VectorDocument
127+
{
128+
$id = $data['uuid'] ?? throw new InvalidArgumentException('Missing "id" field in the document data.');
129+
130+
$vector = !\array_key_exists('vector', $data) || null === $data['vector']
131+
? new NullVector() : new Vector($data['vector']);
132+
133+
return new VectorDocument(Uuid::fromString($id), $vector, new Metadata(json_decode($data['_metadata'], true)));
134+
}
135+
}

0 commit comments

Comments
 (0)