Skip to content

Commit 01614e0

Browse files
committed
feature #257 [Store] Add PSR-6 support for vector store (Guikingone)
This PR was squashed before being merged into the main branch. Discussion ---------- [Store] Add PSR-6 support for vector store | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | yes | Issues | None | License | MIT Hi 👋🏻 This PR aims to introduce a `CacheStore` based on PSR-6, it's heavily inspired by the `CacheStore` from `MessageStore` layer, this store introduce a refactoring of the `InMemoryStore` to use common logic for both stores (as we can't rely on the `CacheItemPool` implementation). Thanks for the feedback. Commits ------- 3d82bf8 [Store] Add PSR-6 support for vector store
2 parents bf726dd + 3d82bf8 commit 01614e0

File tree

13 files changed

+494
-115
lines changed

13 files changed

+494
-115
lines changed

examples/rag/cache.php

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Agent\Toolbox\AgentProcessor;
14+
use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch;
15+
use Symfony\AI\Agent\Toolbox\Toolbox;
16+
use Symfony\AI\Fixtures\Movies;
17+
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
18+
use Symfony\AI\Platform\Bridge\OpenAi\Gpt;
19+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
20+
use Symfony\AI\Platform\Message\Message;
21+
use Symfony\AI\Platform\Message\MessageBag;
22+
use Symfony\AI\Store\CacheStore;
23+
use Symfony\AI\Store\Document\Metadata;
24+
use Symfony\AI\Store\Document\TextDocument;
25+
use Symfony\AI\Store\Document\Vectorizer;
26+
use Symfony\AI\Store\Indexer;
27+
use Symfony\Component\Cache\Adapter\ArrayAdapter;
28+
use Symfony\Component\Uid\Uuid;
29+
30+
require_once dirname(__DIR__).'/bootstrap.php';
31+
32+
// initialize the store
33+
$store = new CacheStore(new ArrayAdapter());
34+
35+
// create embeddings and documents
36+
foreach (Movies::all() as $i => $movie) {
37+
$documents[] = new TextDocument(
38+
id: Uuid::v4(),
39+
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
40+
metadata: new Metadata($movie),
41+
);
42+
}
43+
44+
// create embeddings for documents
45+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
46+
$vectorizer = new Vectorizer($platform, $embeddings = new Embeddings());
47+
$indexer = new Indexer($vectorizer, $store, logger());
48+
$indexer->index($documents);
49+
50+
$model = new Gpt(Gpt::GPT_4O_MINI);
51+
52+
$similaritySearch = new SimilaritySearch($platform, $embeddings, $store);
53+
$toolbox = new Toolbox([$similaritySearch], logger: logger());
54+
$processor = new AgentProcessor($toolbox);
55+
$agent = new Agent($platform, $model, [$processor], [$processor], logger());
56+
57+
$messages = new MessageBag(
58+
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
59+
Message::ofUser('Which movie fits the theme of the mafia?')
60+
);
61+
$result = $agent->call($messages);
62+
63+
echo $result->getContent().\PHP_EOL;

src/ai-bundle/config/options.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,15 @@
161161
->end()
162162
->end()
163163
->end()
164+
->arrayNode('cache')
165+
->normalizeKeys(false)
166+
->useAttributeAsKey('name')
167+
->arrayPrototype()
168+
->children()
169+
->scalarNode('service')->cannotBeEmpty()->defaultValue('cache.app')->end()
170+
->end()
171+
->end()
172+
->end()
164173
->arrayNode('chroma_db')
165174
->normalizeKeys(false)
166175
->useAttributeAsKey('name')

src/ai-bundle/src/AiBundle.php

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
use Symfony\AI\Store\Bridge\Qdrant\Store as QdrantStore;
4949
use Symfony\AI\Store\Bridge\SurrealDb\Store as SurrealDbStore;
5050
use Symfony\AI\Store\Bridge\Typesense\Store as TypesenseStore;
51+
use Symfony\AI\Store\CacheStore;
5152
use Symfony\AI\Store\Document\Vectorizer;
5253
use Symfony\AI\Store\Indexer;
5354
use Symfony\AI\Store\InMemoryStore;
@@ -471,6 +472,21 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde
471472
}
472473
}
473474

475+
if ('cache' === $type) {
476+
foreach ($stores as $name => $store) {
477+
$arguments = [
478+
new Reference($store['service']),
479+
];
480+
481+
$definition = new Definition(CacheStore::class);
482+
$definition
483+
->addTag('ai.store')
484+
->setArguments($arguments);
485+
486+
$container->setDefinition('ai.store.'.$type.'.'.$name, $definition);
487+
}
488+
}
489+
474490
if ('chroma_db' === $type) {
475491
foreach ($stores as $name => $store) {
476492
$definition = new Definition(ChromaDbStore::class);

src/ai-bundle/tests/DependencyInjection/AiBundleTest.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,11 @@ private function getFullConfig(): array
201201
'vector_field' => 'contentVector',
202202
],
203203
],
204+
'cache' => [
205+
'my_cache_store' => [
206+
'service' => 'cache.system',
207+
],
208+
],
204209
'chroma_db' => [
205210
'my_chroma_store' => [
206211
'collection' => 'my_collection',

src/store/CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ CHANGELOG
2828
- Orchestrates document processing pipeline
2929
- Accepts TextDocuments, vectorizes and stores in chunks
3030
- Configurable batch processing
31-
* Add `InMemoryStore` implementation with multiple distance algorithms:
31+
* Add `InMemoryStore` and `CacheStore` implementations with multiple distance algorithms:
3232
- Cosine similarity
3333
- Angular distance
3434
- Euclidean distance

src/store/composer.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@
4040
"mongodb/mongodb": "^1.21 || ^2.0",
4141
"phpstan/phpstan": "^2.0",
4242
"phpunit/phpunit": "^11.5",
43-
"probots-io/pinecone-php": "^1.0"
43+
"probots-io/pinecone-php": "^1.0",
44+
"symfony/cache": "^7.3"
4445
},
4546
"config": {
4647
"sort-packages": true

src/store/doc/index.rst

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,17 @@ You can find more advanced usage in combination with an Agent using the store fo
4343
* `Similarity Search with MongoDB (RAG)`_
4444
* `Similarity Search with Neo4j (RAG)`_
4545
* `Similarity Search with Pinecone (RAG)`_
46+
* `Similarity Search with PSR-6 Cache (RAG)`_
4647
* `Similarity Search with Qdrant (RAG)`_
4748
* `Similarity Search with SurrealDB (RAG)`_
4849
* `Similarity Search with Typesense (RAG)`_
4950

51+
.. note::
52+
53+
Both `InMemory` and `PSR-6 cache` vector stores will load all the data into the
54+
memory of the PHP process. They can be used only the amount of data fits in the
55+
PHP memory limit, typically for testing.
56+
5057
Supported Stores
5158
----------------
5259

@@ -59,6 +66,7 @@ Supported Stores
5966
* `Neo4j`_
6067
* `Pinecone`_ (requires `probots-io/pinecone-php` as additional dependency)
6168
* `Postgres`_ (requires `ext-pdo`)
69+
* `PSR-6 Cache`_
6270
* `Qdrant`_
6371
* `SurrealDB`_
6472
* `Typesense`_
@@ -97,12 +105,13 @@ This leads to a store implementing two methods::
97105
.. _`Retrieval Augmented Generation`: https://de.wikipedia.org/wiki/Retrieval-Augmented_Generation
98106
.. _`Similarity Search with MariaDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/mariadb.php
99107
.. _`Similarity Search with MongoDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/mongodb.php
100-
.. _`Similarity Search with Pinecone (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/pinecone.php
101108
.. _`Similarity Search with Meilisearch (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/meilisearch.php
102-
.. _`Similarity Search with SurrealDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/surrealdb.php
103109
.. _`Similarity Search with memory storage (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/in-memory.php
104-
.. _`Similarity Search with Qdrant (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/qdrant.php
105110
.. _`Similarity Search with Neo4j (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/neo4j.php
111+
.. _`Similarity Search with Pinecone (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/pinecone.php
112+
.. _`Similarity Search with PSR-6 Cache (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/cache.php
113+
.. _`Similarity Search with Qdrant (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/qdrant.php
114+
.. _`Similarity Search with SurrealDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/surrealdb.php
106115
.. _`Similarity Search with Typesense (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/typesense.php
107116
.. _`Azure AI Search`: https://azure.microsoft.com/products/ai-services/ai-search
108117
.. _`Chroma`: https://www.trychroma.com/
@@ -117,3 +126,4 @@ This leads to a store implementing two methods::
117126
.. _`Neo4j`: https://neo4j.com/
118127
.. _`Typesense`: https://typesense.org/
119128
.. _`GitHub`: https://github.com/symfony/ai/issues/16
129+
.. _`PSR-6 Cache`: https://www.php-fig.org/psr/psr-6/

src/store/src/CacheStore.php

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store;
13+
14+
use Psr\Cache\CacheItemPoolInterface;
15+
use Symfony\AI\Platform\Vector\Vector;
16+
use Symfony\AI\Store\Document\Metadata;
17+
use Symfony\AI\Store\Document\VectorDocument;
18+
use Symfony\AI\Store\Exception\RuntimeException;
19+
use Symfony\Component\Uid\Uuid;
20+
use Symfony\Contracts\Cache\CacheInterface;
21+
22+
/**
23+
* @author Guillaume Loulier <[email protected]>
24+
*/
25+
final readonly class CacheStore implements VectorStoreInterface
26+
{
27+
public function __construct(
28+
private CacheInterface&CacheItemPoolInterface $cache,
29+
private DistanceCalculator $distanceCalculator = new DistanceCalculator(),
30+
private string $cacheKey = '_vectors',
31+
) {
32+
if (!interface_exists(CacheItemPoolInterface::class)) {
33+
throw new RuntimeException('For using the CacheStore as vector store, a PSR-6 cache implementation is required. Try running "composer require symfony/cache" or another PSR-6 compatible cache.');
34+
}
35+
}
36+
37+
public function add(VectorDocument ...$documents): void
38+
{
39+
$existingVectors = $this->cache->get($this->cacheKey, static fn (): array => []);
40+
41+
$newVectors = array_map(static fn (VectorDocument $document): array => [
42+
'id' => $document->id->toRfc4122(),
43+
'vector' => $document->vector->getData(),
44+
'metadata' => $document->metadata->getArrayCopy(),
45+
], $documents);
46+
47+
$cacheItem = $this->cache->getItem($this->cacheKey);
48+
49+
$cacheItem->set([
50+
...$existingVectors,
51+
...$newVectors,
52+
]);
53+
54+
$this->cache->save($cacheItem);
55+
}
56+
57+
/**
58+
* @param array{
59+
* maxItems?: positive-int
60+
* } $options If maxItems is provided, only the top N results will be returned
61+
*/
62+
public function query(Vector $vector, array $options = []): array
63+
{
64+
$documents = $this->cache->getItem($this->cacheKey)->get() ?? [];
65+
66+
$vectorDocuments = array_map(static fn (array $document): VectorDocument => new VectorDocument(
67+
id: Uuid::fromString($document['id']),
68+
vector: new Vector($document['vector']),
69+
metadata: new Metadata($document['metadata']),
70+
), $documents);
71+
72+
return $this->distanceCalculator->calculate($vectorDocuments, $vector, $options['maxItems'] ?? null);
73+
}
74+
}

src/store/src/DistanceCalculator.php

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store;
13+
14+
use Symfony\AI\Platform\Vector\Vector;
15+
use Symfony\AI\Store\Document\VectorDocument;
16+
17+
/**
18+
* @author Guillaume Loulier <[email protected]>
19+
*/
20+
final readonly class DistanceCalculator
21+
{
22+
public function __construct(
23+
private DistanceStrategy $strategy = DistanceStrategy::COSINE_DISTANCE,
24+
) {
25+
}
26+
27+
/**
28+
* @param VectorDocument[] $documents
29+
* @param ?int $maxItems If maxItems is provided, only the top N results will be returned
30+
*
31+
* @return VectorDocument[]
32+
*/
33+
public function calculate(array $documents, Vector $vector, ?int $maxItems = null): array
34+
{
35+
$strategy = match ($this->strategy) {
36+
DistanceStrategy::COSINE_DISTANCE => $this->cosineDistance(...),
37+
DistanceStrategy::ANGULAR_DISTANCE => $this->angularDistance(...),
38+
DistanceStrategy::EUCLIDEAN_DISTANCE => $this->euclideanDistance(...),
39+
DistanceStrategy::MANHATTAN_DISTANCE => $this->manhattanDistance(...),
40+
DistanceStrategy::CHEBYSHEV_DISTANCE => $this->chebyshevDistance(...),
41+
};
42+
43+
$currentEmbeddings = array_map(
44+
static fn (VectorDocument $vectorDocument): array => [
45+
'distance' => $strategy($vectorDocument, $vector),
46+
'document' => $vectorDocument,
47+
],
48+
$documents,
49+
);
50+
51+
usort(
52+
$currentEmbeddings,
53+
static fn (array $embedding, array $nextEmbedding): int => $embedding['distance'] <=> $nextEmbedding['distance'],
54+
);
55+
56+
if (null !== $maxItems && $maxItems < \count($currentEmbeddings)) {
57+
$currentEmbeddings = \array_slice($currentEmbeddings, 0, $maxItems);
58+
}
59+
60+
return array_map(
61+
static fn (array $embedding): VectorDocument => $embedding['document'],
62+
$currentEmbeddings,
63+
);
64+
}
65+
66+
private function cosineDistance(VectorDocument $embedding, Vector $against): float
67+
{
68+
return 1 - $this->cosineSimilarity($embedding, $against);
69+
}
70+
71+
private function cosineSimilarity(VectorDocument $embedding, Vector $against): float
72+
{
73+
$currentEmbeddingVectors = $embedding->vector->getData();
74+
75+
$dotProduct = array_sum(array: array_map(
76+
static fn (float $a, float $b): float => $a * $b,
77+
$currentEmbeddingVectors,
78+
$against->getData(),
79+
));
80+
81+
$currentEmbeddingLength = sqrt(array_sum(array_map(
82+
static fn (float $value): float => $value ** 2,
83+
$currentEmbeddingVectors,
84+
)));
85+
86+
$againstLength = sqrt(array_sum(array_map(
87+
static fn (float $value): float => $value ** 2,
88+
$against->getData(),
89+
)));
90+
91+
return fdiv($dotProduct, $currentEmbeddingLength * $againstLength);
92+
}
93+
94+
private function angularDistance(VectorDocument $embedding, Vector $against): float
95+
{
96+
$cosineSimilarity = $this->cosineSimilarity($embedding, $against);
97+
98+
return fdiv(acos($cosineSimilarity), \M_PI);
99+
}
100+
101+
private function euclideanDistance(VectorDocument $embedding, Vector $against): float
102+
{
103+
return sqrt(array_sum(array_map(
104+
static fn (float $a, float $b): float => ($a - $b) ** 2,
105+
$embedding->vector->getData(),
106+
$against->getData(),
107+
)));
108+
}
109+
110+
private function manhattanDistance(VectorDocument $embedding, Vector $against): float
111+
{
112+
return array_sum(array_map(
113+
static fn (float $a, float $b): float => abs($a - $b),
114+
$embedding->vector->getData(),
115+
$against->getData(),
116+
));
117+
}
118+
119+
private function chebyshevDistance(VectorDocument $embedding, Vector $against): float
120+
{
121+
$embeddingsAsPower = array_map(
122+
static fn (float $currentValue, float $againstValue): float => abs($currentValue - $againstValue),
123+
$embedding->vector->getData(),
124+
$against->getData(),
125+
);
126+
127+
return array_reduce(
128+
array: $embeddingsAsPower,
129+
callback: static fn (float $value, float $current): float => max($value, $current),
130+
initial: 0.0,
131+
);
132+
}
133+
}

0 commit comments

Comments
 (0)