Skip to content

Commit 54708b0

Browse files
committed
feature #109 [Store] Add InMemory (Guikingone)
This PR was squashed before being merged into the main branch. Discussion ---------- [Store] Add InMemory | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | yes | Issues | None | License | MIT Hi 👋🏻 This PR aim to introduce an `InMemoryStore` to store vectors (mostly used for tests environments and/or POCs), the implementation is based around `cosine` similarity (others search algorithms can be used but will requires a dedicated interface like `SearchStrategyInterface` or other). This store also allows to retrieve only a specific amount of items if required. This store don't use `initialize` as everything is stored in an array. PS: An optimized approach would be to use `fpow` but it requires `8.4` so ... Well, maybe for later 😄 Commits ------- 734e2ba [Store] Add InMemory
2 parents 8b5514a + 734e2ba commit 54708b0

File tree

4 files changed

+337
-0
lines changed

4 files changed

+337
-0
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Agent\Toolbox\AgentProcessor;
14+
use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch;
15+
use Symfony\AI\Agent\Toolbox\Toolbox;
16+
use Symfony\AI\Platform\Bridge\OpenAI\Embeddings;
17+
use Symfony\AI\Platform\Bridge\OpenAI\GPT;
18+
use Symfony\AI\Platform\Bridge\OpenAI\PlatformFactory;
19+
use Symfony\AI\Platform\Message\Message;
20+
use Symfony\AI\Platform\Message\MessageBag;
21+
use Symfony\AI\Store\Document\Metadata;
22+
use Symfony\AI\Store\Document\TextDocument;
23+
use Symfony\AI\Store\Document\Vectorizer;
24+
use Symfony\AI\Store\Indexer;
25+
use Symfony\AI\Store\InMemoryStore;
26+
use Symfony\Component\Dotenv\Dotenv;
27+
use Symfony\Component\Uid\Uuid;
28+
29+
require_once dirname(__DIR__).'/vendor/autoload.php';
30+
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');
31+
32+
if (!isset($_SERVER['OPENAI_API_KEY'])) {
33+
echo 'Please set OPENAI_API_KEY environment variable.'.\PHP_EOL;
34+
exit(1);
35+
}
36+
37+
// initialize the store
38+
$store = new InMemoryStore();
39+
40+
// our data
41+
$movies = [
42+
['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'director' => 'Christopher Nolan'],
43+
['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'director' => 'The Wachowskis'],
44+
['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'director' => 'Francis Ford Coppola'],
45+
];
46+
47+
// create embeddings and documents
48+
foreach ($movies as $i => $movie) {
49+
$documents[] = new TextDocument(
50+
id: Uuid::v4(),
51+
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
52+
metadata: new Metadata($movie),
53+
);
54+
}
55+
56+
// create embeddings for documents
57+
$platform = PlatformFactory::create($_SERVER['OPENAI_API_KEY']);
58+
$vectorizer = new Vectorizer($platform, $embeddings = new Embeddings());
59+
$indexer = new Indexer($vectorizer, $store);
60+
$indexer->index($documents);
61+
62+
$model = new GPT(GPT::GPT_4O_MINI);
63+
64+
$similaritySearch = new SimilaritySearch($platform, $embeddings, $store);
65+
$toolbox = Toolbox::create($similaritySearch);
66+
$processor = new AgentProcessor($toolbox);
67+
$agent = new Agent($platform, $model, [$processor], [$processor]);
68+
69+
$messages = new MessageBag(
70+
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
71+
Message::ofUser('Which movie fits the theme of the mafia?')
72+
);
73+
$response = $agent->call($messages);
74+
75+
echo $response->getContent().\PHP_EOL;

src/store/doc/index.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ You can find more advanced usage in combination with an Agent using the store fo
4141
* `Similarity Search with MongoDB (RAG)`_
4242
* `Similarity Search with Pinecone (RAG)`_
4343
* `Similarity Search with Meilisearch (RAG)`_
44+
* `Similarity Search with memory storage (RAG)`_
4445

4546
Supported Stores
4647
----------------
@@ -52,6 +53,7 @@ Supported Stores
5253
* `Pinecone`_ (requires `probots-io/pinecone-php` as additional dependency)
5354
* `Postgres`_ (requires `ext-pdo`)
5455
* `Meilisearch`_
56+
* `InMemory`_
5557

5658
.. note::
5759

@@ -89,11 +91,13 @@ This leads to a store implementing two methods::
8991
.. _`Similarity Search with MongoDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/mongodb-similarity-search.php
9092
.. _`Similarity Search with Pinecone (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/pinecone-similarity-search.php
9193
.. _`Similarity Search with Meilisearch (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/meilisearch-similarity-search.php
94+
.. _`Similarity Search with memory storage (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/memory-similarity-search.php
9295
.. _`Azure AI Search`: https://azure.microsoft.com/products/ai-services/ai-search
9396
.. _`Chroma`: https://www.trychroma.com/
9497
.. _`MariaDB`: https://mariadb.org/projects/mariadb-vector/
9598
.. _`MongoDB Atlas`: https://www.mongodb.com/atlas
9699
.. _`Pinecone`: https://www.pinecone.io/
97100
.. _`Postgres`: https://www.postgresql.org/about/news/pgvector-070-released-2852/
98101
.. _`Meilisearch`: https://www.meilisearch.com/
102+
.. _`InMemory`: https://www.php.net/manual/en/language.types.array.php
99103
.. _`GitHub`: https://github.com/symfony/ai/issues/16

src/store/src/InMemoryStore.php

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store;
13+
14+
use Symfony\AI\Platform\Vector\Vector;
15+
use Symfony\AI\Store\Document\VectorDocument;
16+
use Symfony\AI\Store\Exception\InvalidArgumentException;
17+
18+
/**
19+
* @author Guillaume Loulier <[email protected]>
20+
*/
21+
final class InMemoryStore implements VectorStoreInterface
22+
{
23+
public const COSINE_SIMILARITY = 'cosine';
24+
public const ANGULAR_DISTANCE = 'angular';
25+
public const EUCLIDEAN_DISTANCE = 'euclidean';
26+
public const MANHATTAN_DISTANCE = 'manhattan';
27+
public const CHEBYSHEV_DISTANCE = 'chebyshev';
28+
29+
/**
30+
* @var VectorDocument[]
31+
*/
32+
private array $documents = [];
33+
34+
public function __construct(
35+
private readonly string $similarity = self::COSINE_SIMILARITY,
36+
) {
37+
}
38+
39+
public function add(VectorDocument ...$documents): void
40+
{
41+
array_push($this->documents, ...$documents);
42+
}
43+
44+
/**
45+
* @param array{
46+
* maxItems?: positive-int
47+
* } $options If maxItems is provided, only the top N results will be returned
48+
*/
49+
public function query(Vector $vector, array $options = [], ?float $minScore = null): array
50+
{
51+
$strategy = match ($this->similarity) {
52+
self::COSINE_SIMILARITY => $this->cosineSimilarity(...),
53+
self::ANGULAR_DISTANCE => $this->angularDistance(...),
54+
self::EUCLIDEAN_DISTANCE => $this->euclideanDistance(...),
55+
self::MANHATTAN_DISTANCE => $this->manhattanDistance(...),
56+
self::CHEBYSHEV_DISTANCE => $this->chebyshevDistance(...),
57+
default => throw new InvalidArgumentException(\sprintf('Unsupported similarity strategy "%s"', $this->similarity)),
58+
};
59+
60+
$currentEmbeddings = array_map(
61+
static fn (VectorDocument $vectorDocument): array => [
62+
'distance' => $strategy($vectorDocument, $vector),
63+
'document' => $vectorDocument,
64+
],
65+
$this->documents,
66+
);
67+
68+
usort(
69+
$currentEmbeddings,
70+
static fn (array $embedding, array $nextEmbedding): int => $embedding['distance'] <=> $nextEmbedding['distance'],
71+
);
72+
73+
if (\array_key_exists('maxItems', $options) && $options['maxItems'] < \count($currentEmbeddings)) {
74+
$currentEmbeddings = \array_slice($currentEmbeddings, 0, $options['maxItems']);
75+
}
76+
77+
return array_map(
78+
static fn (array $embedding): VectorDocument => $embedding['document'],
79+
$currentEmbeddings,
80+
);
81+
}
82+
83+
private function cosineSimilarity(VectorDocument $embedding, Vector $against): float
84+
{
85+
$currentEmbeddingVectors = $embedding->vector->getData();
86+
87+
$dotProduct = array_sum(array: array_map(
88+
static fn (float $a, float $b): float => $a * $b,
89+
$currentEmbeddingVectors,
90+
$against->getData(),
91+
));
92+
93+
$currentEmbeddingLength = sqrt(array_sum(array_map(
94+
static fn (float $value): float => $value ** 2,
95+
$currentEmbeddingVectors,
96+
)));
97+
98+
$againstLength = sqrt(array_sum(array_map(
99+
static fn (float $value): float => $value ** 2,
100+
$against->getData(),
101+
)));
102+
103+
return fdiv($dotProduct, $currentEmbeddingLength * $againstLength);
104+
}
105+
106+
private function angularDistance(VectorDocument $embedding, Vector $against): float
107+
{
108+
$cosineSimilarity = $this->cosineSimilarity($embedding, $against);
109+
110+
return fdiv(acos($cosineSimilarity), \M_PI);
111+
}
112+
113+
private function euclideanDistance(VectorDocument $embedding, Vector $against): float
114+
{
115+
return sqrt(array_sum(array_map(
116+
static fn (float $a, float $b): float => ($a - $b) ** 2,
117+
$embedding->vector->getData(),
118+
$against->getData(),
119+
)));
120+
}
121+
122+
private function manhattanDistance(VectorDocument $embedding, Vector $against): float
123+
{
124+
return array_sum(array_map(
125+
static fn (float $a, float $b): float => abs($a - $b),
126+
$embedding->vector->getData(),
127+
$against->getData(),
128+
));
129+
}
130+
131+
private function chebyshevDistance(VectorDocument $embedding, Vector $against): float
132+
{
133+
$embeddingsAsPower = array_map(
134+
static fn (float $currentValue, float $againstValue): float => abs($currentValue - $againstValue),
135+
$embedding->vector->getData(),
136+
$against->getData(),
137+
);
138+
139+
return array_reduce(
140+
array: $embeddingsAsPower,
141+
callback: static fn (float $value, float $current): float => max($value, $current),
142+
initial: 0.0,
143+
);
144+
}
145+
}

src/store/tests/InMemoryStoreTest.php

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Tests;
13+
14+
use PHPUnit\Framework\Attributes\CoversClass;
15+
use PHPUnit\Framework\TestCase;
16+
use Symfony\AI\Platform\Vector\Vector;
17+
use Symfony\AI\Store\Document\VectorDocument;
18+
use Symfony\AI\Store\InMemoryStore;
19+
use Symfony\Component\Uid\Uuid;
20+
21+
#[CoversClass(InMemoryStore::class)]
22+
final class InMemoryStoreTest extends TestCase
23+
{
24+
public function testStoreCanSearchUsingCosineSimilarity(): void
25+
{
26+
$store = new InMemoryStore();
27+
$store->add(
28+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5])),
29+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0])),
30+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1])),
31+
);
32+
33+
self::assertCount(3, $store->query(new Vector([0.0, 0.1, 0.6])));
34+
35+
$store->add(
36+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5])),
37+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0])),
38+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1])),
39+
);
40+
41+
self::assertCount(6, $store->query(new Vector([0.0, 0.1, 0.6])));
42+
}
43+
44+
public function testStoreCanSearchUsingCosineSimilarityWithMaxItems(): void
45+
{
46+
$store = new InMemoryStore();
47+
$store->add(
48+
new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5])),
49+
new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0])),
50+
new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1])),
51+
);
52+
53+
self::assertCount(1, $store->query(new Vector([0.0, 0.1, 0.6]), [
54+
'maxItems' => 1,
55+
]));
56+
}
57+
58+
public function testStoreCanSearchUsingAngularDistance(): void
59+
{
60+
$store = new InMemoryStore(InMemoryStore::ANGULAR_DISTANCE);
61+
$store->add(
62+
new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0])),
63+
new VectorDocument(Uuid::v4(), new Vector([1.0, 5.0, 7.0])),
64+
);
65+
66+
$result = $store->query(new Vector([1.2, 2.3, 3.4]));
67+
68+
self::assertCount(2, $result);
69+
self::assertSame([1.0, 2.0, 3.0], $result[0]->vector->getData());
70+
}
71+
72+
public function testStoreCanSearchUsingEuclideanDistance(): void
73+
{
74+
$store = new InMemoryStore(InMemoryStore::EUCLIDEAN_DISTANCE);
75+
$store->add(
76+
new VectorDocument(Uuid::v4(), new Vector([1.0, 5.0, 7.0])),
77+
new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0])),
78+
);
79+
80+
$result = $store->query(new Vector([1.2, 2.3, 3.4]));
81+
82+
self::assertCount(2, $result);
83+
self::assertSame([1.0, 2.0, 3.0], $result[0]->vector->getData());
84+
}
85+
86+
public function testStoreCanSearchUsingManhattanDistance(): void
87+
{
88+
$store = new InMemoryStore(InMemoryStore::MANHATTAN_DISTANCE);
89+
$store->add(
90+
new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0])),
91+
new VectorDocument(Uuid::v4(), new Vector([1.0, 5.0, 7.0])),
92+
);
93+
94+
$result = $store->query(new Vector([1.2, 2.3, 3.4]));
95+
96+
self::assertCount(2, $result);
97+
self::assertSame([1.0, 2.0, 3.0], $result[0]->vector->getData());
98+
}
99+
100+
public function testStoreCanSearchUsingChebyshevDistance(): void
101+
{
102+
$store = new InMemoryStore(InMemoryStore::CHEBYSHEV_DISTANCE);
103+
$store->add(
104+
new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0])),
105+
new VectorDocument(Uuid::v4(), new Vector([1.0, 5.0, 7.0])),
106+
);
107+
108+
$result = $store->query(new Vector([1.2, 2.3, 3.4]));
109+
110+
self::assertCount(2, $result);
111+
self::assertSame([1.0, 2.0, 3.0], $result[0]->vector->getData());
112+
}
113+
}

0 commit comments

Comments
 (0)