Skip to content

Commit aee7a35

Browse files
committed
bug #137 [Store][Meilisearch] Remove id, score and vectors from metadata (chr-hertel)
This PR was merged into the main branch. Discussion ---------- [Store][Meilisearch] Remove id, score and vectors from metadata | Q | A | ------------- | --- | Bug fix? | yes | New feature? | no | Docs? | no | Issues | | License | MIT Metadata should only include additional but relevant information. It is handed over by the SimilaritySearch to the LLM and too much unnecessary information hurt the context size and token consumption. On top, score was not set. Commits ------- 0ace629 Remove id, score and vectors from metadata
2 parents bd106a0 + 0ace629 commit aee7a35

File tree

3 files changed

+71
-30
lines changed

3 files changed

+71
-30
lines changed

examples/store/memory-similarity-search.php

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
use Symfony\AI\Agent\Toolbox\AgentProcessor;
1414
use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch;
1515
use Symfony\AI\Agent\Toolbox\Toolbox;
16+
use Symfony\AI\Fixtures\Movies;
1617
use Symfony\AI\Platform\Bridge\OpenAI\Embeddings;
1718
use Symfony\AI\Platform\Bridge\OpenAI\GPT;
1819
use Symfony\AI\Platform\Bridge\OpenAI\PlatformFactory;
@@ -37,15 +38,8 @@
3738
// initialize the store
3839
$store = new InMemoryStore();
3940

40-
// our data
41-
$movies = [
42-
['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'director' => 'Christopher Nolan'],
43-
['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'director' => 'The Wachowskis'],
44-
['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'director' => 'Francis Ford Coppola'],
45-
];
46-
4741
// create embeddings and documents
48-
foreach ($movies as $i => $movie) {
42+
foreach (Movies::all() as $i => $movie) {
4943
$documents[] = new TextDocument(
5044
id: Uuid::v4(),
5145
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],

src/store/src/Bridge/Meilisearch/Store.php

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,13 @@ private function convertToIndexableArray(VectorDocument $document): array
123123
*/
124124
private function convertToVectorDocument(array $data): VectorDocument
125125
{
126-
return new VectorDocument(
127-
id: Uuid::fromString($data['id']),
128-
vector: !\array_key_exists($this->vectorFieldName, $data) || null === $data[$this->vectorFieldName]
129-
? new NullVector()
130-
: new Vector($data[$this->vectorFieldName][$this->embedder]['embeddings']),
131-
metadata: new Metadata($data),
132-
);
126+
$id = $data['id'] ?? throw new InvalidArgumentException('Missing "id" field in the document data');
127+
$vector = !\array_key_exists($this->vectorFieldName, $data) || null === $data[$this->vectorFieldName]
128+
? new NullVector() : new Vector($data[$this->vectorFieldName][$this->embedder]['embeddings']);
129+
$score = $data['_rankingScore'] ?? null;
130+
131+
unset($data['id'], $data[$this->vectorFieldName], $data['_rankingScore']);
132+
133+
return new VectorDocument(Uuid::fromString($id), $vector, new Metadata($data), $score);
133134
}
134135
}

src/store/tests/Bridge/Meilisearch/StoreTest.php

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
use Symfony\AI\Store\Document\VectorDocument;
1919
use Symfony\Component\HttpClient\Exception\ClientException;
2020
use Symfony\Component\HttpClient\MockHttpClient;
21-
use Symfony\Component\HttpClient\Response\MockResponse;
21+
use Symfony\Component\HttpClient\Response\JsonMockResponse;
2222
use Symfony\Component\Uid\Uuid;
2323

2424
#[CoversClass(Store::class)]
@@ -27,12 +27,12 @@ final class StoreTest extends TestCase
2727
public function testStoreCannotInitializeOnInvalidResponse(): void
2828
{
2929
$httpClient = new MockHttpClient([
30-
new MockResponse(json_encode([
30+
new JsonMockResponse([
3131
'message' => 'error',
3232
'code' => 'index_creation_failed',
3333
'type' => 'invalid_request',
3434
'link' => 'https://docs.meilisearch.com/errors#index_creation_failed',
35-
]), [
35+
], [
3636
'http_code' => 400,
3737
]),
3838
], 'http://localhost:7700');
@@ -53,22 +53,22 @@ public function testStoreCannotInitializeOnInvalidResponse(): void
5353
public function testStoreCanInitialize(): void
5454
{
5555
$httpClient = new MockHttpClient([
56-
new MockResponse(json_encode([
56+
new JsonMockResponse([
5757
'taskUid' => 1,
5858
'indexUid' => 'test',
5959
'status' => 'enqueued',
6060
'type' => 'indexCreation',
6161
'enqueuedAt' => '2025-01-01T00:00:00Z',
62-
]), [
62+
], [
6363
'http_code' => 202,
6464
]),
65-
new MockResponse(json_encode([
65+
new JsonMockResponse([
6666
'taskUid' => 2,
6767
'indexUid' => 'test',
6868
'status' => 'enqueued',
6969
'type' => 'indexUpdate',
7070
'enqueuedAt' => '2025-01-01T01:00:00Z',
71-
]), [
71+
], [
7272
'http_code' => 202,
7373
]),
7474
], 'http://localhost:7700');
@@ -88,12 +88,12 @@ public function testStoreCanInitialize(): void
8888
public function testStoreCannotAddOnInvalidResponse(): void
8989
{
9090
$httpClient = new MockHttpClient([
91-
new MockResponse(json_encode([
91+
new JsonMockResponse([
9292
'message' => 'error',
9393
'code' => 'invalid_document_fields',
9494
'type' => 'invalid_request',
9595
'link' => 'https://docs.meilisearch.com/errors#invalid_document_fields',
96-
]), [
96+
], [
9797
'http_code' => 400,
9898
]),
9999
], 'http://localhost:7700');
@@ -114,13 +114,13 @@ public function testStoreCannotAddOnInvalidResponse(): void
114114
public function testStoreCanAdd(): void
115115
{
116116
$httpClient = new MockHttpClient([
117-
new MockResponse(json_encode([
117+
new JsonMockResponse([
118118
'taskUid' => 1,
119119
'indexUid' => 'test',
120120
'status' => 'enqueued',
121121
'type' => 'documentAdditionOrUpdate',
122122
'enqueuedAt' => '2025-01-01T00:00:00Z',
123-
]), [
123+
], [
124124
'http_code' => 202,
125125
]),
126126
], 'http://localhost:7700');
@@ -140,12 +140,12 @@ public function testStoreCanAdd(): void
140140
public function testStoreCannotQueryOnInvalidResponse(): void
141141
{
142142
$httpClient = new MockHttpClient([
143-
new MockResponse(json_encode([
143+
new JsonMockResponse([
144144
'message' => 'error',
145145
'code' => 'invalid_search_hybrid_query',
146146
'type' => 'invalid_request',
147147
'link' => 'https://docs.meilisearch.com/errors#invalid_search_hybrid_query',
148-
]), [
148+
], [
149149
'http_code' => 400,
150150
]),
151151
], 'http://localhost:7700');
@@ -166,7 +166,7 @@ public function testStoreCannotQueryOnInvalidResponse(): void
166166
public function testStoreCanQuery(): void
167167
{
168168
$httpClient = new MockHttpClient([
169-
new MockResponse(json_encode([
169+
new JsonMockResponse([
170170
'hits' => [
171171
[
172172
'id' => Uuid::v4()->toRfc4122(),
@@ -176,6 +176,7 @@ public function testStoreCanQuery(): void
176176
'regenerate' => false,
177177
],
178178
],
179+
'_rankingScore' => 0.95,
179180
],
180181
[
181182
'id' => Uuid::v4()->toRfc4122(),
@@ -185,9 +186,10 @@ public function testStoreCanQuery(): void
185186
'regenerate' => false,
186187
],
187188
],
189+
'_rankingScore' => 0.85,
188190
],
189191
],
190-
]), [
192+
], [
191193
'http_code' => 200,
192194
]),
193195
], 'http://localhost:7700');
@@ -204,5 +206,49 @@ public function testStoreCanQuery(): void
204206

205207
self::assertSame(1, $httpClient->getRequestsCount());
206208
self::assertCount(2, $vectors);
209+
self::assertInstanceOf(VectorDocument::class, $vectors[0]);
210+
self::assertInstanceOf(VectorDocument::class, $vectors[1]);
211+
self::assertSame(0.95, $vectors[0]->score);
212+
self::assertSame(0.85, $vectors[1]->score);
213+
}
214+
215+
public function testMetadataWithoutIDRankingandVector(): void
216+
{
217+
$httpClient = new MockHttpClient([
218+
new JsonMockResponse([
219+
'hits' => [
220+
[
221+
'id' => Uuid::v4()->toRfc4122(),
222+
'title' => 'The Matrix',
223+
'description' => 'A science fiction action film.',
224+
'_vectors' => [
225+
'default' => [
226+
'embeddings' => [0.1, 0.2, 0.3],
227+
'regenerate' => false,
228+
],
229+
],
230+
'_rankingScore' => 0.95,
231+
],
232+
],
233+
], [
234+
'http_code' => 200,
235+
]),
236+
], 'http://localhost:7700');
237+
238+
$store = new Store(
239+
$httpClient,
240+
'http://localhost:7700',
241+
'test',
242+
'test',
243+
embeddingsDimension: 3,
244+
);
245+
246+
$vectors = $store->query(new Vector([0.1, 0.2, 0.3]));
247+
$expected = [
248+
'title' => 'The Matrix',
249+
'description' => 'A science fiction action film.',
250+
];
251+
252+
self::assertSame($expected, $vectors[0]->metadata->getArrayCopy());
207253
}
208254
}

0 commit comments

Comments
 (0)