Skip to content

Commit e1bf21e

Browse files
committed
feature #481 [Store] Refactor VectorizerInterface to separate string and document vectorization (OskarStark)
This PR was squashed before being merged into the main branch. Discussion ---------- [Store] Refactor `VectorizerInterface` to separate string and document vectorization | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | no | Issues | -- | License | MIT Commits ------- 1bd8cc4 [Store] Refactor `VectorizerInterface` to separate string and document vectorization
2 parents 3021464 + 1bd8cc4 commit e1bf21e

File tree

10 files changed

+165
-79
lines changed

10 files changed

+165
-79
lines changed

demo/config/packages/ai.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,4 @@ services:
7474
# $apiKey: '%env(SERP_API_KEY)%'
7575
Symfony\AI\Agent\Toolbox\Tool\Wikipedia: ~
7676
Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch:
77-
$model: '@ai.indexer.default.model'
77+
$vectorizer: '@ai.vectorizer.openai_embeddings'
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
13+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
14+
use Symfony\AI\Store\Document\TextDocument;
15+
use Symfony\AI\Store\Document\VectorDocument;
16+
use Symfony\AI\Store\Document\Vectorizer;
17+
use Symfony\Component\Uid\Uuid;
18+
19+
require_once dirname(__DIR__).'/bootstrap.php';
20+
21+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
22+
$embeddings = new Embeddings(Embeddings::TEXT_3_LARGE);
23+
24+
$textDocuments = [
25+
new TextDocument(Uuid::v4(), 'Hello World'),
26+
new TextDocument(Uuid::v4(), 'Lorem ipsum dolor sit amet'),
27+
new TextDocument(Uuid::v4(), 'PHP Hypertext Preprocessor'),
28+
];
29+
30+
$vectorizer = new Vectorizer($platform, $embeddings);
31+
$vectorDocuments = $vectorizer->vectorizeTextDocuments($textDocuments);
32+
33+
dump(array_map(fn (VectorDocument $document) => $document->vector->getDimensions(), $vectorDocuments));

examples/document/vectorizing.php

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,21 @@
1111

1212
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
1313
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
14-
use Symfony\AI\Store\Document\TextDocument;
15-
use Symfony\AI\Store\Document\VectorDocument;
1614
use Symfony\AI\Store\Document\Vectorizer;
17-
use Symfony\Component\Uid\Uuid;
1815

1916
require_once dirname(__DIR__).'/bootstrap.php';
2017

2118
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
2219
$embeddings = new Embeddings(Embeddings::TEXT_3_LARGE);
2320

24-
$textDocuments = [
25-
new TextDocument(Uuid::v4(), 'Hello World'),
26-
new TextDocument(Uuid::v4(), 'Lorem ipsum dolor sit amet'),
27-
new TextDocument(Uuid::v4(), 'PHP Hypertext Preprocessor'),
28-
];
29-
3021
$vectorizer = new Vectorizer($platform, $embeddings);
31-
$vectorDocuments = $vectorizer->vectorizeDocuments($textDocuments);
3222

33-
dump(array_map(fn (VectorDocument $document) => $document->vector->getDimensions(), $vectorDocuments));
23+
$string = 'Hello World';
24+
$vector = $vectorizer->vectorize($string);
25+
26+
printf(
27+
"String: %s\nVector dimensions: %d\nFirst 5 values: [%s]\n",
28+
$string,
29+
$vector->getDimensions(),
30+
implode(', ', array_map(fn ($val) => number_format($val, 6), array_slice($vector->getData(), 0, 5)))
31+
);

src/agent/src/Toolbox/Tool/SimilaritySearch.php

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@
1212
namespace Symfony\AI\Agent\Toolbox\Tool;
1313

1414
use Symfony\AI\Agent\Toolbox\Attribute\AsTool;
15-
use Symfony\AI\Platform\Model;
16-
use Symfony\AI\Platform\PlatformInterface;
1715
use Symfony\AI\Store\Document\VectorDocument;
16+
use Symfony\AI\Store\Document\VectorizerInterface;
1817
use Symfony\AI\Store\StoreInterface;
1918

2019
/**
@@ -29,8 +28,7 @@ final class SimilaritySearch
2928
public array $usedDocuments = [];
3029

3130
public function __construct(
32-
private readonly PlatformInterface $platform,
33-
private readonly Model $model,
31+
private readonly VectorizerInterface $vectorizer,
3432
private readonly StoreInterface $store,
3533
) {
3634
}
@@ -40,8 +38,8 @@ public function __construct(
4038
*/
4139
public function __invoke(string $searchTerm): string
4240
{
43-
$vectors = $this->platform->invoke($this->model, $searchTerm)->asVectors();
44-
$this->usedDocuments = $this->store->query($vectors[0]);
41+
$vector = $this->vectorizer->vectorize($searchTerm);
42+
$this->usedDocuments = $this->store->query($vector);
4543

4644
if ([] === $this->usedDocuments) {
4745
return 'No results found';

src/agent/tests/Toolbox/Tool/SimilaritySearchTest.php

Lines changed: 19 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,10 @@
1414
use PHPUnit\Framework\Attributes\CoversClass;
1515
use PHPUnit\Framework\TestCase;
1616
use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch;
17-
use Symfony\AI\Platform\Model;
18-
use Symfony\AI\Platform\PlatformInterface;
19-
use Symfony\AI\Platform\Result\RawResultInterface;
20-
use Symfony\AI\Platform\Result\ResultPromise;
21-
use Symfony\AI\Platform\Result\VectorResult;
2217
use Symfony\AI\Platform\Vector\Vector;
2318
use Symfony\AI\Store\Document\Metadata;
2419
use Symfony\AI\Store\Document\VectorDocument;
20+
use Symfony\AI\Store\Document\VectorizerInterface;
2521
use Symfony\AI\Store\StoreInterface;
2622
use Symfony\Component\Uid\Uuid;
2723

@@ -44,27 +40,19 @@ public function testSearchWithResults()
4440
new Metadata(['title' => 'Document 2', 'content' => 'Second document content']),
4541
);
4642

47-
$rawResult = $this->createMock(RawResultInterface::class);
48-
$vectorResult = new VectorResult($vector);
49-
$resultPromise = new ResultPromise(
50-
fn () => $vectorResult,
51-
$rawResult
52-
);
53-
54-
$platform = $this->createMock(PlatformInterface::class);
55-
$platform->expects($this->once())
56-
->method('invoke')
57-
->with($this->isInstanceOf(Model::class), $searchTerm)
58-
->willReturn($resultPromise);
43+
$vectorizer = $this->createMock(VectorizerInterface::class);
44+
$vectorizer->expects($this->once())
45+
->method('vectorize')
46+
->with($searchTerm)
47+
->willReturn($vector);
5948

6049
$store = $this->createMock(StoreInterface::class);
6150
$store->expects($this->once())
6251
->method('query')
6352
->with($vector)
6453
->willReturn([$document1, $document2]);
6554

66-
$model = new Model('test-model');
67-
$similaritySearch = new SimilaritySearch($platform, $model, $store);
55+
$similaritySearch = new SimilaritySearch($vectorizer, $store);
6856

6957
$result = $similaritySearch($searchTerm);
7058

@@ -77,27 +65,19 @@ public function testSearchWithoutResults()
7765
$searchTerm = 'find nothing';
7866
$vector = new Vector([0.1, 0.2, 0.3]);
7967

80-
$rawResult = $this->createMock(RawResultInterface::class);
81-
$vectorResult = new VectorResult($vector);
82-
$resultPromise = new ResultPromise(
83-
fn () => $vectorResult,
84-
$rawResult
85-
);
86-
87-
$platform = $this->createMock(PlatformInterface::class);
88-
$platform->expects($this->once())
89-
->method('invoke')
90-
->with($this->isInstanceOf(Model::class), $searchTerm)
91-
->willReturn($resultPromise);
68+
$vectorizer = $this->createMock(VectorizerInterface::class);
69+
$vectorizer->expects($this->once())
70+
->method('vectorize')
71+
->with($searchTerm)
72+
->willReturn($vector);
9273

9374
$store = $this->createMock(StoreInterface::class);
9475
$store->expects($this->once())
9576
->method('query')
9677
->with($vector)
9778
->willReturn([]);
9879

99-
$model = new Model('test-model');
100-
$similaritySearch = new SimilaritySearch($platform, $model, $store);
80+
$similaritySearch = new SimilaritySearch($vectorizer, $store);
10181

10282
$result = $similaritySearch($searchTerm);
10383

@@ -116,27 +96,19 @@ public function testSearchWithSingleResult()
11696
new Metadata(['title' => 'Single Document', 'description' => 'Only one match']),
11797
);
11898

119-
$rawResult = $this->createMock(RawResultInterface::class);
120-
$vectorResult = new VectorResult($vector);
121-
$resultPromise = new ResultPromise(
122-
fn () => $vectorResult,
123-
$rawResult
124-
);
125-
126-
$platform = $this->createMock(PlatformInterface::class);
127-
$platform->expects($this->once())
128-
->method('invoke')
129-
->with($this->isInstanceOf(Model::class), $searchTerm)
130-
->willReturn($resultPromise);
99+
$vectorizer = $this->createMock(VectorizerInterface::class);
100+
$vectorizer->expects($this->once())
101+
->method('vectorize')
102+
->with($searchTerm)
103+
->willReturn($vector);
131104

132105
$store = $this->createMock(StoreInterface::class);
133106
$store->expects($this->once())
134107
->method('query')
135108
->with($vector)
136109
->willReturn([$document]);
137110

138-
$model = new Model('test-model');
139-
$similaritySearch = new SimilaritySearch($platform, $model, $store);
111+
$similaritySearch = new SimilaritySearch($vectorizer, $store);
140112

141113
$result = $similaritySearch($searchTerm);
142114

src/ai-bundle/config/options.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@
373373
->end()
374374
->end()
375375
->arrayNode('vectorizer')
376+
->info('Vectorizers for converting strings to Vector objects and transforming TextDocument arrays to VectorDocument arrays')
376377
->useAttributeAsKey('name')
377378
->arrayPrototype()
378379
->children()

src/store/src/Document/Vectorizer.php

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
use Symfony\AI\Platform\Capability;
1717
use Symfony\AI\Platform\Model;
1818
use Symfony\AI\Platform\PlatformInterface;
19+
use Symfony\AI\Platform\Vector\Vector;
20+
use Symfony\AI\Store\Exception\RuntimeException;
1921

2022
final readonly class Vectorizer implements VectorizerInterface
2123
{
@@ -26,7 +28,7 @@ public function __construct(
2628
) {
2729
}
2830

29-
public function vectorize(array $documents): array
31+
public function vectorizeTextDocuments(array $documents): array
3032
{
3133
$documentCount = \count($documents);
3234
$this->logger->info('Starting vectorization process', ['document_count' => $documentCount]);
@@ -64,4 +66,18 @@ public function vectorize(array $documents): array
6466

6567
return $vectorDocuments;
6668
}
69+
70+
public function vectorize(string $string): Vector
71+
{
72+
$this->logger->debug('Vectorizing string', ['string' => $string]);
73+
74+
$result = $this->platform->invoke($this->model, $string);
75+
$vectors = $result->asVectors();
76+
77+
if (!isset($vectors[0])) {
78+
throw new RuntimeException('No vector returned for string vectorization.');
79+
}
80+
81+
return $vectors[0];
82+
}
6783
}

src/store/src/Document/VectorizerInterface.php

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@
1111

1212
namespace Symfony\AI\Store\Document;
1313

14+
use Symfony\AI\Platform\Vector\Vector;
15+
1416
/**
15-
* Interface for converting a collection of TextDocuments into VectorDocuments.
17+
* Interface for converting a collection of TextDocuments into VectorDocuments
18+
* and for vectorizing individual strings.
1619
*
1720
* @author Oskar Stark <[email protected]>
1821
*/
@@ -23,5 +26,10 @@ interface VectorizerInterface
2326
*
2427
* @return VectorDocument[]
2528
*/
26-
public function vectorize(array $documents): array;
29+
public function vectorizeTextDocuments(array $documents): array;
30+
31+
/**
32+
* Vectorizes a single string into a Vector.
33+
*/
34+
public function vectorize(string $string): Vector;
2735
}

src/store/src/Indexer.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,13 @@ public function index(TextDocument|iterable $documents, int $chunkSize = 50): vo
4545
++$counter;
4646

4747
if ($chunkSize === \count($chunk)) {
48-
$this->store->add(...$this->vectorizer->vectorize($chunk));
48+
$this->store->add(...$this->vectorizer->vectorizeTextDocuments($chunk));
4949
$chunk = [];
5050
}
5151
}
5252

5353
if (\count($chunk) > 0) {
54-
$this->store->add(...$this->vectorizer->vectorize($chunk));
54+
$this->store->add(...$this->vectorizer->vectorizeTextDocuments($chunk));
5555
}
5656

5757
$this->logger->debug(0 === $counter ? 'No documents to index' : \sprintf('Indexed %d documents', $counter));

0 commit comments

Comments
 (0)