Skip to content

Commit 7acc22f

Browse files
committed
Rework state handling of indexer
1 parent 5b89765 commit 7acc22f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+410
-225
lines changed

examples/indexer/index-file-loader.php

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
*/
1111

1212
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
13+
use Symfony\AI\Store\Document\Loader;
1314
use Symfony\AI\Store\Document\Loader\TextFileLoader;
1415
use Symfony\AI\Store\Document\Transformer\TextReplaceTransformer;
1516
use Symfony\AI\Store\Document\Transformer\TextSplitTransformer;
@@ -23,21 +24,22 @@
2324
$store = new InMemoryStore();
2425
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small');
2526
$indexer = new Indexer(
26-
loader: new TextFileLoader(),
27+
loader: new Loader([new TextFileLoader()]),
2728
vectorizer: $vectorizer,
2829
store: $store,
29-
source: [
30-
dirname(__DIR__, 2).'/fixtures/movies/gladiator.md',
31-
dirname(__DIR__, 2).'/fixtures/movies/inception.md',
32-
dirname(__DIR__, 2).'/fixtures/movies/jurassic-park.md',
33-
],
3430
transformers: [
3531
new TextReplaceTransformer(search: '## Plot', replace: '## Synopsis'),
3632
new TextSplitTransformer(chunkSize: 500, overlap: 100),
3733
],
3834
);
3935

40-
$indexer->index();
36+
$sources = TextFileLoader::createSource([
37+
dirname(__DIR__, 2).'/fixtures/movies/gladiator.md',
38+
dirname(__DIR__, 2).'/fixtures/movies/inception.md',
39+
dirname(__DIR__, 2).'/fixtures/movies/jurassic-park.md',
40+
]);
41+
42+
$indexer->index($sources);
4143

4244
$vector = $vectorizer->vectorize('Roman gladiator revenge');
4345
$results = $store->query($vector);

examples/indexer/index-inmemory-loader.php

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
*/
1111

1212
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
13-
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
13+
use Symfony\AI\Store\Document\Loader;
14+
use Symfony\AI\Store\Document\Loader\DocumentCollectionLoader;
1415
use Symfony\AI\Store\Document\Metadata;
16+
use Symfony\AI\Store\Document\Source\DocumentCollection;
1517
use Symfony\AI\Store\Document\TextDocument;
1618
use Symfony\AI\Store\Document\Transformer\TextSplitTransformer;
1719
use Symfony\AI\Store\Document\Vectorizer;
@@ -25,7 +27,7 @@
2527
$store = new InMemoryStore();
2628
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small');
2729

28-
$documents = [
30+
$sources = new DocumentCollection([
2931
new TextDocument(
3032
Uuid::v4(),
3133
'Artificial Intelligence is transforming the way we work and live. Machine learning algorithms can now process vast amounts of data and make predictions with remarkable accuracy.',
@@ -36,19 +38,18 @@
3638
'Climate change is one of the most pressing challenges of our time. Renewable energy sources like solar and wind power are becoming increasingly important for a sustainable future.',
3739
new Metadata(['title' => 'Climate Action'])
3840
),
39-
];
41+
]);
4042

4143
$indexer = new Indexer(
42-
loader: new InMemoryLoader($documents),
44+
loader: new Loader([new DocumentCollectionLoader()]),
4345
vectorizer: $vectorizer,
4446
store: $store,
45-
source: null,
4647
transformers: [
4748
new TextSplitTransformer(chunkSize: 100, overlap: 20),
4849
],
4950
);
5051

51-
$indexer->index();
52+
$indexer->index($sources);
5253

5354
$vector = $vectorizer->vectorize('machine learning artificial intelligence');
5455
$results = $store->query($vector);

examples/indexer/index-rss-loader.php

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
*/
1111

1212
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
13+
use Symfony\AI\Store\Document\Loader;
1314
use Symfony\AI\Store\Document\Loader\RssFeedLoader;
1415
use Symfony\AI\Store\Document\Transformer\TextSplitTransformer;
1516
use Symfony\AI\Store\Document\Vectorizer;
@@ -23,19 +24,20 @@
2324
$store = new InMemoryStore();
2425
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small');
2526
$indexer = new Indexer(
26-
loader: new RssFeedLoader(HttpClient::create()),
27+
loader: new Loader([new RssFeedLoader(HttpClient::create())]),
2728
vectorizer: $vectorizer,
2829
store: $store,
29-
source: [
30-
'https://feeds.feedburner.com/symfony/blog',
31-
'https://www.tagesschau.de/index~rss2.xml',
32-
],
3330
transformers: [
3431
new TextSplitTransformer(chunkSize: 500, overlap: 100),
3532
],
3633
);
3734

38-
$indexer->index();
35+
$sources = RssFeedLoader::createSource([
36+
'https://feeds.feedburner.com/symfony/blog',
37+
'https://www.tagesschau.de/index~rss2.xml',
38+
]);
39+
40+
$indexer->index($sources);
3941

4042
$vector = $vectorizer->vectorize('Week of Symfony');
4143
$results = $store->query($vector);

examples/indexer/index-with-filters.php

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111

1212
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
1313
use Symfony\AI\Store\Document\Filter\TextContainsFilter;
14-
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
14+
use Symfony\AI\Store\Document\Loader;
15+
use Symfony\AI\Store\Document\Loader\DocumentCollectionLoader;
1516
use Symfony\AI\Store\Document\Metadata;
17+
use Symfony\AI\Store\Document\Source\DocumentCollection;
1618
use Symfony\AI\Store\Document\TextDocument;
1719
use Symfony\AI\Store\Document\Transformer\TextTrimTransformer;
1820
use Symfony\AI\Store\Document\Vectorizer;
@@ -27,7 +29,7 @@
2729
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small');
2830

2931
// Sample documents with some unwanted content
30-
$documents = [
32+
$documents = new DocumentCollection([
3133
new TextDocument(
3234
Uuid::v4(),
3335
'Artificial Intelligence is transforming the way we work and live. Machine learning algorithms can now process vast amounts of data and make predictions with remarkable accuracy.',
@@ -48,7 +50,7 @@
4850
'Climate change is one of the most pressing challenges of our time. Renewable energy sources like solar and wind power are becoming increasingly important for a sustainable future.',
4951
new Metadata(['title' => 'Climate Action', 'category' => 'environment'])
5052
),
51-
];
53+
]);
5254

5355
// Create filters to remove unwanted content
5456
$filters = [
@@ -57,17 +59,16 @@
5759
];
5860

5961
$indexer = new Indexer(
60-
loader: new InMemoryLoader($documents),
62+
loader: new Loader([new DocumentCollectionLoader()]),
6163
vectorizer: $vectorizer,
6264
store: $store,
63-
source: null,
6465
filters: $filters,
6566
transformers: [
6667
new TextTrimTransformer(),
6768
],
6869
);
6970

70-
$indexer->index();
71+
$indexer->index($documents);
7172

7273
$vector = $vectorizer->vectorize('technology artificial intelligence');
7374
$results = $store->query($vector);
@@ -81,7 +82,7 @@
8182
}
8283

8384
echo "=== Results Summary ===\n";
84-
echo sprintf("Original documents: %d\n", count($documents));
85-
echo sprintf("Documents after filtering: %d\n", count($results));
86-
echo sprintf("Filtered out: %d documents\n", count($documents) - count($results));
85+
echo sprintf("Original documents: %d\n", count($documents->getDocuments()));
86+
echo sprintf("Documents after filtering: %d\n", $i + 1);
87+
echo sprintf("Filtered out: %d documents\n", count($documents->getDocuments()) - ($i + 1));
8788
echo "\nThe 'Week of Symfony' newsletter and SPAM advertisement were successfully filtered out!\n";

examples/memory/mariadb.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
use Symfony\AI\Platform\Message\Message;
1919
use Symfony\AI\Platform\Message\MessageBag;
2020
use Symfony\AI\Store\Bridge\MariaDb\Store;
21-
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
21+
use Symfony\AI\Store\Document\Loader\DocumentCollectionLoader;
2222
use Symfony\AI\Store\Document\Metadata;
2323
use Symfony\AI\Store\Document\TextDocument;
2424
use Symfony\AI\Store\Document\Vectorizer;
@@ -57,7 +57,7 @@
5757
// create embeddings for documents as preparation of the chain memory
5858
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
5959
$vectorizer = new Vectorizer($platform, $embeddings = 'text-embedding-3-small');
60-
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger());
60+
$indexer = new Indexer(new DocumentCollectionLoader($documents), $vectorizer, $store, logger: logger());
6161
$indexer->index($documents);
6262

6363
// Execute a chat call that is utilizing the memory

examples/ollama/rag.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
use Symfony\AI\Platform\Bridge\Ollama\PlatformFactory;
1818
use Symfony\AI\Platform\Message\Message;
1919
use Symfony\AI\Platform\Message\MessageBag;
20-
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
20+
use Symfony\AI\Store\Document\Loader\DocumentCollectionLoader;
2121
use Symfony\AI\Store\Document\Metadata;
2222
use Symfony\AI\Store\Document\TextDocument;
2323
use Symfony\AI\Store\Document\Vectorizer;
@@ -43,7 +43,7 @@
4343
// create embeddings for documents
4444
$platform = PlatformFactory::create(env('OLLAMA_HOST_URL'), http_client());
4545
$vectorizer = new Vectorizer($platform, env('OLLAMA_EMBEDDINGS'), logger());
46-
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger());
46+
$indexer = new Indexer(new DocumentCollectionLoader($documents), $vectorizer, $store, logger: logger());
4747
$indexer->index($documents);
4848

4949
$similaritySearch = new SimilaritySearch($vectorizer, $store);

examples/rag/cache.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
use Symfony\AI\Platform\Message\Message;
1919
use Symfony\AI\Platform\Message\MessageBag;
2020
use Symfony\AI\Store\Bridge\Cache\Store as CacheStore;
21-
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
21+
use Symfony\AI\Store\Document\Loader\DocumentCollectionLoader;
2222
use Symfony\AI\Store\Document\Metadata;
2323
use Symfony\AI\Store\Document\TextDocument;
2424
use Symfony\AI\Store\Document\Vectorizer;
@@ -44,7 +44,7 @@
4444
// create embeddings for documents
4545
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
4646
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger());
47-
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger());
47+
$indexer = new Indexer(new DocumentCollectionLoader($documents), $vectorizer, $store, logger: logger());
4848
$indexer->index($documents);
4949

5050
$similaritySearch = new SimilaritySearch($vectorizer, $store);

examples/rag/chromadb.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
use Symfony\AI\Platform\Message\Message;
2020
use Symfony\AI\Platform\Message\MessageBag;
2121
use Symfony\AI\Store\Bridge\ChromaDb\Store;
22-
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
22+
use Symfony\AI\Store\Document\Loader\DocumentCollectionLoader;
2323
use Symfony\AI\Store\Document\Metadata;
2424
use Symfony\AI\Store\Document\TextDocument;
2525
use Symfony\AI\Store\Document\Vectorizer;
@@ -51,7 +51,7 @@
5151
// create embeddings for documents
5252
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
5353
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger());
54-
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger());
54+
$indexer = new Indexer(new DocumentCollectionLoader($documents), $vectorizer, $store, logger: logger());
5555
$indexer->index($documents);
5656

5757
$similaritySearch = new SimilaritySearch($vectorizer, $store);

examples/rag/clickhouse.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
use Symfony\AI\Platform\Message\Message;
1919
use Symfony\AI\Platform\Message\MessageBag;
2020
use Symfony\AI\Store\Bridge\ClickHouse\Store;
21-
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
21+
use Symfony\AI\Store\Document\Loader\DocumentCollectionLoader;
2222
use Symfony\AI\Store\Document\Metadata;
2323
use Symfony\AI\Store\Document\TextDocument;
2424
use Symfony\AI\Store\Document\Vectorizer;
@@ -51,7 +51,7 @@
5151
// create embeddings for documents
5252
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
5353
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger());
54-
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger());
54+
$indexer = new Indexer(new DocumentCollectionLoader($documents), $vectorizer, $store, logger: logger());
5555
$indexer->index($documents);
5656

5757
$similaritySearch = new SimilaritySearch($vectorizer, $store);

examples/rag/cloudflare.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
use Symfony\AI\Platform\Message\Message;
1919
use Symfony\AI\Platform\Message\MessageBag;
2020
use Symfony\AI\Store\Bridge\Cloudflare\Store;
21-
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
21+
use Symfony\AI\Store\Document\Loader\DocumentCollectionLoader;
2222
use Symfony\AI\Store\Document\Metadata;
2323
use Symfony\AI\Store\Document\TextDocument;
2424
use Symfony\AI\Store\Document\Vectorizer;
@@ -51,7 +51,7 @@
5151
// create embeddings for documents (keep in mind that upserting vectors is asynchronous)
5252
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
5353
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger());
54-
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger());
54+
$indexer = new Indexer(new DocumentCollectionLoader($documents), $vectorizer, $store, logger: logger());
5555
$indexer->index($documents);
5656

5757
$similaritySearch = new SimilaritySearch($vectorizer, $store);

0 commit comments

Comments
 (0)