Skip to content

Commit 3387c64

Browse files
committed
[AI Bundle][Store] Add FilterInterface and filters support to Indexer
1 parent e34c11f commit 3387c64

File tree

11 files changed

+839
-14
lines changed

11 files changed

+839
-14
lines changed

.github/workflows/integration-tests.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,16 @@ jobs:
9696
- name: Install demo dependencies
9797
uses: ramsey/composer-install@v3
9898
with:
99+
composer-options: "--no-scripts"
99100
working-directory: demo
100101

101102
- name: Link demo
102103
working-directory: demo
103104
run: ../link
104105

106+
- run: composer run-script auto-scripts --no-interaction
107+
working-directory: demo
108+
105109
- name: Run demo tests
106110
working-directory: demo
107111
run: vendor/bin/phpunit

demo/config/packages/ai.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ ai:
6262
blog:
6363
loader: 'Symfony\AI\Store\Document\Loader\RssFeedLoader'
6464
source: 'https://feeds.feedburner.com/symfony/blog'
65+
filters:
66+
- 'app.filter.week_of_symfony'
6567
transformers:
6668
- 'Symfony\AI\Store\Document\Transformer\TextTrimTransformer'
6769
vectorizer: 'ai.vectorizer.openai'
@@ -83,3 +85,9 @@ services:
8385

8486
Symfony\AI\Store\Document\Loader\RssFeedLoader: ~
8587
Symfony\AI\Store\Document\Transformer\TextTrimTransformer: ~
88+
89+
app.filter.week_of_symfony:
90+
class: 'Symfony\AI\Store\Document\Filter\TextContainsFilter'
91+
arguments:
92+
$needle: 'Week of Symfony'
93+
$caseSensitive: false
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
13+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
14+
use Symfony\AI\Store\Bridge\Local\InMemoryStore;
15+
use Symfony\AI\Store\Document\Filter\TextContainsFilter;
16+
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
17+
use Symfony\AI\Store\Document\Metadata;
18+
use Symfony\AI\Store\Document\TextDocument;
19+
use Symfony\AI\Store\Document\Transformer\TextTrimTransformer;
20+
use Symfony\AI\Store\Document\Vectorizer;
21+
use Symfony\AI\Store\Indexer;
22+
use Symfony\Component\Uid\Uuid;
23+
24+
require_once dirname(__DIR__).'/bootstrap.php';
25+
26+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
27+
$store = new InMemoryStore();
28+
$vectorizer = new Vectorizer($platform, new Embeddings('text-embedding-3-small'));
29+
30+
// Sample documents with some unwanted content
31+
$documents = [
32+
new TextDocument(
33+
Uuid::v4(),
34+
'Artificial Intelligence is transforming the way we work and live. Machine learning algorithms can now process vast amounts of data and make predictions with remarkable accuracy.',
35+
new Metadata(['title' => 'AI Revolution', 'category' => 'technology'])
36+
),
37+
new TextDocument(
38+
Uuid::v4(),
39+
'Week of Symfony - This week we released several new features including improved performance and better documentation.',
40+
new Metadata(['title' => 'Weekly Newsletter', 'category' => 'newsletter'])
41+
),
42+
new TextDocument(
43+
Uuid::v4(),
44+
'SPAM: Buy cheap products now! Limited time offer on all electronics. Click here to save 90% on your purchase!',
45+
new Metadata(['title' => 'Advertisement', 'category' => 'spam'])
46+
),
47+
new TextDocument(
48+
Uuid::v4(),
49+
'Climate change is one of the most pressing challenges of our time. Renewable energy sources like solar and wind power are becoming increasingly important for a sustainable future.',
50+
new Metadata(['title' => 'Climate Action', 'category' => 'environment'])
51+
),
52+
];
53+
54+
// Create filters to remove unwanted content
55+
$filters = [
56+
new TextContainsFilter('Week of Symfony', caseSensitive: false),
57+
new TextContainsFilter('SPAM:', caseSensitive: true),
58+
];
59+
60+
$indexer = new Indexer(
61+
loader: new InMemoryLoader($documents),
62+
vectorizer: $vectorizer,
63+
store: $store,
64+
source: null,
65+
filters: $filters,
66+
transformers: [
67+
new TextTrimTransformer(),
68+
],
69+
);
70+
71+
$indexer->index();
72+
73+
$vector = $vectorizer->vectorize('technology artificial intelligence');
74+
$results = $store->query($vector);
75+
76+
foreach ($results as $i => $document) {
77+
$title = $document->metadata['title'] ?? 'Unknown';
78+
$category = $document->metadata['category'] ?? 'Unknown';
79+
echo sprintf("%d. %s [%s]\n", $i + 1, $title, $category);
80+
echo sprintf(" Content: %s\n", substr($document->metadata->getText() ?? 'No content', 0, 80).'...');
81+
echo sprintf(" ID: %s\n\n", substr($document->id, 0, 8).'...');
82+
}
83+
84+
echo "=== Results Summary ===\n";
85+
echo sprintf("Original documents: %d\n", count($documents));
86+
echo sprintf("Documents after filtering: %d\n", count($results));
87+
echo sprintf("Filtered out: %d documents\n", count($documents) - count($results));
88+
echo "\nThe 'Week of Symfony' newsletter and SPAM advertisement were successfully filtered out!\n";

src/ai-bundle/config/options.php

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,12 @@
557557
->scalarPrototype()->end()
558558
->defaultValue([])
559559
->end()
560-
->stringNode('vectorizer')
560+
->arrayNode('filters')
561+
->info('Array of filter service names')
562+
->scalarPrototype()->end()
563+
->defaultValue([])
564+
->end()
565+
->scalarNode('vectorizer')
561566
->info('Service name of vectorizer')
562567
->defaultValue(VectorizerInterface::class)
563568
->end()

src/ai-bundle/src/AiBundle.php

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1130,15 +1130,21 @@ private function processVectorizerConfig(string $name, array $config, ContainerB
11301130
private function processIndexerConfig(int|string $name, array $config, ContainerBuilder $container): void
11311131
{
11321132
$transformers = [];
1133-
foreach ($config['transformers'] ?? [] as $transformer) {
1133+
foreach ($config['transformers'] as $transformer) {
11341134
$transformers[] = new Reference($transformer);
11351135
}
11361136

1137+
$filters = [];
1138+
foreach ($config['filters'] as $filter) {
1139+
$filters[] = new Reference($filter);
1140+
}
1141+
11371142
$definition = new Definition(Indexer::class, [
11381143
new Reference($config['loader']),
11391144
new Reference($config['vectorizer']),
11401145
new Reference($config['store']),
11411146
$config['source'],
1147+
$filters,
11421148
$transformers,
11431149
new Reference('logger', ContainerInterface::IGNORE_ON_INVALID_REFERENCE),
11441150
]);

0 commit comments

Comments
 (0)