|
| 1 | +<?php |
| 2 | + |
| 3 | +/* |
| 4 | + * This file is part of the Symfony package. |
| 5 | + * |
| 6 | + * (c) Fabien Potencier <[email protected]> |
| 7 | + * |
| 8 | + * For the full copyright and license information, please view the LICENSE |
| 9 | + * file that was distributed with this source code. |
| 10 | + */ |
| 11 | + |
| 12 | +use Symfony\AI\Platform\Bridge\OpenAi\Embeddings; |
| 13 | +use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory; |
| 14 | +use Symfony\AI\Store\Bridge\Local\InMemoryStore; |
| 15 | +use Symfony\AI\Store\Document\Filter\TextContainsFilter; |
| 16 | +use Symfony\AI\Store\Document\Loader\InMemoryLoader; |
| 17 | +use Symfony\AI\Store\Document\Metadata; |
| 18 | +use Symfony\AI\Store\Document\TextDocument; |
| 19 | +use Symfony\AI\Store\Document\Transformer\TextTrimTransformer; |
| 20 | +use Symfony\AI\Store\Document\Vectorizer; |
| 21 | +use Symfony\AI\Store\Indexer; |
| 22 | +use Symfony\Component\Uid\Uuid; |
| 23 | + |
| 24 | +require_once dirname(__DIR__).'/bootstrap.php'; |
| 25 | + |
| 26 | +$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); |
| 27 | +$store = new InMemoryStore(); |
| 28 | +$vectorizer = new Vectorizer($platform, new Embeddings('text-embedding-3-small')); |
| 29 | + |
| 30 | +// Sample documents with some unwanted content |
| 31 | +$documents = [ |
| 32 | + new TextDocument( |
| 33 | + Uuid::v4(), |
| 34 | + 'Artificial Intelligence is transforming the way we work and live. Machine learning algorithms can now process vast amounts of data and make predictions with remarkable accuracy.', |
| 35 | + new Metadata(['title' => 'AI Revolution', 'category' => 'technology']) |
| 36 | + ), |
| 37 | + new TextDocument( |
| 38 | + Uuid::v4(), |
| 39 | + 'Week of Symfony - This week we released several new features including improved performance and better documentation.', |
| 40 | + new Metadata(['title' => 'Weekly Newsletter', 'category' => 'newsletter']) |
| 41 | + ), |
| 42 | + new TextDocument( |
| 43 | + Uuid::v4(), |
| 44 | + 'SPAM: Buy cheap products now! Limited time offer on all electronics. Click here to save 90% on your purchase!', |
| 45 | + new Metadata(['title' => 'Advertisement', 'category' => 'spam']) |
| 46 | + ), |
| 47 | + new TextDocument( |
| 48 | + Uuid::v4(), |
| 49 | + 'Climate change is one of the most pressing challenges of our time. Renewable energy sources like solar and wind power are becoming increasingly important for a sustainable future.', |
| 50 | + new Metadata(['title' => 'Climate Action', 'category' => 'environment']) |
| 51 | + ), |
| 52 | +]; |
| 53 | + |
| 54 | +// Create filters to remove unwanted content |
| 55 | +$filters = [ |
| 56 | + new TextContainsFilter('Week of Symfony', caseSensitive: false), |
| 57 | + new TextContainsFilter('SPAM:', caseSensitive: true), |
| 58 | +]; |
| 59 | + |
| 60 | +$indexer = new Indexer( |
| 61 | + loader: new InMemoryLoader($documents), |
| 62 | + vectorizer: $vectorizer, |
| 63 | + store: $store, |
| 64 | + source: null, |
| 65 | + filters: $filters, |
| 66 | + transformers: [ |
| 67 | + new TextTrimTransformer(), |
| 68 | + ], |
| 69 | +); |
| 70 | + |
| 71 | +$indexer->index(); |
| 72 | + |
| 73 | +$vector = $vectorizer->vectorize('technology artificial intelligence'); |
| 74 | +$results = $store->query($vector); |
| 75 | + |
| 76 | +foreach ($results as $i => $document) { |
| 77 | + $title = $document->metadata['title'] ?? 'Unknown'; |
| 78 | + $category = $document->metadata['category'] ?? 'Unknown'; |
| 79 | + echo sprintf("%d. %s [%s]\n", $i + 1, $title, $category); |
| 80 | + echo sprintf(" Content: %s\n", substr($document->metadata->getText() ?? 'No content', 0, 80).'...'); |
| 81 | + echo sprintf(" ID: %s\n\n", substr($document->id, 0, 8).'...'); |
| 82 | +} |
| 83 | + |
| 84 | +echo "=== Results Summary ===\n"; |
| 85 | +echo sprintf("Original documents: %d\n", count($documents)); |
| 86 | +echo sprintf("Documents after filtering: %d\n", count($results)); |
| 87 | +echo sprintf("Filtered out: %d documents\n", count($documents) - count($results)); |
| 88 | +echo "\nThe 'Week of Symfony' newsletter and SPAM advertisement were successfully filtered out!\n"; |
0 commit comments