forked from doctrine/mongodb-odm
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathVectorSearchTest.php
More file actions
126 lines (102 loc) · 4.87 KB
/
VectorSearchTest.php
File metadata and controls
126 lines (102 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
<?php
declare(strict_types=1);
namespace Doctrine\ODM\MongoDB\Tests\Functional;
use Doctrine\ODM\MongoDB\Tests\BaseTestCase;
use Doctrine\ODM\MongoDB\Types\Type;
use Documents\VectorEmbedding;
use MongoDB\BSON\Binary;
use MongoDB\Driver\WriteConcern;
use PHPUnit\Framework\Attributes\Group;
use PHPUnit\Framework\Attributes\RequiresPhpExtension;
use ReflectionProperty;
#[Group('atlas')]
class VectorSearchTest extends BaseTestCase
{
public function testAtlasVectorSearch(): void
{
// Create the collection by ensuring the schema
$schemaManager = $this->dm->getSchemaManager();
// Create the collection and vector search indexes
$schemaManager->createDocumentCollection(VectorEmbedding::class);
// Insert some test documents with vector embeddings
$doc1 = new VectorEmbedding();
$doc1->vectorFloat = [1.0, 2.0, 3.0];
$doc1->vectorInt = [1, 2, 3];
$doc1->filterField = 'active';
$doc2 = new VectorEmbedding();
$doc2->vectorFloat = [4.0, 5.0, 6.0];
$doc2->vectorInt = [4, 5, 6];
$doc2->filterField = 'inactive';
$doc3 = new VectorEmbedding();
$doc3->vectorFloat = [1.5, 2.5, 3.5];
$doc3->vectorInt = [2, 3, 4];
$doc3->filterField = 'active';
$this->dm->persist($doc1);
$this->dm->persist($doc2);
$this->dm->persist($doc3);
// Write with majority concern to ensure data is visible for search
$this->dm->flush(['writeConcern' => new WriteConcern(WriteConcern::MAJORITY)]);
// Index must be created after data insertion, so the index status is not immediately "READY"
$schemaManager->createDocumentSearchIndexes(VectorEmbedding::class);
// Wait for the search index to be ready (Atlas Local needs time to build the index)
$schemaManager->waitForSearchIndexes([VectorEmbedding::class]);
$results = $this->dm->createQueryBuilder(VectorEmbedding::class)->getQuery()->toArray();
$this->assertCount(3, $results, 'All documents should be present in the collection');
foreach ($results as $result) {
$this->assertInstanceOf(VectorEmbedding::class, $result);
$this->assertIsArray($result->vectorFloat);
$this->assertCount(3, $result->vectorFloat);
$this->assertIsArray($result->vectorInt);
$this->assertCount(3, $result->vectorInt);
}
$results = $this->dm->createAggregationBuilder(VectorEmbedding::class)
->vectorSearch()
->index('default')
->queryVector([1.1, 2.1, 3.1])
->path('vectorFloat')
->numCandidates(10)
->limit(5)
->set()
->field('score')
->expression(['$meta' => 'vectorSearchScore'])
->getAggregation()->execute()->toArray();
$this->assertCount(3, $results);
foreach ($results as $result) {
$this->assertIsArray($result);
$this->assertIsFloat($result['score'], 'Result should have a score');
}
// Test with filter
$results = ($builder = $this->dm->createAggregationBuilder(VectorEmbedding::class))
->hydrate(VectorEmbedding::class)
->vectorSearch()
->index('vector_int')
->queryVector([1, 1, 3])
->path('vectorInt')
->numCandidates(10)
->limit(5)
->filter($builder->matchExpr()->field('filterField')->equals('active'))
->getAggregation()->execute()->toArray();
$this->assertCount(2, $results);
foreach ($results as $result) {
$this->assertInstanceOf(VectorEmbedding::class, $result);
$this->assertEquals('active', $result->filterField, 'Filtered results should only contain active documents');
}
}
#[RequiresPhpExtension('mongodb', '>= 2.2')]
public function testAtlasVectorSearchWithBinaryType(): void
{
$cm = $this->dm->getClassMetadata(VectorEmbedding::class);
$fieldMappings = $cm->fieldMappings;
$fieldMappings['vectorFloat']['type'] = Type::VECTOR_FLOAT32;
$fieldMappings['vectorInt']['type'] = Type::VECTOR_INT8;
new ReflectionProperty($cm, 'fieldMappings')->setValue($cm, $fieldMappings);
// Change the collection name to avoid conflicts with asynchronous index building
$cm->collection .= '_binary_type';
$this->testAtlasVectorSearch();
// Ensure that the vectors are stored in as binary vectors
$doc = $this->dm->getDocumentCollection(VectorEmbedding::class)->findOne(['filterField' => 'active']);
$this->assertIsArray($doc);
$this->assertInstanceOf(Binary::class, $doc['vectorInt']);
$this->assertInstanceOf(Binary::class, $doc['db_vector_float']);
}
}