Skip to content

Commit ab49e85

Browse files
committed
Add comprehensive unit tests for DistanceCalculator
- Test all distance strategies (cosine, angular, euclidean, manhattan, chebyshev) - Test with various vector dimensions and configurations - Test maxItems limiting functionality - Test edge cases (empty arrays, single documents, negative values) - Achieve full test coverage for the DistanceCalculator class
1 parent 148413f commit ab49e85

File tree

1 file changed

+302
-0
lines changed

1 file changed

+302
-0
lines changed
Lines changed: 302 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Tests\Bridge\Local;
13+
14+
use PHPUnit\Framework\Attributes\CoversClass;
15+
use PHPUnit\Framework\Attributes\DataProvider;
16+
use PHPUnit\Framework\Attributes\TestDox;
17+
use PHPUnit\Framework\Attributes\UsesClass;
18+
use PHPUnit\Framework\TestCase;
19+
use Symfony\AI\Platform\Vector\Vector;
20+
use Symfony\AI\Store\Bridge\Local\DistanceCalculator;
21+
use Symfony\AI\Store\Bridge\Local\DistanceStrategy;
22+
use Symfony\AI\Store\Document\Metadata;
23+
use Symfony\AI\Store\Document\VectorDocument;
24+
use Symfony\Component\Uid\Uuid;
25+
26+
#[CoversClass(DistanceCalculator::class)]
27+
#[UsesClass(VectorDocument::class)]
28+
#[UsesClass(Vector::class)]
29+
#[UsesClass(Metadata::class)]
30+
#[UsesClass(DistanceStrategy::class)]
31+
final class DistanceCalculatorTest extends TestCase
32+
{
33+
/**
34+
* @param array<list<float>> $documentVectors
35+
* @param list<float> $queryVector
36+
* @param list<int> $expectedOrder
37+
*/
38+
#[TestDox('Calculates distances correctly using $strategy strategy')]
39+
#[DataProvider('provideDistanceStrategyTestCases')]
40+
public function testCalculateWithDifferentStrategies(
41+
DistanceStrategy $strategy,
42+
array $documentVectors,
43+
array $queryVector,
44+
array $expectedOrder,
45+
) {
46+
$calculator = new DistanceCalculator($strategy);
47+
48+
$documents = [];
49+
foreach ($documentVectors as $index => $vector) {
50+
$documents[] = new VectorDocument(
51+
Uuid::v4(),
52+
new Vector($vector),
53+
new Metadata(['index' => $index])
54+
);
55+
}
56+
57+
$result = $calculator->calculate($documents, new Vector($queryVector));
58+
59+
// Check that results are ordered correctly
60+
$this->assertCount(\count($expectedOrder), $result);
61+
62+
foreach ($expectedOrder as $position => $expectedIndex) {
63+
$metadata = $result[$position]->metadata;
64+
$this->assertSame($expectedIndex, $metadata['index']);
65+
}
66+
}
67+
68+
/**
69+
* @return \Generator<string, array{DistanceStrategy, array<list<float>>, list<float>, list<int>}>
70+
*/
71+
public static function provideDistanceStrategyTestCases(): \Generator
72+
{
73+
// Test vectors for different scenarios
74+
$vectors = [
75+
[1.0, 0.0, 0.0], // Index 0: unit vector along x-axis
76+
[0.0, 1.0, 0.0], // Index 1: unit vector along y-axis
77+
[0.0, 0.0, 1.0], // Index 2: unit vector along z-axis
78+
[0.5, 0.5, 0.707], // Index 3: mixed vector
79+
];
80+
81+
$queryVector = [1.0, 0.0, 0.0]; // Query similar to first vector
82+
83+
yield 'cosine distance' => [
84+
DistanceStrategy::COSINE_DISTANCE,
85+
$vectors,
86+
$queryVector,
87+
[0, 3, 1, 2], // Expected order: 0 is most similar (same direction)
88+
];
89+
90+
yield 'euclidean distance' => [
91+
DistanceStrategy::EUCLIDEAN_DISTANCE,
92+
$vectors,
93+
$queryVector,
94+
[0, 3, 1, 2], // Expected order: 0 is closest
95+
];
96+
97+
yield 'manhattan distance' => [
98+
DistanceStrategy::MANHATTAN_DISTANCE,
99+
$vectors,
100+
$queryVector,
101+
[0, 3, 1, 2], // Expected order based on L1 distance
102+
];
103+
}
104+
105+
#[TestDox('Limits results to specified maximum items')]
106+
public function testCalculateWithMaxItems()
107+
{
108+
$calculator = new DistanceCalculator(DistanceStrategy::EUCLIDEAN_DISTANCE);
109+
110+
$documents = [
111+
new VectorDocument(Uuid::v4(), new Vector([0.0, 0.0]), new Metadata(['id' => 'a'])),
112+
new VectorDocument(Uuid::v4(), new Vector([1.0, 0.0]), new Metadata(['id' => 'b'])),
113+
new VectorDocument(Uuid::v4(), new Vector([0.0, 1.0]), new Metadata(['id' => 'c'])),
114+
new VectorDocument(Uuid::v4(), new Vector([1.0, 1.0]), new Metadata(['id' => 'd'])),
115+
new VectorDocument(Uuid::v4(), new Vector([0.5, 0.5]), new Metadata(['id' => 'e'])),
116+
];
117+
118+
$queryVector = new Vector([0.0, 0.0]);
119+
120+
// Request only top 3 results
121+
$result = $calculator->calculate($documents, $queryVector, 3);
122+
123+
$this->assertCount(3, $result);
124+
125+
// Verify the closest 3 documents are returned
126+
// Distances from [0.0, 0.0]:
127+
// a: [0.0, 0.0] -> 0.0
128+
// b: [1.0, 0.0] -> 1.0
129+
// c: [0.0, 1.0] -> 1.0
130+
// d: [1.0, 1.0] -> sqrt(2) ≈ 1.414
131+
// e: [0.5, 0.5] -> sqrt(0.5) ≈ 0.707
132+
133+
$ids = array_map(fn ($doc) => $doc->metadata['id'], $result);
134+
$this->assertSame(['a', 'e', 'b'], $ids); // a is closest, then e, then b/c (same distance)
135+
}
136+
137+
#[TestDox('Calculates cosine distance correctly for parallel vectors')]
138+
public function testCosineDistanceCalculation()
139+
{
140+
$calculator = new DistanceCalculator(DistanceStrategy::COSINE_DISTANCE);
141+
142+
// Test with parallel vectors (should have cosine distance = 0)
143+
$doc1 = new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0]));
144+
$doc2 = new VectorDocument(Uuid::v4(), new Vector([2.0, 4.0, 6.0])); // Parallel to doc1
145+
146+
$queryVector = new Vector([1.0, 2.0, 3.0]);
147+
148+
$result = $calculator->calculate([$doc1, $doc2], $queryVector);
149+
150+
// Both vectors are parallel to query, so should have same cosine distance (0)
151+
$this->assertCount(2, $result);
152+
}
153+
154+
#[TestDox('Calculates angular distance correctly for orthogonal vectors')]
155+
public function testAngularDistanceCalculation()
156+
{
157+
$calculator = new DistanceCalculator(DistanceStrategy::ANGULAR_DISTANCE);
158+
159+
// Orthogonal vectors should have angular distance of 0.5 (90 degrees / 180 degrees)
160+
$orthogonalDoc = new VectorDocument(Uuid::v4(), new Vector([0.0, 1.0]));
161+
$parallelDoc = new VectorDocument(Uuid::v4(), new Vector([2.0, 0.0]));
162+
163+
$queryVector = new Vector([1.0, 0.0]);
164+
165+
$result = $calculator->calculate([$orthogonalDoc, $parallelDoc], $queryVector);
166+
167+
// Parallel vector should be first (smaller angular distance)
168+
$this->assertSame($parallelDoc, $result[0]);
169+
$this->assertSame($orthogonalDoc, $result[1]);
170+
}
171+
172+
#[TestDox('Calculates Chebyshev distance using maximum absolute difference')]
173+
public function testChebyshevDistanceCalculation()
174+
{
175+
$calculator = new DistanceCalculator(DistanceStrategy::CHEBYSHEV_DISTANCE);
176+
177+
$doc1 = new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0]));
178+
$doc2 = new VectorDocument(Uuid::v4(), new Vector([1.5, 2.5, 3.5]));
179+
$doc3 = new VectorDocument(Uuid::v4(), new Vector([4.0, 2.0, 3.0]));
180+
181+
$queryVector = new Vector([1.0, 2.0, 3.0]);
182+
183+
$result = $calculator->calculate([$doc1, $doc2, $doc3], $queryVector);
184+
185+
// doc1 should be first (distance 0), doc2 second (max diff 0.5), doc3 last (max diff 3.0)
186+
$this->assertSame($doc1, $result[0]);
187+
$this->assertSame($doc2, $result[1]);
188+
$this->assertSame($doc3, $result[2]);
189+
}
190+
191+
#[TestDox('Returns empty array when no documents are provided')]
192+
public function testEmptyDocumentsArray()
193+
{
194+
$calculator = new DistanceCalculator();
195+
196+
$result = $calculator->calculate([], new Vector([1.0, 2.0, 3.0]));
197+
198+
$this->assertSame([], $result);
199+
}
200+
201+
#[TestDox('Returns single document when only one is provided')]
202+
public function testSingleDocument()
203+
{
204+
$calculator = new DistanceCalculator();
205+
206+
$doc = new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0]));
207+
208+
$result = $calculator->calculate([$doc], new Vector([0.0, 0.0, 0.0]));
209+
210+
$this->assertCount(1, $result);
211+
$this->assertSame($doc, $result[0]);
212+
}
213+
214+
#[TestDox('Handles high-dimensional vectors correctly')]
215+
public function testHighDimensionalVectors()
216+
{
217+
$calculator = new DistanceCalculator(DistanceStrategy::EUCLIDEAN_DISTANCE);
218+
219+
// Create high-dimensional vectors (100 dimensions)
220+
$dimensions = 100;
221+
$vector1 = array_fill(0, $dimensions, 0.1);
222+
$vector2 = array_fill(0, $dimensions, 0.2);
223+
224+
$doc1 = new VectorDocument(Uuid::v4(), new Vector($vector1));
225+
$doc2 = new VectorDocument(Uuid::v4(), new Vector($vector2));
226+
227+
$queryVector = new Vector(array_fill(0, $dimensions, 0.15));
228+
229+
$result = $calculator->calculate([$doc1, $doc2], $queryVector);
230+
231+
// doc1 should be closer to query vector (0.15 is closer to 0.1 than to 0.2)
232+
$this->assertSame($doc1, $result[0]);
233+
$this->assertSame($doc2, $result[1]);
234+
}
235+
236+
#[TestDox('Handles negative vector components correctly')]
237+
public function testNegativeVectorComponents()
238+
{
239+
$calculator = new DistanceCalculator(DistanceStrategy::EUCLIDEAN_DISTANCE);
240+
241+
$doc1 = new VectorDocument(Uuid::v4(), new Vector([-1.0, -2.0, -3.0]));
242+
$doc2 = new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0]));
243+
$doc3 = new VectorDocument(Uuid::v4(), new Vector([0.0, 0.0, 0.0]));
244+
245+
$queryVector = new Vector([-1.0, -2.0, -3.0]);
246+
247+
$result = $calculator->calculate([$doc1, $doc2, $doc3], $queryVector);
248+
249+
// doc1 should be first (identical to query)
250+
$this->assertSame($doc1, $result[0]);
251+
}
252+
253+
#[TestDox('Returns all documents when maxItems exceeds document count')]
254+
public function testMaxItemsGreaterThanDocumentCount()
255+
{
256+
$calculator = new DistanceCalculator();
257+
258+
$doc1 = new VectorDocument(Uuid::v4(), new Vector([1.0, 0.0]));
259+
$doc2 = new VectorDocument(Uuid::v4(), new Vector([0.0, 1.0]));
260+
261+
$result = $calculator->calculate([$doc1, $doc2], new Vector([1.0, 0.0]), 10);
262+
263+
// Should return all documents even though maxItems is 10
264+
$this->assertCount(2, $result);
265+
}
266+
267+
#[TestDox('Calculates Manhattan distance correctly with mixed positive and negative values')]
268+
public function testManhattanDistanceWithMixedSigns()
269+
{
270+
$calculator = new DistanceCalculator(DistanceStrategy::MANHATTAN_DISTANCE);
271+
272+
$doc1 = new VectorDocument(Uuid::v4(), new Vector([1.0, -1.0, 2.0]));
273+
$doc2 = new VectorDocument(Uuid::v4(), new Vector([-1.0, 1.0, -2.0]));
274+
$doc3 = new VectorDocument(Uuid::v4(), new Vector([0.5, -0.5, 1.0]));
275+
276+
$queryVector = new Vector([0.0, 0.0, 0.0]);
277+
278+
$result = $calculator->calculate([$doc1, $doc2, $doc3], $queryVector);
279+
280+
// doc3 has smallest Manhattan distance (2.0), then doc1 and doc2 (both 4.0)
281+
$this->assertSame($doc3, $result[0]);
282+
}
283+
284+
#[TestDox('Uses cosine distance as default strategy')]
285+
public function testDefaultStrategyIsCosineDistance()
286+
{
287+
// Test that default constructor uses cosine distance
288+
$calculator = new DistanceCalculator();
289+
290+
// Create vectors where cosine distance ordering differs from Euclidean
291+
$doc1 = new VectorDocument(Uuid::v4(), new Vector([1.0, 0.0, 0.0]));
292+
$doc2 = new VectorDocument(Uuid::v4(), new Vector([100.0, 0.0, 0.0])); // Same direction but different magnitude
293+
294+
$queryVector = new Vector([1.0, 0.0, 0.0]);
295+
296+
$result = $calculator->calculate([$doc1, $doc2], $queryVector);
297+
298+
// With cosine distance, both should have same distance (parallel vectors)
299+
// The order might vary but both are equally similar in terms of direction
300+
$this->assertCount(2, $result);
301+
}
302+
}

0 commit comments

Comments
 (0)