Skip to content

Commit f5e69ac

Browse files
authored
fix FP16 tests precision overflow [MOD-13429] (#884)
* Update ARM EC2 instance type and scale FP16 test values to prevent overflow * Remove outdated SQ8 design document to streamline documentation * Add test_override method template to FP16Test class * Fix FP16 overflow by scaling vector indices in GenerateAndAddVector * Update scale factor and tolerance in FP16 tests to prevent overflow
1 parent bdcbf80 commit f5e69ac

File tree

3 files changed

+31
-18
lines changed

3 files changed

+31
-18
lines changed

.github/workflows/arm.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
2525
# Ubuntu 22.04 region AMI for ARM
2626
ec2-image-id: ami-062b37d89f25c958f
27-
ec2-instance-type: t4g.medium
27+
ec2-instance-type: r8g.xlarge
2828
subnet-id: ${{ secrets.AWS_EC2_SUBNET_ID }}
2929
security-group-id: ${{ secrets.AWS_EC2_SG_ID }}
3030
ec2-volume-size: 30

tests/unit/test_fp16.cpp

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -502,36 +502,42 @@ template <typename params_t>
502502
void FP16Test::test_override(params_t params) {
503503
size_t n = 100;
504504
size_t new_n = 250;
505+
// Scale factor to avoid FP16 overflow. FP16 max value is 65504, and L2² = dim × diff².
506+
// With scale=0.1 and max diff=250: L2² = 4 × (250×0.1)² = 10000 < 65504.
507+
constexpr float scale = 0.1f;
505508
SetUp(params);
506509

507510
// Insert n vectors.
508511
for (size_t i = 0; i < n; i++) {
509-
ASSERT_EQ(GenerateAndAddVector(i, i), 1);
512+
ASSERT_EQ(GenerateAndAddVector(i, i * scale), 1);
510513
}
511514
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
512515

513516
// Override n vectors, the first 100 will be overwritten (deleted first).
514517
for (size_t i = 0; i < n; i++) {
515-
ASSERT_EQ(GenerateAndAddVector(i, i), 0);
518+
ASSERT_EQ(GenerateAndAddVector(i, i * scale), 0);
516519
}
517520

518521
// Add up to new_n vectors.
519522
for (size_t i = n; i < new_n; i++) {
520-
ASSERT_EQ(GenerateAndAddVector(i, i), 1);
523+
ASSERT_EQ(GenerateAndAddVector(i, i * scale), 1);
521524
}
522525

523526
float16 query[dim];
524-
GenerateVector(query, new_n);
527+
GenerateVector(query, new_n * scale);
525528

526-
// Vectors values equals their id, so we expect the larger the id the closest it will be to the
527-
// query.
529+
// Vectors values equals their id (scaled), so we expect the larger the id the closest it will
530+
// be to the query.
528531
auto verify_res = [&](size_t id, double score, size_t index) {
529532
ASSERT_EQ(id, new_n - 1 - index) << "id: " << id << " score: " << score;
530-
float16 a = vecsim_types::FP32_to_FP16(new_n);
531-
float16 b = vecsim_types::FP32_to_FP16(id);
533+
float16 a = vecsim_types::FP32_to_FP16(new_n * scale);
534+
float16 b = vecsim_types::FP32_to_FP16(id * scale);
532535
float diff = vecsim_types::FP16_to_FP32(a) - vecsim_types::FP16_to_FP32(b);
533536
float exp_score = 4 * diff * diff;
534-
ASSERT_EQ(score, exp_score) << "id: " << id << " score: " << score;
537+
// Use tolerance-based comparison due to FP16 precision loss in SVE accumulation.
538+
// FP16 has ~3 decimal digits of precision, so we allow ~0.2% relative tolerance.
539+
float tolerance = std::max(1.0f, std::abs(exp_score) * 0.002f);
540+
ASSERT_NEAR(score, exp_score, tolerance) << "id: " << id << " score: " << score;
535541
};
536542
runTopKSearchTest(index, query, 300, verify_res);
537543
}
@@ -666,18 +672,21 @@ TEST_F(FP16TieredTest, GetDistanceIPTest) {
666672
template <typename params_t>
667673
void FP16Test::test_batch_iterator_basic(params_t params) {
668674
size_t n = params.initialCapacity;
675+
// Scale factor to avoid FP16 overflow. FP16 max value is 65504, and L2² = dim × diff².
676+
// With scale=0.1 and max diff=250: L2² = 4 × (250×0.1)² = 10000 < 65504.
677+
constexpr float scale = 0.1f;
669678
SetUp(params);
670679

671-
// For every i, add the vector (i,i,i,i) under the label i.
680+
// For every i, add the vector (i*scale, i*scale, i*scale, i*scale) under the label i.
672681
for (size_t i = 0; i < n; i++) {
673-
ASSERT_EQ(GenerateAndAddVector(i, i), 1);
682+
ASSERT_EQ(GenerateAndAddVector(i, i * scale), 1);
674683
}
675684

676685
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
677686

678-
// Query for (n,n,n,n) vector (recall that n-1 is the largest id in te index).
687+
// Query for (n*scale, n*scale, n*scale, n*scale) vector (recall that n-1 is the largest id).
679688
float16 query[dim];
680-
GenerateVector(query, n);
689+
GenerateVector(query, n * scale);
681690

682691
VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(index, query, nullptr);
683692
size_t iteration_num = 0;

tests/unit/test_svs_fp16.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2556,6 +2556,9 @@ class FP16SVSTieredIndexTest : public FP16SVSTest<index_type_t> {
25562556
}
25572557
constexpr size_t d = 4;
25582558
constexpr size_t n = 1000;
2559+
// Scale factor to avoid FP16 overflow. FP16 max is ~65504, and L2 distance is
2560+
// d * (query - vec)^2. With scale=0.1, max distance is 4 * (100)^2 = 40000 < 65504.
2561+
constexpr float scale = 0.1f;
25592562

25602563
size_t per_label = is_multi ? 10 : 1;
25612564
size_t n_labels = n / per_label;
@@ -2581,16 +2584,17 @@ class FP16SVSTieredIndexTest : public FP16SVSTest<index_type_t> {
25812584
auto *svs = tiered_index->GetBackendIndex();
25822585
auto *flat = tiered_index->GetFlatIndex();
25832586

2584-
// For every i, add the vector (i,i,i,i) under the label i.
2587+
// For every i, add the vector (i*scale, i*scale, ...) under the label i.
2588+
// We scale down to avoid FP16 overflow in distance calculations.
25852589
for (size_t i = 0; i < n; i++) {
25862590
auto cur = decider(i, n) ? svs : flat;
2587-
this->GenerateAndAddVector(cur, d, i % n_labels, i);
2591+
this->GenerateAndAddVector(cur, d, i % n_labels, i * scale);
25882592
}
25892593
ASSERT_EQ(VecSimIndex_IndexSize(tiered_index), n) << decider_name;
25902594

2591-
// Query for (n,n,n,n) vector (recall that n-1 is the largest id in te index).
2595+
// Query for (n*scale, n*scale, ...) vector.
25922596
float16 query[d];
2593-
this->GenerateVector(query, d, n);
2597+
this->GenerateVector(query, d, n * scale);
25942598

25952599
VecSimBatchIterator *batchIterator =
25962600
VecSimBatchIterator_New(tiered_index, query, nullptr);

0 commit comments

Comments
 (0)