@@ -2830,3 +2830,218 @@ TEST_P(SQ8_SQ8_Precomputed_SpacesOptimizationTest, SQ8_SQ8_Precomputed_CosineTes
28302830// Test suite covers dimensions 64-128 to exercise AVX512 SIMD paths
28312831INSTANTIATE_TEST_SUITE_P (SQ8_SQ8_PrecomputedOptFuncs, SQ8_SQ8_Precomputed_SpacesOptimizationTest,
28322832 testing::Range (64UL , 64 * 2UL + 1 ));
2833+
2834+ // Additional test suite for smaller dimensions (1-63) to test residual handling
2835+ INSTANTIATE_TEST_SUITE_P (SQ8_SQ8_PrecomputedOptFuncs_SmallDim,
2836+ SQ8_SQ8_Precomputed_SpacesOptimizationTest,
2837+ testing::Values (1UL , 7UL , 15UL , 16UL , 31UL , 32UL , 33UL , 48UL , 63UL ));
2838+
2839+ // Test suite for larger dimensions to stress-test the implementation
2840+ INSTANTIATE_TEST_SUITE_P (SQ8_SQ8_PrecomputedOptFuncs_LargeDim,
2841+ SQ8_SQ8_Precomputed_SpacesOptimizationTest,
2842+ testing::Values (256UL , 512UL , 768UL , 1024UL , 1536UL ));
2843+
2844+ #ifdef OPT_AVX512_F_BW_VL_VNNI
2845+ // Test self-distance: distance to itself should be 0 for cosine (normalized vectors)
2846+ TEST (SQ8_SQ8_Precomputed_EdgeCases, SelfDistanceCosine) {
2847+ auto optimization = getCpuOptimizationFeatures ();
2848+ if (!(optimization.avx512f && optimization.avx512bw && optimization.avx512vnni )) {
2849+ GTEST_SKIP () << " AVX512 VNNI not available" ;
2850+ }
2851+
2852+ size_t dim = 128 ;
2853+ std::vector<float > v_orig (dim);
2854+
2855+ // Create a normalized vector
2856+ std::mt19937 rng (42 );
2857+ std::uniform_real_distribution<float > dist (-1 .0f , 1 .0f );
2858+ float norm = 0 .0f ;
2859+ for (size_t i = 0 ; i < dim; i++) {
2860+ v_orig[i] = dist (rng);
2861+ norm += v_orig[i] * v_orig[i];
2862+ }
2863+ norm = std::sqrt (norm);
2864+ for (size_t i = 0 ; i < dim; i++) {
2865+ v_orig[i] /= norm;
2866+ }
2867+
2868+ auto v_quantized = CreateSQ8QuantizedVectorWithSumNorm (v_orig.data (), dim);
2869+
2870+ auto precomputed_func =
2871+ spaces::Choose_SQ8_SQ8_Precomputed_Cosine_implementation_AVX512F_BW_VL_VNNI (dim);
2872+ float self_distance = precomputed_func (v_quantized.data (), v_quantized.data (), dim);
2873+
2874+ // Self-distance for cosine should be close to 0
2875+ ASSERT_NEAR (self_distance, 0 .0f , 0 .02f ) << " Self-distance should be ~0 for cosine" ;
2876+ }
2877+
2878+ // Test symmetry: dist(v1, v2) == dist(v2, v1)
2879+ TEST (SQ8_SQ8_Precomputed_EdgeCases, SymmetryTest) {
2880+ auto optimization = getCpuOptimizationFeatures ();
2881+ if (!(optimization.avx512f && optimization.avx512bw && optimization.avx512vnni )) {
2882+ GTEST_SKIP () << " AVX512 VNNI not available" ;
2883+ }
2884+
2885+ size_t dim = 128 ;
2886+ std::vector<float > v1_orig (dim), v2_orig (dim);
2887+
2888+ std::mt19937 rng (123 );
2889+ std::uniform_real_distribution<float > dist (-1 .0f , 1 .0f );
2890+ for (size_t i = 0 ; i < dim; i++) {
2891+ v1_orig[i] = dist (rng);
2892+ v2_orig[i] = dist (rng);
2893+ }
2894+
2895+ auto v1_quantized = CreateSQ8QuantizedVectorWithSumNorm (v1_orig.data (), dim);
2896+ auto v2_quantized = CreateSQ8QuantizedVectorWithSumNorm (v2_orig.data (), dim);
2897+
2898+ auto ip_func = spaces::Choose_SQ8_SQ8_Precomputed_IP_implementation_AVX512F_BW_VL_VNNI (dim);
2899+ auto cosine_func =
2900+ spaces::Choose_SQ8_SQ8_Precomputed_Cosine_implementation_AVX512F_BW_VL_VNNI (dim);
2901+
2902+ float ip_12 = ip_func (v1_quantized.data (), v2_quantized.data (), dim);
2903+ float ip_21 = ip_func (v2_quantized.data (), v1_quantized.data (), dim);
2904+ ASSERT_NEAR (ip_12, ip_21, 1e-6f ) << " IP should be symmetric" ;
2905+
2906+ float cos_12 = cosine_func (v1_quantized.data (), v2_quantized.data (), dim);
2907+ float cos_21 = cosine_func (v2_quantized.data (), v1_quantized.data (), dim);
2908+ ASSERT_NEAR (cos_12, cos_21, 1e-6f ) << " Cosine should be symmetric" ;
2909+ }
2910+
2911+ // Test with zero vector
2912+ TEST (SQ8_SQ8_Precomputed_EdgeCases, ZeroVectorTest) {
2913+ auto optimization = getCpuOptimizationFeatures ();
2914+ if (!(optimization.avx512f && optimization.avx512bw && optimization.avx512vnni )) {
2915+ GTEST_SKIP () << " AVX512 VNNI not available" ;
2916+ }
2917+
2918+ size_t dim = 128 ;
2919+ std::vector<float > v_zero (dim, 0 .0f );
2920+ std::vector<float > v_nonzero (dim);
2921+
2922+ std::mt19937 rng (456 );
2923+ std::uniform_real_distribution<float > dist (-1 .0f , 1 .0f );
2924+ for (size_t i = 0 ; i < dim; i++) {
2925+ v_nonzero[i] = dist (rng);
2926+ }
2927+
2928+ auto v_zero_quantized = CreateSQ8QuantizedVectorWithSumNorm (v_zero.data (), dim);
2929+ auto v_nonzero_quantized = CreateSQ8QuantizedVectorWithSumNorm (v_nonzero.data (), dim);
2930+
2931+ // Get baseline from original implementation
2932+ auto orig_func = spaces::Choose_SQ8_SQ8_IP_implementation_AVX512F_BW_VL_VNNI (dim);
2933+ auto orig_v_zero = populate_float_vec_to_sq8 (v_zero.data (), dim);
2934+ auto orig_v_nonzero = populate_float_vec_to_sq8 (v_nonzero.data (), dim);
2935+ float baseline = orig_func (orig_v_zero.data (), orig_v_nonzero.data (), dim);
2936+
2937+ auto ip_func = spaces::Choose_SQ8_SQ8_Precomputed_IP_implementation_AVX512F_BW_VL_VNNI (dim);
2938+ float result = ip_func (v_zero_quantized.data (), v_nonzero_quantized.data (), dim);
2939+
2940+ ASSERT_NEAR (result, baseline, 0 .01f ) << " Zero vector IP should match baseline" ;
2941+ }
2942+
2943+ // Test with constant vector (all same values)
2944+ TEST (SQ8_SQ8_Precomputed_EdgeCases, ConstantVectorTest) {
2945+ auto optimization = getCpuOptimizationFeatures ();
2946+ if (!(optimization.avx512f && optimization.avx512bw && optimization.avx512vnni )) {
2947+ GTEST_SKIP () << " AVX512 VNNI not available" ;
2948+ }
2949+
2950+ size_t dim = 128 ;
2951+ std::vector<float > v_const (dim, 0 .5f );
2952+ std::vector<float > v_random (dim);
2953+
2954+ std::mt19937 rng (789 );
2955+ std::uniform_real_distribution<float > dist (-1 .0f , 1 .0f );
2956+ for (size_t i = 0 ; i < dim; i++) {
2957+ v_random[i] = dist (rng);
2958+ }
2959+
2960+ auto v_const_quantized = CreateSQ8QuantizedVectorWithSumNorm (v_const.data (), dim);
2961+ auto v_random_quantized = CreateSQ8QuantizedVectorWithSumNorm (v_random.data (), dim);
2962+
2963+ // Get baseline from original implementation
2964+ auto orig_func = spaces::Choose_SQ8_SQ8_IP_implementation_AVX512F_BW_VL_VNNI (dim);
2965+ auto orig_v_const = populate_float_vec_to_sq8 (v_const.data (), dim);
2966+ auto orig_v_random = populate_float_vec_to_sq8 (v_random.data (), dim);
2967+ float baseline = orig_func (orig_v_const.data (), orig_v_random.data (), dim);
2968+
2969+ auto ip_func = spaces::Choose_SQ8_SQ8_Precomputed_IP_implementation_AVX512F_BW_VL_VNNI (dim);
2970+ float result = ip_func (v_const_quantized.data (), v_random_quantized.data (), dim);
2971+
2972+ ASSERT_NEAR (result, baseline, 0 .01f ) << " Constant vector IP should match baseline" ;
2973+ }
2974+
2975+ // Test with extreme values (-1 and 1 only)
2976+ TEST (SQ8_SQ8_Precomputed_EdgeCases, ExtremeValuesTest) {
2977+ auto optimization = getCpuOptimizationFeatures ();
2978+ if (!(optimization.avx512f && optimization.avx512bw && optimization.avx512vnni )) {
2979+ GTEST_SKIP () << " AVX512 VNNI not available" ;
2980+ }
2981+
2982+ size_t dim = 128 ;
2983+ std::vector<float > v1 (dim), v2 (dim);
2984+
2985+ // Alternating extreme values
2986+ for (size_t i = 0 ; i < dim; i++) {
2987+ v1[i] = (i % 2 == 0 ) ? 1 .0f : -1 .0f ;
2988+ v2[i] = (i % 3 == 0 ) ? 1 .0f : -1 .0f ;
2989+ }
2990+
2991+ auto v1_quantized = CreateSQ8QuantizedVectorWithSumNorm (v1.data (), dim);
2992+ auto v2_quantized = CreateSQ8QuantizedVectorWithSumNorm (v2.data (), dim);
2993+
2994+ // Get baseline from original implementation
2995+ auto orig_func = spaces::Choose_SQ8_SQ8_IP_implementation_AVX512F_BW_VL_VNNI (dim);
2996+ auto orig_v1 = populate_float_vec_to_sq8 (v1.data (), dim);
2997+ auto orig_v2 = populate_float_vec_to_sq8 (v2.data (), dim);
2998+ float baseline = orig_func (orig_v1.data (), orig_v2.data (), dim);
2999+
3000+ auto ip_func = spaces::Choose_SQ8_SQ8_Precomputed_IP_implementation_AVX512F_BW_VL_VNNI (dim);
3001+ float result = ip_func (v1_quantized.data (), v2_quantized.data (), dim);
3002+
3003+ ASSERT_NEAR (result, baseline, 0 .01f ) << " Extreme values IP should match baseline" ;
3004+ }
3005+
3006+ // Test accuracy across multiple random vector pairs
3007+ TEST (SQ8_SQ8_Precomputed_EdgeCases, AccuracyStressTest) {
3008+ auto optimization = getCpuOptimizationFeatures ();
3009+ if (!(optimization.avx512f && optimization.avx512bw && optimization.avx512vnni )) {
3010+ GTEST_SKIP () << " AVX512 VNNI not available" ;
3011+ }
3012+
3013+ size_t dim = 256 ;
3014+ const int num_iterations = 100 ;
3015+ std::mt19937 rng (999 );
3016+ std::uniform_real_distribution<float > dist (-10 .0f , 10 .0f );
3017+
3018+ auto orig_ip_func = spaces::Choose_SQ8_SQ8_IP_implementation_AVX512F_BW_VL_VNNI (dim);
3019+ auto precomputed_ip_func =
3020+ spaces::Choose_SQ8_SQ8_Precomputed_IP_implementation_AVX512F_BW_VL_VNNI (dim);
3021+
3022+ float max_error = 0 .0f ;
3023+ for (int iter = 0 ; iter < num_iterations; iter++) {
3024+ std::vector<float > v1 (dim), v2 (dim);
3025+ for (size_t i = 0 ; i < dim; i++) {
3026+ v1[i] = dist (rng);
3027+ v2[i] = dist (rng);
3028+ }
3029+
3030+ auto orig_v1 = populate_float_vec_to_sq8 (v1.data (), dim);
3031+ auto orig_v2 = populate_float_vec_to_sq8 (v2.data (), dim);
3032+ float baseline = orig_ip_func (orig_v1.data (), orig_v2.data (), dim);
3033+
3034+ auto precomp_v1 = CreateSQ8QuantizedVectorWithSumNorm (v1.data (), dim);
3035+ auto precomp_v2 = CreateSQ8QuantizedVectorWithSumNorm (v2.data (), dim);
3036+ float result = precomputed_ip_func (precomp_v1.data (), precomp_v2.data (), dim);
3037+
3038+ float error = std::abs (result - baseline);
3039+ max_error = std::max (max_error, error);
3040+
3041+ ASSERT_NEAR (result, baseline, 0 .01f ) << " Iteration " << iter << " failed" ;
3042+ }
3043+
3044+ // Log max error for informational purposes
3045+ ASSERT_LT (max_error, 0 .01f ) << " Max error across all iterations: " << max_error;
3046+ }
3047+ #endif
0 commit comments