removed foldrescalevec

DiamonDinoia · DiamonDinoia · commit f830ba3779d5 · 2024-06-22T19:30:05.000-04:00
diff --git a/devel/foldrescale.cpp b/devel/foldrescale.cpp
@@ -73,18 +73,6 @@ inline __attribute__((always_inline)) FLT foldRescale03(FLT x, BIGINT N) {
   return result * fN;
 }
 
-#ifdef __AVX2__
-
-inline __attribute__((always_inline)) __m256d foldRescaleVec(__m256d x, BIGINT N) {
-  __m256d result;
-  __m256d fN                = _mm256_set1_pd(FLT(N));
-  static const __m256d x2pi = _mm256_set1_pd(FLT(M_1_2PI));
-  static const __m256d half = _mm256_set1_pd(FLT(0.5));
-  result                    = _mm256_fmadd_pd(x, x2pi, half);
-  result                    = _mm256_sub_pd(result, _mm256_floor_pd(result));
-  return _mm256_mul_pd(result, fN);
-}
-#endif
 
 static std::mt19937_64 gen;
 static std::uniform_real_distribution<> dis(-10, 10);
@@ -197,21 +185,6 @@ static void BM_FoldRescale05N(benchmark::State &state) {
   }
 }
 
-#ifdef __AVX2__
-static void BM_FoldRescaleVec(benchmark::State &state) {
-  for (auto _ : state) {
-    // Generate 4 floating point numbers
-    double x1 = dis(gen);
-    double x2 = dis(gen);
-    double x3 = dis(gen);
-    double x4 = dis(gen);
-    // Pack them into an AVX vector
-    __m256d x = _mm256_set_pd(x1, x2, x3, x4);
-    // Call the foldRescaleVec function
-    benchmark::DoNotOptimize(foldRescaleVec(x, N));
-  }
-}
-#endif
 
 BENCHMARK(BM_BASELINE)->Iterations(10000000);
 BENCHMARK(BM_FoldRescaleMacro)->Iterations(1000000);
@@ -221,9 +194,6 @@ BENCHMARK(BM_FoldRescale02)->Iterations(1000000);
 BENCHMARK(BM_FoldRescale03)->Iterations(10000000);
 BENCHMARK(BM_FoldRescale04)->Iterations(1000000);
 BENCHMARK(BM_FoldRescale05)->Iterations(1000000);
-#ifdef __AVX2__
-BENCHMARK(BM_FoldRescaleVec)->Iterations(1000000 / 4);
-#endif
 BENCHMARK(BM_FoldRescaleMacroN)->Iterations(1000000);
 BENCHMARK(BM_FoldRescale00N)->Iterations(1000000);
 BENCHMARK(BM_FoldRescale01N)->Iterations(1000000);
@@ -232,33 +202,6 @@ BENCHMARK(BM_FoldRescale03N)->Iterations(1000000);
 BENCHMARK(BM_FoldRescale04N)->Iterations(1000000);
 BENCHMARK(BM_FoldRescale05N)->Iterations(1000000);
 
-#ifdef __AVX2__
-void testFoldRescaleVec_avx256_vs_foldRescale00() {
-  // Generate 4 floating point numbers
-  double x1 = dis(gen);
-  double x2 = dis(gen);
-  double x3 = dis(gen);
-  double x4 = dis(gen);
-
-  // Pack them into an AVX vector
-  __m256d xVec = _mm256_set_pd(x1, x2, x3, x4);
-
-  // Call the foldRescaleVec function
-  __m256d resultVec = foldRescaleVec(xVec, N);
-
-  // Extract the results from the AVX vector
-
-  for (int i = 0; i < 4; ++i) {
-    double result00 = foldRescale03<true>(xVec[i], N);
-    if (std::abs(1 - result00 / resultVec[i]) > 1e-14) {
-      std::cout << "input: " << xVec[i] << " result00: " << result00
-                << " result256: " << resultVec[i] << std::endl;
-      throw std::runtime_error("foldRescaleVec is not equivalent to foldRescale00");
-    }
-  }
-}
-#endif
-
 void testFoldRescaleFunctions() {
   for (bool p : {true}) {
     for (int i = 0; i < 1024; ++i) { // Run the test 1000 times
@@ -341,9 +284,6 @@ int main(int argc, char **argv) {
   std::cout << "Seed: " << seed << "\n";
   gen.seed(seed);
   testFoldRescaleFunctions();
-#ifdef __AVX2__
-  testFoldRescaleVec_avx256_vs_foldRescale00();
-#endif
   ::benchmark::Initialize(&argc, argv);
   BaselineSubtractingReporter reporter;
   ::benchmark::RunSpecifiedBenchmarks(&reporter);