@@ -73,18 +73,6 @@ inline __attribute__((always_inline)) FLT foldRescale03(FLT x, BIGINT N) {
7373 return result * fN ;
7474}
7575
76- #ifdef __AVX2__
77-
78- inline __attribute__ ((always_inline)) __m256d foldRescaleVec(__m256d x, BIGINT N) {
79- __m256d result;
80- __m256d fN = _mm256_set1_pd (FLT (N));
81- static const __m256d x2pi = _mm256_set1_pd (FLT (M_1_2PI));
82- static const __m256d half = _mm256_set1_pd (FLT (0.5 ));
83- result = _mm256_fmadd_pd (x, x2pi, half);
84- result = _mm256_sub_pd (result, _mm256_floor_pd (result));
85- return _mm256_mul_pd (result, fN );
86- }
87- #endif
8876
8977static std::mt19937_64 gen;
9078static std::uniform_real_distribution<> dis (-10 , 10 );
@@ -197,21 +185,6 @@ static void BM_FoldRescale05N(benchmark::State &state) {
197185 }
198186}
199187
200- #ifdef __AVX2__
201- static void BM_FoldRescaleVec (benchmark::State &state) {
202- for (auto _ : state) {
203- // Generate 4 floating point numbers
204- double x1 = dis (gen);
205- double x2 = dis (gen);
206- double x3 = dis (gen);
207- double x4 = dis (gen);
208- // Pack them into an AVX vector
209- __m256d x = _mm256_set_pd (x1, x2, x3, x4);
210- // Call the foldRescaleVec function
211- benchmark::DoNotOptimize (foldRescaleVec (x, N));
212- }
213- }
214- #endif
215188
216189BENCHMARK (BM_BASELINE)->Iterations(10000000 );
217190BENCHMARK (BM_FoldRescaleMacro)->Iterations(1000000 );
@@ -221,9 +194,6 @@ BENCHMARK(BM_FoldRescale02)->Iterations(1000000);
221194BENCHMARK (BM_FoldRescale03)->Iterations(10000000 );
222195BENCHMARK (BM_FoldRescale04)->Iterations(1000000 );
223196BENCHMARK (BM_FoldRescale05)->Iterations(1000000 );
224- #ifdef __AVX2__
225- BENCHMARK (BM_FoldRescaleVec)->Iterations(1000000 / 4 );
226- #endif
227197BENCHMARK (BM_FoldRescaleMacroN)->Iterations(1000000 );
228198BENCHMARK (BM_FoldRescale00N)->Iterations(1000000 );
229199BENCHMARK (BM_FoldRescale01N)->Iterations(1000000 );
@@ -232,33 +202,6 @@ BENCHMARK(BM_FoldRescale03N)->Iterations(1000000);
232202BENCHMARK (BM_FoldRescale04N)->Iterations(1000000 );
233203BENCHMARK (BM_FoldRescale05N)->Iterations(1000000 );
234204
235- #ifdef __AVX2__
236- void testFoldRescaleVec_avx256_vs_foldRescale00 () {
237- // Generate 4 floating point numbers
238- double x1 = dis (gen);
239- double x2 = dis (gen);
240- double x3 = dis (gen);
241- double x4 = dis (gen);
242-
243- // Pack them into an AVX vector
244- __m256d xVec = _mm256_set_pd (x1, x2, x3, x4);
245-
246- // Call the foldRescaleVec function
247- __m256d resultVec = foldRescaleVec (xVec, N);
248-
249- // Extract the results from the AVX vector
250-
251- for (int i = 0 ; i < 4 ; ++i) {
252- double result00 = foldRescale03<true >(xVec[i], N);
253- if (std::abs (1 - result00 / resultVec[i]) > 1e-14 ) {
254- std::cout << " input: " << xVec[i] << " result00: " << result00
255- << " result256: " << resultVec[i] << std::endl;
256- throw std::runtime_error (" foldRescaleVec is not equivalent to foldRescale00" );
257- }
258- }
259- }
260- #endif
261-
262205void testFoldRescaleFunctions () {
263206 for (bool p : {true }) {
264207 for (int i = 0 ; i < 1024 ; ++i) { // Run the test 1000 times
@@ -341,9 +284,6 @@ int main(int argc, char **argv) {
341284 std::cout << " Seed: " << seed << " \n " ;
342285 gen.seed (seed);
343286 testFoldRescaleFunctions ();
344- #ifdef __AVX2__
345- testFoldRescaleVec_avx256_vs_foldRescale00 ();
346- #endif
347287 ::benchmark::Initialize (&argc, argv);
348288 BaselineSubtractingReporter reporter;
349289 ::benchmark::RunSpecifiedBenchmarks (&reporter);
0 commit comments