@@ -117,20 +117,20 @@ void pointerRAJA( benchmark::State & state )
117117 kernels.pointer ();
118118}
119119
120- INDEX_TYPE const SERIAL_N = (2 << 7 ) + 73 ;
121- INDEX_TYPE const SERIAL_L = (2 << 7 ) - 71 ;
122- INDEX_TYPE const SERIAL_M = (2 << 7 ) - 3 ;
120+ INDEX_TYPE const SERIAL_N = (2 << 8 ) + 73 ;
121+ INDEX_TYPE const SERIAL_L = (2 << 8 ) - 71 ;
122+ INDEX_TYPE const SERIAL_M = (2 << 8 ) - 3 ;
123123
124124#if defined(LVARRAY_USE_OPENMP)
125- INDEX_TYPE const OMP_N = SERIAL_N;
126- INDEX_TYPE const OMP_L = SERIAL_L;
127- INDEX_TYPE const OMP_M = SERIAL_M;
125+ INDEX_TYPE const OMP_N = 4 * SERIAL_N;
126+ INDEX_TYPE const OMP_L = 4 * SERIAL_L;
127+ INDEX_TYPE const OMP_M = 4 * SERIAL_M;
128128#endif
129129
130130#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
131- INDEX_TYPE const CUDA_N = SERIAL_N;
132- INDEX_TYPE const CUDA_L = SERIAL_L;
133- INDEX_TYPE const CUDA_M = SERIAL_M;
131+ INDEX_TYPE const CUDA_N = 16 * SERIAL_N;
132+ INDEX_TYPE const CUDA_L = 16 * SERIAL_L;
133+ INDEX_TYPE const CUDA_M = 16 * SERIAL_M;
134134#endif
135135
136136void registerBenchmarks ()
@@ -170,12 +170,12 @@ void registerBenchmarks()
170170 std::make_tuple ( SERIAL_N, SERIAL_L, SERIAL_M, RAJA::PERM_IJ {}, serialPolicy {} )
171171 , std::make_tuple ( SERIAL_N, SERIAL_L, SERIAL_M, RAJA::PERM_JI {}, serialPolicy {} )
172172 #if defined(LVARRAY_USE_OPENMP)
173- , std::make_tuple ( OMP_N, SERIAL_L , OMP_M, RAJA::PERM_IJ {}, parallelHostPolicy {} )
174- , std::make_tuple ( OMP_N, SERIAL_L , OMP_M, RAJA::PERM_JI {}, parallelHostPolicy {} )
173+ , std::make_tuple ( OMP_N, OMP_L , OMP_M, RAJA::PERM_IJ {}, parallelHostPolicy {} )
174+ , std::make_tuple ( OMP_N, OMP_L , OMP_M, RAJA::PERM_JI {}, parallelHostPolicy {} )
175175 #endif
176176 #if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
177- , std::make_tuple ( CUDA_N, SERIAL_L , CUDA_M, RAJA::PERM_IJ {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
178- , std::make_tuple ( CUDA_N, SERIAL_L , CUDA_M, RAJA::PERM_JI {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
177+ , std::make_tuple ( CUDA_N, CUDA_L , CUDA_M, RAJA::PERM_IJ {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
178+ , std::make_tuple ( CUDA_N, CUDA_L , CUDA_M, RAJA::PERM_JI {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
179179 #endif
180180 );
181181}
0 commit comments