Skip to content

Commit f27562a

Browse files
committed
Fixed some bugs in the benchmarks.
1 parent f39b7c9 commit f27562a

File tree

3 files changed

+19
-17
lines changed

3 files changed

+19
-17
lines changed

benchmarks/benchmarkMatrixMatrix.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -117,20 +117,20 @@ void pointerRAJA( benchmark::State & state )
117117
kernels.pointer();
118118
}
119119

120-
INDEX_TYPE const SERIAL_N = (2 << 7) + 73;
121-
INDEX_TYPE const SERIAL_L = (2 << 7) - 71;
122-
INDEX_TYPE const SERIAL_M = (2 << 7) - 3;
120+
INDEX_TYPE const SERIAL_N = (2 << 8) + 73;
121+
INDEX_TYPE const SERIAL_L = (2 << 8) - 71;
122+
INDEX_TYPE const SERIAL_M = (2 << 8) - 3;
123123

124124
#if defined(LVARRAY_USE_OPENMP)
125-
INDEX_TYPE const OMP_N = SERIAL_N;
126-
INDEX_TYPE const OMP_L = SERIAL_L;
127-
INDEX_TYPE const OMP_M = SERIAL_M;
125+
INDEX_TYPE const OMP_N = 4 * SERIAL_N;
126+
INDEX_TYPE const OMP_L = 4 * SERIAL_L;
127+
INDEX_TYPE const OMP_M = 4 * SERIAL_M;
128128
#endif
129129

130130
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
131-
INDEX_TYPE const CUDA_N = SERIAL_N;
132-
INDEX_TYPE const CUDA_L = SERIAL_L;
133-
INDEX_TYPE const CUDA_M = SERIAL_M;
131+
INDEX_TYPE const CUDA_N = 16 * SERIAL_N;
132+
INDEX_TYPE const CUDA_L = 16 * SERIAL_L;
133+
INDEX_TYPE const CUDA_M = 16 * SERIAL_M;
134134
#endif
135135

136136
void registerBenchmarks()
@@ -170,12 +170,12 @@ void registerBenchmarks()
170170
std::make_tuple( SERIAL_N, SERIAL_L, SERIAL_M, RAJA::PERM_IJ {}, serialPolicy {} )
171171
, std::make_tuple( SERIAL_N, SERIAL_L, SERIAL_M, RAJA::PERM_JI {}, serialPolicy {} )
172172
#if defined(LVARRAY_USE_OPENMP)
173-
, std::make_tuple( OMP_N, SERIAL_L, OMP_M, RAJA::PERM_IJ {}, parallelHostPolicy {} )
174-
, std::make_tuple( OMP_N, SERIAL_L, OMP_M, RAJA::PERM_JI {}, parallelHostPolicy {} )
173+
, std::make_tuple( OMP_N, OMP_L, OMP_M, RAJA::PERM_IJ {}, parallelHostPolicy {} )
174+
, std::make_tuple( OMP_N, OMP_L, OMP_M, RAJA::PERM_JI {}, parallelHostPolicy {} )
175175
#endif
176176
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
177-
, std::make_tuple( CUDA_N, SERIAL_L, CUDA_M, RAJA::PERM_IJ {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
178-
, std::make_tuple( CUDA_N, SERIAL_L, CUDA_M, RAJA::PERM_JI {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
177+
, std::make_tuple( CUDA_N, CUDA_L, CUDA_M, RAJA::PERM_IJ {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
178+
, std::make_tuple( CUDA_N, CUDA_L, CUDA_M, RAJA::PERM_JI {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
179179
#endif
180180
);
181181
}

benchmarks/benchmarkMatrixMatrixKernels.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class MatrixMatrixNative
5252
{
5353
VALUE_TYPE const result = reduce( m_c ) / INDEX_TYPE( m_state.iterations() );
5454
registerResult( m_results, { m_a.size( 0 ), m_a.size( 1 ), m_b.size( 1 ) }, result, m_callingFunction );
55-
m_state.counters[ "OPS "] = ::benchmark::Counter( 2 * m_a.size() * m_b.size(
55+
m_state.counters[ "OPS" ] = ::benchmark::Counter( 2 * m_a.size() * m_b.size(
5656
1 ), ::benchmark::Counter::kIsIterationInvariantRate, ::benchmark::Counter::OneK::kIs1000 );
5757
}
5858

@@ -170,7 +170,7 @@ class MatrixMatrixRAJA : public MatrixMatrixNative< PERMUTATION >
170170
}
171171

172172
~MatrixMatrixRAJA()
173-
{ this->m_c.move( RAJAHelper< POLICY >::space, false ); }
173+
{ this->m_c.move( MemorySpace::CPU, false ); }
174174

175175
void fortranView() const
176176
{

benchmarks/compare.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,10 +170,12 @@ def aggregateAndPrint( results ):
170170
resultNames.append( name )
171171

172172
value = group[ name ] / group[ "baseline" ]
173-
if value > 1:
173+
if value > 1.05:
174174
colors[ groupName ][ name ] = style.GREEN
175-
else:
175+
elif value < 0.95:
176176
colors[ groupName ][ name ] = style.RED
177+
else:
178+
colors[ groupName ][ name ] = style.RESET
177179

178180
group[ name ] = "{:.4}x".format( value )
179181

0 commit comments

Comments
 (0)