Skip to content

Commit cf0fa17

Browse files
author
Balint Joo
committed
Added performence tester: test/test_dslash_sycl_vperf.cpp
1 parent e476941 commit cf0fa17

File tree

3 files changed

+163
-919
lines changed

3 files changed

+163
-919
lines changed

test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,5 @@ package_add_test(test_vneighbor_table_sycl test_vneighbor_table_sycl.cpp)
5252
package_add_test(test_qdp_utils_sycl test_qdp_utils_sycl.cpp)
5353
package_add_test(test_vspinproj_sycl test_vspinproj_sycl.cpp)
5454
package_add_test(test_dslash_sycl test_dslash_sycl.cpp)
55+
package_add_test(test_dslash_sycl_vperf test_dslash_sycl_vperf.cpp)
5556
# package_add_test(test_vneighbor_table_sycl_broken test_vneighbor_table_sycl_broken.cpp)

test/test_dslash_sycl_vperf.cpp

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
#include "sycl_dslash_config.h"
2+
#include "gtest/gtest.h"
3+
#include "test_env.h"
4+
#include "qdpxx_utils.h"
5+
#include "dslashm_w.h"
6+
7+
#include "lattice/constants.h"
8+
#include "lattice/lattice_info.h"
9+
#include "utils/print_utils.h"
10+
11+
#include "sycl_dslash_config.h" // Build options
12+
#include "dslash/dslash_defaults.h" // Default layouts
13+
#include "dslash/dslash_vnode.h"
14+
#include "dslash/sycl_vtypes.h" // Vector type s
15+
#include "dslash/sycl_qdp_vutils.h" // Utils
16+
#include "dslash/dslash_vectype_sycl.h"
17+
#include "dslash/sycl_vdslash.h"
18+
19+
20+
using namespace MG;
21+
using namespace MGTesting;
22+
using namespace QDP;
23+
24+
template<typename T>
25+
class TimeVDslash : public ::testing::Test{};
26+
27+
#ifdef MG_FORTRANLIKE_COMPLEX
28+
#if 0
29+
using test_types = ::testing::Types<
30+
std::integral_constant<int,1>,
31+
std::integral_constant<int,2>,
32+
std::integral_constant<int,4>,
33+
std::integral_constant<int,8> >;
34+
#else
35+
36+
using test_types = ::testing::Types<
37+
std::integral_constant<int,1>,
38+
std::integral_constant<int,4> >; // length 8 for AVX2
39+
#endif
40+
#else
41+
42+
#if 0
43+
using test_types = ::testing::Types<
44+
std::integral_constant<int,1>,
45+
std::integral_constant<int,2>,
46+
std::integral_constant<int,4>,
47+
std::integral_constant<int,8>,
48+
std::integral_constant<int,16> >;
49+
#endif
50+
51+
// Get Scalar (nonvectorized and AVX=2 (8) for now.
52+
using test_types = ::testing::Types<
53+
std::integral_constant<int,1>,
54+
std::integral_constant<int,8> >; // lenth 8 for AVX2
55+
56+
#endif
57+
58+
TYPED_TEST_CASE(TimeVDslash, test_types);
59+
60+
TYPED_TEST(TimeVDslash, DslashTime)
61+
{
62+
// Vector length
63+
static constexpr int V = TypeParam::value;
64+
65+
66+
IndexArray latdims={{8,8,8,8}};
67+
68+
initQDPXXLattice(latdims);
69+
multi1d<LatticeColorMatrix> gauge_in(n_dim);
70+
for(int mu=0; mu < n_dim; ++mu) {
71+
gaussian(gauge_in[mu]);
72+
reunit(gauge_in[mu]);
73+
}
74+
75+
LatticeFermion psi_in=zero;
76+
gaussian(psi_in);
77+
78+
LatticeInfo info(latdims,4,3,NodeInfo());
79+
80+
using VN = VNode<MGComplex<REAL32>,V>;
81+
using SpinorType = SyCLCBFineVSpinor<MGComplex<REAL32>,VN,4>;
82+
using FullGaugeType = SyCLFineVGaugeField<MGComplex<REAL32>,VN>;
83+
using GaugeType = SyCLCBFineVGaugeFieldDoubleCopy<MGComplex<REAL32>,VN>;
84+
85+
SpinorType sycl_spinor_even(info,EVEN);
86+
SpinorType sycl_spinor_odd(info,ODD);
87+
FullGaugeType sycl_gauge(info);
88+
89+
90+
91+
// Import Gauge Field
92+
QDPGaugeFieldToSyCLVGaugeField(gauge_in, sycl_gauge);
93+
94+
95+
// Double Store Gauge field. This benchmark is always even cb.
96+
GaugeType gauge_even(info,EVEN);
97+
98+
99+
// Import gets the rear neighbors, and permutes them if needed
100+
import(gauge_even, sycl_gauge(EVEN), sycl_gauge(ODD));
101+
102+
// Import spinor
103+
QDPLatticeFermionToSyCLCBVSpinor(psi_in, sycl_spinor_even);
104+
105+
106+
SyCLVDslash<VN, MGComplex<float>,MGComplex<float> > D(sycl_spinor_even.GetInfo());
107+
108+
IndexArray cb_latdims = sycl_spinor_even.GetInfo().GetCBLatticeDimensions();
109+
double num_sites = static_cast<double>(V*cb_latdims[0]*cb_latdims[1]*cb_latdims[2]*cb_latdims[3]);
110+
111+
MasterLog(INFO, "Running timing for VectorLength=%u", V);
112+
int isign=1;
113+
MasterLog(INFO, "isign=%d First run (JIT-ing)", isign);
114+
{
115+
D(sycl_spinor_even,gauge_even,sycl_spinor_odd,isign);
116+
}
117+
118+
int iters=100;
119+
MasterLog(INFO, "Calibrating");
120+
{
121+
double start_time = omp_get_wtime();
122+
{
123+
D(sycl_spinor_even,gauge_even,sycl_spinor_odd,isign);
124+
} // all queues finish here.
125+
double end_time = omp_get_wtime();
126+
double time_per_iteration = end_time - start_time;
127+
MasterLog(INFO, "One application=%16.8e (sec)", time_per_iteration);
128+
iters = static_cast<int>( 5.0 / time_per_iteration );
129+
130+
// Do at least one lousy iteration
131+
if ( iters == 0 ) iters = 1;
132+
MasterLog(INFO, "Setting Timing iters=%d",iters);
133+
}
134+
135+
for(int rep=0; rep < 5; ++rep ) {
136+
137+
// Time it.
138+
double start_time = omp_get_wtime();
139+
for(int i=0; i < iters; ++i) {
140+
D(sycl_spinor_even,gauge_even,sycl_spinor_odd,isign);
141+
}
142+
double end_time = omp_get_wtime();
143+
double time_taken = end_time - start_time;
144+
145+
double rfo = 1.0;
146+
double num_sites = static_cast<double>((latdims[0]/2)*latdims[1]*latdims[2]*latdims[3]);
147+
double bytes_in = static_cast<double>((8*4*3*2*sizeof(REAL32)+8*3*3*2*sizeof(REAL32))*num_sites*iters);
148+
double bytes_out = static_cast<double>(4*3*2*sizeof(REAL32)*num_sites*iters);
149+
double rfo_bytes_out = (1.0 + rfo)*bytes_out;
150+
double flops = static_cast<double>(1320.0*num_sites*iters);
151+
152+
MasterLog(INFO,"isign=%d Performance: %lf GFLOPS", isign, flops/(time_taken*1.0e9));
153+
MasterLog(INFO,"isign=%d Effective BW (RFO=0): %lf GB/sec",isign, (bytes_in+bytes_out)/(time_taken*1.0e9));
154+
MasterLog(INFO,"isign=%d Effective BW (RFO=1): %lf GB/sec", isign, (bytes_in+rfo_bytes_out)/(time_taken*1.0e9));
155+
156+
157+
158+
// } // isign
159+
MasterLog(INFO,"");
160+
} // rep
161+
}
162+

0 commit comments

Comments
 (0)