11
22// /
3- // / Copyright (c) 2013 , Intel Corporation
3+ // / Copyright (c) 2020 , Intel Corporation
44// /
55// / Redistribution and use in source and binary forms, with or without
66// / modification, are permitted provided that the following conditions
4646// / The output consists of diagnostics to make sure the
4747// / algorithm worked, and of timing statistics.
4848// /
49- // / FUNCTIONS CALLED:
50- // /
51- // / Other than standard C functions, the following functions are used in
52- // / this program:
53- // / wtime()
54- // /
5549// / HISTORY: - Written by Rob Van der Wijngaart, February 2009.
5650// / - RvdW: Removed unrolling pragmas for clarity;
5751// / added constant to array "in" at end of each iteration to force
6054// /
6155// ////////////////////////////////////////////////////////////////////
6256
57+ #include " prk_sycl.h"
6358#include " prk_util.h"
64- #include " stencil_seq .hpp"
59+ #include " stencil_sycl .hpp"
6560
66- void nothing (const int n, const int t, prk::vector<double > & in, prk::vector<double > & out)
61+ template <typename T>
62+ void nothing (sycl::queue & q, const size_t n, const T * in, T * out)
6763{
6864 std::cout << " You are trying to use a stencil that does not exist.\n " ;
6965 std::cout << " Please generate the new stencil using the code generator\n " ;
7066 std::cout << " and add it to the case-switch in the driver." << std::endl;
71- // n will never be zero - this is to silence compiler warnings.
72- if (n==0 || t==0 ) std::cout << in.size () << out.size () << std::endl;
73- std::abort ();
67+ prk::Abort ();
7468}
7569
7670int main (int argc, char * argv[])
7771{
7872 std::cout << " Parallel Research Kernels version " << PRKVERSION << std::endl;
79- std::cout << " C++11 Stencil execution on 2D grid" << std::endl;
73+ std::cout << " C++11/DPC++ Stencil execution on 2D grid" << std::endl;
8074
8175 // ////////////////////////////////////////////////////////////////////
8276 // Process and test input parameters
8377 // ////////////////////////////////////////////////////////////////////
8478
85- int iterations, n, radius, tile_size;
79+ int iterations;
80+ size_t n, tile_size;
8681 bool star = true ;
82+ size_t radius = 2 ;
8783 try {
8884 if (argc < 3 ) {
8985 throw " Usage: <# iterations> <array dimension> [<tile_size> <star/grid> <radius>]" ;
@@ -139,16 +135,18 @@ int main(int argc, char* argv[])
139135 std::cout << " Type of stencil = " << (star ? " star" : " grid" ) << std::endl;
140136 std::cout << " Radius of stencil = " << radius << std::endl;
141137
142- auto stencil = nothing;
138+ auto stencil = nothing< double > ;
143139 if (star) {
144140 switch (radius) {
145- case 1 : stencil = star1; break ;
146- case 2 : stencil = star2; break ;
147- case 3 : stencil = star3; break ;
148- case 4 : stencil = star4; break ;
149- case 5 : stencil = star5; break ;
141+ case 1 : stencil = star1< double > ; break ;
142+ case 2 : stencil = star2< double > ; break ;
143+ case 3 : stencil = star3< double > ; break ;
144+ case 4 : stencil = star4< double > ; break ;
145+ case 5 : stencil = star5< double > ; break ;
150146 }
151- } else {
147+ }
148+ #if 0
149+ else {
152150 switch (radius) {
153151 case 1: stencil = grid1; break;
154152 case 2: stencil = grid2; break;
@@ -157,39 +155,63 @@ int main(int argc, char* argv[])
157155 case 5: stencil = grid5; break;
158156 }
159157 }
158+ #endif
159+
160+ sycl::queue q (sycl::default_selector{});
161+ prk::SYCL::print_device_platform (q);
160162
161163 // ////////////////////////////////////////////////////////////////////
162164 // Allocate space and perform the computation
163165 // ////////////////////////////////////////////////////////////////////
164166
165- auto stencil_time = 0.0 ;
166-
167- prk::vector<double > in (n*n);
168- prk::vector<double > out (n*n);
169-
170- {
171- for (int it=0 ; it<n; it+=tile_size) {
172- for (int jt=0 ; jt<n; jt+=tile_size) {
173- for (int i=it; i<std::min (n,it+tile_size); i++) {
174- PRAGMA_SIMD
175- for (int j=jt; j<std::min (n,jt+tile_size); j++) {
176- in[i*n+j] = static_cast <double >(i+j);
177- out[i*n+j] = 0.0 ;
178- }
179- }
180- }
181- }
167+ double stencil_time (0 );
182168
183- for (int iter = 0 ; iter<=iterations; iter++) {
169+ prk::vector<double > h_in (n*n, 0 );
170+ prk::vector<double > h_out (n*n, 0 );
184171
185- if (iter==1 ) stencil_time = prk::wtime ();
186- // Apply the stencil operator
187- stencil (n, tile_size, in, out);
188- // Add constant to solution to force refresh of neighbor data, if any
189- std::transform (in.begin (), in.end (), in.begin (), [](double c) { return c+=1.0 ; });
190- }
191- stencil_time = prk::wtime () - stencil_time;
172+ const size_t bytes = n * n * sizeof (double );
173+
174+ double * d_in = syclx::malloc_device<double >(n*n, q);
175+ double * d_out = syclx::malloc_device<double >(n*n, q);
176+ q.wait ();
177+
178+ q.submit ([&](sycl::handler& h) {
179+ h.parallel_for (sycl::range<2 > {n,n}, [=] (sycl::item<2 > it) {
180+ const auto i = it[0 ];
181+ const auto j = it[1 ];
182+ d_in[i*n+j] = static_cast <double >(i+j);
183+ d_out[i*n+j] = static_cast <double >(0 );
184+ });
185+ });
186+ q.wait ();
187+
188+ for (int iter = 0 ; iter<=iterations; iter++) {
189+
190+ if (iter==1 ) stencil_time = prk::wtime ();
191+
192+ // Apply the stencil operator
193+ stencil (q, n, d_in, d_out);
194+ q.wait ();
195+
196+ // Add constant to solution to force refresh of neighbor data, if any
197+ q.submit ([&](sycl::handler& h) {
198+ h.parallel_for (sycl::range<2 > {n,n}, [=] (sycl::item<2 > it) {
199+ const auto i = it[0 ];
200+ const auto j = it[1 ];
201+ d_in[i*n+j] += static_cast <double >(1 );
202+ });
203+ });
204+ q.wait ();
192205 }
206+ stencil_time = prk::wtime () - stencil_time;
207+
208+ q.memcpy (&(h_in[0 ]), d_in, bytes);
209+ q.memcpy (&(h_out[0 ]), d_out, bytes);
210+ q.wait ();
211+
212+ syclx::free (d_in, q);
213+ syclx::free (d_out,q);
214+ q.wait ();
193215
194216 // ////////////////////////////////////////////////////////////////////
195217 // Analyze and output results.
@@ -200,7 +222,7 @@ int main(int argc, char* argv[])
200222 double norm = 0.0 ;
201223 for (int i=radius; i<n-radius; i++) {
202224 for (int j=radius; j<n-radius; j++) {
203- norm += prk::abs (out [i*n+j]);
225+ norm += prk::abs (h_out [i*n+j]);
204226 }
205227 }
206228 norm /= active_points;
@@ -211,6 +233,11 @@ int main(int argc, char* argv[])
211233 if (prk::abs (norm-reference_norm) > epsilon) {
212234 std::cout << " ERROR: L1 norm = " << norm
213235 << " Reference L1 norm = " << reference_norm << std::endl;
236+ for (int i=0 ; i<n; i++) {
237+ for (int j=0 ; j<n; j++) {
238+ std::cerr << i << " ," << j << " = " << h_in[i*n+j] <<" , " << h_out[i*n+j] << " \n " ;
239+ }
240+ }
214241 return 1 ;
215242 } else {
216243 std::cout << " Solution validates" << std::endl;
0 commit comments