Skip to content

Commit 691938c

Browse files
author
Jeff Hammond
committed
some fixes - GPU broken
1 parent add9e23 commit 691938c

File tree

1 file changed

+74
-47
lines changed

1 file changed

+74
-47
lines changed

Cxx11/stencil-dpcpp.cc

Lines changed: 74 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
///
3-
/// Copyright (c) 2013, Intel Corporation
3+
/// Copyright (c) 2020, Intel Corporation
44
///
55
/// Redistribution and use in source and binary forms, with or without
66
/// modification, are permitted provided that the following conditions
@@ -46,12 +46,6 @@
4646
/// The output consists of diagnostics to make sure the
4747
/// algorithm worked, and of timing statistics.
4848
///
49-
/// FUNCTIONS CALLED:
50-
///
51-
/// Other than standard C functions, the following functions are used in
52-
/// this program:
53-
/// wtime()
54-
///
5549
/// HISTORY: - Written by Rob Van der Wijngaart, February 2009.
5650
/// - RvdW: Removed unrolling pragmas for clarity;
5751
/// added constant to array "in" at end of each iteration to force
@@ -60,30 +54,32 @@
6054
///
6155
//////////////////////////////////////////////////////////////////////
6256

57+
#include "prk_sycl.h"
6358
#include "prk_util.h"
64-
#include "stencil_seq.hpp"
59+
#include "stencil_sycl.hpp"
6560

66-
void nothing(const int n, const int t, prk::vector<double> & in, prk::vector<double> & out)
61+
template <typename T>
62+
void nothing(sycl::queue & q, const size_t n, const T * in, T * out)
6763
{
6864
std::cout << "You are trying to use a stencil that does not exist.\n";
6965
std::cout << "Please generate the new stencil using the code generator\n";
7066
std::cout << "and add it to the case-switch in the driver." << std::endl;
71-
// n will never be zero - this is to silence compiler warnings.
72-
if (n==0 || t==0) std::cout << in.size() << out.size() << std::endl;
73-
std::abort();
67+
prk::Abort();
7468
}
7569

7670
int main(int argc, char* argv[])
7771
{
7872
std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl;
79-
std::cout << "C++11 Stencil execution on 2D grid" << std::endl;
73+
std::cout << "C++11/DPC++ Stencil execution on 2D grid" << std::endl;
8074

8175
//////////////////////////////////////////////////////////////////////
8276
// Process and test input parameters
8377
//////////////////////////////////////////////////////////////////////
8478

85-
int iterations, n, radius, tile_size;
79+
int iterations;
80+
size_t n, tile_size;
8681
bool star = true;
82+
size_t radius = 2;
8783
try {
8884
if (argc < 3) {
8985
throw "Usage: <# iterations> <array dimension> [<tile_size> <star/grid> <radius>]";
@@ -139,16 +135,18 @@ int main(int argc, char* argv[])
139135
std::cout << "Type of stencil = " << (star ? "star" : "grid") << std::endl;
140136
std::cout << "Radius of stencil = " << radius << std::endl;
141137

142-
auto stencil = nothing;
138+
auto stencil = nothing<double>;
143139
if (star) {
144140
switch (radius) {
145-
case 1: stencil = star1; break;
146-
case 2: stencil = star2; break;
147-
case 3: stencil = star3; break;
148-
case 4: stencil = star4; break;
149-
case 5: stencil = star5; break;
141+
case 1: stencil = star1<double>; break;
142+
case 2: stencil = star2<double>; break;
143+
case 3: stencil = star3<double>; break;
144+
case 4: stencil = star4<double>; break;
145+
case 5: stencil = star5<double>; break;
150146
}
151-
} else {
147+
}
148+
#if 0
149+
else {
152150
switch (radius) {
153151
case 1: stencil = grid1; break;
154152
case 2: stencil = grid2; break;
@@ -157,39 +155,63 @@ int main(int argc, char* argv[])
157155
case 5: stencil = grid5; break;
158156
}
159157
}
158+
#endif
159+
160+
sycl::queue q(sycl::default_selector{});
161+
prk::SYCL::print_device_platform(q);
160162

161163
//////////////////////////////////////////////////////////////////////
162164
// Allocate space and perform the computation
163165
//////////////////////////////////////////////////////////////////////
164166

165-
auto stencil_time = 0.0;
166-
167-
prk::vector<double> in(n*n);
168-
prk::vector<double> out(n*n);
169-
170-
{
171-
for (int it=0; it<n; it+=tile_size) {
172-
for (int jt=0; jt<n; jt+=tile_size) {
173-
for (int i=it; i<std::min(n,it+tile_size); i++) {
174-
PRAGMA_SIMD
175-
for (int j=jt; j<std::min(n,jt+tile_size); j++) {
176-
in[i*n+j] = static_cast<double>(i+j);
177-
out[i*n+j] = 0.0;
178-
}
179-
}
180-
}
181-
}
167+
double stencil_time(0);
182168

183-
for (int iter = 0; iter<=iterations; iter++) {
169+
prk::vector<double> h_in(n*n, 0);
170+
prk::vector<double> h_out(n*n, 0);
184171

185-
if (iter==1) stencil_time = prk::wtime();
186-
// Apply the stencil operator
187-
stencil(n, tile_size, in, out);
188-
// Add constant to solution to force refresh of neighbor data, if any
189-
std::transform(in.begin(), in.end(), in.begin(), [](double c) { return c+=1.0; });
190-
}
191-
stencil_time = prk::wtime() - stencil_time;
172+
const size_t bytes = n * n * sizeof(double);
173+
174+
double * d_in = syclx::malloc_device<double>(n*n, q);
175+
double * d_out = syclx::malloc_device<double>(n*n, q);
176+
q.wait();
177+
178+
q.submit([&](sycl::handler& h) {
179+
h.parallel_for(sycl::range<2> {n,n}, [=] (sycl::item<2> it) {
180+
const auto i = it[0];
181+
const auto j = it[1];
182+
d_in[i*n+j] = static_cast<double>(i+j);
183+
d_out[i*n+j] = static_cast<double>(0);
184+
});
185+
});
186+
q.wait();
187+
188+
for (int iter = 0; iter<=iterations; iter++) {
189+
190+
if (iter==1) stencil_time = prk::wtime();
191+
192+
// Apply the stencil operator
193+
stencil(q, n, d_in, d_out);
194+
q.wait();
195+
196+
// Add constant to solution to force refresh of neighbor data, if any
197+
q.submit([&](sycl::handler& h) {
198+
h.parallel_for(sycl::range<2> {n,n}, [=] (sycl::item<2> it) {
199+
const auto i = it[0];
200+
const auto j = it[1];
201+
d_in[i*n+j] += static_cast<double>(1);
202+
});
203+
});
204+
q.wait();
192205
}
206+
stencil_time = prk::wtime() - stencil_time;
207+
208+
q.memcpy(&(h_in[0]), d_in, bytes);
209+
q.memcpy(&(h_out[0]), d_out, bytes);
210+
q.wait();
211+
212+
syclx::free(d_in, q);
213+
syclx::free(d_out,q);
214+
q.wait();
193215

194216
//////////////////////////////////////////////////////////////////////
195217
// Analyze and output results.
@@ -200,7 +222,7 @@ int main(int argc, char* argv[])
200222
double norm = 0.0;
201223
for (int i=radius; i<n-radius; i++) {
202224
for (int j=radius; j<n-radius; j++) {
203-
norm += prk::abs(out[i*n+j]);
225+
norm += prk::abs(h_out[i*n+j]);
204226
}
205227
}
206228
norm /= active_points;
@@ -211,6 +233,11 @@ int main(int argc, char* argv[])
211233
if (prk::abs(norm-reference_norm) > epsilon) {
212234
std::cout << "ERROR: L1 norm = " << norm
213235
<< " Reference L1 norm = " << reference_norm << std::endl;
236+
for (int i=0; i<n; i++) {
237+
for (int j=0; j<n; j++) {
238+
std::cerr << i << "," << j << " = " << h_in[i*n+j] <<", " << h_out[i*n+j] << "\n";
239+
}
240+
}
214241
return 1;
215242
} else {
216243
std::cout << "Solution validates" << std::endl;

0 commit comments

Comments
 (0)