Skip to content

Commit 9018d44

Browse files
committed
Revert "Removing explicit copy instructions"
This reverts commit 5d231a8.
1 parent b9b738e commit 9018d44

File tree

1 file changed

+32
-13
lines changed

1 file changed

+32
-13
lines changed

stencil/stencil.hpp

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -165,14 +165,21 @@ class StencilExecutor
165165
uindex_t grid_height = orig_buffer.get_range()[1];
166166
cl::sycl::buffer<T, 2> in_buffer(cl::sycl::range<2>(max_grid_width, max_grid_height));
167167

168-
auto orig_buffer_ac = orig_buffer.template get_access<cl::sycl::access::mode::read>();
169-
auto in_buffer_ac = in_buffer.template get_access<cl::sycl::access::mode::discard_write>();
170-
171-
for (uindex_t c = 0; c < grid_width; c++)
168+
if (grid_width == max_grid_width && grid_height == max_grid_height)
169+
{
170+
in_buffer = orig_buffer;
171+
}
172+
else
172173
{
173-
for (uindex_t r = 0; r < grid_height; r++)
174+
auto orig_buffer_ac = orig_buffer.template get_access<cl::sycl::access::mode::read>();
175+
auto in_buffer_ac = in_buffer.template get_access<cl::sycl::access::mode::discard_write>();
176+
177+
for (uindex_t c = 0; c < grid_width; c++)
174178
{
175-
in_buffer_ac[c][r] = orig_buffer_ac[c][r];
179+
for (uindex_t r = 0; r < grid_height; r++)
180+
{
181+
in_buffer_ac[c][r] = orig_buffer_ac[c][r];
182+
}
176183
}
177184
}
178185

@@ -204,8 +211,7 @@ class StencilExecutor
204211
*
205212
* The new half-buffer is either the upper or the lower half of the given, full buffer, depending on `half`. If `half` is 0, the upper half of the buffer is used, if `half` is 1, the lower half of the buffer is used.
206213
*/
207-
BufferEventGroup(cl::sycl::buffer<T, 2> buffer, uindex_t half) : events(), sync_buffers(), half_buffer(buffer, cl::sycl::id<2>(half * n_blocks / 2, 0), cl::sycl::range<2>(n_blocks / 2, block_size))
208-
{
214+
BufferEventGroup(cl::sycl::buffer<T, 2> buffer, uindex_t half) : events(), sync_buffers(), half_buffer(buffer, cl::sycl::id<2>(half * n_blocks / 2, 0), cl::sycl::range<2>(n_blocks / 2, block_size)) {
209215
assert(half == 1 || half == 0);
210216
}
211217

@@ -296,14 +302,27 @@ class StencilExecutor
296302

297303
out_buffer = out_buffer.template reinterpret<T, 2>(cl::sycl::range<2>(max_grid_width, max_grid_height));
298304

299-
auto out_buffer_ac = out_buffer.template get_access<cl::sycl::access::mode::read>();
300-
auto orig_buffer_ac = orig_buffer.template get_access<cl::sycl::access::mode::discard_write>();
305+
if (grid_width == max_grid_width && grid_height == max_grid_height)
306+
{
307+
queue.submit([&](cl::sycl::handler &cgh) {
308+
auto out_buffer_ac = out_buffer.template get_access<cl::sycl::access::mode::read>(cgh);
309+
auto orig_buffer_ac = orig_buffer.template get_access<cl::sycl::access::mode::discard_write>(cgh);
301310

302-
for (uindex_t c = 0; c < grid_width; c++)
311+
cgh.copy(out_buffer_ac, orig_buffer_ac);
312+
})
313+
.wait();
314+
}
315+
else
303316
{
304-
for (uindex_t r = 0; r < grid_height; r++)
317+
auto out_buffer_ac = out_buffer.template get_access<cl::sycl::access::mode::read>();
318+
auto orig_buffer_ac = orig_buffer.template get_access<cl::sycl::access::mode::discard_write>();
319+
320+
for (uindex_t c = 0; c < grid_width; c++)
305321
{
306-
orig_buffer_ac[c][r] = out_buffer_ac[c][r];
322+
for (uindex_t r = 0; r < grid_height; r++)
323+
{
324+
orig_buffer_ac[c][r] = out_buffer_ac[c][r];
325+
}
307326
}
308327
}
309328
}

0 commit comments

Comments
 (0)