Skip to content

Commit f0d3ab6

Browse files
authored
Merge pull request #36 from oneapi-src/sf_outofbound
fix index out of bound in sobel filter
2 parents 8f6380c + 3c685f0 commit f0d3ab6

File tree

3 files changed

+29
-8
lines changed

3 files changed

+29
-8
lines changed

sobel_filter/CUDA/src/sobelfilter.cu

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,11 @@ void computeGradient(
9494

9595
float gradient_x, gradient_y;
9696

97-
int index = blockDim.x * blockIdx.x + threadIdx.x + cols + 1;
97+
int row = blockIdx.y * blockDim.y + threadIdx.y;
98+
int col = blockIdx.x * blockDim.x + threadIdx.x;
99+
if (row >= rows - 2 || col >= cols - 2) return;
98100

101+
int index = row * cols + col + cols + 1;
99102
int index_row_above = index - cols;
100103
int index_row_below = index + cols;
101104

@@ -228,7 +231,11 @@ int main(int argc, const char* argv[])
228231
#endif
229232

230233
//Step 3 Gradient strength and direction
231-
computeGradient<<<(rows * cols) / BLOCK_SIZE, BLOCK_SIZE>>>(d_input, d_gradient, rows, cols);
234+
constexpr int blockDim_x = 64;
235+
constexpr int blockDim_y = 2;
236+
dim3 block(blockDim_x, blockDim_y);
237+
dim3 grid((cols - 2 + blockDim_x - 1) / blockDim_x, (rows - 2 + blockDim_y - 1) / blockDim_y);
238+
computeGradient<<<grid, block>>>(d_input, d_gradient, rows, cols);
232239
CUDA_CHECK( cudaDeviceSynchronize() );
233240

234241
#ifdef DEBUG_TIME

sobel_filter/HIP/src/sobelfilter.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,11 @@ void computeGradient(
9494

9595
float gradient_x, gradient_y;
9696

97-
int index = blockDim.x * blockIdx.x + threadIdx.x + cols + 1;
97+
int row = blockIdx.y * blockDim.y + threadIdx.y;
98+
int col = blockIdx.x * blockDim.x + threadIdx.x;
99+
if (row >= rows - 2 || col >= cols - 2) return;
98100

101+
int index = row * cols + col + cols + 1;
99102
int index_row_above = index - cols;
100103
int index_row_below = index + cols;
101104

@@ -228,7 +231,11 @@ int main(int argc, const char* argv[])
228231
#endif
229232

230233
//Step 3 Gradient strength and direction
231-
hipLaunchKernelGGL(computeGradient, (rows * cols) / BLOCK_SIZE, BLOCK_SIZE, 0, 0, d_input, d_gradient, rows, cols);
234+
constexpr int blockDim_x = 64;
235+
constexpr int blockDim_y = 2;
236+
dim3 block(blockDim_x, blockDim_y);
237+
dim3 grid((cols - 2 + blockDim_x - 1) / blockDim_x, (rows - 2 + blockDim_y - 1) / blockDim_y);
238+
hipLaunchKernelGGL(computeGradient, grid, block, 0, 0, d_input, d_gradient, rows, cols);
232239
HIP_CHECK( hipDeviceSynchronize() );
233240

234241
#ifdef DEBUG_TIME

sobel_filter/SYCL/src/sobelfilter.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,15 +81,18 @@ void computeGradient(
8181
unsigned char* output,
8282
int rows,
8383
int cols,
84-
sycl::nd_item<1> item)
84+
sycl::nd_item<3> item)
8585
{
8686
float gradientx[3][3] = {{-1.f, 0.f, 1.f}, {-2.f, 0.f, 2.f}, {-1.f, 0.f, 1.f}};
8787
float gradienty[3][3] = {{-1.f, -2.f, -1.f}, { 0.f, 0.f, 0.f}, { 1.f, 2.f, 1.f}};
8888

8989
float gradient_x, gradient_y;
9090

91-
int index = item.get_global_id(0) + cols + 1;
91+
int row = item.get_group(1) * item.get_local_range(1) + item.get_local_id(1);
92+
int col = item.get_group(2) * item.get_local_range(2) + item.get_local_id(2);
93+
if (row >= rows - 2 || col >= cols - 2) return;
9294

95+
int index = row * cols + col + cols + 1;
9396
int index_row_above = index - cols;
9497
int index_row_below = index + cols;
9598

@@ -224,9 +227,13 @@ int main(int argc, const char* argv[])
224227
#endif
225228

226229
//Step 3 Gradient strength and direction
230+
constexpr int blockDim_x = 64;
231+
constexpr int blockDim_y = 2;
232+
sycl::range block(1, blockDim_y, blockDim_x);
233+
sycl::range grid(1, (rows - 2 + blockDim_y - 1) / blockDim_y, (cols - 2 + blockDim_x - 1) / blockDim_x);
227234
qsf.parallel_for(
228-
sycl::nd_range<1>(((rows * cols) / BLOCK_SIZE) * BLOCK_SIZE, BLOCK_SIZE),
229-
[=](sycl::nd_item<1> item) {
235+
sycl::nd_range<3>(grid * block, block),
236+
[=](sycl::nd_item<3> item) {
230237
computeGradient(d_input, d_gradient, rows, cols, item);
231238
}
232239
);

0 commit comments

Comments
 (0)