Skip to content

Commit 86d148f

Browse files
author
Y
committed
Fix SYCL failure on AMD Backend
1 parent a64a6b8 commit 86d148f

File tree

2 files changed

+36
-39
lines changed

2 files changed

+36
-39
lines changed

cudaSift/HIP/CMakeLists.txt

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ set(SOURCES
7272
include_directories(
7373
${CMAKE_SOURCE_DIR}/../common/
7474
${CMAKE_CURRENT_SOURCE_DIR}
75+
${HIP_INCLUDE_DIRS}
7576
)
7677

7778
# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific)
@@ -97,12 +98,5 @@ if(DEVICE_TIMER)
9798
add_compile_options(-DDEVICE_TIMER)
9899
endif()
99100

100-
set(HIP_SEPARABLE_COMPILATION ON)
101-
set(MY_TARGET_NAME ${PROJECT_NAME})
102-
set(MY_HIPCC_OPTIONS)
103-
set(MY_NVCC_OPTIONS)
104-
set(CMAKE_HIP_ARCHITECTURES OFF)
105-
106-
set_source_files_properties(${cuda_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
107-
hip_add_executable(${MY_TARGET_NAME} ${SOURCES} ${MY_HIPCC_OPTIONS} NVCC_OPTIONS ${MY_NVCC_OPTIONS})
101+
add_executable(cudasift ${SOURCES})
108102
target_link_libraries(cudasift stdc++ stdc++fs ${OpenCV_LIBS})

cudaSift/SYCL/cudaSiftD.dp.cpp

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -332,17 +332,17 @@ void ExtractSiftDescriptorsCONSTNew(
332332
if (y >= 2)
333333
{ // Upper left
334334
float grad2 = iverf * grad1;
335-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
335+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
336336
buffer + p1, iangf * grad2);
337-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
337+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
338338
buffer + p2, angf * grad2);
339339
}
340340
if (y <= 13)
341341
{ // Lower left
342342
float grad2 = verf * grad1;
343-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
343+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
344344
buffer + p1 + 32, iangf * grad2);
345-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
345+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
346346
buffer + p2 + 32, angf * grad2);
347347
}
348348
}
@@ -352,17 +352,17 @@ void ExtractSiftDescriptorsCONSTNew(
352352
if (y >= 2)
353353
{ // Upper right
354354
float grad2 = iverf * grad1;
355-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
355+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
356356
buffer + p1 + 8, iangf * grad2);
357-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
357+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
358358
buffer + p2 + 8, angf * grad2);
359359
}
360360
if (y <= 13)
361361
{ // Lower right
362362
float grad2 = verf * grad1;
363-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
363+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
364364
buffer + p1 + 40, iangf * grad2);
365-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
365+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
366366
buffer + p2 + 40, angf * grad2);
367367
}
368368
}
@@ -452,17 +452,17 @@ void ExtractSiftDescriptor(rawImg_data texObj,
452452
if (y >= 2)
453453
{ // Upper left
454454
float grad2 = iverf * grad1;
455-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
455+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
456456
buffer + p1, iangf * grad2);
457-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
457+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
458458
buffer + p2, angf * grad2);
459459
}
460460
if (y <= 13)
461461
{ // Lower left
462462
float grad2 = verf * grad1;
463-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
463+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
464464
buffer + p1 + 32, iangf * grad2);
465-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
465+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
466466
buffer + p2 + 32, angf * grad2);
467467
}
468468
}
@@ -472,17 +472,17 @@ void ExtractSiftDescriptor(rawImg_data texObj,
472472
if (y >= 2)
473473
{ // Upper right
474474
float grad2 = iverf * grad1;
475-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
475+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
476476
buffer + p1 + 8, iangf * grad2);
477-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
477+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
478478
buffer + p2 + 8, angf * grad2);
479479
}
480480
if (y <= 13)
481481
{ // Lower right
482482
float grad2 = verf * grad1;
483-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
483+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
484484
buffer + p1 + 40, iangf * grad2);
485-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
485+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
486486
buffer + p2 + 40, angf * grad2);
487487
}
488488
}
@@ -621,7 +621,7 @@ void ComputeOrientationsCONSTNew(float *image, int w, int p, int h, SiftPoint *d
621621
(int)((LEN / 2) * sycl::atan2(dy, dx) / 3.1416f + (LEN / 2) + 0.5f) %
622622
LEN;
623623
float grad = sycl::sqrt(dx * dx + dy * dy);
624-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
624+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
625625
&hist[LEN + bin], grad * gaussx[x] * gaussy[y]);
626626
}
627627
item_ct1.barrier(sycl::access::fence_space::local_space);
@@ -673,11 +673,12 @@ void ComputeOrientationsCONSTNew(float *image, int w, int p, int h, SiftPoint *d
673673
float val1 = hist[LEN + ((i2 + 1) % LEN)];
674674
float val2 = hist[LEN + ((i2 + LEN - 1) % LEN)];
675675
float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
676-
// unsigned int idx = infra::atomic_fetch_compare_inc(
677-
// &d_PointCounter[2 * octave + 1], (unsigned int)0x7fffffff);
678-
unsigned int idx =
679-
infra::atomic_fetch_add<unsigned int, sycl::access::address_space::generic_space>(
680-
&d_PointCounter[2 * octave + 1], 2) / 2;
676+
// unsigned int idx = infra::atomic_fetch_compare_inc(
677+
// &d_PointCounter[2 * octave + 1], (unsigned int)0x7fffffff);
678+
unsigned int idx =
679+
infra::atomic_fetch_add<unsigned int, sycl::access::address_space::generic_space>(
680+
&d_PointCounter[2 * octave + 1], 2) /
681+
2;
681682
if (idx < d_MaxNumPoints)
682683
{
683684
d_Sift[idx].xpos = d_Sift[bx].xpos;
@@ -734,7 +735,7 @@ void ComputeOrientationsCONST(rawImg_data texObj,
734735
if (bin > 31)
735736
bin = 0;
736737
float grad = sycl::sqrt(dx * dx + dy * dy);
737-
infra::atomic_fetch_add<sycl::access::address_space::local_space>(
738+
infra::atomic_fetch_add<float, sycl::access::address_space::local_space>(
738739
&hist[bin], grad * gauss[xd] * gauss[yd]);
739740
}
740741

@@ -789,11 +790,12 @@ void ComputeOrientationsCONST(rawImg_data texObj,
789790
float val1 = hist[32 + ((i2 + 1) & 31)];
790791
float val2 = hist[32 + ((i2 + 31) & 31)];
791792
float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2);
792-
// unsigned int idx = infra::atomic_fetch_compare_inc(
793-
// &d_PointCounter[2 * octave + 1], (unsigned int)0x7fffffff);
794-
unsigned int idx =
793+
// unsigned int idx = infra::atomic_fetch_compare_inc(
794+
// &d_PointCounter[2 * octave + 1], (unsigned int)0x7fffffff);
795+
unsigned int idx =
795796
infra::atomic_fetch_add<unsigned int, sycl::access::address_space::generic_space>(
796-
&d_PointCounter[2 * octave + 1], 2) / 2;
797+
&d_PointCounter[2 * octave + 1], 2) /
798+
2;
797799

798800
if (idx < d_MaxNumPoints)
799801
{
@@ -979,11 +981,12 @@ void FindPointsMultiNew(float *d_Data0, SiftPoint *d_Sift, int width, int pitch,
979981
sycl::atomic<unsigned int>(
980982
sycl::global_ptr<unsigned int>(&d_PointCounter[2 * octave + 0]))
981983
.fetch_max(d_PointCounter[2 * octave - 1]);
982-
// unsigned int idx = infra::atomic_fetch_compare_inc(
983-
// &d_PointCounter[2 * octave + 0], (unsigned int)0x7fffffff);
984-
unsigned int idx =
984+
// unsigned int idx = infra::atomic_fetch_compare_inc(
985+
// &d_PointCounter[2 * octave + 0], (unsigned int)0x7fffffff);
986+
unsigned int idx =
985987
infra::atomic_fetch_add<unsigned int, sycl::access::address_space::generic_space>(
986-
&d_PointCounter[2 * octave + 0], 2) / 2;
988+
&d_PointCounter[2 * octave + 0], 2) /
989+
2;
987990

988991
idx = (idx >= maxPts ? maxPts - 1 : idx);
989992
d_Sift[idx].xpos = xpos + pdx;

0 commit comments

Comments
 (0)