Skip to content

Commit 5626bb7

Browse files
author
jgtong
authored
Merge pull request #12 from oneapi-src/reverse_time_migration/code_update
[reverse_time_migration] Code Updates
2 parents b45af7b + b6b8993 commit 5626bb7

File tree

7 files changed

+139
-87
lines changed

7 files changed

+139
-87
lines changed

reverse_time_migration/CMakeLists.txt

Lines changed: 88 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -29,29 +29,59 @@ option(USE_DPC "Use DPC++ technology" OFF)
2929
option(USE_CUDA "Use CUDA" OFF)
3030
option(USE_HIP "Use AMD" OFF)
3131

32-
option(USE_INTEL_CPU "Build AOT for Intel CPU" OFF)
3332
option(GPU_AOT "Build AOT for Intel GPU" OFF)
3433
option(USE_NVIDIA_BACKEND "Use DPC++ technology with NVIDIA backend" OFF)
35-
option(USE_AMDHIP_BACKEND "Use DPC++ technology with AMD HIP backend" OFF)
34+
option(USE_AMD_BACKEND "Use DPC++ technology with AMD HIP backend" OFF)
3635
option(ENABLE_GPU_TIMINGS "Show GPU timings at end of execution" OFF)
3736

38-
set(USE_DEFAULT_FLAGS ON)
39-
set(INTEL_GPU_CXX_FLAGS "-O3 -fsycl " )
40-
set(NVIDIA_GPU_CXX_FLAGS "-O3 -fsycl " ) # For DPC++ NVIDIA_BACKEND
41-
set(AMD_GPU_CXX_FLAGS "-O3 -fsycl " )
42-
set(NVCC_GPU_CXX_FLAGS "-O3 ") # For NVCC
43-
set(ROCM_CXX_FLAGS "-O3 ") # For AMDHIP
37+
### SYCL RELATED FLAGS START HERE
38+
set(DEF_INTEL_GENERAL_CXX_FLAGS " -O2 -fsycl -ffast-math ")
39+
set(DEF_NVIDIA_GENERAL_CXX_FLAGS " -O3 -fsycl -ffast-math ")
40+
set(DEF_AMD_GENERAL_CXX_FLAGS " -O3 -fsycl -ffast-math ")
41+
42+
set(DEF_INTEL_WL_CXX_FLAGS " ")
43+
set(DEF_NVIDIA_WL_CXX_FLAGS " ")
44+
set(DEF_AMD_WL_CXX_FLAGS " ")
45+
46+
set(DEF_INTEL_CXX_FLAGS "${DEF_INTEL_GENERAL_CXX_FLAGS} ${DEF_INTEL_WL_CXX_FLAGS}")
47+
set(DEF_NVIDIA_CXX_FLAGS "${DEF_NVIDIA_GENERAL_CXX_FLAGS} ${DEFT_NVIDIA_WL_CXX_FLAGS}")
48+
set(DEF_AMD_CXX_FLAGS "${DEF_AMD_GENERAL_CXX_FLAGS} ${DEF_AMD_WL_CXX_FLAGS}")
49+
50+
if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
51+
message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
52+
elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
53+
message(STATUS "Using DEFAULT compilation flags")
54+
set(INTEL_GPU_CXX_FLAGS "${DEF_INTEL_GENERAL_CXX_FLAGS} ${DEF_INTEL_WL_CXX_FLAGS}")
55+
set(NVIDIA_GPU_CXX_FLAGS "${DEF_NVIDIA_GENERAL_CXX_FLAGS} ${DEF_NVIDIA_WL_CXX_FLAGS}")
56+
set(AMD_GPU_CXX_FLAGS "${DEF_AMD_GENERAL_CXX_FLAGS} ${DEF_AMD_WL_CXX_FLAGS}")
57+
elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
58+
message(STATUS "OVERRIDING GENERAL compilation flags")
59+
set(INTEL_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_INTEL_WL_CXX_FLAGS}")
60+
set(NVIDIA_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_NVIDIA_WL_CXX_FLAGS}")
61+
set(AMD_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_AMD_WL_CXX_FLAGS}")
62+
elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
63+
message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags")
64+
set(INTEL_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
65+
set(NVIDIA_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
66+
set(AMD_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
67+
endif()
68+
#END OF SYCL RELATED FLAGS
4469

4570

46-
message(STATUS "Use DPC++ ${USE_DPC} and Use CUDA ${USE_CUDA} and Use HIP ${USE_HIP}")
71+
### NVCC RELATED FLAGS START HERE
72+
set(NVCC_WL_CXX_FLAGS "")
73+
set(NVCC_DEF_CXX_FLAGS "-O3 ") # For NVCC
74+
set(NVCC_DEF_COMBINED_FLAGS "${NVCC_WL_CXX_FLAGS} ${NVCC_DEF_CXX_FLAGS}")
75+
### END OF NVCC RELATED FLAGS
4776

48-
if("${CMAKE_CXX_FLAGS}" STREQUAL "")
49-
message(STATUS "Using DEFAULT compilation flags for the application")
50-
string(APPEND CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}") # Default flags for NV backend
51-
else()
52-
message(STATUS "OVERRIDING compilation flags")
53-
set(USE_DEFAULT_FLAGS OFF)
54-
endif()
77+
78+
### ROCM RELATED FLAGS START HERE
79+
set(ROCM_WL_CXX_FLAGS "")
80+
set(ROCM_DEF_CXX_FLAGS "-O3 -D__HIP_PLATFORM_AMD__ ")
81+
set(ROCM_DEF_COMBINED_FLAGS "${ROCM_WL_CXX_FLAGS} ${ROCM_DEF_CXX_FLAGS}")
82+
### END OF ROCM RELATED FLAGS
83+
84+
message(STATUS "Use DPC++ ${USE_DPC} and Use CUDA ${USE_CUDA} and Use HIP ${USE_HIP}")
5585

5686
if(ENABLE_GPU_TIMINGS)
5787
message(STATUS "GPU Timings will be displayed")
@@ -63,40 +93,64 @@ if (NOT USE_DPC AND NOT USE_CUDA AND NOT USE_HIP)
6393
elseif (USE_DPC AND USE_CUDA AND USE_HIP OR USE_DPC AND USE_CUDA OR USE_CUDA AND USE_HIP)
6494
message(FATAL_ERROR "Please specify only one technology using the config.sh script")
6595
elseif (USE_DPC)
96+
message(STATUS "Compiling for DPC++")
6697
if(GPU_AOT)
98+
set(CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}") # Default flags for Intel backend
6799
if( (${GPU_AOT} STREQUAL "pvc") OR (${GPU_AOT} STREQUAL "PVC") )
68100
message(STATUS "Enabling Intel GPU AOT compilation for ${GPU_AOT}")
69101
string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=spir64_gen -Xs \"-device 0x0bd5 -revision_id 0x2f\" ")
70102
else()
71103
message(STATUS "Using custom AOT compilation flag ${GPU_AOT}")
72104
string(APPEND CMAKE_CXX_FLAGS " ${GPU_AOT} ") # User should be aware of advanced AOT compilation flags
73105
endif()
74-
elseif(USE_INTEL_CPU)
75-
message(STATUS "Compiling for Intel CPU")
76-
string(APPEND CMAKE_CXX_FLAGS " -ffast-math -mprefer-vector-width=512 -mfma -fsycl-targets=spir64_x86_64--linux \"-device avx512\" ")
77106
elseif(USE_NVIDIA_BACKEND)
78107
message(STATUS "Enabling NVIDIA backend")
79-
if(USE_DEFAULT_FLAGS)
80-
set(CMAKE_CXX_FLAGS "${NVIDIA_GPU_CXX_FLAGS}") # Default flags for NV backend
81-
endif()
82-
string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=nvptx64-nvidia-cuda ") # -O3 will be used, even though -O2 was set earlier
108+
set(CMAKE_CXX_FLAGS "${NVIDIA_GPU_CXX_FLAGS}")
109+
string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=nvptx64-nvidia-cuda ")
83110
if(USE_SM)
84111
message(STATUS "Building for SM_${USE_SM} architecture")
85112
string(APPEND CMAKE_CXX_FLAGS " -Xsycl-target-backend --cuda-gpu-arch=sm_${USE_SM}")
86113
endif()
87-
elseif(USE_AMDHIP_BACKEND)
88-
message(STATUS "Enabling AMD HIP backend for ${USE_HIP_BACKEND} AMD architecture")
89-
if(USE_DEFAULT_FLAGS)
90-
set(CMAKE_CXX_FLAGS "${AMD_GPU_CXX_FLAGS}")
114+
elseif(USE_AMD_BACKEND)
115+
if ("${USE_AMD_ARCH}" STREQUAL "")
116+
message(FATAL_ERROR "Must specify AMD arch, e.g., -DUSE_AMD_ARCH=gfx90a")
91117
endif()
92-
string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${USE_AMDHIP_BACKEND} ")
93-
add_compile_options(-DUSE_AMDHIP_BACKEND)
118+
message(STATUS "Enabling AMD backend for ${USE_AMD_ARCH} AMD architecture")
119+
set(CMAKE_CXX_FLAGS "${AMD_GPU_CXX_FLAGS}")
120+
string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${USE_AMD_ARCH} ")
121+
add_compile_options(-DUSE_AMD_BACKEND)
122+
else()
123+
message(STATUS "Enabling INTEL backend")
124+
set(CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}") # Default flags for Intel backend
94125
endif()
95126
elseif (USE_CUDA)
96-
set(CMAKE_CXX_FLAGS "${NVCC_GPU_CXX_FLAGS}")
127+
message(STATUS "Compiling for NVCC")
128+
if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
129+
message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
130+
elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
131+
message(STATUS "Using DEFAULT compilation flags")
132+
set(CMAKE_CXX_FLAGS "${NVCC_DEF_COMBINED_FLAGS}")
133+
elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
134+
message(STATUS "OVERRIDING GENERAL compilation flags")
135+
set(CMAKE_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS}")
136+
string(APPEND CMAKE_CXX_FLAGS ${NVCC_WL_CXX_FLAGS})
137+
elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
138+
message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags")
139+
endif()
97140
add_compile_options(-DENABLE_CUDA_LOGGING)
98141
elseif (USE_HIP)
99-
set(CMAKE_CXX_FLAGS "${ROCM_CXX_FLAGS}")
142+
if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
143+
message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
144+
elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
145+
message(STATUS "Using DEFAULT compilation flags")
146+
set(CMAKE_CXX_FLAGS "${ROCM_DEF_COMBINED_FLAGS}")
147+
elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
148+
message(STATUS "OVERRIDING GENERAL compilation flags")
149+
set(CMAKE_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS}")
150+
string(APPEND CMAKE_CXX_FLAGS ${ROCM_WL_CXX_FLAGS})
151+
elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
152+
message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags")
153+
endif()
100154
add_compile_options(-DENABLE_HIP_LOGGING)
101155
endif ()
102156

@@ -224,8 +278,6 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libs/SeismicOperations)
224278
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libs/Thoth)
225279
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
226280

227-
message(STATUS "Compilation flags set to: ${CMAKE_CXX_FLAGS}")
228-
#message(STATUS "Flags : ${CMAKE_CXX_FLAGS}")
229281
##############################################################################
230282
# EXECUTABLES
231283
##############################################################################
@@ -264,3 +316,6 @@ if (BUILD_TESTS)
264316
COMMAND seismic-toolbox-tests
265317
)
266318
endif ()
319+
320+
321+
message(STATUS "Compilation flags set to: ${CMAKE_CXX_FLAGS}")

reverse_time_migration/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ make -j ### Engine binary is only needed
3636
Note: Be sure that `-DUSE_DPC=ON`, `-DUSE_NVIDIA_BACKEND=YES`, `-DUSE_OpenCV=ON` are set. Other flags should be `OFF`
3737
To compile for 8.0 or 9.0 compute capability, please use `-DUSE_SM=80` or `-DUSE_SM=90` respectively
3838

39-
## To build on AMDHIP-BACKEND:
39+
## To build on AMD-BACKEND:
4040

4141
```
42-
CC=/path/to/intel/llvm/clang CXX=/path/to/intel/llvm/clang++ cmake -DCMAKE_BUILD_TYPE=NOMODE -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DUSE_DPC=ON -DUSE_NVIDIA_BACKEND=OFF -DUSE_AMDHIP_BACKEND=SPECIFY_AMD_GPU_ARCHITECTURE_HERE -DGPU_AOT= -DUSE_CUDA=OFF -DUSE_SM= -DUSE_OpenCV=ON -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DDATA_PATH=data -DWRITE_PATH=results -DUSE_INTEL= -DCOMPRESSION=NO -DCOMPRESSION_PATH=. -DUSE_MPI=OFF -H. -B./bin
42+
CC=/path/to/intel/llvm/clang CXX=/path/to/intel/llvm/clang++ cmake -DCMAKE_BUILD_TYPE=NOMODE -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DUSE_DPC=ON -DUSE_NVIDIA_BACKEND=OFF -DUSE_AMD_BACKEND=SPECIFY_AMD_GPU_ARCHITECTURE_HERE -DGPU_AOT= -DUSE_CUDA=OFF -DUSE_SM= -DUSE_OpenCV=ON -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DDATA_PATH=data -DWRITE_PATH=results -DUSE_INTEL= -DCOMPRESSION=NO -DCOMPRESSION_PATH=. -DUSE_MPI=OFF -H. -B./bin
4343
cd bin
4444
make Engine -j ### Engine binary is only needed
4545
```
46-
Note: Be sure that `-DUSE_DPC=ON`, `-DUSE_AMDHIP_BACKEND=[SPECIFY AMD GPU ARCHITECTURE HERE]`, `-DUSE_OpenCV=ON` are set. The AMD gpu architecture we tested are `gfx900` (Vega-FE) and `gfx908` (MI100)
46+
Note: Be sure that `-DUSE_DPC=ON`, `-DUSE_AMD_BACKEND=[SPECIFY AMD GPU ARCHITECTURE HERE]`, `-DUSE_OpenCV=ON` are set. The AMD gpu architecture we tested are `gfx900` (Vega-FE) and `gfx908` (MI100)
4747

4848
## Build for NVCC Compiler:
4949

reverse_time_migration/libs/SeismicOperations/src/components/concrete/oneapi/boundary-managers/extensions/ZeroExtension.cpp

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -65,46 +65,44 @@ void ZeroExtension::VelocityExtensionHelper(float *property_array,
6565
});
6666
});
6767
}
68+
6869
/*!putting zero values for velocities at the boundaries for X and with all Y
6970
* and Z */
71+
7072
OneAPIBackend::GetInstance()->GetDeviceQueue()->submit([&](handler &cgh) {
71-
auto global_range =
72-
range<3>(boundary_length, end_y - start_y, end_z - start_z);
73-
auto local_range = range<3>(1, 1, 1);
74-
auto global_nd_range = nd_range<3>(global_range, local_range);
73+
auto global_range = range<2>(boundary_length, end_z - start_z);
74+
auto local_range = range<2>(1, 1);
75+
auto global_nd_range = nd_range<2>(global_range, local_range);
7576

7677
cgh.parallel_for<class Zero_velocity_extension_X>(
77-
global_nd_range, [=](nd_item<3> it) {
78+
global_nd_range, [=](nd_item<2> it) {
7879
int column = it.get_global_id(0);
79-
int depth = it.get_global_id(1) + start_y;
8080
int row = it.get_global_id(2) + start_z;
8181

8282
/*!for values from y = HALF_LENGTH TO y = HALF_LENGTH +BOUND_LENGTH*/
83-
property_array[depth * nz_nx + row * nx + column + start_x] = 0;
83+
property_array[row * nx + column + start_x] = 0;
8484
/*!for values from y = ny-HALF_LENGTH TO y =
8585
* ny-HALF_LENGTH-BOUND_LENGTH*/
86-
property_array[depth * nz_nx + row * nx + (end_x - 1 - column)] = 0;
86+
property_array[row * nx + (end_x - 1 - column)] = 0;
8787
});
8888
});
8989
/*!putting zero values for velocities at the boundaries for z and with all x
9090
* and y */
9191
OneAPIBackend::GetInstance()->GetDeviceQueue()->submit([&](handler &cgh) {
92-
auto global_range =
93-
range<3>(end_x - start_x, end_y - start_y, boundary_length);
94-
auto local_range = range<3>(1, 1, 1);
95-
auto global_nd_range = nd_range<3>(global_range, local_range);
92+
auto global_range = range<2>(end_x - start_x, boundary_length);
93+
auto local_range = range<2>(1, 1);
94+
auto global_nd_range = nd_range<2>(global_range, local_range);
9695

9796
cgh.parallel_for<class Zero_velocity_extension_Z>(
98-
global_nd_range, [=](nd_item<3> it) {
97+
global_nd_range, [=](nd_item<2> it) {
9998
int column = it.get_global_id(0) + start_x;
100-
int depth = it.get_global_id(1) + start_y;
10199
int row = it.get_global_id(2);
102100

103101
/*!for values from y = HALF_LENGTH TO y = HALF_LENGTH +BOUND_LENGTH*/
104-
property_array[depth * nz_nx + (start_z + row) * nx + column] = 0;
102+
property_array[(start_z + row) * nx + column] = 0;
105103
/*!for values from y = ny-HALF_LENGTH TO y =
106104
* ny-HALF_LENGTH-BOUND_LENGTH*/
107-
property_array[depth * nz_nx + (end_z - 1 - row) * nx + column] = 0;
105+
property_array[(end_z - 1 - row) * nx + column] = 0;
108106
});
109107
});
110108
}

reverse_time_migration/libs/SeismicOperations/src/components/concrete/oneapi/migration-accommodators/CrossCorrelationKernel.cpp

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -63,27 +63,26 @@ void CrossCorrelationKernel::Correlation(GridBox *apGridBox) {
6363
if (_IS_2D) {
6464
y_offset = 0;
6565
}
66-
66+
6767
float *source = apGridBox->Get(WAVE | GB_PRSS | CURR | DIR_Z)->GetNativePointer();
6868
float *receiver = mpGridBox->Get(WAVE | GB_PRSS | CURR | DIR_Z)->GetNativePointer();
6969

7070
OneAPIBackend::GetInstance()->GetDeviceQueue()->submit([&](handler &cgh) {
7171

72-
auto global_range = range<3>(compute_ny, compute_nz, compute_nx);
73-
auto local_range = range<3>(block_y, block_z, block_x);
74-
auto starting_offset = id<3>(y_offset, half_length, half_length);
75-
auto global_nd_range = nd_range<3>(global_range,
72+
auto global_range = range<2>(compute_nz, compute_nx);
73+
auto local_range = range<2>(block_z, block_x);
74+
auto starting_offset = id<2>(half_length, half_length);
75+
auto global_nd_range = nd_range<2>(global_range,
7676
local_range);
7777
///starting_offset);
7878

7979
float *output_buffer = mpShotCorrelation->GetNativePointer();
8080
float *src_buffer = mpSourceIllumination->GetNativePointer();
8181
float *dest_buffer = mpReceiverIllumination->GetNativePointer();
82-
cgh.parallel_for(global_nd_range, [=](nd_item<3> it) {
82+
cgh.parallel_for(global_nd_range, [=](nd_item<2> it) {
8383

84-
int idx = (it.get_global_id(0) * wnz * wnx) + (starting_offset[0] * wnx * wnz) +
85-
(it.get_global_id(1) * wnx ) + (starting_offset[1] * wnx) +
86-
it.get_global_id(2) + starting_offset[2];
84+
int idx = (it.get_global_id(0) * wnx ) + (starting_offset[0] * wnx) +
85+
it.get_global_id(1) + starting_offset[1];
8786

8887
output_buffer[idx] += source[idx] * receiver[idx];
8988

@@ -111,7 +110,9 @@ void CrossCorrelationKernel::Stack() {
111110

112111
size_t sizeTotal = nx * nz * ny;
113112
OneAPIBackend::GetInstance()->GetDeviceQueue()->submit([&](handler &cgh) {
114-
auto global_range = range<3>(orig_x, orig_z, orig_y);
113+
auto global_range = range<2>(orig_z, orig_x);
114+
auto local_range = sycl::range<2>(1, 1);
115+
auto global_nd_range = sycl::nd_range<2>(global_range, local_range);
115116
int wsx = mpGridBox->GetWindowStart(X_AXIS);
116117
int wsz = mpGridBox->GetWindowStart(Z_AXIS);
117118
int wsy = mpGridBox->GetWindowStart(Y_AXIS);
@@ -123,16 +124,16 @@ void CrossCorrelationKernel::Stack() {
123124
float *cor_rcv = mpReceiverIllumination->GetNativePointer();
124125
if (mCompensationType == NO_COMPENSATION) {
125126
cgh.parallel_for(
126-
global_range, [=](id<3> idx) {
127-
uint offset_window = idx[0] + idx[1] * wnx + idx[2] * wnx * wnz;
128-
uint offset = idx[0] + idx[1] * nx + idx[2] * nx * nz;
127+
global_nd_range, [=](sycl::nd_item<2> it) {
128+
uint offset_window = it.get_global_id(0) * wnx + it.get_global_id(1);
129+
uint offset = it.get_global_id(0) * nx + it.get_global_id(1);
129130
stack_buf[offset] += cor_buf[offset_window];
130131
});
131132
} else {
132133
cgh.parallel_for(
133-
global_range, [=](id<3> idx) {
134-
uint offset_window = idx[0] + idx[1] * wnx + idx[2] * wnx * wnz;
135-
uint offset = idx[0] + idx[1] * nx + idx[2] * nx * nz;
134+
global_nd_range, [=](sycl::id<2> idx) {
135+
uint offset_window = idx[0] + idx[1] * wnx;
136+
uint offset = idx[0] + idx[1] * nx;
136137
stack_buf[offset] += cor_buf[offset_window];
137138
stack_src[offset] += cor_src[offset_window];
138139
stack_rcv[offset] += cor_rcv[offset_window];

0 commit comments

Comments
 (0)