Skip to content

Commit 75f6820

Browse files
authored
Merge pull request #22 from oneapi-src/tsne/code_update/2309
[tsne] code update + cmake changes
2 parents c4e15f6 + 3bfe45e commit 75f6820

File tree

10 files changed

+99
-44
lines changed

10 files changed

+99
-44
lines changed

tsne/CUDA/CMakeLists.txt

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,25 @@ set(CMAKE_CXX_EXTENSIONS OFF)
3838

3939
option(USE_SM "Build for specific SM" OFF)
4040

41-
# CMAKE_CXX_FLAGS
42-
if("${CMAKE_CXX_FLAGS}" STREQUAL "")
41+
set(DEF_WL_CXX_FLAGS " ")
42+
set(DEF_GENERAL_CXX_FLAGS " -O3 -ffast-math ")
43+
set(DEF_COMBINED_CXX_FLAGS "${DEF_GENERAL_CXX_FLAGS} ${DEF_WL_CXX_FLAGS}")
44+
45+
# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific)
46+
# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags)
47+
# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity
48+
49+
if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
50+
message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
51+
elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
4352
message(STATUS "Using DEFAULT compilation flags")
44-
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} " -O3 -ffast-math ")
45-
else()
46-
message(STATUS "OVERRIDING DEFAULT compilation flags")
53+
set(CMAKE_CXX_FLAGS "${DEF_COMBINED_CXX_FLAGS}")
54+
elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
55+
message(STATUS "OVERRIDING GENERAL compilation flags")
56+
set(CMAKE_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS}")
57+
string(APPEND CMAKE_CXX_FLAGS ${DEF_WL_CXX_FLAGS})
58+
elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
59+
message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags")
4760
endif()
4861

4962
# CUDA_NVCC_FLAGS

tsne/CUDA/src/kernels/nbodyfft.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ void DFT2D1gpu(float* din, thrust::complex<float>* dout, int num_rows, int num_c
300300
thrust::complex<float> sum, twiddle;
301301
angle = -2.0f * PI * fdividef((float)i, (float)num_cols);
302302
sum = 0.0f;
303+
#pragma unroll
303304
for (int k = 0; k < num_cols; ++k) {
304305
// sincosf(angle * k, &sinf, &cosf);
305306
// twiddle = thrust::complex<float>(cosf, sinf);
@@ -324,6 +325,7 @@ void DFT2D2gpu(thrust::complex<float>* din, thrust::complex<float>* dout, int nu
324325
thrust::complex<float> sum, twiddle;
325326
angle = -2.0f * PI * fdividef((float)i, (float)num_cols);
326327
sum = 0.0f;
328+
#pragma unroll
327329
for (int k = 0; k < num_cols; ++k) {
328330
// sincosf(angle * k, &sinf, &cosf);
329331
// twiddle = thrust::complex<float>(cosf, sinf);
@@ -348,6 +350,7 @@ void iDFT2D1gpu(thrust::complex<float>* din, thrust::complex<float>* dout, int n
348350
thrust::complex<float> sum, twiddle;
349351
angle = 2.0f * PI * fdividef((float)i, (float)num_cols);
350352
sum = 0.0f;
353+
#pragma unroll
351354
for (int k = 0; k < num_cols; ++k) {
352355
// sincosf(angle * k, &sinf, &cosf);
353356
// twiddle = thrust::complex<float>(cosf, sinf);
@@ -376,6 +379,7 @@ void iDFT2D2gpu(thrust::complex<float>* din, float* dout, int num_rows, int num_
376379
thrust::complex<float> twiddle;
377380
angle = 2.0f * PI * fdividef((float)i, (float)num_cols);
378381
sum = 0.0f;
382+
#pragma unroll
379383
for (int k = 0; k < num_cols; ++k) {
380384
// sincosf(angle * k, &sinf, &cosf);
381385
// twiddle = thrust::complex<float>(cosf, sinf);

tsne/CUDA/src/kernels/perplexity_search.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,12 @@ void tsnecuda::SearchPerplexity(
153153

154154
// compute entropy of current row
155155
row_sum = tsnecuda::utils::ReduceSum(handle, pij, num_neighbors, num_points, 0);
156-
GpuErrorCheck(cudaDeviceSynchronize());
156+
// GpuErrorCheck(cudaDeviceSynchronize());
157157

158158
// compute negative entropy
159159
thrust::transform(pij.begin(), pij.end(), entropy.begin(), tsnecuda::utils::FunctionalEntropy());
160160
neg_entropy = tsnecuda::utils::ReduceAlpha(handle, entropy, num_neighbors, num_points, -1.0f, 0);
161-
GpuErrorCheck(cudaDeviceSynchronize());
161+
// GpuErrorCheck(cudaDeviceSynchronize());
162162

163163
// binary search for beta
164164
PerplexitySearchKernel<<<NBLOCKS2, BLOCKSIZE2>>>(

tsne/CUDA/src/utils/reduce_utils.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ thrust::device_vector<float> tsnecuda::utils::ReduceAlpha(
5757
thrust::raw_pointer_cast(d_matrix.data()), N,
5858
thrust::raw_pointer_cast(ones.data()), 1, &kBeta,
5959
thrust::raw_pointer_cast(means.data()), 1));
60+
GpuErrorCheck(cudaDeviceSynchronize());
6061
return means;
6162
} else if (axis == 1) {
6263
thrust::device_vector<float> ones(M, 1.f);
@@ -67,6 +68,7 @@ thrust::device_vector<float> tsnecuda::utils::ReduceAlpha(
6768
thrust::raw_pointer_cast(d_matrix.data()), N,
6869
thrust::raw_pointer_cast(ones.data()), 1, &kBeta,
6970
thrust::raw_pointer_cast(means.data()), 1));
71+
GpuErrorCheck(cudaDeviceSynchronize());
7072
return means;
7173
} else {
7274
throw std::runtime_error("Axis must be 0 or 1.");

tsne/HIP/CMakeLists.txt

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,25 @@ set(CMAKE_MODULE_PATH "${ROCM_PATH}/hip/cmake" ${CMAKE_MODULE_PATH})
4747
set(HIP_INCLUDE_DIRS "${ROCM_PATH}/include" ${HIP_INCLUDE_DIRS})
4848
set(HIP_LIBRARIES "${ROCM_PATH}/lib" ${HIP_LIBRARIES})
4949

50-
# CMAKE_CXX_FLAGS
51-
if("${CMAKE_CXX_FLAGS}" STREQUAL "")
52-
message(STATUS "Using DEFAULT compilation flags for the application")
53-
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} " -O3 -std=c++17 -ffast-math -D__HIP_PLATFORM_AMD__ ")
54-
else()
55-
message(STATUS "OVERRIDING compilation flags")
50+
set(DEF_WL_CXX_FLAGS " -D__HIP_PLATFORM_AMD__ ")
51+
set(DEF_GENERAL_CXX_FLAGS " -O3 -std=c++17 -ffast-math ")
52+
set(DEF_COMBINED_CXX_FLAGS "${DEF_GENERAL_CXX_FLAGS} ${DEF_WL_CXX_FLAGS}")
53+
54+
# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific)
55+
# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags)
56+
# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity
57+
58+
if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
59+
message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
60+
elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
61+
message(STATUS "Using DEFAULT compilation flags")
62+
set(CMAKE_CXX_FLAGS "${DEF_COMBINED_CXX_FLAGS}")
63+
elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
64+
message(STATUS "OVERRIDING GENERAL compilation flags")
65+
set(CMAKE_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS}")
66+
string(APPEND CMAKE_CXX_FLAGS ${DEF_WL_CXX_FLAGS})
67+
elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
68+
message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags")
5669
endif()
5770

5871
find_package(HIP REQUIRED)

tsne/HIP/src/kernels/nbodyfft.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ void DFT2D1gpu(float* din, thrust::complex<float>* dout, int num_rows, int num_c
309309
thrust::complex<float> sum, twiddle;
310310
angle = -2.0f * PI * fdividef((float)i, (float)num_cols);
311311
sum = 0.0f;
312+
#pragma unroll
312313
for (int k = 0; k < num_cols; ++k) {
313314
// sincosf(angle * k, &sinf, &cosf);
314315
// twiddle = thrust::complex<float>(cosf, sinf);
@@ -333,6 +334,7 @@ void DFT2D2gpu(thrust::complex<float>* din, thrust::complex<float>* dout, int nu
333334
thrust::complex<float> sum, twiddle;
334335
angle = -2.0f * PI * fdividef((float)i, (float)num_cols);
335336
sum = 0.0f;
337+
#pragma unroll
336338
for (int k = 0; k < num_cols; ++k) {
337339
// sincosf(angle * k, &sinf, &cosf);
338340
// twiddle = thrust::complex<float>(cosf, sinf);
@@ -357,6 +359,7 @@ void iDFT2D1gpu(thrust::complex<float>* din, thrust::complex<float>* dout, int n
357359
thrust::complex<float> sum, twiddle;
358360
angle = 2.0f * PI * fdividef((float)i, (float)num_cols);
359361
sum = 0.0f;
362+
#pragma unroll
360363
for (int k = 0; k < num_cols; ++k) {
361364
// sincosf(angle * k, &sinf, &cosf);
362365
// twiddle = thrust::complex<float>(cosf, sinf);
@@ -385,6 +388,7 @@ void iDFT2D2gpu(thrust::complex<float>* din, float* dout, int num_rows, int num_
385388
thrust::complex<float> twiddle;
386389
angle = 2.0f * PI * fdividef((float)i, (float)num_cols);
387390
sum = 0.0f;
391+
#pragma unroll
388392
for (int k = 0; k < num_cols; ++k) {
389393
// sincosf(angle * k, &sinf, &cosf);
390394
// twiddle = thrust::complex<float>(cosf, sinf);

tsne/HIP/src/kernels/perplexity_search.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,12 @@ void tsnecuda::SearchPerplexity(
154154

155155
// compute entropy of current row
156156
row_sum = tsnecuda::utils::ReduceSum(handle, pij, num_neighbors, num_points, 0);
157-
GpuErrorCheck(hipDeviceSynchronize());
157+
// GpuErrorCheck(hipDeviceSynchronize());
158158

159159
// compute negative entropy
160160
thrust::transform(pij.begin(), pij.end(), entropy.begin(), tsnecuda::utils::FunctionalEntropy());
161161
neg_entropy = tsnecuda::utils::ReduceAlpha(handle, entropy, num_neighbors, num_points, -1.0f, 0);
162-
GpuErrorCheck(hipDeviceSynchronize());
162+
// GpuErrorCheck(hipDeviceSynchronize());
163163

164164
// binary search for beta
165165
hipLaunchKernelGGL(PerplexitySearchKernel, NBLOCKS2, BLOCKSIZE2, 0, 0,

tsne/HIP/src/utils/reduce_utils.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ thrust::device_vector<float> tsnecuda::utils::ReduceAlpha(
5757
thrust::raw_pointer_cast(d_matrix.data()), N,
5858
thrust::raw_pointer_cast(ones.data()), 1, &kBeta,
5959
thrust::raw_pointer_cast(means.data()), 1));
60+
GpuErrorCheck(hipDeviceSynchronize());
6061
return means;
6162
} else if (axis == 1) {
6263
thrust::device_vector<float> ones(M, 1.f);
@@ -67,6 +68,7 @@ thrust::device_vector<float> tsnecuda::utils::ReduceAlpha(
6768
thrust::raw_pointer_cast(d_matrix.data()), N,
6869
thrust::raw_pointer_cast(ones.data()), 1, &kBeta,
6970
thrust::raw_pointer_cast(means.data()), 1));
71+
GpuErrorCheck(hipDeviceSynchronize());
7072
return means;
7173
} else {
7274
throw std::runtime_error("Axis must be 0 or 1.");

tsne/README.md

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,12 @@
22

33
tsne implements [FIt-SNE algorithm](https://github.com/KlugerLab/FIt-SNE) for various GPU architectures (original CUDA source code is from [here](https://github.com/CannyLab/tsne-cuda)).
44

5-
## Cloning
6-
7-
To clone
8-
9-
```
10-
git clone https://github.com/oneapi-src/Velocity-Bench.git
11-
```
12-
135
## Supported versions
146

157
- CUDA: The original code was obtained from [here](https://github.com/CannyLab/tsne-cuda)
168
- SYCL: The CUDA code was migrated using Intel DPCT, and then the resulting code was modified to remove the dpct headers.
179
- HIP: Created from CUDA version using hipify-perl script.
1810

19-
# Current Version:
20-
- Initial release of the workload
2111

2212
# Build Instructions
2313

tsne/SYCL/CMakeLists.txt

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -40,31 +40,48 @@ option(ENABLE_KERNEL_PROFILING "Build using kernel profiling" OFF)
4040
option(GPU_AOT "Build AOT for Intel GPU" OFF)
4141
option(USE_NVIDIA_BACKEND "Build for NVIDIA backend" OFF)
4242
option(USE_AMDHIP_BACKEND "Build for AMD HIP backend" OFF)
43+
option(USE_SM "Build for specific SM" OFF)
4344

4445
if(ENABLE_KERNEL_PROFILING)
4546
message("-- Enabling kernel profiling")
4647
add_compile_options(-DENABLE_KERNEL_PROFILING)
4748
endif()
4849

49-
set(INTEL_GPU_CXX_FLAGS " -O2 -std=c++17 -fsycl -ffast-math -Wall -Wextra -Wno-unused-parameter -Wno-sign-compare -Wno-unknown-pragmas -Wno-unused-local-typedef ")
50-
set(NVIDIA_GPU_CXX_FLAGS " -O3 -std=c++17 -fsycl -ffast-math -Wall -Wextra -Wno-unused-parameter -Wno-sign-compare -Wno-unknown-pragmas -Wno-unused-local-typedef ")
51-
set(AMD_GPU_CXX_FLAGS " -O3 -std=c++17 -fsycl -ffast-math -Wall -Wextra -Wno-unused-parameter -Wno-sign-compare -Wno-unknown-pragmas -Wno-unused-local-typedef ")
50+
set(DEF_INTEL_WL_CXX_FLAGS " ")
51+
set(DEF_NVIDIA_WL_CXX_FLAGS " ")
52+
set(DEF_AMD_WL_CXX_FLAGS " ")
5253

53-
set(USE_DEFAULT_FLAGS ON)
54-
if("${CMAKE_CXX_FLAGS}" STREQUAL "")
54+
set(DEF_INTEL_GENERAL_CXX_FLAGS " -O2 -std=c++17 -fsycl -ffast-math -Wall -Wextra -Wno-unused-parameter -Wno-sign-compare -Wno-unknown-pragmas -Wno-unused-local-typedef ")
55+
set(DEF_NVIDIA_GENERAL_CXX_FLAGS " -O3 -std=c++17 -fsycl -ffast-math -Wall -Wextra -Wno-unused-parameter -Wno-sign-compare -Wno-unknown-pragmas -Wno-unused-local-typedef ")
56+
set(DEF_AMD_GENERAL_CXX_FLAGS " -O3 -std=c++17 -fsycl -ffast-math -Wall -Wextra -Wno-unused-parameter -Wno-sign-compare -Wno-unknown-pragmas -Wno-unused-local-typedef ")
57+
58+
# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific)
59+
# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags)
60+
# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity
61+
62+
if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
63+
message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
64+
elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
5565
message(STATUS "Using DEFAULT compilation flags")
56-
else()
57-
message(STATUS "OVERRIDING DEFAULT compilation flags")
58-
set(USE_DEFAULT_FLAGS OFF)
66+
set(INTEL_GPU_CXX_FLAGS "${DEF_INTEL_GENERAL_CXX_FLAGS} ${DEF_INTEL_WL_CXX_FLAGS}")
67+
set(NVIDIA_GPU_CXX_FLAGS "${DEF_NVIDIA_GENERAL_CXX_FLAGS} ${DEF_NVIDIA_WL_CXX_FLAGS}")
68+
set(AMD_GPU_CXX_FLAGS "${DEF_AMD_GENERAL_CXX_FLAGS} ${DEF_AMD_WL_CXX_FLAGS}")
69+
elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
70+
message(STATUS "OVERRIDING GENERAL compilation flags")
71+
set(INTEL_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_INTEL_WL_CXX_FLAGS}")
72+
set(NVIDIA_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_NVIDIA_WL_CXX_FLAGS}")
73+
set(AMD_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_AMD_WL_CXX_FLAGS}")
74+
elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
75+
message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags")
76+
set(INTEL_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
77+
set(NVIDIA_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
78+
set(AMD_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
5979
endif()
6080

61-
# JIT compilation
6281
if(GPU_AOT)
6382
message(STATUS "Enabling INTEL backend")
64-
if(USE_DEFAULT_FLAGS)
65-
set(CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}") # Default flags for Intel backend
66-
endif()
67-
if( (${GPU_AOT} STREQUAL "pvc") OR (${GPU_AOT} STREQUAL "PVC") )
83+
set(CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}")
84+
if((${GPU_AOT} STREQUAL "pvc") OR (${GPU_AOT} STREQUAL "PVC"))
6885
message(STATUS "Enabling Intel GPU AOT compilation for ${GPU_AOT}")
6986
string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=spir64_gen -Xs \"-device 0x0bd5 -revision_id 0x2f\" ")
7087
else()
@@ -73,18 +90,28 @@ if(GPU_AOT)
7390
endif()
7491
elseif(USE_NVIDIA_BACKEND)
7592
message(STATUS "Enabling NVIDIA backend")
76-
if(USE_DEFAULT_FLAGS)
77-
set(CMAKE_CXX_FLAGS "${NVIDIA_GPU_CXX_FLAGS}") # Default flags for NV backend
93+
set(CMAKE_CXX_FLAGS "${NVIDIA_GPU_CXX_FLAGS}")
94+
if(USE_SM)
95+
message("-- Building for SM_${USE_SM} compatibility")
96+
string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_${USE_SM} ")
97+
else()
98+
message("-- Building for SM_80 compatibility (DEFAULT)")
99+
string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 ")
78100
endif()
79-
string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=nvptx64-nvidia-cuda ") # -O3 will be used, even though -O2 was set earlier
101+
string(APPEND CMAKE_CXX_FLAGS " -DUSE_NVIDIA_BACKEND")
80102
elseif(USE_AMDHIP_BACKEND)
81103
message(STATUS "Enabling AMD HIP backend for ${USE_AMDHIP_BACKEND} AMD architecture")
82-
if(USE_DEFAULT_FLAGS)
83-
set(CMAKE_CXX_FLAGS "${AMD_GPU_CXX_FLAGS}") # Default flags for AMD backend (gfx908 for MI100)
84-
endif()
104+
set(CMAKE_CXX_FLAGS "${AMD_GPU_CXX_FLAGS}")
85105
string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${USE_AMDHIP_BACKEND} ")
106+
string(APPEND CMAKE_CXX_FLAGS " -DUSE_AMDHIP_BACKEND")
107+
else()
108+
# JIT case
109+
message(STATUS "Enabling INTEL backend")
110+
set(CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}")
86111
endif()
87112

113+
message(STATUS "CXX Compilation flags set to: ${CMAKE_CXX_FLAGS}")
114+
88115
if(GPU_AOT)
89116
set(MKL_LINK static)
90117
set(MKL_THREADING sequential)

0 commit comments

Comments
 (0)