Skip to content

Commit 08adee2

Browse files
s-Nicknormallytangent
authored andcommitted
[DFT][Examples] Make the oneMKL DFT example consistent with the other domains (#518)
1 parent 16d3d4b commit 08adee2

File tree

5 files changed

+95
-173
lines changed

5 files changed

+95
-173
lines changed

examples/README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,14 @@ oneAPI Math Kernel Library (oneMKL) Interfaces offers examples with the followin
33
- blas: level3/gemm_usm
44
- rng: uniform_usm
55
- lapack: getrs_usm
6-
- dft: complex_fwd_buffer, real_fwd_usm
6+
- dft: complex_fwd_usm, real_fwd_usm
77
- sparse_blas: sparse_gemv_usm
88

99
Each routine has one run-time dispatching example and one compile-time dispatching example (which uses both mklcpu and cuda backends), located in `example/<$domain>/run_time_dispatching` and `example/<$domain>/compile_time_dispatching` subfolders, respectively.
1010

1111
To build examples, use cmake build option `-DBUILD_EXAMPLES=true`.
1212
Compile_time_dispatching will be built if `-DBUILD_EXAMPLES=true` and cuda backend is enabled, because the compile-time dispatching example runs on both mklcpu and cuda backends.
1313
Run_time_dispatching will be built if `-DBUILD_EXAMPLES=true` and `-DBUILD_SHARED_LIBS=true`.
14-
All DFT examples require the mklgpu backend to be enabled.
1514

1615
The example executable naming convention follows `example_<$domain>_<$routine>_<$backend>` for compile-time dispatching examples
1716
or `example_<$domain>_<$routine>` for run-time dispatching examples.

examples/dft/compile_time_dispatching/CMakeLists.txt

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,31 +18,35 @@
1818
#===============================================================================
1919

2020
#Build object from all sources
21-
set(DFTI_CT_BACKENDS "")
22-
23-
if(ENABLE_MKLGPU_BACKEND)
24-
list(APPEND DFTI_CT_BACKENDS "mklgpu")
25-
endif()
26-
27-
if(ENABLE_MKLCPU_BACKEND)
28-
list(APPEND DFTI_CT_BACKENDS "mklcpu")
21+
set(DFT_CT_SOURCES "")
22+
if (ENABLE_MKLCPU_BACKEND AND ENABLE_CUFFT_BACKEND)
23+
list(APPEND DFT_CT_SOURCES "complex_fwd_usm_mklcpu_cufft")
2924
endif()
3025

3126
include(WarningsUtils)
3227

33-
foreach(dfti_backend ${DFTI_CT_BACKENDS})
34-
set(EXAMPLE_NAME example_dft_complex_fwd_buffer_${dfti_backend})
35-
add_executable(${EXAMPLE_NAME} complex_fwd_buffer_${dfti_backend}.cpp)
28+
foreach(dft_ct_source ${DFT_CT_SOURCES})
29+
set(EXAMPLE_NAME example_${domain}_${dft_ct_source})
30+
add_executable(${EXAMPLE_NAME} ${dft_ct_source}.cpp)
3631
target_include_directories(${EXAMPLE_NAME}
3732
PUBLIC ${PROJECT_SOURCE_DIR}/examples/include
3833
PUBLIC ${PROJECT_SOURCE_DIR}/include
3934
PUBLIC ${CMAKE_BINARY_DIR}/bin
4035
)
4136

42-
add_dependencies(${EXAMPLE_NAME} onemkl_dft_${dfti_backend})
43-
target_link_libraries(${EXAMPLE_NAME} PRIVATE ONEMKL::SYCL::SYCL onemkl_dft_${dfti_backend} onemkl_warnings)
37+
if(domain STREQUAL "dft" AND ENABLE_MKLCPU_BACKEND AND ENABLE_CUFFT_BACKEND)
38+
add_dependencies(${EXAMPLE_NAME} onemkl_${domain}_mklcpu onemkl_${domain}_cufft)
39+
list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_mklcpu onemkl_${domain}_cufft)
40+
endif()
41+
42+
target_link_libraries(${EXAMPLE_NAME} PUBLIC
43+
${ONEMKL_LIBRARIES_${domain}}
44+
ONEMKL::SYCL::SYCL
45+
onemkl_warnings
46+
)
4447

4548
# Register example as ctest
46-
add_test(NAME dft/EXAMPLE/CT/complex_fwd_buffer_${dfti_backend} COMMAND ${EXAMPLE_NAME})
47-
endforeach(dfti_backend)
49+
add_test(NAME dft/EXAMPLE/CT/${dft_ct_source} COMMAND ${EXAMPLE_NAME})
50+
51+
endforeach(dft_ct_source)
4852

examples/dft/compile_time_dispatching/complex_fwd_buffer_mklcpu.cpp

Lines changed: 0 additions & 132 deletions
This file was deleted.

examples/dft/compile_time_dispatching/complex_fwd_buffer_mklgpu.cpp renamed to examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp

Lines changed: 71 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2023 Intel Corporation
2+
* Copyright 2024 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -27,10 +27,26 @@
2727
#include <CL/sycl.hpp>
2828
#endif
2929
#include "oneapi/mkl.hpp"
30+
#include <complex>
3031

31-
void run_example(const sycl::device& gpu_device) {
32+
void run_example(const sycl::device& cpu_device, const sycl::device& gpu_device) {
3233
constexpr std::size_t N = 10;
3334

35+
// Catch asynchronous exceptions for cpu
36+
auto cpu_error_handler = [&](sycl::exception_list exceptions) {
37+
for (auto const& e : exceptions) {
38+
try {
39+
std::rethrow_exception(e);
40+
}
41+
catch (sycl::exception const& e) {
42+
// Handle not dft related exceptions that happened during asynchronous call
43+
std::cerr << "Caught asynchronous SYCL exception on CPU device during execution:"
44+
<< std::endl;
45+
std::cerr << "\t" << e.what() << std::endl;
46+
}
47+
}
48+
std::exit(2);
49+
};
3450
// Catch asynchronous exceptions for gpu
3551
auto gpu_error_handler = [&](sycl::exception_list exceptions) {
3652
for (auto const& e : exceptions) {
@@ -39,17 +55,30 @@ void run_example(const sycl::device& gpu_device) {
3955
}
4056
catch (sycl::exception const& e) {
4157
// Handle not dft related exceptions that happened during asynchronous call
42-
std::cerr << "Caught asynchronous SYCL exception:" << std::endl;
58+
std::cerr << "Caught asynchronous SYCL exception on GPU device during execution:"
59+
<< std::endl;
4360
std::cerr << "\t" << e.what() << std::endl;
4461
}
4562
}
4663
std::exit(2);
4764
};
4865

66+
// Preparation CPU device and GPU device
67+
sycl::queue cpu_queue(cpu_device, cpu_error_handler);
4968
sycl::queue gpu_queue(gpu_device, gpu_error_handler);
5069

51-
std::vector<std::complex<float>> input_data(N);
52-
std::vector<std::complex<float>> output_data(N);
70+
// allocate on CPU device and GPU device
71+
auto cpu_input_data = sycl::malloc_shared<std::complex<float>>(N, cpu_queue);
72+
auto cpu_output_data = sycl::malloc_shared<std::complex<float>>(N, cpu_queue);
73+
74+
auto gpu_input_data = sycl::malloc_shared<std::complex<float>>(N, gpu_queue);
75+
auto gpu_output_data = sycl::malloc_shared<std::complex<float>>(N, gpu_queue);
76+
77+
// Initialize input data
78+
for (std::size_t i = 0; i < N; ++i) {
79+
cpu_input_data[i] = { static_cast<float>(i), static_cast<float>(-i) };
80+
gpu_input_data[i] = { static_cast<float>(i), static_cast<float>(-i) };
81+
}
5382

5483
// enabling
5584
// 1. create descriptors
@@ -63,16 +92,27 @@ void run_example(const sycl::device& gpu_device) {
6392
desc.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS,
6493
static_cast<std::int64_t>(1));
6594

66-
// 3. commit_descriptor (compile_time MKLGPU)
67-
desc.commit(oneapi::mkl::backend_selector<oneapi::mkl::backend::mklgpu>{ gpu_queue });
95+
// 3a. commit_descriptor (compile_time MKLCPU)
96+
desc.commit(oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue });
6897

69-
// 4. compute_forward / compute_backward (MKLGPU)
70-
{
71-
sycl::buffer<std::complex<float>> input_buffer(input_data.data(), sycl::range<1>(N));
72-
sycl::buffer<std::complex<float>> output_buffer(output_data.data(), sycl::range<1>(N));
73-
oneapi::mkl::dft::compute_forward<decltype(desc), std::complex<float>, std::complex<float>>(
74-
desc, input_buffer, output_buffer);
75-
}
98+
// 4a. compute_forward / compute_backward (MKLCPU)
99+
oneapi::mkl::dft::compute_forward<decltype(desc), std::complex<float>, std::complex<float>>(
100+
desc, cpu_input_data, cpu_output_data);
101+
102+
// 3b. commit_descriptor (compile_time cuFFT)
103+
desc.commit(oneapi::mkl::backend_selector<oneapi::mkl::backend::cufft>{ gpu_queue });
104+
105+
// 4b. compute_forward / compute_backward (cuFFT)
106+
oneapi::mkl::dft::compute_forward<decltype(desc), std::complex<float>, std::complex<float>>(
107+
desc, gpu_input_data, gpu_output_data);
108+
109+
cpu_queue.wait_and_throw();
110+
gpu_queue.wait_and_throw();
111+
112+
sycl::free(cpu_input_data, cpu_queue);
113+
sycl::free(gpu_input_data, gpu_queue);
114+
sycl::free(cpu_output_data, cpu_queue);
115+
sycl::free(gpu_output_data, gpu_queue);
76116
}
77117

78118
//
@@ -81,18 +121,16 @@ void run_example(const sycl::device& gpu_device) {
81121
void print_example_banner() {
82122
std::cout << "\n"
83123
"########################################################################\n"
84-
"# Complex out-of-place forward transform for Buffer API's example:\n"
124+
"# Complex out-of-place forward transform for USM API's example:\n"
85125
"#\n"
86126
"# Using APIs:\n"
87127
"# Compile-time dispatch API\n"
88-
"# Buffer forward complex out-of-place\n"
128+
"# USM forward complex out-of-place\n"
89129
"#\n"
90130
"# Using single precision (float) data type\n"
91131
"#\n"
92-
"# For Intel GPU with Intel MKLGPU backend.\n"
132+
"# Running on both Intel CPU and NVIDIA GPU devices.\n"
93133
"#\n"
94-
"# The environment variable ONEAPI_DEVICE_SELECTOR can be used to specify\n"
95-
"# available devices\n"
96134
"########################################################################\n"
97135
<< std::endl;
98136
}
@@ -104,15 +142,25 @@ int main(int /*argc*/, char** /*argv*/) {
104142
print_example_banner();
105143

106144
try {
145+
sycl::device cpu_device((sycl::cpu_selector_v));
107146
sycl::device gpu_device((sycl::gpu_selector_v));
108-
std::cout << "Running DFT Complex forward out-of-place buffer example" << std::endl;
109-
std::cout << "Using compile-time dispatch API with MKLGPU." << std::endl;
147+
148+
unsigned int vendor_id = gpu_device.get_info<sycl::info::device::vendor_id>();
149+
if (vendor_id != NVIDIA_ID) {
150+
std::cerr << "FAILED: NVIDIA GPU device not found" << std::endl;
151+
return 1;
152+
}
153+
154+
std::cout << "Running DFT Complex forward out-of-place usm example" << std::endl;
155+
std::cout << "Using compile-time dispatch API with MKLCPU and cuFFT." << std::endl;
110156
std::cout << "Running with single precision real data type on:" << std::endl;
157+
std::cout << "\tCPU device: " << cpu_device.get_info<sycl::info::device::name>()
158+
<< std::endl;
111159
std::cout << "\tGPU device :" << gpu_device.get_info<sycl::info::device::name>()
112160
<< std::endl;
113161

114-
run_example(gpu_device);
115-
std::cout << "DFT Complex USM example ran OK on MKLGPU" << std::endl;
162+
run_example(cpu_device, gpu_device);
163+
std::cout << "DFT Complex USM example ran OK on MKLCPU and CUFFT" << std::endl;
116164
}
117165
catch (sycl::exception const& e) {
118166
// Handle not dft related exceptions that happened during synchronous call

examples/dft/run_time_dispatching/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,16 @@ set(DFT_RT_SOURCES "")
2727
# If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to
2828
# overwrite ONEAPI_DEVICE_SELECTOR in their environment to run on the desired backend
2929
set(DEVICE_FILTERS "")
30-
if(ENABLE_MKLGPU_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_ROCFFT_BACKEND OR ENABLE_PORTFFT_BACKEND)
30+
if(ENABLE_MKLGPU_BACKEND OR ENABLE_MKLCPU_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_ROCFFT_BACKEND OR ENABLE_PORTFFT_BACKEND)
3131
list(APPEND DFT_RT_SOURCES "real_fwd_usm")
3232
endif()
3333

3434
if(ENABLE_MKLGPU_BACKEND)
3535
list(APPEND DEVICE_FILTERS "level_zero:gpu")
3636
endif()
37+
if(ENABLE_MKLCPU_BACKEND)
38+
list(APPEND DEVICE_FILTERS "opencl:cpu")
39+
endif()
3740
if(ENABLE_PORTFFT_BACKEND)
3841
list(APPEND DEVICE_FILTERS "*:gpu")
3942
endif()

0 commit comments

Comments
 (0)