Skip to content

Commit fed066f

Browse files
committed
Sync GPU Optimization Guide exampls
1 parent 5be6adc commit fed066f

File tree

128 files changed

+383
-1760
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+383
-1760
lines changed
Lines changed: 106 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,30 @@
1-
cmake_minimum_required(VERSION 3.21)
1+
cmake_minimum_required(VERSION 3.25 FATAL_ERROR)
22
option(BUILD_FORTRAN_EXAMPLES "Whether to build fortran examples" ON)
33
set(CMAKE_C_COMPILER icx)
44
set(CMAKE_CXX_COMPILER icpx)
5-
set(_languages C CXX)
6-
75
if (BUILD_FORTRAN_EXAMPLES)
8-
set(_languages ${_languages} Fortran)
96
set(CMAKE_Fortran_COMPILER ifx)
107
endif()
118

12-
enable_testing()
13-
14-
project(GPUOptGuide
15-
LANGUAGES ${_languages}
16-
DESCRIPTION "Code examples from Intel GPU Optimization guide")
17-
9+
project(GPUOptGuide)
1810
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
1911

12+
include(CheckLanguage)
13+
enable_testing()
14+
2015
find_package(IntelSYCL REQUIRED)
2116

17+
if (BUILD_FOTRAN_EXAMPLES)
18+
check_language(Fortran)
19+
if(CMAKE_Fortran_COMPILER)
20+
enable_language(Fortran)
21+
else()
22+
message(FATAL_ERROR "No Fortran support detected, but Fortran tests were requested. Install oneAPI HPC Toolkit.")
23+
endif()
24+
endif()
25+
2226
set(MKL_THREADING tbb_thread)
23-
set(MKL_INTERFACE "ilp64")
2427
set(DPCPP_COMPILER ON)
25-
26-
set(MKL_VERSION_2024 FALSE)
27-
find_package(MKL QUIET)
28-
if(MKL_FOUND)
29-
if(MKL_VERSION VERSION_GREATER_EQUAL "2024.0.0")
30-
set(MKL_VERSION_2024 TRUE)
31-
endif()
32-
endif()
3328
find_package(MKL REQUIRED)
3429

3530
string(CONCAT WARNING_CXX_FLAGS_STR
@@ -44,116 +39,155 @@ string(CONCAT WARNING_CXX_FLAGS_STR
4439
string(REPLACE " " ";" COMMON_CXX_FLAGS "${WARNING_CXX_FLAGS_STR}")
4540

4641
function(add_example_with_mkl name)
47-
set(_sources ${name}.cpp)
48-
add_executable(${name} ${_sources})
49-
add_sycl_to_target(TARGET ${name} SOURCES ${_sources})
50-
target_compile_options(${name} PRIVATE ${COMMON_CXX_FLAGS})
51-
if (MKL_VERSION_2024)
52-
target_link_libraries(${name} PUBLIC MKL::MKL_SYCL)
42+
cmake_parse_arguments(FUNC_SRC "" "" "SOURCES" ${ARGN})
43+
if (FUNC_SRC_SOURCES)
44+
set(_src ${FUNC_SRC_SOURCES})
5345
else()
54-
target_link_libraries(${name} PUBLIC MKL::MKL_DPCPP)
46+
set(_src ${name}.cpp)
5547
endif()
48+
add_executable(${name} ${_src})
49+
add_sycl_to_target(TARGET ${name} SOURCES ${_src})
50+
target_compile_options(${name} PRIVATE ${COMMON_CXX_FLAGS})
51+
target_compile_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -qmkl)
52+
target_link_libraries(${name} PRIVATE MKL::MKL_DPCPP)
53+
target_link_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -qmkl -lOpenCL)
5654
add_test(NAME ${name} COMMAND ${name} ${ARGN})
5755
endfunction(add_example_with_mkl)
5856

5957
function(add_fortran_example_with_mkl name)
6058
if(CMAKE_Fortran_COMPILER)
61-
set(_sources ${name}.f)
62-
add_executable(${name} ${_sources})
63-
add_sycl_to_target(TARGET ${name} SOURCES ${_sources})
59+
set(_src ${name}.f)
60+
add_executable(${name} ${_src})
61+
add_sycl_to_target(TARGET ${name} SOURCES ${_src})
6462
target_compile_options(${name} PRIVATE -warn all)
65-
target_compile_options(${name} PRIVATE -fpp -free -DMKL_ILP64 -i8)
63+
target_compile_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -qmkl -fpp -free)
6664
set_target_properties(${name} PROPERTIES LINKER_LANGUAGE Fortran)
67-
if (MKL_VERSION_2024)
68-
target_link_libraries(${name} PUBLIC MKL::MKL_SYCL)
69-
else()
70-
target_link_libraries(${name} PUBLIC MKL::MKL_DPCPP)
71-
endif()
65+
target_link_libraries(${name} PUBLIC MKL::MKL_DPCPP)
66+
target_link_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -qmkl -lOpenCL)
7267
add_test(NAME ${name} COMMAND ${name} ${ARGN})
7368
endif()
7469
endfunction(add_fortran_example_with_mkl)
7570

71+
function(add_fortran_example_with_mkl_i8 name)
72+
if(CMAKE_Fortran_COMPILER)
73+
set(_src ${name}.f)
74+
add_executable(${name} ${_src})
75+
add_sycl_to_target(TARGET ${name} SOURCES ${_src})
76+
target_compile_options(${name} PRIVATE -warn all)
77+
target_compile_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -qmkl -fpp -free -DMKL_ILP64 -i8)
78+
set_target_properties(${name} PROPERTIES LINKER_LANGUAGE Fortran)
79+
target_link_libraries(${name} PUBLIC MKL::MKL_DPCPP)
80+
target_link_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -qmkl -lOpenCL)
81+
add_test(NAME ${name} COMMAND ${name} ${ARGN})
82+
endif()
83+
endfunction(add_fortran_example_with_mkl_i8)
84+
7685
function(add_example name)
77-
set(_sources ${name}.cpp)
78-
add_executable(${name} ${_sources})
79-
add_sycl_to_target(TARGET ${name} SOURCES ${_sources})
86+
cmake_parse_arguments(FUNC_SRC "" "" "SOURCES" ${ARGN})
87+
if (FUNC_SRC_SOURCES)
88+
set(_src ${FUNC_SRC_SOURCES})
89+
else()
90+
set(_src ${name}.cpp)
91+
endif()
92+
add_executable(${name} ${_src})
93+
add_sycl_to_target(TARGET ${name} SOURCES ${_src})
8094
target_compile_options(${name} PRIVATE ${COMMON_CXX_FLAGS})
8195
target_link_options(${name} PRIVATE -fsycl-device-code-split=per_kernel)
8296
add_test(NAME ${name} COMMAND ${name} ${ARGN})
8397
endfunction(add_example)
8498

8599
function(add_openmp_example name)
86-
set(_sources ${name}.cpp)
87-
add_executable(${name} ${_sources})
100+
set(_src ${name}.cpp)
101+
add_executable(${name} ${_src})
102+
add_sycl_to_target(TARGET ${name} SOURCES ${_src})
88103
target_compile_options(${name} PRIVATE ${COMMON_CXX_FLAGS})
89104
add_test(NAME ${name} COMMAND ${name} ${ARGN})
90105
endfunction(add_openmp_example)
91106

92107
function(add_fortran_example name)
93108
if(CMAKE_Fortran_COMPILER)
94-
set(_sources ${name}.f90)
95-
add_executable(${name} ${_sources})
96-
add_sycl_to_target(TARGET ${name} SOURCES ${_sources})
109+
set(_src ${name}.f90)
110+
add_executable(${name} ${_src})
111+
add_sycl_to_target(TARGET ${name} SOURCES ${_src})
97112
target_compile_options(${name} PRIVATE -warn all)
113+
target_compile_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64)
98114
set_target_properties(${name} PROPERTIES LINKER_LANGUAGE Fortran)
115+
target_link_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64)
99116
add_test(NAME ${name} COMMAND ${name} ${ARGN})
100117
endif()
101118
endfunction(add_fortran_example)
102119

103120
function(add_fixed_fortran_example name)
104121
if(CMAKE_Fortran_COMPILER)
105-
set(_sources ${name}.f)
106-
add_executable(${name} ${_sources})
122+
set(_src ${name}.f)
123+
add_executable(${name} ${_src})
124+
add_sycl_to_target(TARGET ${name} SOURCES ${_src})
107125
target_compile_options(${name} PRIVATE -warn all)
126+
target_compile_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64)
108127
set_target_properties(${name} PROPERTIES LINKER_LANGUAGE Fortran)
109128
add_test(NAME ${name} COMMAND ${name} ${ARGN})
110129
endif()
111130
endfunction(add_fixed_fortran_example)
112131

113132
function(add_mpi_example name)
114133
if(MPI_FOUND)
115-
set(_sources ${name}.cpp)
116-
add_executable(${name} ${_sources})
117-
add_sycl_to_target(TARGET ${name} SOURCES ${_sources})
134+
set(_src ${name}.cpp)
135+
add_executable(${name} ${_src})
136+
add_sycl_to_target(TARGET ${name} SOURCES ${_src})
137+
target_compile_options(${name} PRIVATE -O3 -fiopenmp -fopenmp-targets=spir64)
138+
target_link_options(${name} PRIVATE -O3 -fiopenmp -fopenmp-targets=spir64)
118139
target_link_libraries(${name} PRIVATE MPI::MPI_CXX)
119140
add_test(NAME ${name} COMMAND ${name} ${ARGN})
120141
endif()
121142
endfunction(add_mpi_example)
122143

144+
function(add_example_with_mkl_mpi name)
145+
if(MPI_FOUND)
146+
set(_src ${name}.cpp)
147+
add_executable(${name} ${_src})
148+
add_sycl_to_target(TARGET ${name} SOURCES ${_src})
149+
target_compile_options(${name} PRIVATE ${COMMON_CXX_FLAGS})
150+
if(NOT MKL_ROOT)
151+
set(MKL_ROOT $ENV{MKLROOT} CACHE PATH "Folder contains MKL")
152+
endif(NOT MKL_ROOT)
153+
target_compile_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -fsycl -DMKL_LP64 -I"${MKLROOT}/include")
154+
target_link_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -fsycl -L${MKLROOT}/lib -lmkl_sycl_blas -lmkl_intel_lp64 -lmkl_tbb_thread -lmkl_core -lsycl -lpthread -lm -ldl)
155+
target_link_libraries(${name} PRIVATE MPI::MPI_CXX)
156+
add_test(NAME ${name} COMMAND ${name} ${ARGN})
157+
endif()
158+
endfunction(add_example_with_mkl_mpi)
159+
123160
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
124161

125162
add_subdirectory(atomics)
126-
add_subdirectory(matrix)
163+
add_subdirectory(buffer-accessors)
164+
add_subdirectory(buffers)
165+
add_subdirectory(composite-explicit-scaling)
166+
add_subdirectory(conditionals)
127167
add_subdirectory(exec-model)
128-
add_subdirectory(explicit-scaling)
168+
add_subdirectory(flat)
169+
add_subdirectory(fp-computations)
170+
add_subdirectory(grf-mode-selection)
129171
add_subdirectory(io-kernel)
130172
add_subdirectory(jitting)
131173
add_subdirectory(kernels)
132-
add_subdirectory(memory-movement)
133-
add_subdirectory(restrict)
134-
add_subdirectory(slm)
135-
add_subdirectory(usm)
136-
add_subdirectory(sub-group)
137-
add_subdirectory(buffers)
138-
add_subdirectory(buffer-accessors)
139-
add_subdirectory(reduction)
140-
add_subdirectory(conditionals)
174+
add_subdirectory(libraries-fcorr)
141175
add_subdirectory(libraries-kernel)
142176
add_subdirectory(libraries-stdlib)
143-
add_subdirectory(libraries-fcorr)
144-
add_subdirectory(multiple-queue-submission)
177+
add_subdirectory(matrix)
178+
add_subdirectory(memory-movement)
179+
add_subdirectory(MPI)
145180
add_subdirectory(multiple-devices)
146181
add_subdirectory(multiple-kernel-execution)
147-
add_subdirectory(work-group-size)
148-
add_subdirectory(registers)
182+
add_subdirectory(multiple-queue-submission)
149183
add_subdirectory(OpenMP)
150184
add_subdirectory(optimize-data-transfers)
151-
add_subdirectory(MPI)
152-
add_subdirectory(grf-mode-selection)
153-
add_subdirectory(fp-computations)
154-
add_subdirectory(host-device-memory)
155-
add_subdirectory(joint-matrix)
156-
add_subdirectory(local-global-sync)
157-
#add_subdirectory(memory-sharing-with-media)
158-
add_subdirectory(redundant-queues)
159-
add_subdirectory(implicit-scaling)
185+
add_subdirectory(porting-registers)
186+
add_subdirectory(prefetch)
187+
add_subdirectory(reduction)
188+
add_subdirectory(registers)
189+
add_subdirectory(restrict)
190+
add_subdirectory(slm)
191+
add_subdirectory(sub-group)
192+
add_subdirectory(usm)
193+
add_subdirectory(work-group-size)
Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
11
add_mpi_example(omp_mpich)
2-
target_compile_options(omp_mpich PRIVATE -fiopenmp)
3-
target_link_options(omp_mpich PRIVATE -fiopenmp)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
find_package(MPI)
22

33
add_subdirectory(01_omp_mpich)
4+
add_subdirectory(02_omp_mpi_onemkl_dgemm)

Publications/GPU-Opt-Guide/OpenMP/21_omp_target_alloc/test_target_map_f.f90

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ program main
5555

5656
nstream_time = omp_get_wtime()
5757
do iter = 1, iterations
58-
!$omp target teams distribute parallel do map(to: A, B) map(tofrom: C)
58+
!$omp target teams distribute parallel do &
59+
map(to: A, B) map(tofrom: C)
5960
do i = 1, length
6061
C(i) = C(i) + A(i) + scalar * B(i)
6162
end do

Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ add_example_with_mkl(dgemm_example_02)
66
add_example_with_mkl(dgemm_example_03)
77
add_example_with_mkl(dgemm_batch_example_01)
88
add_example_with_mkl(dgemm_batch_example_02)
9-
add_fortran_example_with_mkl(dgemm_dispatch_f)
9+
add_fortran_example_with_mkl_i8(dgemm_dispatch_f)

Publications/GPU-Opt-Guide/OpenMP/22_mkl_dispatch/sample.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
"mkdir ../../build",
3535
"cd ../../build",
3636
"cmake ..",
37-
"make dgemm_target_variant_dispatch_c dgemm_dispatch_c dgemm_example_01 dgemm_example_02 dgemm_example_03 dgemm_batch_example_01 dgemm_batch_example_02 dgemm_dispatch_f",
37+
"make dgemm_dispatch_c dgemm_example_01 dgemm_example_02 dgemm_example_03 dgemm_batch_example_01 dgemm_batch_example_02 dgemm_dispatch_f",
3838
"make clean"
3939
]
4040
}

Publications/GPU-Opt-Guide/OpenMP/22_mkl_pad/dgemm_pad_c_01.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,10 @@ int main(int argc, char **argv) {
193193
"beta = %f, iterations = %d, verify? = %d\n",
194194
HA, WA, WB, ldA, ldB, ldC, alpha, beta, niter, verify);
195195

196-
double start_t, end_t, tot_t = 0.0, best_t = DBL_MAX;
196+
#if defined(USE_MKL)
197+
double start_t, end_t;
198+
#endif
199+
double tot_t = 0.0, best_t = DBL_MAX;
197200

198201
/*ALLOCATE HOST ARRAYS*/
199202
FLOAT *A = (FLOAT *)MALLOC(ldA * WA * sizeof(FLOAT));

Publications/GPU-Opt-Guide/OpenMP/25_fortran_example/test-HostMem-DeviceMem-Map-UpdateTo.f

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ end subroutine init
2828
subroutine init_d (c, m, n)
2929
implicit none
3030
real :: c(m, n)
31-
integer m, n, i, j
31+
integer m, k, n, i, j
3232

3333
!$omp target teams distribute parallel do
3434
do i = 1, m
@@ -74,7 +74,7 @@ program main
7474
beta = 1.0
7575
total = 0.0
7676
call init (a, b, m, k, n)
77-
call init_d (c, m, n)
77+
call init_d (c, m, k, n)
7878

7979
! Compute sgemm on the device.
8080

Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c/nbody_c.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,14 @@ int main() {
9999
}
100100

101101
#pragma omp target
102-
{}
102+
{
103+
}
103104

104-
#pragma omp target enter data map(alloc \
105-
: a [0:ARRAYLEN1], b [0:ARRAYLEN2], \
106-
c [0:ARRAYLEN1])
107-
#pragma omp target enter data map(alloc : d [0:CACHE_CLEAN_SIZE])
105+
#pragma omp target enter data map(alloc : a[0 : ARRAYLEN1], b[0 : ARRAYLEN2], \
106+
c[0 : ARRAYLEN1])
107+
#pragma omp target enter data map(alloc : d[0 : CACHE_CLEAN_SIZE])
108108

109-
#pragma omp target update to(a [0:ARRAYLEN1], b [0:ARRAYLEN2])
109+
#pragma omp target update to(a[0 : ARRAYLEN1], b[0 : ARRAYLEN2])
110110

111111
double t1, t2, elapsed_s = 0.0;
112112
for (int i = 0; i < ITERATIONS; ++i) {
@@ -119,7 +119,7 @@ int main() {
119119
elapsed_s += (t2 - t1);
120120
}
121121

122-
#pragma omp target update from(c [0:ARRAYLEN1])
122+
#pragma omp target update from(c[0 : ARRAYLEN1])
123123

124124
double sum = 0.0f;
125125
for (int i = 0; i < ARRAYLEN1; ++i)
@@ -136,10 +136,9 @@ int main() {
136136

137137
printf("\nTotal time = %8.1f milliseconds\n", (elapsed_s * 1000));
138138

139-
#pragma omp target exit data map(delete \
140-
: a [0:ARRAYLEN1], b [0:ARRAYLEN2], \
141-
c [0:ARRAYLEN1])
142-
#pragma omp target exit data map(delete : d [0:CACHE_CLEAN_SIZE])
139+
#pragma omp target exit data map(delete : a[0 : ARRAYLEN1], b[0 : ARRAYLEN2], \
140+
c[0 : ARRAYLEN1])
141+
#pragma omp target exit data map(delete : d[0 : CACHE_CLEAN_SIZE])
143142

144143
delete[] a;
145144
delete[] b;

0 commit comments

Comments
 (0)