Skip to content
This repository was archived by the owner on Sep 22, 2025. It is now read-only.

Commit a007da2

Browse files
author
kc432959
committed
More logs and bring back benchmark cmake
1 parent b89c131 commit a007da2

File tree

3 files changed

+149
-133
lines changed

3 files changed

+149
-133
lines changed
Lines changed: 133 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -1,133 +1,133 @@
1-
## SPDX-FileCopyrightText: Intel Corporation
2-
##
3-
## SPDX-License-Identifier: BSD-3-Clause
4-
#
5-
#set(CMAKE_INCLUDE_CURRENT_DIR ON)
6-
#
7-
## cmake-format: off
8-
#add_executable(
9-
# mp-bench
10-
# mp-bench.cpp
11-
# ../common/distributed_vector.cpp
12-
# ../common/dot_product.cpp
13-
# ../common/inclusive_exclusive_scan.cpp
14-
# ../common/sort.cpp
15-
# ../common/stream.cpp
16-
# streammp.cpp
17-
# rooted.cpp
18-
# stencil_1d.cpp
19-
# stencil_2d.cpp
20-
# chunk.cpp
21-
# # mdspan.cpp
22-
# mpi.cpp)
23-
## cmake-format: on
24-
#
25-
## disabled with SYCL due to DRA-135
26-
#if(NOT ENABLE_SYCL)
27-
# target_sources(mp-bench PRIVATE mdspan.cpp)
28-
#endif()
29-
#
30-
#if(ENABLE_SYCL)
31-
# target_sources(mp-bench PRIVATE fft3d.cpp)
32-
#endif()
33-
#
34-
#if(NOT ENABLE_CUDA)
35-
# # does not compile in CUDA because: black_scholes.cpp uses std::log
36-
# # shallow_water, wave_equation uses uses exp
37-
# target_sources(mp-bench PRIVATE ../common/black_scholes.cpp shallow_water.cpp
38-
# wave_equation.cpp)
39-
#endif()
40-
#
41-
## mp-quick-bench is for development. By reducing the number of source files, it
42-
## builds much faster. Change the source files to match what you need to test. It
43-
## is OK to commit changes to the source file list.
44-
#add_executable(mp-quick-bench mp-bench.cpp ../common/distributed_vector.cpp)
45-
#
46-
#foreach(mp-bench-exec IN ITEMS mp-bench mp-quick-bench)
47-
# target_compile_definitions(${mp-bench-exec} PRIVATE BENCH_MP)
48-
# target_link_libraries(${mp-bench-exec} benchmark::benchmark cxxopts DR::mpi)
49-
# if(ENABLE_ISHMEM)
50-
# target_link_ishmem(${mp-bench-exec})
51-
# endif()
52-
# if(ENABLE_SYCL)
53-
# target_link_libraries(${mp-bench-exec} MKL::MKL_DPCPP)
54-
# endif()
55-
#endforeach()
56-
#
57-
#if(ENABLE_SYCL)
58-
# # target_sources(mp-quick-bench PRIVATE fft3d.cpp)
59-
#endif()
60-
#
61-
#cmake_path(GET MPI_CXX_ADDITIONAL_INCLUDE_DIRS FILENAME MPI_IMPL)
62-
#
63-
## debug mp-bench is too slow
64-
#if(CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT MPI_IMPL STREQUAL "openmpi")
65-
# # MPI_Win_create fails for communicator with size 1 30000 is minimum because
66-
# # of static column size for stencil2D disable DPL benchmarks because we get
67-
# # intermittent fails with: ONEAPI_DEVICE_SELECTOR=opencl:cpu mpirun -n 1
68-
# # ./mp-bench --vector-size 30000 --rows 100 --columns 100 --check
69-
# add_mp_ctest(
70-
# NAME mp-bench TIMEOUT 200 TARGS --vector-size 30000 --rows 100 --columns
71-
# 100 --check --benchmark_filter=-FFT3D.*)
72-
# if(ENABLE_SYCL)
73-
# add_mp_ctest(
74-
# NAME mp-bench TIMEOUT 200 SYCL TARGS --vector-size 30000 --rows 100
75-
# --columns 100 --check --benchmark_filter=-.*DPL.*)
76-
# endif()
77-
#endif()
78-
#
79-
#add_executable(wave_equation wave_equation.cpp)
80-
#target_link_libraries(wave_equation cxxopts DR::mpi)
81-
#target_compile_definitions(wave_equation PRIVATE STANDALONE_BENCHMARK)
82-
#add_mp_ctest(NAME wave_equation)
83-
#add_executable(wave_equation_wide wave_equation_wide.cpp)
84-
#target_link_libraries(wave_equation_wide cxxopts DR::mpi)
85-
#target_compile_definitions(wave_equation_wide PRIVATE STANDALONE_BENCHMARK)
86-
#add_mp_ctest(NAME wave_equation_wide)
87-
## add_mp_ctest(TEST_NAME wave_equation_fused NAME wave_equation TARGS -f) #
88-
## DRA-92
89-
#if(ENABLE_SYCL)
90-
# add_mp_ctest(
91-
# TEST_NAME wave_equation-sycl NAME wave_equation TIMEOUT 1000 NPROC 8 SYCL)
92-
# add_mp_ctest(
93-
# TEST_NAME wave_equation-sycl-benchmark NAME wave_equation TIMEOUT 1000 NPROC 8 SYCL TARGS -t)
94-
# add_mp_ctest(
95-
# TEST_NAME wave_equation_fused-sycl NAME wave_equation TIMEOUT 1000 NPROC 2 SYCL TARGS -f)
96-
# add_mp_ctest(
97-
# TEST_NAME wave_equation_wide-sycl NAME wave_equation_wide TIMEOUT 1000 NPROC 8 SYCL)
98-
# foreach(redundancy RANGE 1 8)
99-
# add_mp_ctest(
100-
# TEST_NAME wave_equation_wide-sycl-benchmark-${redundancy} NAME wave_equation_wide TIMEOUT 1000 NPROC 8 SYCL TARGS -t 100 -r ${redundancy})
101-
# endforeach()
102-
# add_mp_ctest(
103-
# TEST_NAME wave_equation_wide-sycl-gpu NAME wave_equation_wide TIMEOUT 1000 NPROC 8 SYCL TARGS --device-memory)
104-
# foreach(redundancy RANGE 1 8)
105-
# add_mp_ctest(
106-
# TEST_NAME wave_equation_wide-sycl-gpu-benchmark-${redundancy} NAME wave_equation_wide TIMEOUT 1000 NPROC 8 SYCL TARGS --device-memory -t 100 -r ${redundancy})
107-
# endforeach()
108-
#endif()
109-
#
110-
#add_executable(shallow_water shallow_water.cpp)
111-
#target_link_libraries(shallow_water cxxopts DR::mpi)
112-
#target_compile_definitions(shallow_water PRIVATE STANDALONE_BENCHMARK)
113-
#
114-
## issue DRA-23 add_mp_ctest(TEST_NAME shallow_water NAME shallow_water NPROC 1)
115-
## add_mp_ctest( TEST_NAME shallow_water_fused NAME shallow_water NPROC 1 TARGS
116-
## -f)
117-
#
118-
#if(ENABLE_SYCL)
119-
# if(CMAKE_BUILD_TYPE STREQUAL "Release")
120-
# # too long (or hangs?) in debug
121-
#
122-
# add_mp_ctest(
123-
# TEST_NAME shallow_water-sycl NAME shallow_water NPROC 2 SYCL)
124-
# add_mp_ctest(
125-
# TEST_NAME shallow_water_fused-sycl NAME shallow_water NPROC 2 SYCL TARGS
126-
# -f)
127-
# endif()
128-
#
129-
# add_executable(fft3d_mp fft3d.cpp)
130-
# target_link_libraries(fft3d_mp cxxopts DR::mpi MKL::MKL_DPCPP)
131-
# target_compile_definitions(fft3d_mp PRIVATE STANDALONE_BENCHMARK)
132-
# add_mp_ctest(TEST_NAME fft3d_mp NAME fft3d_mp NPROC 2)
133-
#endif()
1+
# SPDX-FileCopyrightText: Intel Corporation
2+
#
3+
# SPDX-License-Identifier: BSD-3-Clause
4+
5+
set(CMAKE_INCLUDE_CURRENT_DIR ON)
6+
7+
# cmake-format: off
8+
add_executable(
9+
mp-bench
10+
mp-bench.cpp
11+
../common/distributed_vector.cpp
12+
../common/dot_product.cpp
13+
../common/inclusive_exclusive_scan.cpp
14+
../common/sort.cpp
15+
../common/stream.cpp
16+
streammp.cpp
17+
rooted.cpp
18+
stencil_1d.cpp
19+
stencil_2d.cpp
20+
chunk.cpp
21+
# mdspan.cpp
22+
mpi.cpp)
23+
# cmake-format: on
24+
25+
# disabled with SYCL due to DRA-135
26+
if(NOT ENABLE_SYCL)
27+
target_sources(mp-bench PRIVATE mdspan.cpp)
28+
endif()
29+
30+
if(ENABLE_SYCL)
31+
target_sources(mp-bench PRIVATE fft3d.cpp)
32+
endif()
33+
34+
if(NOT ENABLE_CUDA)
35+
# does not compile in CUDA because: black_scholes.cpp uses std::log
36+
# shallow_water, wave_equation uses uses exp
37+
target_sources(mp-bench PRIVATE ../common/black_scholes.cpp shallow_water.cpp
38+
wave_equation.cpp)
39+
endif()
40+
41+
# mp-quick-bench is for development. By reducing the number of source files, it
42+
# builds much faster. Change the source files to match what you need to test. It
43+
# is OK to commit changes to the source file list.
44+
add_executable(mp-quick-bench mp-bench.cpp ../common/distributed_vector.cpp)
45+
46+
foreach(mp-bench-exec IN ITEMS mp-bench mp-quick-bench)
47+
target_compile_definitions(${mp-bench-exec} PRIVATE BENCH_MP)
48+
target_link_libraries(${mp-bench-exec} benchmark::benchmark cxxopts DR::mpi)
49+
if(ENABLE_ISHMEM)
50+
target_link_ishmem(${mp-bench-exec})
51+
endif()
52+
if(ENABLE_SYCL)
53+
target_link_libraries(${mp-bench-exec} MKL::MKL_DPCPP)
54+
endif()
55+
endforeach()
56+
57+
if(ENABLE_SYCL)
58+
# target_sources(mp-quick-bench PRIVATE fft3d.cpp)
59+
endif()
60+
61+
cmake_path(GET MPI_CXX_ADDITIONAL_INCLUDE_DIRS FILENAME MPI_IMPL)
62+
63+
# debug mp-bench is too slow
64+
if(CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT MPI_IMPL STREQUAL "openmpi")
65+
# MPI_Win_create fails for communicator with size 1 30000 is minimum because
66+
# of static column size for stencil2D disable DPL benchmarks because we get
67+
# intermittent fails with: ONEAPI_DEVICE_SELECTOR=opencl:cpu mpirun -n 1
68+
# ./mp-bench --vector-size 30000 --rows 100 --columns 100 --check
69+
add_mp_ctest(
70+
NAME mp-bench TIMEOUT 200 TARGS --vector-size 30000 --rows 100 --columns
71+
100 --check --benchmark_filter=-FFT3D.*)
72+
if(ENABLE_SYCL)
73+
add_mp_ctest(
74+
NAME mp-bench TIMEOUT 200 SYCL TARGS --vector-size 30000 --rows 100
75+
--columns 100 --check --benchmark_filter=-.*DPL.*)
76+
endif()
77+
endif()
78+
79+
add_executable(wave_equation wave_equation.cpp)
80+
target_link_libraries(wave_equation cxxopts DR::mpi)
81+
target_compile_definitions(wave_equation PRIVATE STANDALONE_BENCHMARK)
82+
add_mp_ctest(NAME wave_equation)
83+
add_executable(wave_equation_wide wave_equation_wide.cpp)
84+
target_link_libraries(wave_equation_wide cxxopts DR::mpi)
85+
target_compile_definitions(wave_equation_wide PRIVATE STANDALONE_BENCHMARK)
86+
add_mp_ctest(NAME wave_equation_wide)
87+
# add_mp_ctest(TEST_NAME wave_equation_fused NAME wave_equation TARGS -f) #
88+
# DRA-92
89+
if(ENABLE_SYCL)
90+
add_mp_ctest(
91+
TEST_NAME wave_equation-sycl NAME wave_equation TIMEOUT 1000 NPROC 8 SYCL)
92+
add_mp_ctest(
93+
TEST_NAME wave_equation-sycl-benchmark NAME wave_equation TIMEOUT 1000 NPROC 8 SYCL TARGS -t)
94+
add_mp_ctest(
95+
TEST_NAME wave_equation_fused-sycl NAME wave_equation TIMEOUT 1000 NPROC 2 SYCL TARGS -f)
96+
add_mp_ctest(
97+
TEST_NAME wave_equation_wide-sycl NAME wave_equation_wide TIMEOUT 1000 NPROC 8 SYCL)
98+
foreach(redundancy RANGE 1 8)
99+
add_mp_ctest(
100+
TEST_NAME wave_equation_wide-sycl-benchmark-${redundancy} NAME wave_equation_wide TIMEOUT 1000 NPROC 8 SYCL TARGS -t 100 -r ${redundancy})
101+
endforeach()
102+
add_mp_ctest(
103+
TEST_NAME wave_equation_wide-sycl-gpu NAME wave_equation_wide TIMEOUT 1000 NPROC 8 SYCL TARGS --device-memory)
104+
foreach(redundancy RANGE 1 8)
105+
add_mp_ctest(
106+
TEST_NAME wave_equation_wide-sycl-gpu-benchmark-${redundancy} NAME wave_equation_wide TIMEOUT 1000 NPROC 8 SYCL TARGS --device-memory -t 100 -r ${redundancy})
107+
endforeach()
108+
endif()
109+
110+
add_executable(shallow_water shallow_water.cpp)
111+
target_link_libraries(shallow_water cxxopts DR::mpi)
112+
target_compile_definitions(shallow_water PRIVATE STANDALONE_BENCHMARK)
113+
114+
# issue DRA-23 add_mp_ctest(TEST_NAME shallow_water NAME shallow_water NPROC 1)
115+
# add_mp_ctest( TEST_NAME shallow_water_fused NAME shallow_water NPROC 1 TARGS
116+
# -f)
117+
118+
if(ENABLE_SYCL)
119+
if(CMAKE_BUILD_TYPE STREQUAL "Release")
120+
# too long (or hangs?) in debug
121+
122+
add_mp_ctest(
123+
TEST_NAME shallow_water-sycl NAME shallow_water NPROC 2 SYCL)
124+
add_mp_ctest(
125+
TEST_NAME shallow_water_fused-sycl NAME shallow_water NPROC 2 SYCL TARGS
126+
-f)
127+
endif()
128+
129+
add_executable(fft3d_mp fft3d.cpp)
130+
target_link_libraries(fft3d_mp cxxopts DR::mpi MKL::MKL_DPCPP)
131+
target_compile_definitions(fft3d_mp PRIVATE STANDALONE_BENCHMARK)
132+
add_mp_ctest(TEST_NAME fft3d_mp NAME fft3d_mp NPROC 2)
133+
endif()

benchmarks/gbench/mp/wave_equation_wide.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,13 @@ int run(
140140
std::cout << "Redundancy " << redundancy << std::endl;
141141
}
142142

143+
std::cout << "before e\n";
143144
// state variables
144145
// water elevation at T points
145146
Array e({nx + 1, ny}, dist);
147+
std::cout << "after e\n";
146148
dr::mp::fill(e, 0.0);
149+
std::cout << "after fill e\n";
147150
// x velocity at U points
148151
Array u({nx + 1, ny}, dist);
149152
dr::mp::fill(u, 0.0);
@@ -165,12 +168,17 @@ int run(
165168
Array dudt({nx + 1, ny}, dist);
166169
Array dvdt({nx + 1, ny + 1}, dist);
167170

171+
std::cout << "After all arrays\n";
172+
168173
dr::mp::fill(dedt, 0);
169174
dr::mp::fill(dudt, 0);
170175
dr::mp::fill(dvdt, 0);
176+
std::cout << "After fill\n";
177+
171178
dr::mp::halo(dedt).exchange();
172179
dr::mp::halo(dudt).exchange();
173180
dr::mp::halo(dvdt).exchange();
181+
std::cout << "After first exchange\n";
174182

175183
auto init_op = [xmin, ymin, grid](auto index, auto v) {
176184
auto &[o] = v;
@@ -184,6 +192,7 @@ int run(
184192
}
185193
};
186194
dr::mp::for_each(init_op, e);
195+
std::cout << "After mp::for_each\n";
187196

188197
auto add = [](auto ops) { return ops.first + ops.second; };
189198
auto max = [](double x, double y) { return std::max(x, y); };
@@ -225,6 +234,7 @@ int run(
225234
};
226235

227236
for (std::size_t i = 0; i < nt + 1; i++) {
237+
std::cout << "i = " << i << "\n";
228238
t = static_cast<double>(i) * dt;
229239

230240
if (t >= next_t_export - 1e-8) {

include/dr/mp/algorithms/for_each.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,16 @@ void stencil_for_each_extended_1(auto op, stencil_index_type<1> begin,
9797
};
9898
if (mp::use_sycl()) {
9999
#ifdef SYCL_LANGUAGE_VERSION
100+
std::cout << "do parallel_for 1d - sycl\n";
100101
dr::__detail::parallel_for(dr::mp::sycl_queue(),
101102
sycl::range<1>(distance[0]), do_point)
102103
.wait();
103104
#else
105+
std::cout << "do parallel_for 1d - sycl failed\n";
104106
assert(false);
105107
#endif
106108
} else {
109+
std::cout << "do parallel_for 1d - no sycl\n";
107110
for (std::size_t i = 0; i < distance[0]; i++) {
108111
do_point(i);
109112
}
@@ -147,14 +150,17 @@ void stencil_for_each_extended_2(auto op, stencil_index_type<2> &begin,
147150
};
148151
if (mp::use_sycl()) {
149152
#ifdef SYCL_LANGUAGE_VERSION
153+
std::cout << "do parallel_for 2d - sycl\n";
150154
dr::__detail::parallel_for(dr::mp::sycl_queue(),
151155
sycl::range<2>(distance[0], distance[1]),
152156
do_point)
153157
.wait();
154158
#else
159+
std::cout << "do parallel_for 2d - sycl fail\n";
155160
assert(false);
156161
#endif
157162
} else {
163+
std::cout << "do parallel_for 2d - no sycl\n";
158164
for (std::size_t i = 0; i < distance[0]; i++) {
159165
for (std::size_t j = 0; j < distance[1]; j++) {
160166
do_point(stencil_index_type<2>{i, j});

0 commit comments

Comments
 (0)