Skip to content
This repository was archived by the owner on Sep 22, 2025. It is now read-only.

Commit 3d410d2

Browse files
author
kc432959
committed
Implemented game_of_life
1 parent a007da2 commit 3d410d2

File tree

3 files changed

+237
-6
lines changed

3 files changed

+237
-6
lines changed

benchmarks/gbench/mp/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ if(ENABLE_SYCL)
107107
endforeach()
108108
endif()
109109

110+
add_executable(game_of_life game_of_life.cpp)
111+
target_link_libraries(game_of_life cxxopts DR::mpi)
112+
target_compile_definitions(game_of_life PRIVATE STANDALONE_BENCHMARK)
113+
110114
add_executable(shallow_water shallow_water.cpp)
111115
target_link_libraries(shallow_water cxxopts DR::mpi)
112116
target_compile_definitions(shallow_water PRIVATE STANDALONE_BENCHMARK)
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
// SPDX-FileCopyrightText: Intel Corporation
2+
//
3+
// SPDX-License-Identifier: BSD-3-Clause
4+
5+
#include "cxxopts.hpp"
6+
#include "dr/mp.hpp"
7+
#include "mpi.h"
8+
#include <chrono>
9+
#include <memory>
10+
#include <iomanip>
11+
12+
#ifdef STANDALONE_BENCHMARK
13+
14+
MPI_Comm comm;
15+
int comm_rank;
16+
int comm_size;
17+
18+
#else
19+
20+
#include "../common/dr_bench.hpp"
21+
22+
#endif
23+
24+
namespace GameOfLife {
25+
26+
using T = int;
27+
using Array = dr::mp::distributed_mdarray<T, 2>;
28+
29+
void init(std::size_t n, Array& out) {
30+
std::vector<std::vector<int>> in(n, std::vector<int>(n, 0));
31+
/*
32+
1 0 0
33+
0 1 1
34+
1 1 0
35+
*/
36+
// clang-format off
37+
in[1][1] = 1; in[1][2] = 0; in[1][3] = 0;
38+
in[2][1] = 0; in[2][2] = 1; in[2][3] = 1;
39+
in[3][1] = 1; in[3][2] = 1; in[3][3] = 0;
40+
// clang-format on
41+
std::vector<int> local(n * n);
42+
for (int i = 0; i < n; i++) {
43+
for (int j = 0; j < n; j++) {
44+
local[i * n + j] = in[i][j];
45+
}
46+
}
47+
dr::mp::copy(local.begin(), local.end(), out.begin());
48+
}
49+
50+
void run(std::size_t n, std::size_t redundancy, std::size_t steps, bool debug) {
51+
if (comm_rank == 0) {
52+
std::cout << "Using backend: dr" << std::endl;
53+
std::cout << "Grid size: " << n << " x " << n << std::endl;
54+
std::cout << "Time steps:" << steps << std::endl;
55+
std::cout << "Redundancy " << redundancy << std::endl;
56+
std::cout << std::endl;
57+
}
58+
59+
// construct grid
60+
auto dist = dr::mp::distribution().halo(1).redundancy(redundancy);
61+
Array array({n, n}, dist);
62+
Array array_out({n, n}, dist);
63+
dr::mp::fill(array, 0);
64+
dr::mp::fill(array_out, 0);
65+
66+
init(n, array);
67+
68+
// execute one calculation for one cell in game of life
69+
auto calculate = [](auto stencils) {
70+
auto [x, x_out] = stencils;
71+
// because below we calculate the sum of all 9 cells,
72+
// but we want the output only of 8 neighbourhs, subtract the value of self.
73+
int live_neighbours = -x(0, 0);
74+
for (int i = -1; i <= 1; i++) {
75+
for (int j = -1; j <= 1; j++) {
76+
live_neighbours += x(i, j); // alive == 1, dead == 0, so simple addition works
77+
}
78+
}
79+
80+
if (x(0, 0) == 1) { // self if alive
81+
if (live_neighbours == 2 || live_neighbours == 3) {
82+
x_out(0, 0) = 1;
83+
} else {
84+
x_out(0, 0) = 0;
85+
}
86+
}
87+
else { // self is dead
88+
if (live_neighbours == 3) {
89+
x_out(0, 0) = 1;
90+
} else {
91+
x_out(0, 0) = 0;
92+
}
93+
}
94+
};
95+
96+
// assign values of second array to first array
97+
auto assign = [](auto stencils) {
98+
auto [x, x_out] = stencils;
99+
x(0, 0) = x_out(0, 0);
100+
};
101+
102+
auto tic = std::chrono::steady_clock::now();
103+
104+
auto print = [n](const auto &v) {
105+
std::vector<int> local(n * n);
106+
copy(v, local.begin());
107+
if (comm_rank == 0) {
108+
for (int i = 0; i < n; i++) {
109+
for (int j = 0; j < n; j++) {
110+
std::cout << local[i * n + j] << " ";
111+
}
112+
std::cout << "\n";
113+
}
114+
}
115+
};
116+
117+
for (std::size_t i = 0; i < steps; i++) {
118+
if (comm_rank == 0) {
119+
std::cout << "Step " << i << "\n";
120+
}
121+
// step
122+
stencil_for_each_extended<2>(calculate, {1, 1}, {1, 1}, array, array_out);
123+
stencil_for_each_extended<2>(assign, {0, 0}, {0, 0}, array, array_out);
124+
// phase with communication - once after (redundancy - 1) steps without communication
125+
if ((i + 1) % redundancy == 0) {
126+
if (comm_rank == 0) {
127+
std::cout << "Exchange\n";
128+
}
129+
array.halo().exchange();
130+
// Array_out is a temporary, no need to exchange it
131+
}
132+
if (debug) {
133+
if (comm_rank == 0) {
134+
std::cout << "Array " << i << ":\n";
135+
}
136+
print(array);
137+
if (comm_rank == 0) {
138+
std::cout << "\n";
139+
}
140+
}
141+
}
142+
143+
auto toc = std::chrono::steady_clock::now();
144+
std::chrono::duration<double> duration = toc - tic;
145+
if (comm_rank == 0) {
146+
double t_cpu = duration.count();
147+
double t_step = t_cpu / static_cast<double>(steps);
148+
std::cout << "Duration: " << std::setprecision(3) << t_cpu << " s" << std::endl;
149+
std::cout << "Time per step: " << std::setprecision(2) << t_step * 1000 << " ms" << std::endl;
150+
}
151+
}
152+
153+
} // namespace GameOfLife
154+
155+
#ifdef STANDALONE_BENCHMARK
156+
157+
int main(int argc, char *argv[]) {
158+
159+
MPI_Init(&argc, &argv);
160+
comm = MPI_COMM_WORLD;
161+
MPI_Comm_rank(comm, &comm_rank);
162+
MPI_Comm_size(comm, &comm_size);
163+
164+
cxxopts::Options options_spec(argv[0], "game of life");
165+
// clang-format off
166+
options_spec.add_options()
167+
("n,size", "Grid size", cxxopts::value<std::size_t>()->default_value("128"))
168+
("t,steps", "Run a fixed number of time steps.", cxxopts::value<std::size_t>()->default_value("100"))
169+
("r,redundancy", "Set outer-grid redundancy parameter.", cxxopts::value<std::size_t>()->default_value("2"))
170+
("sycl", "Execute on SYCL device")
171+
("l,log", "enable logging")
172+
("d,debug", "enable debug logging")
173+
("logprefix", "appended .RANK.log", cxxopts::value<std::string>()->default_value("dr"))
174+
("device-memory", "Use device memory")
175+
("h,help", "Print help");
176+
// clang-format on
177+
178+
cxxopts::ParseResult options;
179+
try {
180+
options = options_spec.parse(argc, argv);
181+
} catch (const cxxopts::OptionParseException &e) {
182+
std::cout << options_spec.help() << "\n";
183+
exit(1);
184+
}
185+
186+
std::unique_ptr<std::ofstream> logfile;
187+
if (options.count("log")) {
188+
logfile =
189+
std::make_unique<std::ofstream>(options["logprefix"].as<std::string>() +
190+
fmt::format(".{}.log", comm_rank));
191+
dr::drlog.set_file(*logfile);
192+
}
193+
194+
if (options.count("sycl")) {
195+
#ifdef SYCL_LANGUAGE_VERSION
196+
sycl::queue q = dr::mp::select_queue();
197+
std::cout << "Run on: "
198+
<< q.get_device().get_info<sycl::info::device::name>() << "\n";
199+
dr::mp::init(q, options.count("device-memory") ? sycl::usm::alloc::device
200+
: sycl::usm::alloc::shared);
201+
#else
202+
std::cout << "Sycl support requires icpx\n";
203+
exit(1);
204+
#endif
205+
} else {
206+
if (comm_rank == 0) {
207+
std::cout << "Run on: CPU\n";
208+
}
209+
dr::mp::init();
210+
}
211+
212+
std::size_t n = options["n"].as<std::size_t>();
213+
std::size_t redundancy = options["r"].as<std::size_t>();
214+
std::size_t steps = options["t"].as<std::size_t>();
215+
216+
bool debug = false;
217+
if (options.count("debug")) {
218+
debug = true;
219+
}
220+
221+
GameOfLife::run(n, redundancy, steps, debug);
222+
dr::mp::finalize();
223+
MPI_Finalize();
224+
return 0;
225+
}
226+
227+
#else
228+
229+
static void GameOfLife_DR(benchmark::State &state) {}
230+
231+
DR_BENCHMARK(GameOfLife_DR);
232+
233+
#endif

include/dr/mp/algorithms/for_each.hpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,16 +97,13 @@ void stencil_for_each_extended_1(auto op, stencil_index_type<1> begin,
9797
};
9898
if (mp::use_sycl()) {
9999
#ifdef SYCL_LANGUAGE_VERSION
100-
std::cout << "do parallel_for 1d - sycl\n";
101100
dr::__detail::parallel_for(dr::mp::sycl_queue(),
102101
sycl::range<1>(distance[0]), do_point)
103102
.wait();
104103
#else
105-
std::cout << "do parallel_for 1d - sycl failed\n";
106104
assert(false);
107105
#endif
108106
} else {
109-
std::cout << "do parallel_for 1d - no sycl\n";
110107
for (std::size_t i = 0; i < distance[0]; i++) {
111108
do_point(i);
112109
}
@@ -150,17 +147,14 @@ void stencil_for_each_extended_2(auto op, stencil_index_type<2> &begin,
150147
};
151148
if (mp::use_sycl()) {
152149
#ifdef SYCL_LANGUAGE_VERSION
153-
std::cout << "do parallel_for 2d - sycl\n";
154150
dr::__detail::parallel_for(dr::mp::sycl_queue(),
155151
sycl::range<2>(distance[0], distance[1]),
156152
do_point)
157153
.wait();
158154
#else
159-
std::cout << "do parallel_for 2d - sycl fail\n";
160155
assert(false);
161156
#endif
162157
} else {
163-
std::cout << "do parallel_for 2d - no sycl\n";
164158
for (std::size_t i = 0; i < distance[0]; i++) {
165159
for (std::size_t j = 0; j < distance[1]; j++) {
166160
do_point(stencil_index_type<2>{i, j});

0 commit comments

Comments
 (0)