Skip to content
This repository was archived by the owner on Sep 22, 2025. It is now read-only.

Commit 00e1320

Browse files
author
kc432959
committed
Fixed game of life
1 parent 287fcf7 commit 00e1320

File tree

3 files changed

+125
-85
lines changed

3 files changed

+125
-85
lines changed

benchmarks/gbench/mp/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,6 @@ if(ENABLE_SYCL)
107107
endforeach()
108108
endif()
109109

110-
add_executable(game_of_life game_of_life.cpp)
111-
target_link_libraries(game_of_life cxxopts DR::mpi)
112-
target_compile_definitions(game_of_life PRIVATE STANDALONE_BENCHMARK)
113-
114110
add_executable(shallow_water shallow_water.cpp)
115111
target_link_libraries(shallow_water cxxopts DR::mpi)
116112
target_compile_definitions(shallow_water PRIVATE STANDALONE_BENCHMARK)

test/gtest/mp/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ foreach(test-exec IN ITEMS mp-tests mp-tests-3 mp-quick-test mp-quick-test-3-onl
8181
"${CMAKE_COMMAND} -E time")
8282
endforeach()
8383

84+
# Game of life
85+
add_executable(game_of_life game_of_life.cpp)
86+
if(ENABLE_ISHMEM)
87+
target_link_ishmem(game_of_life)
88+
endif()
89+
target_link_libraries(game_of_life cxxopts DR::mpi)
90+
8491
# tests without --sycl flag will fail on IshmemBackend TODO: make them be
8592
# running somehow if ENABLE_ISHMEM will be default CI config
8693
if(NOT ENABLE_ISHMEM)

benchmarks/gbench/mp/game_of_life.cpp renamed to test/gtest/mp/game_of_life.cpp

Lines changed: 118 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,48 @@
55
#include "cxxopts.hpp"
66
#include "dr/mp.hpp"
77
#include "mpi.h"
8+
9+
inline void barrier() { dr::mp::barrier(); }
10+
inline void fence() { dr::mp::fence(); }
11+
inline void fence_on(auto &&obj) { obj.fence(); }
12+
813
#include <chrono>
914
#include <memory>
1015
#include <iomanip>
1116

12-
#ifdef STANDALONE_BENCHMARK
17+
//
18+
19+
struct MPI_data {
20+
MPI_Comm comm;
21+
int rank;
22+
int size;
1323

14-
MPI_Comm comm;
15-
int comm_rank;
16-
int comm_size;
24+
bool host() {
25+
return rank == 0;
26+
}
27+
};
1728

18-
#else
29+
static MPI_data mpi_data;
1930

20-
#include "../common/dr_bench.hpp"
31+
struct Options {
32+
std::size_t size;
33+
std::size_t steps;
34+
std::size_t redundancy;
35+
bool debug;
2136

22-
#endif
37+
std::unique_ptr<std::ofstream> logfile;
38+
39+
bool sycl;
40+
bool device_memory;
41+
};
2342

2443
namespace GameOfLife {
2544

2645
using T = int;
2746
using Array = dr::mp::distributed_mdarray<T, 2>;
2847

2948
void init(std::size_t n, Array& out) {
30-
std::vector<std::vector<int>> in(n, std::vector<int>(n, 0));
49+
std::vector<std::vector<int>> in(4, std::vector<int>(4, 0));
3150
/*
3251
1 0 0
3352
0 1 1
@@ -39,16 +58,16 @@ void init(std::size_t n, Array& out) {
3958
in[3][1] = 1; in[3][2] = 1; in[3][3] = 0;
4059
// clang-format on
4160
std::vector<int> local(n * n);
42-
for (int i = 0; i < n; i++) {
43-
for (int j = 0; j < n; j++) {
61+
for (int i = 0; i < 4; i++) {
62+
for (int j = 0; j < 4; j++) {
4463
local[i * n + j] = in[i][j];
4564
}
4665
}
4766
dr::mp::copy(local.begin(), local.end(), out.begin());
4867
}
4968

5069
void run(std::size_t n, std::size_t redundancy, std::size_t steps, bool debug) {
51-
if (comm_rank == 0) {
70+
if (mpi_data.host()) {
5271
std::cout << "Using backend: dr" << std::endl;
5372
std::cout << "Grid size: " << n << " x " << n << std::endl;
5473
std::cout << "Time steps:" << steps << std::endl;
@@ -61,7 +80,6 @@ void run(std::size_t n, std::size_t redundancy, std::size_t steps, bool debug) {
6180
Array array({n, n}, dist);
6281
Array array_out({n, n}, dist);
6382
dr::mp::fill(array, 0);
64-
dr::mp::fill(array_out, 0);
6583

6684
init(n, array);
6785

@@ -99,80 +117,93 @@ void run(std::size_t n, std::size_t redundancy, std::size_t steps, bool debug) {
99117
x(0, 0) = x_out(0, 0);
100118
};
101119

102-
auto tic = std::chrono::steady_clock::now();
103-
104120
auto print = [n](const auto &v) {
105121
std::vector<int> local(n * n);
106-
copy(v, local.begin());
107-
if (comm_rank == 0) {
122+
dr::mp::copy(0, v, local.begin());
123+
if (mpi_data.host()) {
108124
for (int i = 0; i < n; i++) {
109125
for (int j = 0; j < n; j++) {
110-
std::cout << local[i * n + j] << " ";
126+
fmt::print("{}", local[i * n + j] == 1 ? '#' : '.');
111127
}
112-
std::cout << "\n";
128+
fmt::print("\n");
113129
}
114130
}
115131
};
116132

117-
for (std::size_t i = 0; i < steps; i++) {
118-
if (comm_rank == 0) {
119-
std::cout << "Step " << i << "\n";
133+
auto tic = std::chrono::steady_clock::now();
134+
for (std::size_t i = 0, next_treshold = 0; i < steps; i++) {
135+
if (i >= next_treshold && mpi_data.host()) {
136+
next_treshold += round(static_cast<double>(steps / 100));
137+
double percent = round(static_cast<double>(i) * 100 / static_cast<double>(steps));
138+
fmt::print("Steps done {}% ({} of {} steps)\n", percent, i, steps);
120139
}
140+
121141
// step
122142
stencil_for_each_extended<2>(calculate, {1, 1}, {1, 1}, array, array_out);
123143
stencil_for_each_extended<2>(assign, {0, 0}, {0, 0}, array, array_out);
144+
124145
// phase with communication - once after (redundancy - 1) steps without communication
125146
if ((i + 1) % redundancy == 0) {
126-
if (comm_rank == 0) {
127-
std::cout << "Exchange\n";
147+
if (debug && mpi_data.host()) {
148+
fmt::print("Exchange at step {}\n", i);
128149
}
129150
array.halo().exchange();
130151
// Array_out is a temporary, no need to exchange it
131152
}
153+
154+
// debug print
132155
if (debug) {
133-
if (comm_rank == 0) {
134-
std::cout << "Array " << i << ":\n";
156+
if (mpi_data.host()) {
157+
fmt::print("Array {}:\n", i);
135158
}
159+
// print needs a synchronication accros MPI boundary (dr::mp::copy), each node has to execute it
136160
print(array);
137-
if (comm_rank == 0) {
138-
std::cout << "\n";
139-
}
140161
}
141162
}
142-
143163
auto toc = std::chrono::steady_clock::now();
164+
144165
std::chrono::duration<double> duration = toc - tic;
145-
if (comm_rank == 0) {
166+
167+
if (mpi_data.host()) {
146168
double t_cpu = duration.count();
147169
double t_step = t_cpu / static_cast<double>(steps);
148-
std::cout << "Duration: " << std::setprecision(3) << t_cpu << " s" << std::endl;
149-
std::cout << "Time per step: " << std::setprecision(2) << t_step * 1000 << " ms" << std::endl;
170+
171+
fmt::print("Steps done 100% ({} of {} steps)\n", steps, steps);
172+
fmt::print("Duration {} s\n", t_cpu);
173+
fmt::print("Time per step {} ms\n", t_step * 1000);
150174
}
151175
}
152176

153177
} // namespace GameOfLife
154178

155-
#ifdef STANDALONE_BENCHMARK
156-
157-
int main(int argc, char *argv[]) {
179+
// Initialization functions
158180

181+
void init_MPI(int argc, char *argv[]) {
159182
MPI_Init(&argc, &argv);
160-
comm = MPI_COMM_WORLD;
161-
MPI_Comm_rank(comm, &comm_rank);
162-
MPI_Comm_size(comm, &comm_size);
183+
mpi_data.comm = MPI_COMM_WORLD;
184+
MPI_Comm_rank(mpi_data.comm, &mpi_data.rank);
185+
MPI_Comm_size(mpi_data.comm, &mpi_data.size);
186+
187+
dr::drlog.debug("MPI: rank = {}, size = {}\n", mpi_data.rank, mpi_data.size);
188+
}
189+
190+
Options parse_options(int argc, char *argv[]) {
191+
Options out;
163192

164193
cxxopts::Options options_spec(argv[0], "game of life");
194+
165195
// clang-format off
166196
options_spec.add_options()
167-
("n,size", "Grid size", cxxopts::value<std::size_t>()->default_value("128"))
168-
("t,steps", "Run a fixed number of time steps.", cxxopts::value<std::size_t>()->default_value("100"))
169-
("r,redundancy", "Set outer-grid redundancy parameter.", cxxopts::value<std::size_t>()->default_value("2"))
170-
("sycl", "Execute on SYCL device")
171-
("l,log", "enable logging")
172-
("d,debug", "enable debug logging")
197+
("drhelp", "Print help")
198+
("log", "Enable logging")
173199
("logprefix", "appended .RANK.log", cxxopts::value<std::string>()->default_value("dr"))
200+
("log-filter", "Filter the log", cxxopts::value<std::vector<std::string>>())
174201
("device-memory", "Use device memory")
175-
("h,help", "Print help");
202+
("sycl", "Execute on SYCL device")
203+
("d,debug", "enable debug logging")
204+
("n,size", "Grid size", cxxopts::value<std::size_t>()->default_value("128"))
205+
("t,steps", "Run a fixed number of time steps.", cxxopts::value<std::size_t>()->default_value("100"))
206+
("r,redundancy", "Set outer-grid redundancy parameter.", cxxopts::value<std::size_t>()->default_value("2"));
176207
// clang-format on
177208

178209
cxxopts::ParseResult options;
@@ -183,51 +214,57 @@ int main(int argc, char *argv[]) {
183214
exit(1);
184215
}
185216

186-
std::unique_ptr<std::ofstream> logfile;
187-
if (options.count("log")) {
188-
logfile =
189-
std::make_unique<std::ofstream>(options["logprefix"].as<std::string>() +
190-
fmt::format(".{}.log", comm_rank));
191-
dr::drlog.set_file(*logfile);
217+
out.sycl = options.count("sycl") != 0;
218+
out.device_memory = options.count("debug") != 0;
219+
220+
if (options.count("drhelp")) {
221+
std::cout << options_spec.help() << "\n";
222+
exit(0);
192223
}
193224

194-
if (options.count("sycl")) {
195-
#ifdef SYCL_LANGUAGE_VERSION
196-
sycl::queue q = dr::mp::select_queue();
197-
std::cout << "Run on: "
198-
<< q.get_device().get_info<sycl::info::device::name>() << "\n";
199-
dr::mp::init(q, options.count("device-memory") ? sycl::usm::alloc::device
200-
: sycl::usm::alloc::shared);
201-
#else
202-
std::cout << "Sycl support requires icpx\n";
203-
exit(1);
204-
#endif
205-
} else {
206-
if (comm_rank == 0) {
207-
std::cout << "Run on: CPU\n";
225+
if (options.count("log")) {
226+
out.logfile.reset(new std::ofstream(options["logprefix"].as<std::string>() +
227+
fmt::format(".{}.log", mpi_data.rank)));
228+
dr::drlog.set_file(*out.logfile);
229+
if (options.count("log-filter")) {
230+
dr::drlog.filter(options["log-filter"].as<std::vector<std::string>>());
208231
}
209-
dr::mp::init();
210232
}
211233

212-
std::size_t n = options["n"].as<std::size_t>();
213-
std::size_t redundancy = options["r"].as<std::size_t>();
214-
std::size_t steps = options["t"].as<std::size_t>();
234+
out.size = options["n"].as<std::size_t>();
235+
out.redundancy = options["r"].as<std::size_t>();
236+
out.steps = options["t"].as<std::size_t>();
215237

216-
bool debug = false;
217-
if (options.count("debug")) {
218-
debug = true;
219-
}
238+
out.debug = options.count("debug") != 0;
220239

221-
GameOfLife::run(n, redundancy, steps, debug);
222-
dr::mp::finalize();
223-
MPI_Finalize();
224-
return 0;
240+
return out;
241+
}
242+
243+
void dr_init(const Options& options) {
244+
#ifdef SYCL_LANGUAGE_VERSION
245+
if (options.sycl) {
246+
sycl::queue q;
247+
fmt::print("Running on sycl device: {}, memory: {}\n", q.get_device().get_info<sycl::info::device::name>(), options.device_memory ? "devive" : "shared");
248+
dr::mp::init(q, options.device_memory ? sycl::usm::alloc::device
249+
: sycl::usm::alloc::shared);
250+
return;
251+
}
252+
#endif
253+
fmt::print("Running on CPU\n");
254+
dr::mp::init();
225255
}
226256

227-
#else
257+
// Main loop
228258

229-
static void GameOfLife_DR(benchmark::State &state) {}
259+
int main(int argc, char *argv[]) {
260+
init_MPI(argc, argv);
261+
Options options = parse_options(argc, argv);
262+
dr_init(options);
230263

231-
DR_BENCHMARK(GameOfLife_DR);
264+
GameOfLife::run(options.size, options.redundancy, options.steps, options.debug);
232265

233-
#endif
266+
dr::mp::finalize();
267+
MPI_Finalize();
268+
269+
return 0;
270+
}

0 commit comments

Comments
 (0)