55#include  " cxxopts.hpp" 
66#include  " dr/mp.hpp" 
77#include  " mpi.h" 
8+ 
9+ inline  void  barrier () { dr::mp::barrier (); }
10+ inline  void  fence () { dr::mp::fence (); }
11+ inline  void  fence_on (auto  &&obj) { obj.fence (); }
12+ 
813#include  < chrono> 
914#include  < memory> 
1015#include  < iomanip> 
1116
12- #ifdef  STANDALONE_BENCHMARK
17+ // 
18+ 
19+ struct  MPI_data  {
20+   MPI_Comm comm;
21+   int  rank;
22+   int  size;
1323
14- MPI_Comm comm;
15- int  comm_rank;
16- int  comm_size;
24+   bool  host () {
25+     return  rank == 0 ;
26+   }
27+ };
1728
18- # else 
29+ static  MPI_data mpi_data; 
1930
20- #include  " ../common/dr_bench.hpp" 
31+ struct  Options  {
32+   std::size_t  size;
33+   std::size_t  steps;
34+   std::size_t  redundancy;
35+   bool  debug;
2136
22- #endif 
37+   std::unique_ptr<std::ofstream> logfile;
38+ 
39+   bool  sycl;
40+   bool  device_memory;
41+ };
2342
2443namespace  GameOfLife  {
2544
2645using  T = int ;
2746using  Array = dr::mp::distributed_mdarray<T, 2 >;
2847
2948void  init (std::size_t  n, Array& out) {
30-   std::vector<std::vector<int >> in (n , std::vector<int >(n , 0 ));
49+   std::vector<std::vector<int >> in (4 , std::vector<int >(4 , 0 ));
3150  /* 
3251    1 0 0 
3352    0 1 1 
@@ -39,16 +58,16 @@ void init(std::size_t n, Array& out) {
3958  in[3 ][1 ] = 1 ; in[3 ][2 ] = 1 ; in[3 ][3 ] = 0 ;
4059  //  clang-format on
4160  std::vector<int > local (n * n);
42-   for  (int  i = 0 ; i < n ; i++) {
43-     for  (int  j = 0 ; j < n ; j++) {
61+   for  (int  i = 0 ; i < 4 ; i++) {
62+     for  (int  j = 0 ; j < 4 ; j++) {
4463      local[i * n + j] = in[i][j];
4564    }
4665  }
4766  dr::mp::copy (local.begin (), local.end (), out.begin ());
4867}
4968
5069void  run (std::size_t  n, std::size_t  redundancy, std::size_t  steps, bool  debug) {
51-   if  (comm_rank ==  0 ) {
70+   if  (mpi_data. host () ) {
5271    std::cout << " Using backend: dr" 
5372    std::cout << " Grid size: " "  x " 
5473    std::cout << " Time steps:" 
@@ -61,7 +80,6 @@ void run(std::size_t n, std::size_t redundancy, std::size_t steps, bool debug) {
6180  Array array ({n, n}, dist);
6281  Array array_out ({n, n}, dist);
6382  dr::mp::fill (array, 0 );
64-   dr::mp::fill (array_out, 0 );
6583
6684  init (n, array);
6785
@@ -99,80 +117,93 @@ void run(std::size_t n, std::size_t redundancy, std::size_t steps, bool debug) {
99117      x (0 , 0 ) = x_out (0 , 0 );
100118    };
101119
102-   auto  tic = std::chrono::steady_clock::now ();
103- 
104120  auto  print = [n](const  auto  &v) {
105121      std::vector<int > local (n * n);
106-       copy (v, local.begin ());
107-       if  (comm_rank ==  0 ) {
122+       dr::mp:: copy0 ,  v, local.begin ());
123+       if  (mpi_data. host () ) {
108124        for  (int  i = 0 ; i < n; i++) {
109125          for  (int  j = 0 ; j < n; j++) {
110-             std::cout <<  local[i * n + j] <<  "   " 
126+             fmt::print ( " {} " ,  local[i * n + j] ==  1  ?  ' # '  :  ' . ' ) ;
111127          }
112-           std::cout <<  " \n " 
128+           fmt::print ( " \n " ) ;
113129        }
114130      }
115131    };
116132
117-   for  (std::size_t  i = 0 ; i < steps; i++) {
118-     if  (comm_rank == 0 ) {
119-       std::cout << " Step " " \n " 
133+   auto  tic = std::chrono::steady_clock::now ();
134+   for  (std::size_t  i = 0 , next_treshold = 0 ; i < steps; i++) {
135+     if  (i >= next_treshold && mpi_data.host ()) {
136+       next_treshold += round (static_cast <double >(steps / 100 ));
137+       double  percent = round (static_cast <double >(i) * 100  / static_cast <double >(steps));
138+       fmt::print (" Steps done {}% ({} of {} steps)\n " 
120139    }
140+ 
121141    //  step
122142    stencil_for_each_extended<2 >(calculate, {1 , 1 }, {1 , 1 }, array, array_out);
123143    stencil_for_each_extended<2 >(assign, {0 , 0 }, {0 , 0 }, array, array_out);
144+ 
124145    //  phase with communication - once after (redundancy - 1) steps without communication
125146    if  ((i + 1 ) % redundancy == 0 ) {
126-       if  (comm_rank ==  0 ) {
127-         std::cout <<  " Exchange\n " 
147+       if  (debug && mpi_data. host () ) {
148+         fmt::print ( " Exchange at step {} \n " , i) ;
128149      }
129150      array.halo ().exchange ();
130151      //  Array_out is a temporary, no need to exchange it
131152    }
153+ 
154+     //  debug print
132155    if  (debug) {
133-       if  (comm_rank ==  0 ) {
134-         std::cout <<  " Array "  << i <<  " :\n " 
156+       if  (mpi_data. host () ) {
157+         fmt::print ( " Array {} :\n " , i) ;
135158      }
159+       //  print needs a synchronication accros MPI boundary (dr::mp::copy), each node has to execute it
136160      print (array);
137-       if  (comm_rank == 0 ) {
138-         std::cout << " \n " 
139-       }
140161    }
141162  }
142- 
143163  auto  toc = std::chrono::steady_clock::now ();
164+ 
144165  std::chrono::duration<double > duration = toc - tic;
145-   if  (comm_rank == 0 ) {
166+ 
167+   if  (mpi_data.host ()) {
146168    double  t_cpu = duration.count ();
147169    double  t_step = t_cpu / static_cast <double >(steps);
148-     std::cout << " Duration: " std::setprecision (3 ) << t_cpu << "  s" 
149-     std::cout << " Time per step: " std::setprecision (2 ) << t_step * 1000  << "  ms" 
170+ 
171+     fmt::print (" Steps done 100% ({} of {} steps)\n " 
172+     fmt::print (" Duration {} s\n " 
173+     fmt::print (" Time per step {} ms\n " 1000 );
150174  }
151175}
152176
153177} //  namespace GameOfLife
154178
155- #ifdef  STANDALONE_BENCHMARK
156- 
157- int  main (int  argc, char  *argv[]) {
179+ //  Initialization functions
158180
181+ void  init_MPI (int  argc, char  *argv[]) {
159182  MPI_Init (&argc, &argv);
160-   comm = MPI_COMM_WORLD;
161-   MPI_Comm_rank (comm, &comm_rank);
162-   MPI_Comm_size (comm, &comm_size);
183+   mpi_data.comm  = MPI_COMM_WORLD;
184+   MPI_Comm_rank (mpi_data.comm , &mpi_data.rank );
185+   MPI_Comm_size (mpi_data.comm , &mpi_data.size );
186+ 
187+   dr::drlog.debug (" MPI: rank = {}, size = {}\n " rank , mpi_data.size );
188+ }
189+ 
190+ Options parse_options (int  argc, char  *argv[]) {
191+   Options out;
163192
164193  cxxopts::Options options_spec (argv[0 ], " game of life" 
194+ 
165195  //  clang-format off
166196  options_spec.add_options ()
167-     (" n,size" " Grid size" size_t >()->default_value (" 128" 
168-     (" t,steps" " Run a fixed number of time steps." size_t >()->default_value (" 100" 
169-     (" r,redundancy" " Set outer-grid redundancy parameter." size_t >()->default_value (" 2" 
170-     (" sycl" " Execute on SYCL device" 
171-     (" l,log" " enable logging" 
172-     (" d,debug" " enable debug logging" 
197+     (" drhelp" " Print help" 
198+     (" log" " Enable logging" 
173199    (" logprefix" " appended .RANK.log" default_value (" dr" 
200+     (" log-filter" " Filter the log" 
174201    (" device-memory" " Use device memory" 
175-     (" h,help" " Print help" 
202+     (" sycl" " Execute on SYCL device" 
203+     (" d,debug" " enable debug logging" 
204+     (" n,size" " Grid size" size_t >()->default_value (" 128" 
205+     (" t,steps" " Run a fixed number of time steps." size_t >()->default_value (" 100" 
206+     (" r,redundancy" " Set outer-grid redundancy parameter." size_t >()->default_value (" 2" 
176207  //  clang-format on
177208
178209  cxxopts::ParseResult options;
@@ -183,51 +214,57 @@ int main(int argc, char *argv[]) {
183214    exit (1 );
184215  }
185216
186-   std::unique_ptr<std::ofstream> logfile ;
187-   if  ( options.count (" log " )) { 
188-     logfile = 
189-         std::make_unique<std::ofstream> (options[ " logprefix " ]. as <std::string>() + 
190-                                          fmt::format ( " .{}.log " , comm_rank)) ;
191-     dr::drlog. set_file (*logfile );
217+   out. sycl  = options. count ( " sycl " ) !=  0 ;
218+   out. device_memory  =  options.count (" debug " ) !=  0 ; 
219+ 
220+   if   (options. count ( " drhelp " )) { 
221+     std::cout << options_spec. help () <<  " \n " 
222+     exit ( 0 );
192223  }
193224
194-   if  (options.count (" sycl" 
195- #ifdef  SYCL_LANGUAGE_VERSION
196-     sycl::queue q = dr::mp::select_queue ();
197-     std::cout << " Run on: " 
198-               << q.get_device ().get_info <sycl::info::device::name>() << " \n " 
199-     dr::mp::init (q, options.count (" device-memory" 
200-                                                    : sycl::usm::alloc::shared);
201- #else 
202-     std::cout << " Sycl support requires icpx\n " 
203-     exit (1 );
204- #endif 
205-   } else  {
206-     if  (comm_rank == 0 ) {
207-       std::cout << " Run on: CPU\n " 
225+   if  (options.count (" log" 
226+     out.logfile .reset (new  std::ofstream (options[" logprefix" as <std::string>() +
227+                                     fmt::format (" .{}.log" rank )));
228+     dr::drlog.set_file (*out.logfile );
229+     if  (options.count (" log-filter" 
230+       dr::drlog.filter (options[" log-filter" as <std::vector<std::string>>());
208231    }
209-     dr::mp::init ();
210232  }
211233
212-   std:: size_t  n  = options[" n" as <std::size_t >();
213-   std:: size_t   redundancy = options[" r" as <std::size_t >();
214-   std:: size_t   steps = options[" t" as <std::size_t >();
234+   out. size  = options[" n" as <std::size_t >();
235+   out. redundancy  = options[" r" as <std::size_t >();
236+   out. steps  = options[" t" as <std::size_t >();
215237
216-   bool  debug = false ;
217-   if  (options.count (" debug" 
218-     debug = true ;
219-   }
238+   out.debug  = options.count (" debug" 0 ;
220239
221-   GameOfLife::run (n, redundancy, steps, debug);
222-   dr::mp::finalize ();
223-   MPI_Finalize ();
224-   return  0 ;
240+   return  out;
241+ }
242+ 
243+ void  dr_init (const  Options& options) {
244+ #ifdef  SYCL_LANGUAGE_VERSION
245+   if  (options.sycl ) {
246+     sycl::queue q;
247+     fmt::print (" Running on sycl device: {}, memory: {}\n " get_device ().get_info <sycl::info::device::name>(), options.device_memory  ? " devive" " shared" 
248+     dr::mp::init (q, options.device_memory  ? sycl::usm::alloc::device
249+                                           : sycl::usm::alloc::shared);
250+     return ;
251+   }
252+ #endif 
253+   fmt::print (" Running on CPU\n " 
254+   dr::mp::init ();
225255}
226256
227- # else 
257+ //  Main loop 
228258
229- static  void  GameOfLife_DR (benchmark::State &state) {}
259+ int  main (int  argc, char  *argv[]) {
260+   init_MPI (argc, argv);
261+   Options options = parse_options (argc, argv);
262+   dr_init (options);
230263
231- DR_BENCHMARK (GameOfLife_DR );
264+    GameOfLife::run (options. size , options. redundancy , options. steps , options. debug );
232265
233- #endif 
266+   dr::mp::finalize ();
267+   MPI_Finalize ();
268+ 
269+   return  0 ;
270+ }
0 commit comments