@@ -14,8 +14,6 @@ inline void fence_on(auto &&obj) { obj.fence(); }
1414#include  < memory> 
1515#include  < iomanip> 
1616
17- // 
18- 
1917struct  MPI_data  {
2018  MPI_Comm comm;
2119  int  rank;
@@ -29,7 +27,8 @@ struct MPI_data {
2927static  MPI_data mpi_data;
3028
3129struct  Options  {
32-   std::size_t  size;
30+   std::size_t  width;
31+   std::size_t  height;
3332  std::size_t  steps;
3433  std::size_t  redundancy;
3534  bool  debug;
@@ -57,7 +56,7 @@ void init(std::size_t n, Array& out) {
5756  in[2 ][1 ] = 0 ; in[2 ][2 ] = 1 ; in[2 ][3 ] = 1 ;
5857  in[3 ][1 ] = 1 ; in[3 ][2 ] = 1 ; in[3 ][3 ] = 0 ;
5958  //  clang-format on
60-   std::vector<int > local (n * n );
59+   std::vector<int > local (n * 4 );
6160  for  (int  i = 0 ; i < 4 ; i++) {
6261    for  (int  j = 0 ; j < 4 ; j++) {
6362      local[i * n + j] = in[i][j];
@@ -66,22 +65,22 @@ void init(std::size_t n, Array& out) {
6665  dr::mp::copy (local.begin (), local.end (), out.begin ());
6766}
6867
69- void  run (std::size_t  n, std::size_t  redundancy, std::size_t  steps, bool  debug) {
68+ void  run (std::size_t  n, std::size_t  m, std:: size_t   redundancy, std::size_t  steps, bool  debug) {
7069  if  (mpi_data.host ()) {
7170    std::cout << " Using backend: dr" 
72-     std::cout << " Grid size: " "  x " n  << std::endl;
71+     std::cout << " Grid size: " "  x " m  << std::endl;
7372    std::cout << " Time steps:" 
7473    std::cout << " Redundancy " 
7574    std::cout << std::endl;
7675  }
7776
7877  //  construct grid
7978  auto  dist = dr::mp::distribution ().halo (1 ).redundancy (redundancy);
80-   Array array ({n, n }, dist);
81-   Array array_out ({n, n }, dist);
79+   Array array ({n, m }, dist);
80+   Array array_out ({n, m }, dist);
8281  dr::mp::fill (array, 0 );
8382
84-   init (n , array);
83+   init (m , array);
8584
8685  //  execute one calculation for one cell in game of life
8786  auto  calculate = [](auto  stencils) {
@@ -117,19 +116,22 @@ void run(std::size_t n, std::size_t redundancy, std::size_t steps, bool debug) {
117116      x (0 , 0 ) = x_out (0 , 0 );
118117    };
119118
120-   auto  print = [n](const  auto  &v) {
119+   auto  print = [n, m ](const  auto  &v) {
121120      std::vector<int > local (n * n);
122121      dr::mp::copy (0 , v, local.begin ());
123122      if  (mpi_data.host ()) {
124123        for  (int  i = 0 ; i < n; i++) {
125-           for  (int  j = 0 ; j < n ; j++) {
126-             fmt::print (" {}" n  + j] == 1  ? ' #' ' .' 
124+           for  (int  j = 0 ; j < m ; j++) {
125+             fmt::print (" {}" m  + j] == 1  ? ' #' ' .' 
127126          }
128127          fmt::print (" \n " 
129128        }
130129      }
131130    };
132131
132+   std::chrono::duration<double > exchange_duration;
133+   std::size_t  exchange_count = 0 ;
134+ 
133135  auto  tic = std::chrono::steady_clock::now ();
134136  for  (std::size_t  i = 0 , next_treshold = 0 ; i < steps; i++) {
135137    if  (i >= next_treshold && mpi_data.host ()) {
@@ -147,7 +149,12 @@ void run(std::size_t n, std::size_t redundancy, std::size_t steps, bool debug) {
147149      if  (debug && mpi_data.host ()) {
148150        fmt::print (" Exchange at step {}\n " 
149151      }
152+       auto  exchange_tic = std::chrono::steady_clock::now ();
150153      array.halo ().exchange ();
154+       auto  exchange_toc = std::chrono::steady_clock::now ();
155+       exchange_duration += exchange_toc - exchange_tic;
156+       exchange_count++;
157+ 
151158      //  Array_out is a temporary, no need to exchange it
152159    }
153160
@@ -166,11 +173,14 @@ void run(std::size_t n, std::size_t redundancy, std::size_t steps, bool debug) {
166173
167174  if  (mpi_data.host ()) {
168175    double  t_cpu = duration.count ();
176+     double  t_exch = exchange_duration.count ();
169177    double  t_step = t_cpu / static_cast <double >(steps);
178+     double  t_exch_step = t_exch / static_cast <double >(exchange_count);
170179
171180    fmt::print (" Steps done 100% ({} of {} steps)\n " 
172-     fmt::print (" Duration {} s\n " 
181+     fmt::print (" Duration {} s, including exchange total time {} s \n " , t_exch );
173182    fmt::print (" Time per step {} ms\n " 1000 );
183+     fmt::print (" Time per exchange {} ms\n " 1000 );
174184  }
175185}
176186
@@ -201,7 +211,8 @@ Options parse_options(int argc, char *argv[]) {
201211    (" device-memory" " Use device memory" 
202212    (" sycl" " Execute on SYCL device" 
203213    (" d,debug" " enable debug logging" 
204-     (" n,size" " Grid size" size_t >()->default_value (" 128" 
214+     (" n,size" " Grid width" size_t >()->default_value (" 128" 
215+     (" m,height" " Grid height" size_t >()->default_value (" 128" 
205216    (" t,steps" " Run a fixed number of time steps." size_t >()->default_value (" 100" 
206217    (" r,redundancy" " Set outer-grid redundancy parameter." size_t >()->default_value (" 2" 
207218  //  clang-format on
@@ -215,7 +226,7 @@ Options parse_options(int argc, char *argv[]) {
215226  }
216227
217228  out.sycl  = options.count (" sycl" 0 ;
218-   out.device_memory  = options.count (" debug " 0 ;
229+   out.device_memory  = options.count (" device-memory " 0 ;
219230
220231  if  (options.count (" drhelp" 
221232    std::cout << options_spec.help () << " \n " 
@@ -231,7 +242,8 @@ Options parse_options(int argc, char *argv[]) {
231242    }
232243  }
233244
234-   out.size  = options[" n" as <std::size_t >();
245+   out.width  = options[" n" as <std::size_t >();
246+   out.height  = options.count (" m" 0  ? options[" m" as <std::size_t >() : out.width ;
235247  out.redundancy  = options[" r" as <std::size_t >();
236248  out.steps  = options[" t" as <std::size_t >();
237249
@@ -244,7 +256,7 @@ void dr_init(const Options& options) {
244256#ifdef  SYCL_LANGUAGE_VERSION
245257  if  (options.sycl ) {
246258    sycl::queue q;
247-     fmt::print (" Running on sycl device: {}, memory: {}\n " get_device ().get_info <sycl::info::device::name>(), options.device_memory  ? " devive " " shared" 
259+     fmt::print (" Running on sycl device: {}, memory: {}\n " get_device ().get_info <sycl::info::device::name>(), options.device_memory  ? " device " " shared" 
248260    dr::mp::init (q, options.device_memory  ? sycl::usm::alloc::device
249261                                          : sycl::usm::alloc::shared);
250262    return ;
@@ -261,7 +273,7 @@ int main(int argc, char *argv[]) {
261273  Options options = parse_options (argc, argv);
262274  dr_init (options);
263275
264-   GameOfLife::run (options.size , options.redundancy , options.steps , options.debug );
276+   GameOfLife::run (options.width , options. height , options.redundancy , options.steps , options.debug );
265277
266278  dr::mp::finalize ();
267279  MPI_Finalize ();
0 commit comments