Skip to content

Commit a81ae61

Browse files
committed
WIP: Support for ckpt action flag in rankSync
1 parent 517ee5b commit a81ae61

File tree

7 files changed

+109
-15
lines changed

7 files changed

+109
-15
lines changed

src/sst/core/checkpointAction.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,10 +257,10 @@ CheckpointAction::createCheckpoint(Simulation_impl* sim)
257257
SimTime_t
258258
CheckpointAction::check(SimTime_t current_time)
259259
{
260-
#if 0
260+
#if 1
261261
Simulation_impl* sim = Simulation_impl::getSimulation();
262262
sim->getSimulationOutput().output(
263-
"skk:T %d: checkpointAction.cc: check()\n", rank_.thread);
263+
"skk:R %d, T %d: checkpointAction.cc: check()\n", rank_.rank, rank_.thread);
264264
#endif
265265
// The if-logic is a little weird, but it's trying to minimize the
266266
// number of branches in the normal case of no checkpoint being

src/sst/core/sync/rankSyncParallelSkip.cc

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,12 @@ RankSyncParallelSkip::setShutdownFlags(bool enter_shutdown, Simulation_impl::Shu
173173
// enter_interactive_.load(), enter_shutdown_.load(), shutdown_mode_.load());
174174
}
175175

176+
void
177+
RankSyncParallelSkip::setCkptFlag(bool generate_ckpt)
178+
{
179+
if (generate_ckpt)
180+
generate_ckpt_.store(true);
181+
}
176182

177183
void
178184
RankSyncParallelSkip::setFlags(bool enter_interactive, bool enter_shutdown, Simulation_impl::ShutdownMode_t shutdown_mode)
@@ -209,6 +215,12 @@ RankSyncParallelSkip::getShutdownFlags( bool& enter_shutdown, Simulation_impl::S
209215
// enter_interactive, enter_shutdown, shutdown_mode);
210216
}
211217

218+
void
219+
RankSyncParallelSkip::getCkptFlag(bool& generate_ckpt)
220+
{
221+
generate_ckpt = generate_ckpt_.load();
222+
}
223+
212224
void
213225
RankSyncParallelSkip::getFlags( bool& enter_interactive, bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode)
214226
{
@@ -226,6 +238,7 @@ RankSyncParallelSkip::clearFlags()
226238
enter_interactive_.store(false);
227239
enter_shutdown_.store(false);
228240
shutdown_mode_.store(0);
241+
generate_ckpt_.store(false);
229242

230243
//printf("Clear Flags: enter_interactive %d, enter_shutdown %d, shutdown_mode %d\n",
231244
// enter_interactive_, enter_shutdown_, shutdown_mode_);
@@ -285,6 +298,20 @@ RankSyncParallelSkip::shutdownExchange()
285298
#endif
286299
}
287300

301+
#if 0
302+
void
303+
RankSyncParallelSkip::ckptExchange()
304+
{
305+
#ifdef SST_CONFIG_HAVE_MPI
306+
int32_t local_flags[1] = { static_cast<int32_t>(generate_ckpt_) };
307+
int32_t global_flags[1] = { 0 };
308+
MPI_Allreduce(&local_flags, &global_flags, 1, MPI_INT32_T, MPI_MAX, MPI_COMM_WORLD);
309+
310+
generate_ckpt_ = global_flags[0];
311+
#endif
312+
}
313+
#endif
314+
288315
void
289316
RankSyncParallelSkip::exchange_slave(int thread)
290317
{
@@ -483,13 +510,15 @@ RankSyncParallelSkip::exchange_master(int UNUSED(thread))
483510
sig_usr_ = global_signals[1];
484511
sig_alrm_ = global_signals[2];
485512

486-
int32_t local_flags[3] = { static_cast<int32_t>(enter_interactive_), static_cast<int32_t>(enter_shutdown_), static_cast<int32_t>(shutdown_mode_) };
487-
int32_t global_flags[3] = { 0, 0, 0 };
488-
MPI_Allreduce(&local_flags, &global_flags, 3, MPI_INT32_T, MPI_MAX, MPI_COMM_WORLD);
513+
int32_t local_flags[4] = { static_cast<int32_t>(enter_interactive_), static_cast<int32_t>(enter_shutdown_),
514+
static_cast<int32_t>(shutdown_mode_), static_cast<int32_t>(generate_ckpt_) };
515+
int32_t global_flags[4] = { 0, 0, 0, 0 };
516+
MPI_Allreduce(&local_flags, &global_flags, 4, MPI_INT32_T, MPI_MAX, MPI_COMM_WORLD);
489517

490518
enter_interactive_ = global_flags[0];
491519
enter_shutdown_ = global_flags[1];
492520
shutdown_mode_ = global_flags[2];
521+
generate_ckpt_ = global_flags[3];
493522

494523
#endif
495524
}
@@ -616,5 +645,6 @@ int RankSyncParallelSkip::sig_alrm_(0);
616645
std::atomic<bool> RankSyncParallelSkip::enter_interactive_(false);
617646
std::atomic<bool> RankSyncParallelSkip::enter_shutdown_(false);
618647
std::atomic<unsigned> RankSyncParallelSkip::shutdown_mode_(0);
648+
std::atomic<bool> RankSyncParallelSkip::generate_ckpt_(false);
619649

620650
} // namespace SST

src/sst/core/sync/rankSyncParallelSkip.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,11 @@ class RankSyncParallelSkip : public RankSync
5555
/** Set interactive flags to exchange during sync */
5656
// SKK Separated enter_interactive from from shutdown since they may be needed separately
5757
void setShutdownFlags(bool enter_shutdown, Simulation_impl::ShutdownMode_t shutdown_mode) override;
58+
void setCkptFlag(bool generate_ckpt) override;
5859
void setFlags(bool enter_interactive, bool enter_shutdown, Simulation_impl::ShutdownMode_t shutdown_mode) override;
5960
/** Return exchanged interactive flags after sync */
6061
void getShutdownFlags( bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode) override;
62+
void getCkptFlag(bool& generate_ckpt) override;
6163
void getFlags( bool& enter_interactive, bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode) override;
6264
/** Clear interactive flags before next run */
6365
void clearFlags() override;
@@ -152,6 +154,7 @@ class RankSyncParallelSkip : public RankSync
152154
static std::atomic<bool> enter_interactive_;
153155
static std::atomic<bool> enter_shutdown_;
154156
static std::atomic<unsigned> shutdown_mode_;
157+
static std::atomic<bool> generate_ckpt_;
155158
};
156159

157160
} // namespace SST

src/sst/core/sync/rankSyncSerialSkip.cc

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,12 @@ RankSyncSerialSkip::setShutdownFlags(bool enter_shutdown, Simulation_impl::Shutd
134134
// enter_interactive_.load(), enter_shutdown_.load(), shutdown_mode_.load());
135135
}
136136

137+
void
138+
RankSyncSerialSkip::setCkptFlag(bool generate_ckpt)
139+
{
140+
if (generate_ckpt)
141+
generate_ckpt_.store(true);
142+
}
137143

138144
void
139145
RankSyncSerialSkip::setFlags(bool enter_interactive, bool enter_shutdown, Simulation_impl::ShutdownMode_t shutdown_mode)
@@ -170,13 +176,20 @@ RankSyncSerialSkip::getShutdownFlags( bool& enter_shutdown, Simulation_impl::Shu
170176
// enter_interactive, enter_shutdown, shutdown_mode);
171177
}
172178

179+
void
180+
RankSyncSerialSkip::getCkptFlag(bool& generate_ckpt)
181+
{
182+
generate_ckpt = generate_ckpt_.load();
183+
}
184+
173185
void
174186
RankSyncSerialSkip::getFlags( bool& enter_interactive, bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode)
175187
{
176188

177189
enter_interactive = enter_interactive_.load();
178190
getShutdownFlags( enter_shutdown, shutdown_mode);
179191

192+
180193
//printf("ExitthreadSync getFlags: \n input: enter_interactive %d, enter_shutdown %d, shutdown_mode %d \n",
181194
// enter_interactive, enter_shutdown, shutdown_mode);
182195
}
@@ -187,6 +200,7 @@ RankSyncSerialSkip::clearFlags()
187200
enter_interactive_.store(false);
188201
enter_shutdown_.store(false);
189202
shutdown_mode_.store(0);
203+
generate_ckpt_.store(false);
190204
}
191205

192206
uint64_t
@@ -231,6 +245,19 @@ RankSyncSerialSkip::shutdownExchange()
231245
shutdown_mode_ = global_flags[1];
232246
#endif
233247
}
248+
#if 0
249+
void
250+
RankSyncSerialSkip::ckptExchange()
251+
{
252+
#ifdef SST_CONFIG_HAVE_MPI
253+
int32_t local_flags[1] = { static_cast<int32_t>(generate_ckpt_) };
254+
int32_t global_flags[1] = { 0 };
255+
MPI_Allreduce(&local_flags, &global_flags, 1, MPI_INT32_T, MPI_MAX, MPI_COMM_WORLD);
256+
257+
generate_ckpt_ = global_flags[0];
258+
#endif
259+
}
260+
#endif
234261

235262
void
236263
RankSyncSerialSkip::exchange()
@@ -357,13 +384,15 @@ RankSyncSerialSkip::exchange()
357384
sig_usr_ = global_signals[1];
358385
sig_alrm_ = global_signals[2];
359386

360-
int32_t local_flags[3] = { static_cast<int32_t>(enter_interactive_), static_cast<int32_t>(enter_shutdown_), static_cast<int32_t>(shutdown_mode_) };
361-
int32_t global_flags[3] = { 0, 0, 0 };
362-
MPI_Allreduce(&local_flags, &global_flags, 3, MPI_INT32_T, MPI_MAX, MPI_COMM_WORLD);
387+
int32_t local_flags[4] = { static_cast<int32_t>(enter_interactive_), static_cast<int32_t>(enter_shutdown_),
388+
static_cast<int32_t>(shutdown_mode_), static_cast<int32_t>(generate_ckpt_) };
389+
int32_t global_flags[4] = { 0, 0, 0, 0 };
390+
MPI_Allreduce(&local_flags, &global_flags, 4, MPI_INT32_T, MPI_MAX, MPI_COMM_WORLD);
363391

364392
enter_interactive_ = global_flags[0];
365393
enter_shutdown_ = global_flags[1];
366394
shutdown_mode_ = global_flags[2];
395+
generate_ckpt_ = global_flags[3];
367396

368397
#endif
369398
}
@@ -466,5 +495,6 @@ int RankSyncSerialSkip::sig_alrm_(0);
466495
std::atomic<bool> RankSyncSerialSkip::enter_interactive_(false);
467496
std::atomic<bool> RankSyncSerialSkip::enter_shutdown_(false);
468497
std::atomic<unsigned> RankSyncSerialSkip::shutdown_mode_(0);
498+
std::atomic<bool> RankSyncSerialSkip::generate_ckpt_(false);
469499

470500
} // namespace SST

src/sst/core/sync/rankSyncSerialSkip.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,11 @@ class RankSyncSerialSkip : public RankSync
5353
/** Set interactive flags to exchange during sync */
5454
// SKK Separated enter_interactive from from shutdown since they may be needed separately
5555
void setShutdownFlags(bool enter_shutdown, Simulation_impl::ShutdownMode_t shutdown_mode) override;
56+
void setCkptFlag(bool generate_ckpt) override;
5657
void setFlags(bool enter_interactive, bool enter_shutdown, Simulation_impl::ShutdownMode_t shutdown_mode) override;
5758
/** Return exchanged interactive flags after sync */
5859
void getShutdownFlags( bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode) override;
60+
void getCkptFlag(bool& generate_ckpt) override;
5961
void getFlags( bool& enter_interactive, bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode) override;
6062
/** Clear interactive flags before next run */
6163
void clearFlags() override;
@@ -102,6 +104,7 @@ class RankSyncSerialSkip : public RankSync
102104
static std::atomic<bool> enter_interactive_;
103105
static std::atomic<bool> enter_shutdown_;
104106
static std::atomic<unsigned> shutdown_mode_;
107+
static std::atomic<bool> generate_ckpt_;
105108
};
106109

107110
} // namespace SST

src/sst/core/sync/syncManager.cc

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,12 @@ class EmptyRankSync : public RankSync
134134
void setShutdownFlags(bool UNUSED(enter_shutdown),
135135
Simulation_impl::ShutdownMode_t UNUSED(shutdown_mode)) override {}
136136

137+
void setCkptFlag(bool UNUSED(generate_ckpt)) override {}
137138
void setFlags(bool UNUSED(enter_interactive), bool UNUSED(enter_shutdown),
138139
Simulation_impl::ShutdownMode_t UNUSED(shutdown_mode)) override {}
139140

140141
void getShutdownFlags( bool& UNUSED(enter_shutdown), Simulation_impl::ShutdownMode_t& UNUSED(shutdown_mode)) override {}
141-
142+
void getCkptFlag(bool& UNUSED(generate_ckpt)) override {}
142143
void getFlags( bool& UNUSED(enter_interactive), bool& UNUSED(enter_shutdown), Simulation_impl::ShutdownMode_t& UNUSED(shutdown_mode)) override {}
143144

144145
/** Clear interactive flags before next run */
@@ -410,13 +411,14 @@ SyncManager::exchangeLinkInfo()
410411
shutdown_mode = sim_->shutdown_mode_;
411412
}
412413

413-
// sim_->getSimFlags(enter_interactive, enter_shutdown, shutdown_mode) checkpoint?
414+
// sim_->getSimFlags(enter_interactive, enter_shutdown, shutdown_mode, checkpoint)
414415
void
415-
SyncManager::getSimFlags(bool& enter_interactive, bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode) {
416+
SyncManager::getSimFlags(bool& enter_interactive, bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode, bool& generate_ckpt) {
416417

417418
// Get sim flags to exchange in threadSync
418419
enter_interactive = sim_->enter_interactive_;
419420
getSimShutdownFlags(enter_shutdown, shutdown_mode);
421+
generate_ckpt = checkpoint_->getCheckpoint();
420422
}
421423

422424
void
@@ -768,6 +770,7 @@ SyncManager::execute()
768770
bool enter_interactive;
769771
bool enter_shutdown;
770772
Simulation_impl::ShutdownMode_t shutdown_mode;
773+
bool generate_ckpt;
771774

772775

773776
SimTime_t next_checkpoint_time = MAX_SIMTIME_T;
@@ -795,12 +798,13 @@ SyncManager::execute()
795798
rankSync_->setSignals(sig_end, sig_usr, sig_alrm);
796799
}
797800
#if 1
798-
// Get interactive and shutdown flags
801+
// Get interactive, shutdown, and checkpoint flags
799802
printf("0: Rank%d, Thread%d: sim_- Flags: enter_interactive %d, enter_shutdown %d, shutdown_mode %d\n",
800803
rank_.rank, rank_.thread, sim_->enter_interactive_, sim_->enter_shutdown_, sim_->shutdown_mode_);
801-
getSimFlags(enter_interactive, enter_shutdown, shutdown_mode);
804+
getSimFlags(enter_interactive, enter_shutdown, shutdown_mode, generate_ckpt);
802805
#if 1
803806
rankSync_->setFlags(enter_interactive, enter_shutdown, shutdown_mode);
807+
rankSync_->setCkptFlag(generate_ckpt);
804808
printf("1: Rank%d, Thread%d: Flags: enter_interactive %d, enter_shutdown %d, shutdown_mode %d\n",
805809
rank_.rank, rank_.thread, enter_interactive, enter_shutdown, shutdown_mode);
806810

@@ -836,7 +840,29 @@ SyncManager::execute()
836840
}
837841

838842
// Generate checkpoint if needed
843+
#if 1
844+
rankSync_->getCkptFlag(generate_ckpt);
845+
if ( generate_ckpt ) {
846+
checkpoint_->setCheckpoint();
847+
}
839848
next_checkpoint_time = checkpoint_->check(getDeliveryTime());
849+
#else
850+
// Check local checkpoint generate flag and set shared generate if needed.
851+
if ( checkpoint_->getCheckpoint() == true ) {
852+
ckpt_generate_.store(1);
853+
}
854+
// Ensure everyone has written the mask before updating local generate_
855+
ic_barrier_.wait();
856+
printf("2.5: Rank%d, Thread%d: ckpt_generate_ %d\n",
857+
rank_.rank, rank_.thread, ckpt_generate_.load());
858+
if ( ckpt_generate_.load() ) {
859+
checkpoint_->setCheckpoint();
860+
}
861+
next_checkpoint_time = checkpoint_->check(getDeliveryTime());
862+
ckpt_generate_.store(0);
863+
864+
//next_checkpoint_time = checkpoint_->check(getDeliveryTime());
865+
#endif
840866

841867
#if 1
842868
rankSync_->getFlags(enter_interactive, enter_shutdown, shutdown_mode);
@@ -915,7 +941,7 @@ SyncManager::execute()
915941

916942
if (num_ranks_.rank == 1) {
917943
// Get local sim flags
918-
getSimFlags(enter_interactive, enter_shutdown, shutdown_mode);
944+
getSimFlags(enter_interactive, enter_shutdown, shutdown_mode, generate_ckpt);
919945
// Each thread atomically sets shared flags in threadSync
920946
threadSync_->setFlags(enter_interactive, enter_shutdown, shutdown_mode);
921947
}

src/sst/core/sync/syncManager.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,11 @@ class RankSync
6868

6969
/** Set interactive flags to exchange during sync */
7070
virtual void setShutdownFlags(bool enter_shutdown, Simulation_impl::ShutdownMode_t shutdown_mode) = 0;
71+
virtual void setCkptFlag(bool generate_ckpt) = 0;
7172
virtual void setFlags(bool enter_interactive, bool enter_shutdown, Simulation_impl::ShutdownMode_t shutdown_mode) = 0;
7273
/** Return exchanged interactive flags after sync */
7374
virtual void getShutdownFlags( bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode) = 0;
75+
virtual void getCkptFlag(bool& generate_ckpt) = 0;
7476
virtual void getFlags( bool& enter_interactive, bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode) = 0;
7577
/** Clear interactive flags before next run */
7678
virtual void clearFlags() = 0;
@@ -225,7 +227,7 @@ class SyncManager : public Action
225227
void computeNextInsert(SimTime_t next_checkpoint_time = MAX_SIMTIME_T);
226228
void setupSyncObjects();
227229
void getSimShutdownFlags(bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode);
228-
void getSimFlags(bool& enter_interactive, bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode);
230+
void getSimFlags(bool& enter_interactive, bool& enter_shutdown, Simulation_impl::ShutdownMode_t& shutdown_mode, bool& generate_ckpt);
229231
void partitionInfo();
230232
};
231233

0 commit comments

Comments
 (0)