2121#include " sst/core/stringize.h"
2222#include " sst/core/timeConverter.h"
2323
24- // #include <filesystem>
24+ #include < filesystem>
2525#include < sys/stat.h>
2626#include < unistd.h>
2727
2828namespace SST {
2929
30- CheckpointAction::CheckpointAction (
31- Config* UNUSED (cfg), RankInfo this_rank, Simulation_impl* sim, TimeConverter* period) :
30+ namespace pvt {
31+
32+ std::string
33+ createNameFromFormat (const std::string& format, const std::string& prefix, uint64_t checkpoint_id, SimTime_t time)
34+ {
35+ std::string ret;
36+ bool found_percent = false ;
37+ for ( const auto & x : format ) {
38+ if ( found_percent ) {
39+ switch ( x ) {
40+ case ' p' :
41+ ret += prefix;
42+ break ;
43+ case ' n' :
44+ ret += std::to_string (checkpoint_id);
45+ break ;
46+ case ' t' :
47+ ret += std::to_string (time);
48+ break ;
49+ default :
50+ // Should not happen since format string was already
51+ // checked, but if it does, just delete whole %
52+ // sequence (i.e. do nothing)
53+ break ;
54+ }
55+ found_percent = false ;
56+ }
57+ else if ( x == ' %' ) {
58+ found_percent = true ;
59+ }
60+ else {
61+ ret += x;
62+ }
63+ }
64+ return ret;
65+ }
66+
67+ } // namespace pvt
68+
69+ CheckpointAction::CheckpointAction (Config* cfg, RankInfo this_rank, Simulation_impl* sim, TimeConverter* period) :
3270 Action (),
3371 rank_ (this_rank),
3472 period_ (period),
@@ -53,7 +91,37 @@ CheckpointAction::CheckpointAction(
5391 next_sim_time_ = MAX_SIMTIME_T;
5492 }
5593
56- if ( (0 == this_rank.rank ) ) { last_cpu_time_ = sst_get_cpu_time (); }
94+ // Parse the format string. It was checked by the Config object
95+ // to make sure there was no more than one directory separator (/)
96+ // and that no invalid % sequences were used.
97+ std::string format = cfg->checkpoint_name_format ();
98+ size_t split = format.find (" /" );
99+ if ( split == format.npos ) {
100+ dir_format_ = format;
101+ file_format_ = format;
102+ }
103+ else {
104+ dir_format_ = format.substr (0 , split);
105+ file_format_ = format.substr (split + 1 );
106+ }
107+
108+ if ( (0 == this_rank.rank ) ) {
109+ // Check to make sure that there is at least one of %n or %t to
110+ // make checkpoint filenames unique.
111+ bool unique = false ;
112+ if ( format.find (" %n" ) != format.npos ) unique = true ;
113+ if ( format.find (" %t" ) != format.npos ) unique = true ;
114+
115+ if ( !unique ) {
116+ sim->getSimulationOutput ().output (
117+ " WARNING: checkpoint-name-format does not include one of %%n or %%t, which means that all checkpoints "
118+ " will use the same filename and previous files will be overwritten [%s].\n " ,
119+ format.c_str ());
120+ }
121+
122+ last_cpu_time_ = sst_get_cpu_time ();
123+ }
124+
57125 // Set the priority to be the same as the SyncManager so that
58126 // checkpointing happens in the same place for both serial and
59127 // parallel runs. We will never have both a SyncManager and a
@@ -76,7 +144,7 @@ CheckpointAction::execute()
76144void
77145CheckpointAction::createCheckpoint (Simulation_impl* sim)
78146{
79- if ( 0 == rank_.rank ) {
147+ if ( 0 == rank_.rank && 0 == rank_. thread ) {
80148 const double now = sst_get_cpu_time ();
81149 sim->getSimulationOutput ().output (
82150 " # Simulation Checkpoint: Simulated Time %s (Real CPU time since last checkpoint %.5f seconds)\n " ,
@@ -87,36 +155,31 @@ CheckpointAction::createCheckpoint(Simulation_impl* sim)
87155
88156 // Need to create a directory for this checkpoint
89157 std::string prefix = sim->checkpoint_prefix_ ;
90- std::string basename = prefix + " _ " + std::to_string (checkpoint_id) + " _ " + std::to_string ( sim->currentSimCycle );
158+ std::string basename = pvt::createNameFromFormat (dir_format_, prefix, checkpoint_id, sim->currentSimCycle );
91159
92160 // Directory is shared across threads. Make it a static and make
93161 // sure we barrier in the right places
94- static std::string directory;
162+ std::string directory = sim-> checkpoint_directory_ + " / " + basename ;
95163
96164 // Only thread 0 will participate in setup
97165 if ( rank_.thread == 0 ) {
98166 // Rank 0 will create the directory for this checkpoint
99167 if ( rank_.rank == 0 ) {
100- directory = Checkpointing::createUniqueDirectory (sim->checkpoint_directory_ + " /" + basename);
101- #ifdef SST_CONFIG_HAVE_MPI
102- Comms::broadcast (directory, 0 );
103- #endif
168+ directory = sim->checkpoint_directory_ + " /" + basename;
169+ std::filesystem::create_directory (directory);
104170 }
105- else {
106- // Get directory name (really just a barrier since each
107- // rank already knows the name and it shouldn't have to
108- // create a unique one)
109171#ifdef SST_CONFIG_HAVE_MPI
110- Comms::broadcast (directory, 0 );
172+ Comms::broadcast (directory, 0 );
111173#endif
112- }
113174 }
114- barrier.wait ();
115- if ( rank_.thread == 0 ) checkpoint_id++;
116-
175+ basename = pvt::createNameFromFormat (file_format_, prefix, checkpoint_id, sim->currentSimCycle );
117176 std::string filename =
118177 directory + " /" + basename + " _" + std::to_string (rank_.rank ) + " _" + std::to_string (rank_.thread ) + " .bin" ;
119178
179+ barrier.wait ();
180+
181+ if ( rank_.thread == 0 ) checkpoint_id++;
182+
120183 // Write out the checkpoints for the partitions
121184 sim->checkpoint (filename);
122185
@@ -243,55 +306,6 @@ doesDirectoryExist(const std::string& dirName, bool include_files)
243306 }
244307}
245308
246- /* *
247- Function to create a directory. We need this bacause
248- std::filesystem isn't fully supported until GCC9
249- */
250- bool
251- createDirectory (const std::string& dirName)
252- {
253- if ( mkdir (dirName.c_str (), 0755 ) == 0 ) {
254- return true ; // Directory created successfully
255- }
256- else {
257- return false ; // Failed to create directory
258- }
259- }
260-
261- std::string
262- createUniqueDirectory (const std::string basename)
263- {
264- std::string dirName = basename;
265-
266- // Check if the directory exists
267- // if ( std::filesystem::exists(dirName) ) {
268- if ( doesDirectoryExist (dirName, true ) ) {
269- // Append a unique random set of characters to the directory name
270- std::string newDirName;
271- int num = 0 ;
272- do {
273- ++num;
274- newDirName = dirName + " _" + std::to_string (num);
275- // } while ( std::filesystem::exists(newDirName) ); // Ensure the new directory name is unique
276- } while ( doesDirectoryExist (newDirName, true ) ); // Ensure the new directory name is unique
277-
278- dirName = newDirName;
279- }
280-
281- // Create the directory
282- // if ( !std::filesystem::create_directory(dirName) ) {
283- if ( !createDirectory (dirName) ) {
284- Simulation_impl::getSimulationOutput ().fatal (
285- CALL_INFO_LONG, 1 , " Failed to create directory: %s\n " , dirName.c_str ());
286- }
287- return dirName;
288- }
289-
290- void
291- removeDirectory (const std::string UNUSED (name))
292- {
293- // Implement when adding logic to keep only N checkpoints
294- }
295309
296310std::string
297311initializeCheckpointInfrastructure (Config* cfg, bool rt_can_ckpt, int myRank)
@@ -301,7 +315,11 @@ initializeCheckpointInfrastructure(Config* cfg, bool rt_can_ckpt, int myRank)
301315
302316 std::string checkpoint_dir_name = " " ;
303317
304- if ( myRank == 0 ) { checkpoint_dir_name = createUniqueDirectory (cfg->checkpoint_prefix ()); }
318+ if ( myRank == 0 ) {
319+ SST::Util::Filesystem& fs = Simulation_impl::getSimulation ()->filesystem ;
320+ checkpoint_dir_name = fs.createUniqueDirectory (cfg->checkpoint_prefix ());
321+ }
322+
305323#ifdef SST_CONFIG_HAVE_MPI
306324 // Broadcast the directory name
307325 Comms::broadcast (checkpoint_dir_name, 0 );
0 commit comments