Skip to content

Commit 84248f9

Browse files
Merge pull request #1270 from sstsimulator/devel
Automatically Merged using SST Master Branch Merger
2 parents 2155c79 + 6488a8c commit 84248f9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1227
-259
lines changed

config/sst_check_filesystem.m4

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
AC_DEFUN([SST_CHECK_FILESYSTEM],
2+
[
3+
AC_LANG_SAVE
4+
AC_LANG([C++])
5+
# In older versions of gcc implementation of std::filesystem is kept
6+
# in separate stdc++fs library. You should link it explicitly
7+
AC_MSG_CHECKING([if std::filesystem requires linking stdc++fs])
8+
AC_LINK_IFELSE(
9+
[AC_LANG_SOURCE([
10+
#include <filesystem>
11+
int main() {
12+
std::filesystem::create_directory("/dev/null");
13+
}
14+
])],
15+
[ac_cv_fs_stdlib=no],
16+
[ac_cv_fs_stdlib=yes]
17+
)
18+
if test "x$ac_cv_fs_stdlib" = xyes; then
19+
AC_MSG_RESULT(yes)
20+
LIBS="$LIBS -lstdc++fs"
21+
else
22+
AC_MSG_RESULT(no)
23+
fi
24+
AC_LANG_RESTORE
25+
26+
])

configure.ac

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ AM_INIT_AUTOMAKE([1.9.6 foreign dist-bzip2 subdir-objects no-define tar-pax])
1313
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
1414
m4_pattern_allow([LT_LIBEXT])
1515

16+
AH_TOP([
17+
#ifndef _SST_CONFIG_H_
18+
#define _SST_CONFIG_H_
19+
])
20+
AH_BOTTOM([
21+
#endif /* _SST_CONFIG_H_ */
22+
])
1623
AC_CONFIG_HEADERS([src/sst/core/sst_config.h])
1724

1825
# Lets check for the standard compilers and basic options
@@ -93,6 +100,8 @@ SST_ENABLE_CORE_PROFILE()
93100

94101
SST_CHECK_FPIC()
95102

103+
SST_CHECK_FILESYSTEM()
104+
96105
AC_DEFINE_UNQUOTED([SST_CPPFLAGS], ["$CPPFLAGS"], [Defines the CPPFLAGS used to build SST])
97106
AC_DEFINE_UNQUOTED([SST_CFLAGS], ["$CFLAGS"], [Defines the CFLAGS used to build SST])
98107
AC_DEFINE_UNQUOTED([SST_CXXFLAGS], ["$CXXFLAGS"], [Defines the CXXFLAGS used to build SST])

src/sst/core/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ sst_core_sources = \
177177
configBase.cc \
178178
configShared.cc \
179179
configGraph.cc \
180+
configGraphOutput.cc \
180181
cfgoutput/pythonConfigOutput.cc \
181182
cfgoutput/dotConfigOutput.cc \
182183
cfgoutput/xmlConfigOutput.cc \

src/sst/core/cfgoutput/jsonConfigOutput.cc

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,19 @@ struct StatPair
5353
SST::ConfigComponent const* comp;
5454
};
5555

56+
struct StatGroupPair
57+
{
58+
std::pair<const std::string, SST::ConfigStatGroup> const& group;
59+
std::vector<std::string> vec;
60+
SST::ConfigGraph const* graph;
61+
};
62+
63+
struct StatGroupParamPair
64+
{
65+
const std::string name;
66+
const SST::Params& stat;
67+
};
68+
5669
void
5770
to_json(json::ordered_json& j, StatPair const& sp)
5871
{
@@ -136,6 +149,49 @@ to_json(json::ordered_json& j, LinkConfPair const& pair)
136149
j["right"]["latency"] = link->latency_str[1];
137150
}
138151

152+
void
153+
to_json(json::ordered_json& j, StatGroupParamPair const& pair)
154+
{
155+
auto const& outParams = pair.stat;
156+
157+
j["name"] = pair.name;
158+
159+
for ( auto const& param : outParams.getKeys() ) {
160+
j["params"][param] = outParams.find<std::string>(param);
161+
}
162+
}
163+
164+
void
165+
to_json(json::ordered_json& j, StatGroupPair const& pair)
166+
{
167+
auto const& grp = pair.group.second;
168+
auto const* graph = pair.graph;
169+
auto vec = pair.vec;
170+
171+
j["name"] = grp.name;
172+
173+
if ( grp.outputFrequency.getValue() != 0 ) { j["frequency"] = grp.outputFrequency.toStringBestSI(); }
174+
175+
if ( grp.outputID != 0 ) {
176+
const SST::ConfigStatOutput& out = graph->getStatOutput(grp.outputID);
177+
j["output"]["type"] = out.type;
178+
if ( !out.params.empty() ) {
179+
const SST::Params& outParams = out.params;
180+
for ( auto const& param : vec ) {
181+
j["output"]["params"][param] = outParams.find<std::string>(param);
182+
}
183+
}
184+
}
185+
186+
for ( auto& i : grp.statMap ) {
187+
if ( !i.second.empty() ) { j["statistics"].emplace_back(StatGroupParamPair { i.first, i.second }); }
188+
}
189+
190+
for ( SST::ComponentId_t id : grp.components ) {
191+
const SST::ConfigComponent* comp = graph->findComponent(id);
192+
j["components"].emplace_back(comp->name);
193+
}
194+
}
139195
} // namespace
140196

141197
void
@@ -164,14 +220,38 @@ JSONConfigGraphOutput::generate(const Config* cfg, ConfigGraph* graph)
164220
outputJson["program_options"]["checkpoint-sim-period"] = cfg->checkpoint_sim_period();
165221
outputJson["program_options"]["checkpoint-wall-period"] = std::to_string(cfg->checkpoint_wall_period());
166222

167-
168223
// Put in the global param sets
169224
for ( const auto& set : getGlobalParamSetNames() ) {
170225
for ( const auto& kvp : getGlobalParamSet(set) ) {
171226
if ( kvp.first != "<set_name>" ) outputJson["global_params"][set][kvp.first] = kvp.second;
172227
}
173228
}
174229

230+
// Global statistics
231+
if ( 0 != graph->getStatLoadLevel() ) {
232+
outputJson["statistics_options"]["statisticLoadLevel"] = (uint64_t)graph->getStatLoadLevel();
233+
}
234+
235+
if ( !graph->getStatOutput().type.empty() ) {
236+
outputJson["statistics_options"]["statisticOutput"] = graph->getStatOutput().type.c_str();
237+
const Params& outParams = graph->getStatOutput().params;
238+
if ( !outParams.empty() ) {
239+
// generate the parameters
240+
for ( auto const& paramsItr : getParamsLocalKeys(outParams) ) {
241+
outputJson["statistics_options"]["params"][paramsItr] = outParams.find<std::string>(paramsItr);
242+
}
243+
}
244+
}
245+
246+
// Generate the stat groups
247+
if ( !graph->getStatGroups().empty() ) {
248+
outputJson["statistics_group"];
249+
for ( auto& grp : graph->getStatGroups() ) {
250+
auto vec = getParamsLocalKeys(graph->getStatOutput(grp.second.outputID).params);
251+
outputJson["statistics_group"].emplace_back(StatGroupPair { grp, vec, graph });
252+
}
253+
}
254+
175255
// no components exist in this rank
176256
if ( const_cast<ConfigComponentMap_t&>(compMap).size() == 0 ) { outputJson["components"]; }
177257

src/sst/core/checkpointAction.cc

Lines changed: 88 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,52 @@
2121
#include "sst/core/stringize.h"
2222
#include "sst/core/timeConverter.h"
2323

24-
// #include <filesystem>
24+
#include <filesystem>
2525
#include <sys/stat.h>
2626
#include <unistd.h>
2727

2828
namespace SST {
2929

30-
CheckpointAction::CheckpointAction(
31-
Config* UNUSED(cfg), RankInfo this_rank, Simulation_impl* sim, TimeConverter* period) :
30+
namespace pvt {
31+
32+
std::string
33+
createNameFromFormat(const std::string& format, const std::string& prefix, uint64_t checkpoint_id, SimTime_t time)
34+
{
35+
std::string ret;
36+
bool found_percent = false;
37+
for ( const auto& x : format ) {
38+
if ( found_percent ) {
39+
switch ( x ) {
40+
case 'p':
41+
ret += prefix;
42+
break;
43+
case 'n':
44+
ret += std::to_string(checkpoint_id);
45+
break;
46+
case 't':
47+
ret += std::to_string(time);
48+
break;
49+
default:
50+
// Should not happen since format string was already
51+
// checked, but if it does, just delete whole %
52+
// sequence (i.e. do nothing)
53+
break;
54+
}
55+
found_percent = false;
56+
}
57+
else if ( x == '%' ) {
58+
found_percent = true;
59+
}
60+
else {
61+
ret += x;
62+
}
63+
}
64+
return ret;
65+
}
66+
67+
} // namespace pvt
68+
69+
CheckpointAction::CheckpointAction(Config* cfg, RankInfo this_rank, Simulation_impl* sim, TimeConverter* period) :
3270
Action(),
3371
rank_(this_rank),
3472
period_(period),
@@ -53,7 +91,37 @@ CheckpointAction::CheckpointAction(
5391
next_sim_time_ = MAX_SIMTIME_T;
5492
}
5593

56-
if ( (0 == this_rank.rank) ) { last_cpu_time_ = sst_get_cpu_time(); }
94+
// Parse the format string. It was checked by the Config object
95+
// to make sure there was no more than one directory separator (/)
96+
// and that no invalid % sequences were used.
97+
std::string format = cfg->checkpoint_name_format();
98+
size_t split = format.find("/");
99+
if ( split == format.npos ) {
100+
dir_format_ = format;
101+
file_format_ = format;
102+
}
103+
else {
104+
dir_format_ = format.substr(0, split);
105+
file_format_ = format.substr(split + 1);
106+
}
107+
108+
if ( (0 == this_rank.rank) ) {
109+
// Check to make sure that there is at least one of %n or %t to
110+
// make checkpoint filenames unique.
111+
bool unique = false;
112+
if ( format.find("%n") != format.npos ) unique = true;
113+
if ( format.find("%t") != format.npos ) unique = true;
114+
115+
if ( !unique ) {
116+
sim->getSimulationOutput().output(
117+
"WARNING: checkpoint-name-format does not include one of %%n or %%t, which means that all checkpoints "
118+
"will use the same filename and previous files will be overwritten [%s].\n",
119+
format.c_str());
120+
}
121+
122+
last_cpu_time_ = sst_get_cpu_time();
123+
}
124+
57125
// Set the priority to be the same as the SyncManager so that
58126
// checkpointing happens in the same place for both serial and
59127
// parallel runs. We will never have both a SyncManager and a
@@ -78,7 +146,7 @@ CheckpointAction::execute()
78146
void
79147
CheckpointAction::createCheckpoint(Simulation_impl* sim)
80148
{
81-
if ( 0 == rank_.rank ) {
149+
if ( 0 == rank_.rank && 0 == rank_.thread ) {
82150
const double now = sst_get_cpu_time();
83151
sim->getSimulationOutput().output(
84152
"# Simulation Checkpoint: Simulated Time %s (Real CPU time since last checkpoint %.5f seconds)\n",
@@ -89,36 +157,31 @@ CheckpointAction::createCheckpoint(Simulation_impl* sim)
89157

90158
// Need to create a directory for this checkpoint
91159
std::string prefix = sim->checkpoint_prefix_;
92-
std::string basename = prefix + "_" + std::to_string(checkpoint_id) + "_" + std::to_string(sim->currentSimCycle);
160+
std::string basename = pvt::createNameFromFormat(dir_format_, prefix, checkpoint_id, sim->currentSimCycle);
93161

94162
// Directory is shared across threads. Make it a static and make
95163
// sure we barrier in the right places
96-
static std::string directory;
164+
std::string directory = sim->checkpoint_directory_ + "/" + basename;
97165

98166
// Only thread 0 will participate in setup
99167
if ( rank_.thread == 0 ) {
100168
// Rank 0 will create the directory for this checkpoint
101169
if ( rank_.rank == 0 ) {
102-
directory = Checkpointing::createUniqueDirectory(sim->checkpoint_directory_ + "/" + basename);
103-
#ifdef SST_CONFIG_HAVE_MPI
104-
Comms::broadcast(directory, 0);
105-
#endif
170+
directory = sim->checkpoint_directory_ + "/" + basename;
171+
std::filesystem::create_directory(directory);
106172
}
107-
else {
108-
// Get directory name (really just a barrier since each
109-
// rank already knows the name and it shouldn't have to
110-
// create a unique one)
111173
#ifdef SST_CONFIG_HAVE_MPI
112-
Comms::broadcast(directory, 0);
174+
Comms::broadcast(directory, 0);
113175
#endif
114-
}
115176
}
116-
barrier.wait();
117-
if ( rank_.thread == 0 ) checkpoint_id++;
118-
177+
basename = pvt::createNameFromFormat(file_format_, prefix, checkpoint_id, sim->currentSimCycle);
119178
std::string filename =
120179
directory + "/" + basename + "_" + std::to_string(rank_.rank) + "_" + std::to_string(rank_.thread) + ".bin";
121180

181+
barrier.wait();
182+
183+
if ( rank_.thread == 0 ) checkpoint_id++;
184+
122185
// Write out the checkpoints for the partitions
123186
sim->checkpoint(filename);
124187

@@ -245,55 +308,6 @@ doesDirectoryExist(const std::string& dirName, bool include_files)
245308
}
246309
}
247310

248-
/**
249-
Function to create a directory. We need this bacause
250-
std::filesystem isn't fully supported until GCC9
251-
*/
252-
bool
253-
createDirectory(const std::string& dirName)
254-
{
255-
if ( mkdir(dirName.c_str(), 0755) == 0 ) {
256-
return true; // Directory created successfully
257-
}
258-
else {
259-
return false; // Failed to create directory
260-
}
261-
}
262-
263-
std::string
264-
createUniqueDirectory(const std::string basename)
265-
{
266-
std::string dirName = basename;
267-
268-
// Check if the directory exists
269-
// if ( std::filesystem::exists(dirName) ) {
270-
if ( doesDirectoryExist(dirName, true) ) {
271-
// Append a unique random set of characters to the directory name
272-
std::string newDirName;
273-
int num = 0;
274-
do {
275-
++num;
276-
newDirName = dirName + "_" + std::to_string(num);
277-
// } while ( std::filesystem::exists(newDirName) ); // Ensure the new directory name is unique
278-
} while ( doesDirectoryExist(newDirName, true) ); // Ensure the new directory name is unique
279-
280-
dirName = newDirName;
281-
}
282-
283-
// Create the directory
284-
// if ( !std::filesystem::create_directory(dirName) ) {
285-
if ( !createDirectory(dirName) ) {
286-
Simulation_impl::getSimulationOutput().fatal(
287-
CALL_INFO_LONG, 1, "Failed to create directory: %s\n", dirName.c_str());
288-
}
289-
return dirName;
290-
}
291-
292-
void
293-
removeDirectory(const std::string UNUSED(name))
294-
{
295-
// Implement when adding logic to keep only N checkpoints
296-
}
297311

298312
std::string
299313
initializeCheckpointInfrastructure(Config* cfg, bool rt_can_ckpt, int myRank)
@@ -303,7 +317,11 @@ initializeCheckpointInfrastructure(Config* cfg, bool rt_can_ckpt, int myRank)
303317

304318
std::string checkpoint_dir_name = "";
305319

306-
if ( myRank == 0 ) { checkpoint_dir_name = createUniqueDirectory(cfg->checkpoint_prefix()); }
320+
if ( myRank == 0 ) {
321+
SST::Util::Filesystem& fs = Simulation_impl::getSimulation()->filesystem;
322+
checkpoint_dir_name = fs.createUniqueDirectory(cfg->checkpoint_prefix());
323+
}
324+
307325
#ifdef SST_CONFIG_HAVE_MPI
308326
// Broadcast the directory name
309327
Comms::broadcast(checkpoint_dir_name, 0);

src/sst/core/checkpointAction.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ class CheckpointAction : public Action
9494
double last_cpu_time_; // Last time a checkpoint was triggered
9595
bool generate_; // Whether a checkpoint should be done next time check() is called
9696
SimTime_t next_sim_time_; // Next simulationt ime a checkpoint should trigger at or 0 if not applicable
97+
std::string dir_format_; // Format string for checkpoint directory names
98+
std::string file_format_; // Format string for checkpoint file names
9799
};
98100

99101
} // namespace SST

0 commit comments

Comments
 (0)