Skip to content

Commit 30e5f52

Browse files
committed
Changed the exchange of link pointers from using names to using link IDs. Also removed the std::map currently used for the exchange and replaced it with std::vector. The vector is sorted and compared item by item to the data sent from the remote rank removing the need to do a bunch of lookups in the map. This saves both memory and compute time.
1 parent 78f137a commit 30e5f52

12 files changed

+114
-150
lines changed

src/sst/core/link.cc

Lines changed: 18 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
8080
// Need a pointer to my simulation object
8181
Simulation_impl* sim = Simulation_impl::getSimulation();
8282

83-
// In order to uniquely identify links on restart, we need to
83+
// For restarts that use the same parallelism, we need to
8484
// track the rank of the link and its pair link. For regular
8585
// links, they are the same, but for sync link pairs, the pair
8686
// link will be on a different rank. For self links, this
@@ -109,53 +109,14 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
109109

110110
SST_SER(type);
111111

112-
/*
113-
Unique Identifiers
114-
115-
For non-selflinks, we need to be able to create a unique
116-
identifier so we can connect the pairs on restart. The
117-
unique identifiers are created using the MPI rank and point
118-
of the link cast as a uintptr_t.
119-
120-
For regular links, we only store the rank once since both
121-
links in the pair are on the same rank.
122-
123-
For SYNC links, the local link only knows the remote link by
124-
it's pair link, so we will use that pointer for the unique
125-
ID.
126-
127-
For self links, no rank info is stored since we don't need
128-
to create a unique ID
129-
*/
130-
131112
if ( type == SYNC || type == REG ) {
132113
SST_SER(my_rank);
133114

134-
uintptr_t ptr;
135-
if ( type == SYNC )
136-
ptr = reinterpret_cast<uintptr_t>(s->pair_link);
137-
else
138-
ptr = reinterpret_cast<uintptr_t>(s);
139-
140-
SST_SER(ptr);
141-
142115
if ( type == SYNC ) {
143-
// The unique ID for the remote links is constructed from
144-
// the rank of the remote pair link and its pointer on
145-
// that rank. The remote pointer is stored in
146-
// delivery_info and we can get the remote rank from the
147-
// sync queue.
116+
// Get rank for pair
148117
SyncQueue* q = dynamic_cast<SyncQueue*>(s->send_queue);
149118
pair_rank = q->getToRank();
150119
SST_SER(pair_rank);
151-
SST_SER(s->delivery_info);
152-
}
153-
else {
154-
// Unique ID for my pair link is my rank and pair_link
155-
// pointer. Rank is already stored, just store pair
156-
// pointer
157-
uintptr_t pair_ptr = reinterpret_cast<uintptr_t>(s->pair_link);
158-
SST_SER(pair_ptr);
159120
}
160121
} // if ( type == SYNC || type == REG )
161122

@@ -280,35 +241,28 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
280241
*/
281242
bool is_orig_sync = (type == 3);
282243

283-
/*
284-
Unique identifiers
285-
286-
Get the ranks and tags for this link and its pair link
287-
*/
288244
RankInfo my_restart_rank = sim->getRank();
289245
RankInfo pair_restart_rank = my_restart_rank;
290246

291-
uintptr_t my_tag;
292-
uintptr_t pair_tag;
293247

294248
if ( type == SYNC || type == REG ) {
295249
SST_SER(my_rank);
296-
SST_SER(my_tag);
297250

298251
if ( type == SYNC )
299252
SST_SER(pair_rank);
300253
else
301254
pair_rank = my_rank;
302-
303-
SST_SER(pair_tag);
304255
}
305256

306257

258+
LinkId_t link_id;
259+
SST_SER(link_id);
260+
307261
/*
308262
Determine current sync state
309263
*/
310264
if ( type != SELF ) {
311-
pair_restart_rank = sim->getRankForLinkOnRestart(pair_rank, pair_tag);
265+
pair_restart_rank = sim->getRankForLinkOnRestart(pair_rank, link_id);
312266

313267
// If pair_restart_rank.rank == UNASSIGNED, then we have
314268
// the same paritioning as the checkpoint and the ranks
@@ -318,6 +272,7 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
318272

319273
bool is_restart_sync = (my_restart_rank != pair_restart_rank);
320274

275+
321276
/*
322277
Create or get link from tracker
323278
@@ -330,14 +285,12 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
330285
ser.unpacker().report_new_pointer(reinterpret_cast<uintptr_t>(s));
331286
}
332287
else {
333-
auto& link_tracker = sim->link_restart_tracking;
334-
std::pair<int, uintptr_t> my_unique_id = std::make_pair(my_rank.rank, my_tag);
335-
std::pair<int, uintptr_t> pair_unique_id = std::make_pair(pair_rank.rank, pair_tag);
288+
auto& link_tracker = sim->link_restart_tracking;
336289

337-
if ( !is_restart_sync && link_tracker.count(my_unique_id) ) {
290+
if ( !is_restart_sync && link_tracker.count(link_id) ) {
338291
// Get my link and erase it from the map
339-
s = link_tracker[my_unique_id];
340-
link_tracker.erase(my_unique_id);
292+
s = link_tracker[link_id];
293+
link_tracker.erase(link_id);
341294
}
342295
else {
343296
// Create a link pair and set s to the left link
@@ -350,11 +303,11 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
350303
s->pair_link->setLatency(0);
351304

352305
// Put my pair link in the tracking map
353-
link_tracker[pair_unique_id] = s->pair_link;
306+
link_tracker[link_id] = s->pair_link;
354307
}
355308
}
356309

357-
SST_SER(s->id);
310+
s->id = link_id;
358311

359312
/*
360313
Get the metadata for the link
@@ -462,17 +415,15 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
462415
s->pair_link->tag = s->tag;
463416

464417
s->pair_link->defaultTimeBase = 1;
418+
s->pair_link->id = s->id;
465419

466-
// Need to register with the SyncManager, but first
467-
// need to create a unique name
468-
std::string uname = s->createUniqueGlobalLinkName(my_rank, my_tag, pair_rank, pair_tag);
469-
ActivityQueue* sync_q =
470-
sim->syncManager->registerLink(pair_restart_rank, my_restart_rank, uname, s->pair_link);
471-
s->send_queue = sync_q;
420+
// Need to register with the SyncManager
421+
ActivityQueue* sync_q = sim->syncManager->registerLink(pair_restart_rank, my_restart_rank, s->pair_link);
422+
s->send_queue = sync_q;
472423
}
473424
} break;
474425
case serializer::MAP:
475-
// TODO: Implement Link mapping mode
426+
// No current plans to make Links mappable
476427
break;
477428
}
478429
}

src/sst/core/simulation.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,7 @@ Simulation_impl::prepareLinks(ConfigGraph& graph, const RankInfo& myRank, SimTim
810810

811811
// For local, just register link with threadSync object so
812812
// it can map link_id to link*
813-
ActivityQueue* sync_q = syncManager->registerLink(rank[remote], rank[local], clink->name_, lp.getRight());
813+
ActivityQueue* sync_q = syncManager->registerLink(rank[remote], rank[local], lp.getRight());
814814

815815
lp.getLeft()->send_queue = sync_q;
816816
lp.getRight()->setAsSyncLink();

src/sst/core/simulation_impl.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ class Simulation_impl
423423
between checkpoint and restart and the original rank info
424424
stored in the checkpoint should be used.
425425
*/
426-
RankInfo getRankForLinkOnRestart(RankInfo rank, uintptr_t UNUSED(tag))
426+
RankInfo getRankForLinkOnRestart(RankInfo rank, LinkId_t UNUSED(id))
427427
{
428428
if ( serial_restart_ ) return RankInfo(0, 0);
429429
return RankInfo(rank.rank, rank.thread);
@@ -668,15 +668,15 @@ class Simulation_impl
668668
static std::vector<Simulation_impl*> instanceVec_;
669669

670670
/******** Checkpoint/restart tracking data structures ***********/
671-
std::map<std::pair<int, uintptr_t>, Link*> link_restart_tracking;
672-
std::map<uintptr_t, uintptr_t> event_handler_restart_tracking;
673-
uint32_t checkpoint_id_ = 0;
674-
std::string checkpoint_prefix_ = "";
675-
std::string globalOutputFileName = "";
676-
std::string version_ = "";
677-
std::string arch_ = "";
678-
std::string os_ = "";
679-
bool serial_restart_ = false;
671+
std::map<LinkId_t, Link*> link_restart_tracking;
672+
std::map<uintptr_t, uintptr_t> event_handler_restart_tracking;
673+
uint32_t checkpoint_id_ = 0;
674+
std::string checkpoint_prefix_ = "";
675+
std::string globalOutputFileName = "";
676+
std::string version_ = "";
677+
std::string arch_ = "";
678+
std::string os_ = "";
679+
bool serial_restart_ = false;
680680

681681
// Config object used by the simulation
682682
static Config config;

src/sst/core/sync/rankSyncParallelSkip.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,7 @@ RankSyncParallelSkip::~RankSyncParallelSkip()
8484
}
8585

8686
ActivityQueue*
87-
RankSyncParallelSkip::registerLink(
88-
const RankInfo& to_rank, const RankInfo& from_rank, const std::string& name, Link* link)
87+
RankSyncParallelSkip::registerLink(const RankInfo& to_rank, const RankInfo& from_rank, Link* link)
8988
{
9089
std::scoped_lock slock(lock);
9190

@@ -113,7 +112,7 @@ RankSyncParallelSkip::registerLink(
113112
comm_recv_map[remote_rank_local_thread].local_size = 4096;
114113
}
115114

116-
link_maps[to_rank.rank][name] = reinterpret_cast<uintptr_t>(link);
115+
link_maps[to_rank.rank].emplace_back(link->getId(), reinterpret_cast<uintptr_t>(link));
117116
#ifdef __SST_DEBUG_EVENT_TRACKING__
118117
link->setSendingComponentInfo("SYNC", "SYNC", "");
119118
#endif

src/sst/core/sync/rankSyncParallelSkip.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,8 @@ class RankSyncParallelSkip : public RankSync
3636
virtual ~RankSyncParallelSkip();
3737

3838
/** Register a Link which this Sync Object is responsible for */
39-
ActivityQueue* registerLink(
40-
const RankInfo& to_rank, const RankInfo& from_rank, const std::string& name, Link* link) override;
41-
void execute(int thread) override;
39+
ActivityQueue* registerLink(const RankInfo& to_rank, const RankInfo& from_rank, Link* link) override;
40+
void execute(int thread) override;
4241

4342
/** Cause an exchange of Untimed Data to occur */
4443
void exchangeLinkUntimedData(int thread, std::atomic<int>& msg_count) override;

src/sst/core/sync/rankSyncSerialSkip.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ RankSyncSerialSkip::~RankSyncSerialSkip()
6464
}
6565

6666
ActivityQueue*
67-
RankSyncSerialSkip::registerLink(
68-
const RankInfo& to_rank, const RankInfo& UNUSED(from_rank), const std::string& name, Link* link)
67+
RankSyncSerialSkip::registerLink(const RankInfo& to_rank, const RankInfo& UNUSED(from_rank), Link* link)
6968
{
7069
std::scoped_lock slock(lock);
7170

@@ -80,7 +79,7 @@ RankSyncSerialSkip::registerLink(
8079
queue = comm_map[to_rank.rank].squeue;
8180
}
8281

83-
link_maps[to_rank.rank][name] = reinterpret_cast<uintptr_t>(link);
82+
link_maps[to_rank.rank].emplace_back(link->getId(), reinterpret_cast<uintptr_t>(link));
8483
#ifdef __SST_DEBUG_EVENT_TRACKING__
8584
link->setSendingComponentInfo("SYNC", "SYNC", "");
8685
#endif
@@ -222,7 +221,6 @@ RankSyncSerialSkip::exchange()
222221
deserializeTime += SST::Core::Profile::getElapsed(deserialStart);
223222

224223
for ( unsigned int j = 0; j < activities.size(); j++ ) {
225-
226224
Event* ev = static_cast<Event*>(activities[j]);
227225
SimTime_t delay = ev->getDeliveryTime() - current_cycle;
228226
getDeliveryLink(ev)->send(delay, ev);

src/sst/core/sync/rankSyncSerialSkip.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,8 @@ class RankSyncSerialSkip : public RankSync
3434
virtual ~RankSyncSerialSkip();
3535

3636
/** Register a Link which this Sync Object is responsible for */
37-
ActivityQueue* registerLink(
38-
const RankInfo& to_rank, const RankInfo& from_rank, const std::string& name, Link* link) override;
39-
void execute(int thread) override;
37+
ActivityQueue* registerLink(const RankInfo& to_rank, const RankInfo& from_rank, Link* link) override;
38+
void execute(int thread) override;
4039

4140
/** Cause an exchange of Untimed Data to occur */
4241
void exchangeLinkUntimedData(int thread, std::atomic<int>& msg_count) override;

0 commit comments

Comments
 (0)