Skip to content

Commit eda674f

Browse files
committed
Change LinkId_t to be 64-bits, make it globally unique, and store the ID in the Link object. This added an extra exchange between ConfigGraphs created on different ranks during a parallel load.
1 parent bb59918 commit eda674f

File tree

11 files changed

+354
-74
lines changed

11 files changed

+354
-74
lines changed

src/sst/core/link.cc

Lines changed: 65 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
127127
For self links, no rank info is stored since we don't need
128128
to create a unique ID
129129
*/
130+
130131
if ( type == SYNC || type == REG ) {
131132
SST_SER(my_rank);
132133

@@ -158,6 +159,13 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
158159
}
159160
} // if ( type == SYNC || type == REG )
160161

162+
163+
// Serialize the ID for the Link
164+
if ( !s->has_tool_list )
165+
SST_SER(s->id);
166+
else
167+
SST_SER((*s->attached_tools)[0].second);
168+
161169
/*
162170
Store the metadata for this link
163171
*/
@@ -217,24 +225,31 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
217225

218226
// Determine how many serializable tools there are
219227
Link::ToolList tools;
220-
if ( s->attached_tools ) {
221-
for ( auto x : *s->attached_tools ) {
222-
if ( dynamic_cast<SST::Core::Serialization::serializable*>(x.first) ) {
223-
tools.push_back(x);
228+
229+
if ( s->has_tool_list ) {
230+
for ( auto x = ++s->attached_tools->begin(); x != s->attached_tools->end(); ++x ) {
231+
if ( dynamic_cast<SST::Core::Serialization::serializable*>(x->first) ) {
232+
tools.push_back(*x);
224233
}
225234
}
226235
}
227236
size_t tool_count = tools.size();
228-
SST_SER(tool_count);
229-
if ( tool_count > 0 ) {
230-
// Serialize each tool, then call
231-
// serializeEventAttachPointKey() to serialize any data
232-
// associated with the key
233-
for ( auto x : tools ) {
234-
SST::Core::Serialization::serializable* obj =
235-
dynamic_cast<SST::Core::Serialization::serializable*>(x.first);
236-
SST_SER(obj);
237-
x.first->serializeEventAttachPointKey(ser, x.second);
237+
238+
// Need to determine if we'll have any tools attached on restart. We only have tools when tool_count > 0
239+
bool restart_tools = (tool_count > 0);
240+
SST_SER(restart_tools);
241+
242+
if ( restart_tools ) {
243+
SST_SER(tool_count);
244+
if ( tool_count > 0 ) {
245+
// Serialize each tool, then call serializeEventAttachPointKey() to serialize any data associated with
246+
// the key
247+
for ( auto x : tools ) {
248+
SST::Core::Serialization::serializable* obj =
249+
dynamic_cast<SST::Core::Serialization::serializable*>(x.first);
250+
SST_SER(obj);
251+
x.first->serializeEventAttachPointKey(ser, x.second);
252+
}
238253
}
239254
}
240255

@@ -339,6 +354,8 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
339354
}
340355
}
341356

357+
SST_SER(s->id);
358+
342359
/*
343360
Get the metadata for the link
344361
*/
@@ -401,13 +418,20 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
401418
s->pair_link->latency += latency;
402419
}
403420

404-
/*
405-
Restore attached tools
406-
*/
407-
size_t tool_count;
408-
SST_SER(tool_count);
409-
if ( tool_count > 0 ) {
410-
s->attached_tools = new Link::ToolList();
421+
SST_SER(s->has_tool_list);
422+
423+
if ( s->has_tool_list ) {
424+
/*
425+
Restore attached tools
426+
*/
427+
size_t tool_count;
428+
SST_SER(tool_count);
429+
430+
// If has_tool_list is true, then tool_count is greater than 0
431+
Link::ToolList* tools = new Link::ToolList();
432+
tools->emplace_back(nullptr, s->id);
433+
s->attached_tools = tools;
434+
s->has_tool_list = true;
411435
for ( size_t i = 0; i < tool_count; ++i ) {
412436
SST::Core::Serialization::serializable* tool;
413437
uintptr_t key;
@@ -417,9 +441,7 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
417441
s->attached_tools->emplace_back(ap, key);
418442
}
419443
}
420-
else {
421-
s->attached_tools = nullptr;
422-
}
444+
423445

424446
/*
425447
Deserialize the events targetting this link
@@ -477,7 +499,7 @@ class NullEvent : public Event
477499
};
478500

479501

480-
Link::Link(LinkId_t tag) :
502+
Link::Link(LinkId_t id) :
481503
send_queue(nullptr),
482504
delivery_info(0),
483505
defaultTimeBase(0),
@@ -486,8 +508,8 @@ Link::Link(LinkId_t tag) :
486508
current_time(Simulation_impl::getSimulation()->currentSimCycle),
487509
type(UNINITIALIZED),
488510
mode(INIT),
489-
tag(tag),
490-
attached_tools(nullptr)
511+
tag(0),
512+
id(id)
491513
{}
492514

493515
Link::Link() :
@@ -499,8 +521,7 @@ Link::Link() :
499521
current_time(Simulation_impl::getSimulation()->currentSimCycle),
500522
type(UNINITIALIZED),
501523
mode(INIT),
502-
tag(bit_util::type_max<uint32_t>),
503-
attached_tools(nullptr)
524+
tag(bit_util::type_max<uint32_t>)
504525
{}
505526

506527
Link::~Link()
@@ -514,7 +535,7 @@ Link::~Link()
514535
if ( SYNC == pair_link->type ) delete pair_link;
515536
}
516537

517-
if ( attached_tools ) delete attached_tools;
538+
if ( has_tool_list ) delete attached_tools;
518539
}
519540

520541
void
@@ -683,9 +704,10 @@ Link::send_impl(SimTime_t delay, Event* event)
683704
event->addRecvComponent(pair_link->comp, pair_link->ctype, pair_link->port);
684705
#endif
685706

686-
if ( attached_tools ) {
687-
for ( auto& x : *attached_tools ) {
688-
x.first->eventSent(x.second, event);
707+
if ( has_tool_list ) {
708+
// First entry just holds the Link id, so we can skip it
709+
for ( auto x = ++attached_tools->begin(); x != attached_tools->end(); ++x ) {
710+
x->first->eventSent(x->second, event);
689711
// Check to see if the event was deleted. If so, return.
690712
if ( nullptr == event ) return;
691713
}
@@ -840,17 +862,23 @@ Link::createUniqueGlobalLinkName(RankInfo local_rank, uintptr_t local_ptr, RankI
840862
void
841863
Link::attachTool(AttachPoint* tool, const AttachPointMetaData& mdata)
842864
{
843-
if ( !attached_tools ) attached_tools = new ToolList();
865+
if ( !has_tool_list ) {
866+
auto tools = new ToolList();
867+
tools->emplace_back(nullptr, id);
868+
attached_tools = tools;
869+
has_tool_list = true;
870+
}
844871
auto key = tool->registerLinkAttachTool(mdata);
845-
attached_tools->push_back(std::make_pair(tool, key));
872+
attached_tools->emplace_back(tool, key);
846873
}
847874

848875
void
849876
Link::detachTool(AttachPoint* tool)
850877
{
851-
if ( !attached_tools ) return;
878+
if ( !has_tool_list ) return;
852879

853-
for ( auto x = attached_tools->begin(); x != attached_tools->end(); ++x ) {
880+
// First entry just holds the Link id, so we can skip it
881+
for ( auto x = ++attached_tools->begin(); x != attached_tools->end(); ++x ) {
854882
if ( x->first == tool ) {
855883
attached_tools->erase(x);
856884
break;

src/sst/core/link.h

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class SST::Core::Serialization::serialize_impl<Link*>
5656
class alignas(64) Link
5757
{
5858
enum Type_t : uint16_t { POLL, HANDLER, SYNC, UNINITIALIZED };
59-
enum Mode_t : uint16_t { INIT, RUN, COMPLETE };
59+
enum Mode_t : uint8_t { INIT, RUN, COMPLETE };
6060

6161
friend class SST::Core::Serialization::serialize_impl<Link*>;
6262

@@ -280,7 +280,13 @@ class alignas(64) Link
280280
281281
@return the unique ID for this Link
282282
*/
283-
LinkId_t getId() { return tag; }
283+
LinkId_t getId()
284+
{
285+
if ( has_tool_list )
286+
return (*attached_tools)[0].second;
287+
else
288+
return id;
289+
}
284290

285291
/**
286292
Return the default timebase for this Link
@@ -418,6 +424,7 @@ class alignas(64) Link
418424
SimTime_t& current_time;
419425
Type_t type;
420426
Mode_t mode;
427+
bool has_tool_list = false;
421428
uint32_t tag;
422429

423430
/**
@@ -433,8 +440,6 @@ class alignas(64) Link
433440
*/
434441
explicit Link(LinkId_t id);
435442

436-
Link(const Link& l);
437-
438443
/**
439444
Specifies that this Link has no callback, and is poll-based only
440445
*/
@@ -458,7 +463,44 @@ class alignas(64) Link
458463

459464

460465
using ToolList = std::vector<std::pair<AttachPoint*, uintptr_t>>;
461-
ToolList* attached_tools;
466+
467+
/**
468+
We need to keep the Link object to 64-bits so we need to keep the ID and the attached tools in the same 8-bytes.
469+
This will normally hold the 64-bit id for the link, but if we have attached tools, then the id for the Link will
470+
be stored in the first entry of the attached_tools list.
471+
472+
The has_tool_list member variable stores which data is currently stored in this union. If has_tool_list is true,
473+
then the union's attached_tools variable is currently active. Otherwise, it is storing the Link id.
474+
*/
475+
union {
476+
/**
477+
The Link id is a globally unique id. For serial loads, the ids start at 1 and increment for each new Link.
478+
479+
On a parallel load, the top 32-bits is filled with the MPI rank and the bottom 32-bits start at 1 and
480+
increment for each new rank. The ids for links that cross a rank boundary need to be reconciled so that the
481+
Link objects on both ranks agree on the ID. This is done by having the "lower" rank send the Link name and
482+
it's id for the Link to the "higher" rank, which will then use that id for the Link. "Higher" and "lower" are
483+
determined in a way that hopefully spreads out the MPI traffic across all the ranks and isn't just a direct
484+
greater/less than comparison. It is determined as follows:
485+
486+
1 - A rank is lower than the N/2 ranks after it, wrapping around to 0 when needed.
487+
488+
2 - A rank is higher than the N/2 ranks before it, wrapping around to N-1 when needed.
489+
490+
The id is used in several places:
491+
- When exchanging Link pointers to put in the delivery_info field
492+
- As the global name for connecting links on a restart from checkpoint
493+
494+
*/
495+
LinkId_t id;
496+
497+
/**
498+
This stores any tools connected to the Link::AttachPoint. The first entry in the list will always contain
499+
the Link id since the pointer to the attached_tools list is now using that memory location.
500+
*/
501+
ToolList* attached_tools;
502+
};
503+
462504

463505
/**
464506
Manually set the default time base

src/sst/core/linkPair.h

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,44 +24,29 @@ class LinkPair
2424
{
2525
public:
2626
/** Create a new LinkPair. This is used when the endpoints are in the same partition.
27-
* @param order Value used to enforce the link order.
28-
*/
29-
explicit LinkPair(LinkId_t order) :
30-
left(new Link(order)),
31-
right(new Link(order))
32-
{
33-
my_id = order;
3427
28+
@param id ID of the link
29+
*/
30+
explicit LinkPair(LinkId_t id) :
31+
left(new Link(id)),
32+
right(new Link(id))
33+
{
3534
left->pair_link = right;
3635
right->pair_link = left;
3736
}
3837

3938
/** Create a new LinkPair. This is used on restart.
40-
* @param order Value used to enforce the link order.
41-
*/
39+
40+
@param order Value used to enforce the link order.
41+
*/
4242
LinkPair() :
4343
left(new Link()),
4444
right(new Link())
4545
{
46-
my_id = -1;
47-
4846
left->pair_link = right;
4947
right->pair_link = left;
5048
}
5149

52-
/** Create a new LinkPair. This is used when the endpoints are in different partitions.
53-
* @param order Value used to enforce the link order.
54-
* @param remote_tag Used to look up the correct link on the other side.
55-
*/
56-
LinkPair(LinkId_t order, LinkId_t remote_tag) :
57-
left(new Link(remote_tag)),
58-
right(new Link(order))
59-
{
60-
my_id = order;
61-
62-
left->pair_link = right;
63-
right->pair_link = left;
64-
}
6550

6651
virtual ~LinkPair() {}
6752

@@ -78,8 +63,6 @@ class LinkPair
7863
private:
7964
Link* left;
8065
Link* right;
81-
82-
LinkId_t my_id;
8366
};
8467

8568
} // namespace SST

0 commit comments

Comments
 (0)