Skip to content

Commit 5065851

Browse files
committed
xhc: example of one way to fix for sessions
in the case of multiple session init/finalize sequences that result in MCA framework being destructed prior to a restart with a new session. related to #13013 Signed-off-by: Howard Pritchard <[email protected]>
1 parent 0bccfcd commit 5065851

File tree

1 file changed

+51
-0
lines changed

1 file changed

+51
-0
lines changed

ompi/mca/coll/xhc/coll_xhc_component.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,38 @@ mca_coll_xhc_component_t mca_coll_xhc_component = {
110110
.uniform_chunks = true,
111111
.uniform_chunks_min = 4096,
112112

113+
#if 0
114+
.op_mca[XHC_BCAST] = {
115+
.hierarchy = "numa,socket",
116+
.chunk_size = "16K",
117+
.cico_max = 256
118+
},
119+
120+
.op_mca[XHC_BARRIER] = {
121+
.hierarchy = "numa,socket",
122+
.chunk_size = "1",
123+
.cico_max = 0
124+
},
125+
126+
.op_mca[XHC_REDUCE] = {
127+
.hierarchy = "l3,numa,socket",
128+
.chunk_size = "16K",
129+
.cico_max = 4096
130+
},
131+
132+
.op_mca[XHC_ALLREDUCE] = {
133+
.hierarchy = "l3,numa,socket",
134+
.chunk_size = "16K",
135+
.cico_max = 4096
136+
}
137+
#endif
138+
};
139+
140+
struct xhc_op_mca_init_values_t {
141+
struct xhc_op_mca_t op_mca[XHC_COLLCOUNT];
142+
};
143+
144+
static struct xhc_op_mca_init_values_t mca_coll_op_mca_init_values = {
113145
.op_mca[XHC_BCAST] = {
114146
.hierarchy = "numa,socket",
115147
.chunk_size = "16K",
@@ -370,6 +402,10 @@ static int xhc_register(void) {
370402
"consider for the hierarchy (%s), for %s.", topo_list, xhc_colltype_to_str(t));
371403
if(err < 0) {free(topo_list); free(name); return OMPI_ERR_OUT_OF_RESOURCE;}
372404

405+
if (mca_coll_op_mca_init_values.op_mca[t].hierarchy != NULL) {
406+
mca_coll_xhc_component.op_mca[t].hierarchy = strdup(mca_coll_op_mca_init_values.op_mca[t].hierarchy);
407+
}
408+
373409
mca_base_component_var_register(&mca_coll_xhc_component.super.collm_version,
374410
name, desc, MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_7,
375411
MCA_BASE_VAR_SCOPE_READONLY, &mca_coll_xhc_component.op_mca[t].hierarchy);
@@ -408,6 +444,12 @@ static int xhc_register(void) {
408444

409445
mca_base_var_get(vari, &var);
410446

447+
for(int t = 0; t < XHC_COLLCOUNT; t++) {
448+
if (mca_coll_op_mca_init_values.op_mca[t].chunk_size != NULL) {
449+
mca_coll_xhc_component.op_mca[t].chunk_size = strdup(mca_coll_op_mca_init_values.op_mca[t].chunk_size);
450+
}
451+
}
452+
411453
for(int t = 0; t < XHC_COLLCOUNT; t++) {
412454
if(XHC_BARRIER == t) {
413455
continue;
@@ -421,6 +463,12 @@ static int xhc_register(void) {
421463
"(bottom to top)), for %s.", xhc_colltype_to_str(t));
422464
if(err < 0) {free(name); return OMPI_ERR_OUT_OF_RESOURCE;}
423465

466+
#if 0
467+
if (mca_coll_op_mca_init_values.op_mca[t].chunk_size != NULL) {
468+
mca_coll_xhc_component.op_mca[t].chunk_size = strdup(mca_coll_op_mca_init_values.op_mca[t].chunk_size);
469+
}
470+
#endif
471+
424472
mca_base_component_var_register(&mca_coll_xhc_component.super.collm_version,
425473
name, desc, MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_8,
426474
MCA_BASE_VAR_SCOPE_READONLY, &mca_coll_xhc_component.op_mca[t].chunk_size);
@@ -455,6 +503,9 @@ static int xhc_register(void) {
455503
mca_base_var_get(vari, &var);
456504

457505
for(int t = 0; t < XHC_COLLCOUNT; t++) {
506+
507+
mca_coll_xhc_component.op_mca[t].cico_max = mca_coll_op_mca_init_values.op_mca[t].cico_max;
508+
458509
if(XHC_BARRIER == t) {
459510
continue;
460511
}

0 commit comments

Comments
 (0)