Skip to content

Commit 7412e88

Browse files
authored
Merge pull request #6455 from hppritcha/topic/6238_for_v3.1.x
Fix 6238 for v3.1.x
2 parents 10de852 + c7ccbe9 commit 7412e88

File tree

5 files changed

+236
-123
lines changed

5 files changed

+236
-123
lines changed

orte/mca/rmaps/base/base.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,8 @@ ORTE_DECLSPEC int orte_rmaps_base_filter_nodes(orte_app_context_t *app,
121121
opal_list_t *nodes,
122122
bool remove);
123123

124-
ORTE_DECLSPEC int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
124+
ORTE_DECLSPEC int orte_rmaps_base_set_mapping_policy(orte_job_t *jdata,
125+
orte_mapping_policy_t *policy,
125126
char **device, char *spec);
126127
ORTE_DECLSPEC int orte_rmaps_base_set_ranking_policy(orte_ranking_policy_t *policy,
127128
orte_mapping_policy_t mapping,

orte/mca/rmaps/base/rmaps_base_frame.c

Lines changed: 122 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
1414
* All rights reserved.
15-
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2014-2015 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
1818
* $COPYRIGHT$
@@ -287,7 +287,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
287287
"rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=<obj>:PE=N, default <obj>=NUMA");
288288
}
289289

290-
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping,
290+
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(NULL, &orte_rmaps_base.mapping,
291291
&orte_rmaps_base.device,
292292
rmaps_base_mapping_policy))) {
293293
return rc;
@@ -599,7 +599,8 @@ static int check_modifiers(char *ck, orte_mapping_policy_t *tmp)
599599
return ORTE_ERR_TAKE_NEXT_OPTION;
600600
}
601601

602-
int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
602+
int orte_rmaps_base_set_mapping_policy(orte_job_t *jdata,
603+
orte_mapping_policy_t *policy,
603604
char **device, char *inspec)
604605
{
605606
char *ck;
@@ -624,136 +625,144 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
624625

625626
if (NULL == inspec) {
626627
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
627-
} else {
628-
spec = strdup(inspec); // protect the input string
629-
/* see if a colon was included - if so, then we have a policy + modifier */
630-
ck = strchr(spec, ':');
631-
if (NULL != ck) {
632-
/* if the colon is the first character of the string, then we
633-
* just have modifiers on the default mapping policy */
634-
if (ck == spec) {
635-
ck++;
636-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
637-
"%s rmaps:base only modifiers %s provided - assuming bysocket mapping",
638-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck);
639-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
640-
if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) &&
641-
ORTE_ERR_BAD_PARAM != rc) {
642-
free(spec);
643-
return ORTE_ERR_SILENT;
644-
}
628+
goto setpolicy;
629+
}
630+
631+
spec = strdup(inspec); // protect the input string
632+
/* see if a colon was included - if so, then we have a policy + modifier */
633+
ck = strchr(spec, ':');
634+
if (NULL != ck) {
635+
/* if the colon is the first character of the string, then we
636+
* just have modifiers on the default mapping policy */
637+
if (ck == spec) {
638+
ck++; // step over the colon
639+
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
640+
"%s rmaps:base only modifiers %s provided - assuming bysocket mapping",
641+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck);
642+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
643+
if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) &&
644+
ORTE_ERR_BAD_PARAM != rc) {
645645
free(spec);
646-
goto setpolicy;
646+
return ORTE_ERR_SILENT;
647647
}
648-
/* split the string */
649-
*ck = '\0';
650-
ck++;
651-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
652-
"%s rmaps:base policy %s modifiers %s provided",
653-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck);
654-
/* if the policy is "dist", then we set the policy to that value
655-
* and save the second argument as the device
648+
free(spec);
649+
goto setpolicy;
650+
}
651+
*ck = '\0'; // terminate spec where the colon was
652+
ck++; // step past the colon
653+
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
654+
"%s rmaps:base policy %s modifiers %s provided",
655+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck);
656+
657+
if (0 == strncasecmp(spec, "ppr", strlen(spec))) {
658+
/* at this point, ck points to a string that contains at least
659+
* two fields (specifying the #procs/obj and the object we are
660+
* to map by). we have to allow additional modifiers here - e.g.,
661+
* specifying #pe's/proc or oversubscribe - so check for modifiers. if
662+
* they are present, ck will look like "N:obj:mod1,mod2,mod3"
656663
*/
657-
if (0 == strncasecmp(spec, "ppr", strlen(spec))) {
658-
/* we have to allow additional modifiers here - e.g., specifying
659-
* #pe's/proc or oversubscribe - so check for modifiers
664+
if (NULL == (ptr = strchr(ck, ':'))) {
665+
/* this is an error - there had to be at least one
666+
* colon to delimit the number from the object type
660667
*/
661-
if (NULL == (ptr = strrchr(ck, ':'))) {
662-
/* this is an error - there had to be at least one
663-
* colon to delimit the number from the object type
664-
*/
665-
orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec);
668+
orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec);
669+
free(spec);
670+
return ORTE_ERR_SILENT;
671+
}
672+
ptr++; // move past the colon
673+
/* at this point, ptr is pointing to the beginning of the string that describes
674+
* the object plus any modifiers (i.e., "obj:mod1,mod2". We first check to see if there
675+
* is another colon indicating that there are modifiers to the request */
676+
if (NULL != (cptr = strchr(ptr, ':'))) {
677+
/* there are modifiers, so we terminate the object string
678+
* at the location of the colon */
679+
*cptr = '\0';
680+
/* step over that colon */
681+
cptr++;
682+
/* now check for modifiers - may be none, so
683+
* don't emit an error message if the modifier
684+
* isn't recognized */
685+
if (ORTE_ERR_SILENT == (rc = check_modifiers(cptr, &tmp)) &&
686+
ORTE_ERR_BAD_PARAM != rc) {
666687
free(spec);
667688
return ORTE_ERR_SILENT;
668689
}
669-
ptr++; // move past the colon
670-
/* at this point, ck is pointing to the number of procs/object
671-
* and ptr is pointing to the beginning of the string that describes
672-
* the object plus any modifiers. We first check to see if there
673-
* is a comma indicating that there are modifiers to the request */
674-
if (NULL != (cptr = strchr(ptr, ','))) {
675-
/* there are modifiers, so we terminate the object string
676-
* at the location of the first comma */
677-
*cptr = '\0';
678-
/* step over that comma */
679-
cptr++;
680-
/* now check for modifiers - may be none, so
681-
* don't emit an error message if the modifier
682-
* isn't recognized */
683-
if (ORTE_ERR_SILENT == (rc = check_modifiers(cptr, &tmp)) &&
684-
ORTE_ERR_BAD_PARAM != rc) {
685-
free(spec);
686-
return ORTE_ERR_SILENT;
687-
}
688-
}
689-
/* now save the pattern */
690+
}
691+
/* now save the pattern */
692+
if (NULL == jdata || NULL == jdata->map) {
690693
orte_rmaps_base.ppr = strdup(ck);
691-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR);
692-
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
693-
free(spec);
694-
goto setpolicy;
694+
} else {
695+
jdata->map->ppr = strdup(ck);
695696
}
696-
if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) &&
697-
ORTE_ERR_TAKE_NEXT_OPTION != rc) {
698-
if (ORTE_ERR_BAD_PARAM == rc) {
699-
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec);
700-
}
701-
free(spec);
702-
return rc;
697+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR);
698+
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
699+
free(spec);
700+
goto setpolicy;
701+
}
702+
if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) &&
703+
ORTE_ERR_TAKE_NEXT_OPTION != rc) {
704+
if (ORTE_ERR_BAD_PARAM == rc) {
705+
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec);
703706
}
707+
free(spec);
708+
return rc;
704709
}
705-
len = strlen(spec);
706-
if (0 == strncasecmp(spec, "slot", len)) {
707-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT);
708-
} else if (0 == strncasecmp(spec, "node", len)) {
709-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE);
710-
} else if (0 == strncasecmp(spec, "seq", len)) {
711-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ);
712-
} else if (0 == strncasecmp(spec, "core", len)) {
713-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE);
714-
} else if (0 == strncasecmp(spec, "l1cache", len)) {
715-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE);
716-
} else if (0 == strncasecmp(spec, "l2cache", len)) {
717-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE);
718-
} else if (0 == strncasecmp(spec, "l3cache", len)) {
719-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE);
720-
} else if (0 == strncasecmp(spec, "socket", len)) {
721-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
722-
} else if (0 == strncasecmp(spec, "numa", len)) {
723-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA);
724-
} else if (0 == strncasecmp(spec, "board", len)) {
725-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD);
726-
} else if (0 == strncasecmp(spec, "hwthread", len)) {
727-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD);
728-
/* if we are mapping processes to individual hwthreads, then
729-
* we need to treat those hwthreads as separate cpus
730-
*/
731-
opal_hwloc_use_hwthreads_as_cpus = true;
732-
} else if (0 == strncasecmp(spec, "dist", len)) {
733-
if (NULL != rmaps_dist_device) {
734-
if (NULL != (pch = strchr(rmaps_dist_device, ':'))) {
735-
*pch = '\0';
736-
}
737-
if (NULL != device) {
738-
*device = strdup(rmaps_dist_device);
739-
}
740-
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST);
741-
} else {
742-
orte_show_help("help-orte-rmaps-base.txt", "device-not-specified", true);
743-
free(spec);
744-
return ORTE_ERR_SILENT;
710+
}
711+
len = strlen(spec);
712+
if (0 == strncasecmp(spec, "slot", len)) {
713+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT);
714+
} else if (0 == strncasecmp(spec, "node", len)) {
715+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE);
716+
} else if (0 == strncasecmp(spec, "seq", len)) {
717+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ);
718+
} else if (0 == strncasecmp(spec, "core", len)) {
719+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE);
720+
} else if (0 == strncasecmp(spec, "l1cache", len)) {
721+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE);
722+
} else if (0 == strncasecmp(spec, "l2cache", len)) {
723+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE);
724+
} else if (0 == strncasecmp(spec, "l3cache", len)) {
725+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE);
726+
} else if (0 == strncasecmp(spec, "socket", len)) {
727+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
728+
} else if (0 == strncasecmp(spec, "numa", len)) {
729+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA);
730+
} else if (0 == strncasecmp(spec, "board", len)) {
731+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD);
732+
} else if (0 == strncasecmp(spec, "hwthread", len)) {
733+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD);
734+
/* if we are mapping processes to individual hwthreads, then
735+
* we need to treat those hwthreads as separate cpus
736+
*/
737+
opal_hwloc_use_hwthreads_as_cpus = true;
738+
} else if (0 == strncasecmp(spec, "dist", len)) {
739+
if (NULL != rmaps_dist_device) {
740+
if (NULL != (pch = strchr(rmaps_dist_device, ':'))) {
741+
*pch = '\0';
742+
}
743+
if (NULL != device) {
744+
*device = strdup(rmaps_dist_device);
745745
}
746+
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST);
746747
} else {
747-
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec);
748+
orte_show_help("help-orte-rmaps-base.txt", "device-not-specified", true);
748749
free(spec);
749750
return ORTE_ERR_SILENT;
750751
}
752+
} else {
753+
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec);
751754
free(spec);
752-
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
755+
return ORTE_ERR_SILENT;
753756
}
757+
free(spec);
758+
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
754759

755760
setpolicy:
756-
*policy = tmp;
761+
if (NULL == jdata || NULL == jdata->map) {
762+
*policy = tmp;
763+
} else {
764+
jdata->map->mapping = tmp;
765+
}
757766

758767
return ORTE_SUCCESS;
759768
}

orte/orted/orted_submit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -864,7 +864,7 @@ int orte_submit_job(char *argv[], int *index,
864864
jdata->map = OBJ_NEW(orte_job_map_t);
865865

866866
if (NULL != orte_cmd_options.mapping_policy) {
867-
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, NULL, orte_cmd_options.mapping_policy))) {
867+
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(jdata, &jdata->map->mapping, NULL, orte_cmd_options.mapping_policy))) {
868868
ORTE_ERROR_LOG(rc);
869869
return rc;
870870
}

orte/orted/pmix/pmix_server_dyn.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor,
289289
orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
290290
return ORTE_ERR_BAD_PARAM;
291291
}
292-
rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping,
292+
rc = orte_rmaps_base_set_mapping_policy(jdata, &jdata->map->mapping,
293293
NULL, info->data.string);
294294
if (ORTE_SUCCESS != rc) {
295295
return rc;

0 commit comments

Comments
 (0)