Skip to content

Commit 23325c3

Browse files
authored
Merge pull request #3338 from jjhursey/topic/ompi_info_show_failed
`ompi_info --show-failed` feature
2 parents 39fa1d5 + 742d452 commit 23325c3

File tree

8 files changed

+112
-0
lines changed

8 files changed

+112
-0
lines changed

opal/mca/base/base.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* reserved.
1616
* Copyright (c) 2015 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18+
* Copyright (c) 2017 IBM Corporation. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -68,6 +69,7 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_base_component_priority_list_item_t);
6869
*/
6970
OPAL_DECLSPEC extern char *mca_base_component_path;
7071
OPAL_DECLSPEC extern bool mca_base_component_show_load_errors;
72+
OPAL_DECLSPEC extern bool mca_base_component_track_load_errors;
7173
OPAL_DECLSPEC extern bool mca_base_component_disable_dlopen;
7274
OPAL_DECLSPEC extern char *mca_base_system_default_path;
7375
OPAL_DECLSPEC extern char *mca_base_user_default_path;

opal/mca/base/mca_base_component_repository.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* reserved.
1616
* Copyright (c) 2015 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18+
* Copyright (c) 2017 IBM Corporation. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -55,6 +56,29 @@ OBJ_CLASS_INSTANCE(mca_base_component_repository_item_t, opal_list_item_t,
5556

5657
#endif /* OPAL_HAVE_DL_SUPPORT */
5758

59+
static void clf_constructor(opal_object_t *obj);
60+
static void clf_destructor(opal_object_t *obj);
61+
62+
OBJ_CLASS_INSTANCE(mca_base_failed_component_t, opal_list_item_t,
63+
clf_constructor, clf_destructor);
64+
65+
66+
static void clf_constructor(opal_object_t *obj)
67+
{
68+
mca_base_failed_component_t *cli = (mca_base_failed_component_t *) obj;
69+
cli->comp = NULL;
70+
cli->error_msg = NULL;
71+
}
72+
73+
static void clf_destructor(opal_object_t *obj)
74+
{
75+
mca_base_failed_component_t *cli = (mca_base_failed_component_t *) obj;
76+
cli->comp = NULL;
77+
if( NULL != cli->error_msg ) {
78+
free(cli->error_msg);
79+
cli->error_msg = NULL;
80+
}
81+
}
5882

5983
/*
6084
* Private variables
@@ -408,6 +432,14 @@ int mca_base_component_repository_open (mca_base_framework_t *framework,
408432
}
409433
opal_output_verbose(vl, 0, "mca_base_component_repository_open: unable to open %s: %s (ignored)",
410434
ri->ri_base, err_msg);
435+
436+
if( mca_base_component_track_load_errors ) {
437+
mca_base_failed_component_t *f_comp = OBJ_NEW(mca_base_failed_component_t);
438+
f_comp->comp = ri;
439+
asprintf(&(f_comp->error_msg), "%s", err_msg);
440+
opal_list_append(&framework->framework_failed_components, &f_comp->super);
441+
}
442+
411443
return OPAL_ERR_BAD_PARAM;
412444
}
413445

opal/mca/base/mca_base_component_repository.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
1515
* reserved.
16+
* Copyright (c) 2017 IBM Corporation. All rights reserved.
1617
* $COPYRIGHT$
1718
*
1819
* Additional copyrights may follow
@@ -60,6 +61,17 @@ typedef struct mca_base_component_repository_item_t mca_base_component_repositor
6061

6162
OBJ_CLASS_DECLARATION(mca_base_component_repository_item_t);
6263

64+
/*
65+
* Structure to track information about why a component failed to load.
66+
*/
67+
struct mca_base_failed_component_t {
68+
opal_list_item_t super;
69+
mca_base_component_repository_item_t *comp;
70+
char *error_msg;
71+
};
72+
typedef struct mca_base_failed_component_t mca_base_failed_component_t;
73+
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_base_failed_component_t);
74+
6375
/**
6476
* @brief initialize the component repository
6577
*

opal/mca/base/mca_base_framework.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
44
* reserved.
55
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
6+
* Copyright (c) 2017 IBM Corporation. All rights reserved.
67
* $COPYRIGHT$
78
*
89
* Additional copyrights may follow
@@ -66,6 +67,7 @@ int mca_base_framework_register (struct mca_base_framework_t *framework,
6667
}
6768

6869
OBJ_CONSTRUCT(&framework->framework_components, opal_list_t);
70+
OBJ_CONSTRUCT(&framework->framework_failed_components, opal_list_t);
6971

7072
if (framework->framework_flags & MCA_BASE_FRAMEWORK_FLAG_NO_DSO) {
7173
flags |= MCA_BASE_REGISTER_STATIC_ONLY;
@@ -228,12 +230,16 @@ int mca_base_framework_close (struct mca_base_framework_t *framework) {
228230
framework->framework_output);
229231
OBJ_RELEASE(item);
230232
}
233+
while (NULL != (item = opal_list_remove_first (&framework->framework_failed_components))) {
234+
OBJ_RELEASE(item);
235+
}
231236
ret = OPAL_SUCCESS;
232237
}
233238

234239
framework->framework_flags &= ~(MCA_BASE_FRAMEWORK_FLAG_REGISTERED | MCA_BASE_FRAMEWORK_FLAG_OPEN);
235240

236241
OBJ_DESTRUCT(&framework->framework_components);
242+
OBJ_DESTRUCT(&framework->framework_failed_components);
237243

238244
framework_close_output (framework);
239245

opal/mca/base/mca_base_framework.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/*
33
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
44
* reserved.
5+
* Copyright (c) 2017 IBM Corporation. All rights reserved.
56
* $COPYRIGHT$
67
*
78
* Additional copyrights may follow
@@ -154,6 +155,8 @@ typedef struct mca_base_framework_t {
154155
/** List of selected components (filled in by mca_base_framework_register()
155156
or mca_base_framework_open() */
156157
opal_list_t framework_components;
158+
/** List of components that failed to load */
159+
opal_list_t framework_failed_components;
157160
} mca_base_framework_t;
158161

159162

opal/mca/base/mca_base_open.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ int mca_base_opened = 0;
4949
char *mca_base_system_default_path = NULL;
5050
char *mca_base_user_default_path = NULL;
5151
bool mca_base_component_show_load_errors = true;
52+
bool mca_base_component_track_load_errors = false;
5253
bool mca_base_component_disable_dlopen = false;
5354

5455
static char *mca_base_verbose = NULL;
@@ -111,6 +112,14 @@ int mca_base_open(void)
111112
(void) mca_base_var_register_synonym(var_id, "opal", "mca", NULL, "component_show_load_errors",
112113
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
113114

115+
mca_base_component_track_load_errors = false;
116+
var_id = mca_base_var_register("opal", "mca", "base", "component_track_load_errors",
117+
"Whether to track errors for components that failed to load or not",
118+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
119+
OPAL_INFO_LVL_9,
120+
MCA_BASE_VAR_SCOPE_READONLY,
121+
&mca_base_component_track_load_errors);
122+
114123
mca_base_component_disable_dlopen = false;
115124
var_id = mca_base_var_register("opal", "mca", "base", "component_disable_dlopen",
116125
"Whether to attempt to disable opening dynamic components or not",

opal/runtime/opal_info_support.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* reserved.
1616
* Copyright (c) 2011-2012 University of Houston. All rights reserved.
1717
* Copyright (c) 2016 Intel, Inc. All rights reserved.
18+
* Copyright (c) 2017 IBM Corporation. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -50,6 +51,7 @@
5051
#include "opal/mca/installdirs/installdirs.h"
5152

5253
#include "opal/runtime/opal_info_support.h"
54+
#include "opal/mca/base/mca_base_component_repository.h"
5355

5456
const char *opal_info_path_prefix = "prefix";
5557
const char *opal_info_path_bindir = "bindir";
@@ -109,6 +111,9 @@ OBJ_CLASS_INSTANCE(opal_info_component_map_t,
109111
component_map_construct,
110112
component_map_destruct);
111113

114+
static void opal_info_show_failed_component(const mca_base_component_repository_item_t* ri,
115+
const char *error_msg);
116+
112117
int opal_info_init(int argc, char **argv,
113118
opal_cmd_line_t *opal_info_cmd_line)
114119
{
@@ -157,6 +162,8 @@ int opal_info_init(int argc, char **argv,
157162
"Show only variables with at most this level (1-9)");
158163
opal_cmd_line_make_opt3(opal_info_cmd_line, 's', NULL, "selected-only", 0,
159164
"Show only variables from selected components");
165+
opal_cmd_line_make_opt3(opal_info_cmd_line, '\0', NULL, "show-failed", 0,
166+
"Show the components that failed to load along with the reason why they failed.");
160167

161168
/* set our threading level */
162169
opal_set_using_threads(false);
@@ -223,6 +230,10 @@ int opal_info_init(int argc, char **argv,
223230
opal_info_register_flags = MCA_BASE_REGISTER_DEFAULT;
224231
}
225232

233+
if( opal_cmd_line_is_taken(opal_info_cmd_line, "show-failed") ) {
234+
mca_base_component_track_load_errors = true;
235+
}
236+
226237
return OPAL_SUCCESS;
227238
}
228239

@@ -245,6 +256,7 @@ static int info_register_framework (mca_base_framework_t *framework, opal_pointe
245256
map = OBJ_NEW(opal_info_component_map_t);
246257
map->type = strdup(framework->framework_name);
247258
map->components = &framework->framework_components;
259+
map->failed_components = &framework->framework_failed_components;
248260
opal_pointer_array_add(component_map, map);
249261
}
250262

@@ -1012,6 +1024,7 @@ void opal_info_show_component_version(opal_pointer_array_t *mca_types,
10121024
bool want_all_types = false;
10131025
bool found;
10141026
mca_base_component_list_item_t *cli;
1027+
mca_base_failed_component_t *cli_failed;
10151028
int j;
10161029
char *pos;
10171030
opal_info_component_map_t *map;
@@ -1057,6 +1070,15 @@ void opal_info_show_component_version(opal_pointer_array_t *mca_types,
10571070
}
10581071
}
10591072

1073+
/* found it! */
1074+
OPAL_LIST_FOREACH(cli_failed, map->failed_components, mca_base_failed_component_t) {
1075+
mca_base_component_repository_item_t *ri = cli_failed->comp;
1076+
if (want_all_components ||
1077+
0 == strcmp(component_name, ri->ri_name) ) {
1078+
opal_info_show_failed_component(ri, cli_failed->error_msg);
1079+
}
1080+
}
1081+
10601082
if (!want_all_types) {
10611083
break;
10621084
}
@@ -1065,6 +1087,30 @@ void opal_info_show_component_version(opal_pointer_array_t *mca_types,
10651087
}
10661088

10671089

1090+
static void opal_info_show_failed_component(const mca_base_component_repository_item_t* ri,
1091+
const char *error_msg)
1092+
{
1093+
char *message, *content;
1094+
1095+
if (opal_info_pretty) {
1096+
asprintf(&message, "MCA %s", ri->ri_type);
1097+
asprintf(&content, "%s (failed to load) %s", ri->ri_name, error_msg);
1098+
1099+
opal_info_out(message, NULL, content);
1100+
1101+
free(message);
1102+
free(content);
1103+
} else {
1104+
asprintf(&message, "mca:%s:%s:failed", ri->ri_type, ri->ri_name);
1105+
asprintf(&content, "%s", error_msg);
1106+
1107+
opal_info_out(NULL, message, content);
1108+
1109+
free(message);
1110+
free(content);
1111+
}
1112+
}
1113+
10681114
/*
10691115
* Given a component, display its relevant version(s)
10701116
*/

opal/runtime/opal_info_support.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
33
* All rights reserved.
44
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
5+
* Copyright (c) 2017 IBM Corporation. All rights reserved.
56
* $COPYRIGHT$
67
*
78
* Additional copyrights may follow
@@ -50,6 +51,7 @@ typedef struct {
5051
opal_list_item_t super;
5152
char *type;
5253
opal_list_t *components;
54+
opal_list_t *failed_components;
5355
} opal_info_component_map_t;
5456
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_info_component_map_t);
5557

0 commit comments

Comments
 (0)