Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ompi/mca/osc/rdma/osc_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ struct ompi_osc_rdma_component_t {

/** aggregation free list */
opal_free_list_t aggregate;

/** directory where to place backing files */
char *backing_directory;
};
typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t;

Expand Down
18 changes: 15 additions & 3 deletions ompi/mca/osc/rdma/osc_rdma_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,18 @@ static int ompi_osc_rdma_component_register (void)
MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_mtl_names);
free(description_str);

if (0 == access ("/dev/shm", W_OK)) {
mca_osc_rdma_component.backing_directory = "/dev/shm";
} else {
mca_osc_rdma_component.backing_directory = ompi_process_info.proc_session_dir;
}

(void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "backing_directory",
"Directory to place backing files for memory windows. "
"This directory should be on a local filesystem such as /tmp or "
"/dev/shm (default: (linux) /dev/shm, (others) session directory)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_rdma_component.backing_directory);

/* register performance variables */

Expand Down Expand Up @@ -602,9 +614,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
}

/* allocate the shared memory segment */
ret = asprintf (&data_file, "%s"OPAL_PATH_SEP"window_%d.%s",
ompi_process_info.job_session_dir, ompi_comm_get_cid (module->comm),
ompi_process_info.nodename);
ret = asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_rdma.%s.%x.%d",
mca_osc_rdma_component.backing_directory, ompi_process_info.nodename,
OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm));
if (0 > ret) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
break;
Expand Down
2 changes: 2 additions & 0 deletions ompi/mca/osc/sm/osc_sm.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ typedef struct ompi_osc_sm_node_state_t ompi_osc_sm_node_state_t;

struct ompi_osc_sm_component_t {
ompi_osc_base_component_t super;

char *backing_directory;
};
typedef struct ompi_osc_sm_component_t ompi_osc_sm_component_t;
OMPI_DECLSPEC extern ompi_osc_sm_component_t mca_osc_sm_component;
Expand Down
50 changes: 42 additions & 8 deletions ompi/mca/osc/sm/osc_sm_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ static int component_finalize(void);
static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct opal_info_t *info,
int flavor);
static int component_register (void);
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct opal_info_t *info,
int flavor, int *model);
Expand All @@ -51,6 +52,7 @@ ompi_osc_sm_component_t mca_osc_sm_component = {
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION),
.mca_open_component = component_open,
.mca_register_component_params = component_register,
},
.osc_data = { /* mca_base_component_data */
/* The component is not checkpoint ready */
Expand Down Expand Up @@ -105,6 +107,23 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = {
}
};

static int component_register (void)
{
if (0 == access ("/dev/shm", W_OK)) {
mca_osc_sm_component.backing_directory = "/dev/shm";
} else {
mca_osc_sm_component.backing_directory = ompi_process_info.proc_session_dir;
}

(void) mca_base_component_var_register (&mca_osc_sm_component.super.osc_version, "backing_directory",
"Directory to place backing files for shared memory windows. "
"This directory should be on a local filesystem such as /tmp or "
"/dev/shm (default: (linux) /dev/shm, (others) session directory)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_sm_component.backing_directory);

return OPAL_SUCCESS;
}

static int
component_open(void)
Expand Down Expand Up @@ -169,6 +188,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
{
ompi_osc_sm_module_t *module = NULL;
int comm_size = ompi_comm_size (comm);
bool unlink_needed = false;
int ret = OMPI_ERROR;

if (OMPI_SUCCESS != (ret = check_win_ok(comm, flavor))) {
Expand Down Expand Up @@ -262,10 +282,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
posts_size += OPAL_ALIGN_PAD_AMOUNT(posts_size, 64);
if (0 == ompi_comm_rank (module->comm)) {
char *data_file;
if (asprintf(&data_file, "%s"OPAL_PATH_SEP"shared_window_%d.%s",
ompi_process_info.proc_session_dir,
ompi_comm_get_cid(module->comm),
ompi_process_info.nodename) < 0) {
ret = asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_sm.%s.%x.%d",
mca_osc_sm_component.backing_directory, ompi_process_info.nodename,
OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm));
if (ret < 0) {
return OMPI_ERR_OUT_OF_RESOURCE;
}

Expand All @@ -274,6 +294,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
if (OPAL_SUCCESS != ret) {
goto error;
}

unlink_needed = true;
}

ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0,
Expand All @@ -287,6 +309,17 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
goto error;
}

ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0,
module->comm, module->comm->c_coll->coll_bcast_module);
if (OMPI_SUCCESS != ret) {
goto error;
}

if (0 == ompi_comm_rank (module->comm)) {
opal_shmem_unlink (&module->seg_ds);
unlink_needed = false;
}

module->sizes = malloc(sizeof(size_t) * comm_size);
if (NULL == module->sizes) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
module->bases = malloc(sizeof(void*) * comm_size);
Expand Down Expand Up @@ -399,6 +432,11 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
return OMPI_SUCCESS;

error:

if (0 == ompi_comm_rank (module->comm) && unlink_needed) {
opal_shmem_unlink (&module->seg_ds);
}

ompi_osc_sm_free (win);

return ret;
Expand Down Expand Up @@ -477,10 +515,6 @@ ompi_osc_sm_free(struct ompi_win_t *win)
module->comm->c_coll->coll_barrier(module->comm,
module->comm->c_coll->coll_barrier_module);

if (0 == ompi_comm_rank (module->comm)) {
opal_shmem_unlink (&module->seg_ds);
}

opal_shmem_segment_detach (&module->seg_ds);
} else {
free(module->node_states);
Expand Down