Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions opal/mca/btl/ofi/btl_ofi.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ struct mca_btl_ofi_module_t {

/** registration cache */
mca_rcache_base_module_t *rcache;

mca_btl_base_module_error_cb_fn_t ofi_error_cb;
};
typedef struct mca_btl_ofi_module_t mca_btl_ofi_module_t;

Expand Down
126 changes: 75 additions & 51 deletions opal/mca/btl/ofi/btl_ofi_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,56 @@ mca_btl_ofi_context_t *get_ofi_context_rr(mca_btl_ofi_module_t *btl)
return &btl->contexts[rr_num++ % btl->num_contexts];
}

static void inline complete_op_context(mca_btl_ofi_context_t* context,
void *op_context, int rc)
{
mca_btl_ofi_completion_context_t *c_ctx =
(mca_btl_ofi_completion_context_t*) op_context;
/* We are casting to every type here just for simplicity. */
mca_btl_ofi_base_completion_t *comp =
(mca_btl_ofi_base_completion_t *) c_ctx->comp;
mca_btl_ofi_frag_completion_t *frag_comp =
(mca_btl_ofi_frag_completion_t *) c_ctx->comp;
mca_btl_ofi_rdma_completion_t *rdma_comp
= (mca_btl_ofi_rdma_completion_t *) c_ctx->comp;

switch (comp->type) {
case MCA_BTL_OFI_TYPE_GET:
case MCA_BTL_OFI_TYPE_PUT:
case MCA_BTL_OFI_TYPE_AOP:
case MCA_BTL_OFI_TYPE_AFOP:
case MCA_BTL_OFI_TYPE_CSWAP:
/* call the callback */
if (rdma_comp->cbfunc) {
rdma_comp->cbfunc(comp->btl, comp->endpoint, rdma_comp->local_address,
rdma_comp->local_handle, rdma_comp->cbcontext,
rdma_comp->cbdata, rc);
}

MCA_BTL_OFI_NUM_RDMA_DEC((mca_btl_ofi_module_t *) comp->btl);
break;

case MCA_BTL_OFI_TYPE_RECV:
mca_btl_ofi_recv_frag((mca_btl_ofi_module_t *) comp->btl,
(mca_btl_ofi_endpoint_t *) comp->endpoint, context,
frag_comp->frag, rc);
break;

case MCA_BTL_OFI_TYPE_SEND:
MCA_BTL_OFI_NUM_SEND_DEC((mca_btl_ofi_module_t *) comp->btl);
mca_btl_ofi_frag_complete(frag_comp->frag, rc);
break;

default:
/* catasthrophic */
BTL_ERROR(("unknown completion type"));
MCA_BTL_OFI_ABORT();
}

/* return the completion handler */
opal_free_list_return(comp->my_list, (opal_free_list_item_t *) comp);
}

int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context)
{

Expand All @@ -319,61 +369,14 @@ int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context)
struct fi_cq_entry cq_entry[MCA_BTL_OFI_DEFAULT_MAX_CQE];
struct fi_cq_err_entry cqerr = {0};

mca_btl_ofi_completion_context_t *c_ctx;
mca_btl_ofi_base_completion_t *comp;
mca_btl_ofi_rdma_completion_t *rdma_comp;
mca_btl_ofi_frag_completion_t *frag_comp;

ret = fi_cq_read(context->cq, &cq_entry, mca_btl_ofi_component.num_cqe_read);

if (0 < ret) {
events_read = ret;
for (int i = 0; i < events_read; i++) {
if (NULL != cq_entry[i].op_context) {
++events;

c_ctx = (mca_btl_ofi_completion_context_t *) cq_entry[i].op_context;

/* We are casting to every type here just for simplicity. */
comp = (mca_btl_ofi_base_completion_t *) c_ctx->comp;
frag_comp = (mca_btl_ofi_frag_completion_t *) c_ctx->comp;
rdma_comp = (mca_btl_ofi_rdma_completion_t *) c_ctx->comp;

switch (comp->type) {
case MCA_BTL_OFI_TYPE_GET:
case MCA_BTL_OFI_TYPE_PUT:
case MCA_BTL_OFI_TYPE_AOP:
case MCA_BTL_OFI_TYPE_AFOP:
case MCA_BTL_OFI_TYPE_CSWAP:
/* call the callback */
if (rdma_comp->cbfunc) {
rdma_comp->cbfunc(comp->btl, comp->endpoint, rdma_comp->local_address,
rdma_comp->local_handle, rdma_comp->cbcontext,
rdma_comp->cbdata, OPAL_SUCCESS);
}

MCA_BTL_OFI_NUM_RDMA_DEC((mca_btl_ofi_module_t *) comp->btl);
break;

case MCA_BTL_OFI_TYPE_RECV:
mca_btl_ofi_recv_frag((mca_btl_ofi_module_t *) comp->btl,
(mca_btl_ofi_endpoint_t *) comp->endpoint, context,
frag_comp->frag);
break;

case MCA_BTL_OFI_TYPE_SEND:
MCA_BTL_OFI_NUM_SEND_DEC((mca_btl_ofi_module_t *) comp->btl);
mca_btl_ofi_frag_complete(frag_comp->frag, OPAL_SUCCESS);
break;

default:
/* catasthrophic */
BTL_ERROR(("unknown completion type"));
MCA_BTL_OFI_ABORT();
}

/* return the completion handler */
opal_free_list_return(comp->my_list, (opal_free_list_item_t *) comp);
complete_op_context(context, cq_entry[i].op_context, OPAL_SUCCESS);
}
}
} else if (OPAL_UNLIKELY(ret == -FI_EAVAIL)) {
Expand All @@ -383,10 +386,31 @@ int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context)
if (0 > ret) {
BTL_ERROR(("%s:%d: Error returned from fi_cq_readerr: %s(%d)", __FILE__, __LINE__,
fi_strerror(-ret), ret));
} else {
BTL_ERROR(("fi_cq_readerr: (provider err_code = %d)\n", cqerr.prov_errno));
MCA_BTL_OFI_ABORT();
} else if(NULL != cqerr.op_context){
switch(cqerr.err) {
case -FI_EIO: {
mca_btl_ofi_completion_context_t *c_ctx =
(mca_btl_ofi_completion_context_t*) cqerr.op_context;
mca_btl_ofi_base_completion_t *comp =
(mca_btl_ofi_base_completion_t*) c_ctx->comp;
mca_btl_ofi_module_t *ofi_btl =
(mca_btl_ofi_module_t*) comp->btl;
if(ofi_btl->ofi_error_cb){
ofi_btl->ofi_error_cb(comp->btl, 0, comp->endpoint->ep_proc,
"IO error reported by libfabric");
}

++events;
complete_op_context(context, cqerr.op_context, OPAL_ERR_UNREACH);
break;
}
default:
BTL_ERROR(("fi_cq_readerr: %s(%d) (provider err_code = %d)\n",
fi_strerror(-cqerr.err), cqerr.err, cqerr.prov_errno));
MCA_BTL_OFI_ABORT();
}
}
MCA_BTL_OFI_ABORT();
}
#ifdef FI_EINTR
/* sometimes, sockets provider complain about interrupt. We do nothing. */
Expand Down
6 changes: 3 additions & 3 deletions opal/mca/btl/ofi/btl_ofi_frag.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ int mca_btl_ofi_send(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
}

int mca_btl_ofi_recv_frag(mca_btl_ofi_module_t *ofi_btl, mca_btl_base_endpoint_t *endpoint,
mca_btl_ofi_context_t *context, mca_btl_ofi_base_frag_t *frag)
mca_btl_ofi_context_t *context, mca_btl_ofi_base_frag_t *frag,
int rc)
{
int rc;
mca_btl_active_message_callback_t *reg = mca_btl_base_active_message_trigger + frag->hdr.tag;
mca_btl_base_segment_t segment = {.seg_addr.pval = (void *) (frag + 1),
.seg_len = frag->hdr.len};
Expand All @@ -160,7 +160,7 @@ int mca_btl_ofi_recv_frag(mca_btl_ofi_module_t *ofi_btl, mca_btl_base_endpoint_t

/* call the callback */
reg->cbfunc(&ofi_btl->super, &recv_desc);
mca_btl_ofi_frag_complete(frag, OPAL_SUCCESS);
mca_btl_ofi_frag_complete(frag, rc);

/* repost the recv */
rc = mca_btl_ofi_post_recvs((mca_btl_base_module_t *) ofi_btl, context, 1);
Expand Down
3 changes: 2 additions & 1 deletion opal/mca/btl/ofi/btl_ofi_frag.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ int mca_btl_ofi_send(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
mca_btl_base_descriptor_t *descriptor, mca_btl_base_tag_t tag);

int mca_btl_ofi_recv_frag(mca_btl_ofi_module_t *ofi_btl, mca_btl_base_endpoint_t *endpoint,
mca_btl_ofi_context_t *context, mca_btl_ofi_base_frag_t *frag);
mca_btl_ofi_context_t *context, mca_btl_ofi_base_frag_t *frag,
int rc);

struct mca_btl_base_descriptor_t *mca_btl_ofi_prepare_src(mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
Expand Down
9 changes: 9 additions & 0 deletions opal/mca/btl/ofi/btl_ofi_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,14 @@ static int mca_btl_ofi_del_procs(mca_btl_base_module_t *btl, size_t nprocs, opal
return OPAL_SUCCESS;
}

static int mca_btl_ofi_register_error(mca_btl_base_module_t *btl,
mca_btl_base_module_error_cb_fn_t cb)
{
mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
ofi_btl->ofi_error_cb = cb;
return OPAL_SUCCESS;
}

void mca_btl_ofi_rcache_init(mca_btl_ofi_module_t *module)
{
if (!module->initialized) {
Expand Down Expand Up @@ -515,4 +523,5 @@ mca_btl_ofi_module_t mca_btl_ofi_module_template = {
.btl_add_procs = mca_btl_ofi_add_procs,
.btl_del_procs = mca_btl_ofi_del_procs,
.btl_finalize = mca_btl_ofi_finalize,
.btl_register_error = mca_btl_ofi_register_error,
}};