Skip to content

Commit 06930a0

Browse files
authored
Merge pull request #1840 from artpol84/yalla_perf_fix
pml/yalla: fix yalla performance regression
2 parents 2a98f9f + a4ff9be commit 06930a0

File tree

3 files changed

+21
-23
lines changed

3 files changed

+21
-23
lines changed

ompi/mca/pml/yalla/pml_yalla.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,6 @@ int mca_pml_yalla_mrecv(void *buf, size_t count, ompi_datatype_t *datatype,
681681
int mca_pml_yalla_start(size_t count, ompi_request_t** requests)
682682
{
683683
mca_pml_yalla_base_request_t *req;
684-
mca_pml_yalla_send_request_t *sreq;
685684
mxm_error_t error;
686685
size_t i;
687686
int rc;
@@ -696,10 +695,12 @@ int mca_pml_yalla_start(size_t count, ompi_request_t** requests)
696695

697696
PML_YALLA_ASSERT(req->ompi.req_state != OMPI_REQUEST_INVALID);
698697
PML_YALLA_RESET_OMPI_REQ(&req->ompi, OMPI_REQUEST_ACTIVE);
699-
PML_YALLA_RESET_PML_REQ(req);
700698

701699
if (req->flags & MCA_PML_YALLA_REQUEST_FLAG_SEND) {
700+
mca_pml_yalla_send_request_t *sreq;
702701
sreq = (mca_pml_yalla_send_request_t *)req;
702+
PML_YALLA_RESET_PML_REQ(req, PML_YALLA_MXM_REQBASE(sreq));
703+
703704
if (req->flags & MCA_PML_YALLA_REQUEST_FLAG_BSEND) {
704705
PML_YALLA_VERBOSE(8, "start bsend request %p", (void *)sreq);
705706
rc = mca_pml_yalla_bsend(&sreq->mxm);
@@ -716,8 +717,12 @@ int mca_pml_yalla_start(size_t count, ompi_request_t** requests)
716717
}
717718
}
718719
} else {
720+
mca_pml_yalla_recv_request_t *rreq;
721+
rreq = (mca_pml_yalla_recv_request_t *)req;
722+
PML_YALLA_RESET_PML_REQ(req, PML_YALLA_MXM_REQBASE(rreq));
723+
719724
PML_YALLA_VERBOSE(8, "start recv request %p", (void *)req);
720-
error = mxm_req_recv(&((mca_pml_yalla_recv_request_t *)req)->mxm);
725+
error = mxm_req_recv(&rreq->mxm);
721726
if (MXM_OK != error) {
722727
return OMPI_ERROR;
723728
}

ompi/mca/pml/yalla/pml_yalla_request.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ static inline void mca_pml_yalla_request_release(mca_pml_yalla_base_request_t *r
3232
}
3333

3434
static inline int
35-
mca_pml_yalla_check_request_state(mca_pml_yalla_base_request_t *req)
35+
mca_pml_yalla_check_request_state(mca_pml_yalla_base_request_t *req, mxm_req_base_t *mxm_base)
3636
{
37-
if (req->mxm_base->state != MXM_REQ_COMPLETED) {
38-
PML_YALLA_VERBOSE(8, "request %p free called before completed", (void *)req);
37+
if (mxm_base->state != MXM_REQ_COMPLETED) {
38+
PML_YALLA_VERBOSE(8, "request %p free called before completed", (void*)req);
3939
req->flags |= MCA_PML_YALLA_REQUEST_FLAG_FREE_CALLED;
4040
return 0;
4141
}
@@ -45,12 +45,12 @@ mca_pml_yalla_check_request_state(mca_pml_yalla_base_request_t *req)
4545

4646
static int mca_pml_yalla_send_request_free(ompi_request_t **request)
4747
{
48-
mca_pml_yalla_base_request_t *req = (mca_pml_yalla_base_request_t*)(*request);
48+
mca_pml_yalla_send_request_t *sreq = (mca_pml_yalla_send_request_t*)(*request);
4949

5050
PML_YALLA_VERBOSE(9, "free send request *%p=%p", (void *)request, (void *)*request);
5151

52-
if (mca_pml_yalla_check_request_state(req)) {
53-
mca_pml_yalla_request_release(req, &ompi_pml_yalla.send_reqs);
52+
if (mca_pml_yalla_check_request_state(&sreq->super, PML_YALLA_MXM_REQBASE(sreq))) {
53+
mca_pml_yalla_request_release(&sreq->super, &ompi_pml_yalla.send_reqs);
5454
}
5555

5656
*request = MPI_REQUEST_NULL;
@@ -84,12 +84,12 @@ static int mca_pml_yalla_send_request_cancel(ompi_request_t *request, int flag)
8484

8585
static int mca_pml_yalla_recv_request_free(ompi_request_t **request)
8686
{
87-
mca_pml_yalla_base_request_t *req = (mca_pml_yalla_base_request_t*)(*request);
87+
mca_pml_yalla_recv_request_t *rreq = (mca_pml_yalla_recv_request_t*)(*request);
8888

8989
PML_YALLA_VERBOSE(9, "free receive request *%p=%p", (void *)request, (void *)*request);
9090

91-
if (mca_pml_yalla_check_request_state(req)) {
92-
mca_pml_yalla_request_release(req, &ompi_pml_yalla.recv_reqs);
91+
if (mca_pml_yalla_check_request_state(&rreq->super, PML_YALLA_MXM_REQBASE(rreq))) {
92+
mca_pml_yalla_request_release(&rreq->super, &ompi_pml_yalla.recv_reqs);
9393
}
9494

9595
*request = MPI_REQUEST_NULL;

ompi/mca/pml/yalla/pml_yalla_request.h

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,6 @@ struct pml_yalla_base_request {
2525
ompi_request_t ompi;
2626
mca_pml_yalla_convertor_t *convertor;
2727
int flags;
28-
/* overlaps with base of send/recv
29-
* In ISO C90, you would have to give contents a length of 1,
30-
* which means either you waste space or complicate the argument to malloc.
31-
* Note:
32-
* - 1 was the portable way to go, though it was rather strange
33-
* - 0 was better at indicating intent, but not legal as far as
34-
* the Standard was concerned and supported as an extension by some compilers (including gcc)
35-
*/
36-
mxm_req_base_t mxm_base[1];
3728
};
3829

3930
struct pml_yalla_send_request {
@@ -58,6 +49,8 @@ OBJ_CLASS_DECLARATION(mca_pml_yalla_recv_request_t);
5849

5950
void mca_pml_yalla_init_reqs(void);
6051

52+
#define PML_YALLA_MXM_REQBASE( x ) ( &((x)->mxm.base) )
53+
6154
#define PML_YALLA_RESET_OMPI_REQ(_ompi_req, _state) \
6255
{ \
6356
(_ompi_req)->req_state = _state; \
@@ -72,9 +65,9 @@ void mca_pml_yalla_init_reqs(void);
7265
OBJ_RETAIN(_comm); \
7366
}
7467

75-
#define PML_YALLA_RESET_PML_REQ(_pml_req) \
68+
#define PML_YALLA_RESET_PML_REQ(_pml_req, mxm_base) \
7669
{ \
77-
(_pml_req)->mxm_base[0].state = MXM_REQ_NEW; \
70+
mxm_base->state = MXM_REQ_NEW; \
7871
PML_YALLA_RESET_PML_REQ_DATA(_pml_req); \
7972
}
8073

0 commit comments

Comments
 (0)