@@ -39,7 +39,9 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target,
3939 ptl_match_bits_t match_bits , ptl_size_t remote_offset ,
4040 ompi_mtl_portals4_recv_request_t * request )
4141{
42- int ret ;
42+ int ret , i ;
43+ ptl_size_t rest = length , asked = 0 , frag_size ;
44+ int32_t pending_reply ;
4345
4446#if OMPI_MTL_PORTALS4_FLOW_CONTROL
4547 while (OPAL_UNLIKELY (OPAL_THREAD_ADD32 (& ompi_mtl_portals4 .flowctl .send_slots , -1 ) < 0 )) {
@@ -48,19 +50,29 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target,
4850 }
4951#endif
5052
51- ret = PtlGet (ompi_mtl_portals4 .send_md_h ,
52- (ptl_size_t ) start ,
53- length ,
54- target ,
55- ompi_mtl_portals4 .read_idx ,
56- match_bits ,
57- remote_offset ,
58- request );
59- if (OPAL_UNLIKELY (PTL_OK != ret )) {
60- opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
61- "%s:%d: PtlGet failed: %d" ,
62- __FILE__ , __LINE__ , ret );
63- return OMPI_ERR_OUT_OF_RESOURCE ;
53+ request -> pending_reply = (length + ompi_mtl_portals4 .max_msg_size_mtl - 1 ) / ompi_mtl_portals4 .max_msg_size_mtl ;
54+ pending_reply = request -> pending_reply ;
55+
56+ for (i = 0 ; i < pending_reply ; i ++ ) {
57+ OPAL_OUTPUT_VERBOSE ((90 , ompi_mtl_base_framework .framework_output , "GET (fragment %d/%d) send" ,
58+ i + 1 , pending_reply ));
59+ frag_size = (OPAL_UNLIKELY (rest > ompi_mtl_portals4 .max_msg_size_mtl )) ? ompi_mtl_portals4 .max_msg_size_mtl : rest ;
60+ ret = PtlGet (ompi_mtl_portals4 .send_md_h ,
61+ (ptl_size_t ) start + i * ompi_mtl_portals4 .max_msg_size_mtl ,
62+ frag_size ,
63+ target ,
64+ ompi_mtl_portals4 .read_idx ,
65+ match_bits ,
66+ remote_offset + i * ompi_mtl_portals4 .max_msg_size_mtl ,
67+ request );
68+ if (OPAL_UNLIKELY (PTL_OK != ret )) {
69+ opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
70+ "%s:%d: PtlGet failed: %d" ,
71+ __FILE__ , __LINE__ , ret );
72+ return OMPI_ERR_OUT_OF_RESOURCE ;
73+ }
74+ rest -= frag_size ;
75+ asked += frag_size ;
6476 }
6577
6678 return OMPI_SUCCESS ;
@@ -109,26 +121,30 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
109121 ptl_request -> super .super .ompi_req -> req_status .MPI_ERROR = MPI_ERR_TRUNCATE ;
110122 }
111123
124+ if (ev -> mlength < msg_length )
125+ OPAL_OUTPUT_VERBOSE ((90 , ompi_mtl_base_framework .framework_output , "Truncated message, some PtlGet are required (protocol = %d)" ,
126+ ompi_mtl_portals4 .protocol ));
127+
112128#if OPAL_ENABLE_DEBUG
113129 ptl_request -> hdr_data = ev -> hdr_data ;
114130#endif
115131
116- if (!MTL_PORTALS4_IS_SHORT_MSG (ev -> match_bits ) && ompi_mtl_portals4 .protocol == rndv ) {
117- /* If it's not a short message and we're doing rndv, we
132+ ptl_request -> super .super .ompi_req -> req_status ._ucount = ev -> mlength ;
133+ if (!MTL_PORTALS4_IS_SHORT_MSG (ev -> match_bits ) && msg_length > ev -> mlength ) {
134+ /* If it's not a short message and we're doing rndv and the message is not complete, we
118135 only have the first part of the message. Issue the get
119136 to pull the second part of the message. */
120- ret = read_msg ((char * ) ptl_request -> delivery_ptr + ompi_mtl_portals4 . eager_limit ,
137+ ret = read_msg ((char * ) ptl_request -> delivery_ptr + ev -> mlength ,
121138 ((msg_length > ptl_request -> delivery_len ) ?
122- ptl_request -> delivery_len : msg_length ) - ompi_mtl_portals4 . eager_limit ,
139+ ptl_request -> delivery_len : msg_length ) - ev -> mlength ,
123140 ev -> initiator ,
124141 ev -> hdr_data ,
125- ompi_mtl_portals4 . eager_limit ,
142+ ev -> mlength ,
126143 ptl_request );
127144 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
128145 if (NULL != ptl_request -> buffer_ptr ) free (ptl_request -> buffer_ptr );
129146 goto callback_error ;
130147 }
131-
132148 } else {
133149 /* If we're either using the eager protocol or were a
134150 short message, all data has been received, so complete
@@ -142,8 +158,6 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
142158 __FILE__ , __LINE__ , ret );
143159 ptl_request -> super .super .ompi_req -> req_status .MPI_ERROR = ret ;
144160 }
145- ptl_request -> super .super .ompi_req -> req_status ._ucount = ev -> mlength ;
146-
147161 OPAL_OUTPUT_VERBOSE ((50 , ompi_mtl_base_framework .framework_output ,
148162 "Recv %lu (0x%lx) completed, expected" ,
149163 ptl_request -> opcount , ptl_request -> hdr_data ));
@@ -165,12 +179,14 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
165179 }
166180
167181 /* set the received length in the status, now that we know
168- excatly how much data was sent. */
169- ptl_request -> super .super .ompi_req -> req_status ._ucount = ev -> mlength ;
170- if (ompi_mtl_portals4 .protocol == rndv ) {
171- ptl_request -> super .super .ompi_req -> req_status ._ucount +=
172- ompi_mtl_portals4 .eager_limit ;
182+ exactly how much data was sent. */
183+ ptl_request -> super .super .ompi_req -> req_status ._ucount += ev -> mlength ;
184+
185+ ret = OPAL_THREAD_ADD32 (& (ptl_request -> pending_reply ), -1 );
186+ if (ret > 0 ) {
187+ return OMPI_SUCCESS ;
173188 }
189+ assert (ptl_request -> pending_reply == 0 );
174190
175191#if OMPI_MTL_PORTALS4_FLOW_CONTROL
176192 OPAL_THREAD_ADD32 (& ompi_mtl_portals4 .flowctl .send_slots , 1 );
@@ -192,8 +208,8 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
192208 }
193209
194210 OPAL_OUTPUT_VERBOSE ((50 , ompi_mtl_base_framework .framework_output ,
195- "Recv %lu (0x%lx) completed, reply" ,
196- ptl_request -> opcount , ptl_request -> hdr_data ));
211+ "Recv %lu (0x%lx) completed , reply (pending_reply: %d) " ,
212+ ptl_request -> opcount , ptl_request -> hdr_data , ptl_request -> pending_reply ));
197213 ptl_request -> super .super .completion_callback (& ptl_request -> super .super );
198214 break ;
199215
@@ -281,17 +297,16 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
281297 ptl_request -> super .super .completion_callback (& ptl_request -> super .super );
282298
283299 } else {
284- if (ev -> mlength > 0 ) {
285- /* if rndv or triggered, copy the eager part to the right place */
286- memcpy (ptl_request -> delivery_ptr , ev -> start , ev -> mlength );
287- }
288300
289- ret = read_msg ((char * ) ptl_request -> delivery_ptr + ev -> mlength ,
290- ((msg_length > ptl_request -> delivery_len ) ?
291- ptl_request -> delivery_len : msg_length ) - ev -> mlength ,
301+ /* For long messages in the overflow list, ev->mlength = 0 */
302+ ptl_request -> super .super .ompi_req -> req_status ._ucount = 0 ;
303+
304+ ret = read_msg ((char * ) ptl_request -> delivery_ptr ,
305+ (msg_length > ptl_request -> delivery_len ) ?
306+ ptl_request -> delivery_len : msg_length ,
292307 ev -> initiator ,
293308 ev -> hdr_data ,
294- ev -> mlength ,
309+ 0 ,
295310 ptl_request );
296311 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
297312 if (NULL != ptl_request -> buffer_ptr ) free (ptl_request -> buffer_ptr );
@@ -373,6 +388,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
373388 ptl_request -> delivery_len = length ;
374389 ptl_request -> req_started = false;
375390 ptl_request -> super .super .ompi_req -> req_status .MPI_ERROR = OMPI_SUCCESS ;
391+ ptl_request -> pending_reply = 0 ;
376392
377393 OPAL_OUTPUT_VERBOSE ((50 , ompi_mtl_base_framework .framework_output ,
378394 "Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx)\n" ,
@@ -389,7 +405,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
389405 PTL_ME_OP_PUT |
390406 PTL_ME_USE_ONCE |
391407 PTL_ME_EVENT_UNLINK_DISABLE ;
392- if (length <= ompi_mtl_portals4 .eager_limit ) {
408+ if (length <= ompi_mtl_portals4 .short_limit ) {
393409 me .options |= PTL_ME_EVENT_LINK_DISABLE ;
394410 }
395411 me .match_id = remote_proc ;
@@ -413,7 +429,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
413429 /* if a long message, spin until we either have a comm event or a
414430 link event, guaranteeing progress for long unexpected
415431 messages. */
416- if (length > ompi_mtl_portals4 .eager_limit ) {
432+ if (length > ompi_mtl_portals4 .short_limit ) {
417433 while (true != ptl_request -> req_started ) {
418434 ompi_mtl_portals4_progress ();
419435 }
@@ -454,6 +470,7 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl,
454470 ptl_request -> delivery_ptr = start ;
455471 ptl_request -> delivery_len = length ;
456472 ptl_request -> super .super .ompi_req -> req_status .MPI_ERROR = OMPI_SUCCESS ;
473+ ptl_request -> pending_reply = 0 ;
457474
458475 OPAL_OUTPUT_VERBOSE ((50 , ompi_mtl_base_framework .framework_output ,
459476 "Mrecv %lu of length %ld (0x%lx)\n" ,
0 commit comments