@@ -178,6 +178,97 @@ ompi_osc_portals4_get_dt(struct ompi_datatype_t *dt, ptl_datatype_t *ptl_dt)
178178 return 0 ;
179179}
180180
181+ static ptl_size_t
182+ number_of_fragment (ptl_size_t length , ptl_size_t maxlength )
183+ {
184+ ptl_size_t nb_frag = length == 0 ? 1 : (length - 1 ) / maxlength + 1 ;
185+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
186+ "%s,%d : %ld fragment(s)" , __FUNCTION__ , __LINE__ , nb_frag ));
187+ return nb_frag ;
188+ }
189+
190+ static int
191+ splittedPtlPut (ptl_handle_md_t md_h ,
192+ ptl_size_t loc_offset ,
193+ ptl_size_t length ,
194+ ptl_ack_req_t ack_req ,
195+ ptl_process_t target_id ,
196+ ptl_pt_index_t pt_index ,
197+ ptl_match_bits_t match_b ,
198+ ptl_size_t rem_offset ,
199+ void * usr_ptr ,
200+ ptl_hdr_data_t hdr_data )
201+ {
202+ ptl_size_t length_sent = 0 ;
203+ do {
204+ ptl_size_t length_frag ;
205+ int ret ;
206+
207+ length_frag = (length > mca_osc_portals4_component .ptl_max_msg_size ) ?
208+ mca_osc_portals4_component .ptl_max_msg_size :
209+ length ;
210+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
211+ "Put size : %lu/%lu, offset:%lu" , length_frag , length , length_sent ));
212+ ret = PtlPut (md_h ,
213+ loc_offset + length_sent ,
214+ length_frag ,
215+ ack_req ,
216+ target_id ,
217+ pt_index ,
218+ match_b ,
219+ rem_offset + length_sent ,
220+ usr_ptr ,
221+ hdr_data );
222+ if (PTL_OK != ret ) {
223+ opal_output_verbose (1 , ompi_osc_base_framework .framework_output ,
224+ "%s:%d PtlPut failed with return value %d" ,
225+ __FUNCTION__ , __LINE__ , ret );
226+ return ret ;
227+ }
228+ length -= length_frag ;
229+ length_sent += length_frag ;
230+ } while (length );
231+ return PTL_OK ;
232+ }
233+
234+ static int
235+ splittedPtlGet (ptl_handle_md_t md_h ,
236+ ptl_size_t loc_offset ,
237+ ptl_size_t length ,
238+ ptl_process_t target_id ,
239+ ptl_pt_index_t pt_index ,
240+ ptl_match_bits_t match_b ,
241+ ptl_size_t rem_offset ,
242+ void * usr_ptr )
243+ {
244+ ptl_size_t length_submitted = 0 ;
245+ OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output , "Get" ));
246+
247+ do {
248+ ptl_size_t length_frag ;
249+ int ret ;
250+ length_frag = (length > mca_osc_portals4_component .ptl_max_msg_size ) ?
251+ mca_osc_portals4_component .ptl_max_msg_size :
252+ length ;
253+ ret = PtlGet (md_h ,
254+ (ptl_size_t ) loc_offset + length_submitted ,
255+ length_frag ,
256+ target_id ,
257+ pt_index ,
258+ match_b ,
259+ rem_offset + length_submitted ,
260+ usr_ptr );
261+ if (PTL_OK != ret ) {
262+ opal_output_verbose (1 , ompi_osc_base_framework .framework_output ,
263+ "%s:%d PtlGet failed with return value %d" ,
264+ __FUNCTION__ , __LINE__ , ret );
265+ return ret ;
266+ }
267+ length -= length_frag ;
268+ length_submitted += length_frag ;
269+ } while (length );
270+ return PTL_OK ;
271+ }
181272
182273int
183274ompi_osc_portals4_rput (const void * origin_addr ,
@@ -218,15 +309,18 @@ ompi_osc_portals4_rput(const void *origin_addr,
218309 "MPI_Rput: transfer of non-contiguous memory is not currently supported.\n" );
219310 return OMPI_ERR_NOT_SUPPORTED ;
220311 } else {
221- (void )opal_atomic_add_64 (& module -> opcount , 1 );
222- request -> ops_expected = 1 ;
223312 ret = ompi_datatype_type_size (origin_dt , & length );
224313 if (OMPI_SUCCESS != ret ) {
225314 OMPI_OSC_PORTALS4_REQUEST_RETURN (request );
226315 return ret ;
227316 }
228317 length *= origin_count ;
229- ret = PtlPut (module -> req_md_h ,
318+ request -> ops_expected = number_of_fragment (length , mca_osc_portals4_component .ptl_max_msg_size );
319+ opal_atomic_add_64 (& module -> opcount , request -> ops_expected );
320+ OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output ,
321+ "%s,%d Put" , __FUNCTION__ , __LINE__ ));
322+
323+ ret = splittedPtlPut (module -> req_md_h ,
230324 (ptl_size_t ) origin_addr ,
231325 length ,
232326 PTL_ACK_REQ ,
@@ -285,15 +379,17 @@ ompi_osc_portals4_rget(void *origin_addr,
285379 "MPI_Rget: transfer of non-contiguous memory is not currently supported.\n" );
286380 return OMPI_ERR_NOT_SUPPORTED ;
287381 } else {
288- (void )opal_atomic_add_64 (& module -> opcount , 1 );
289- request -> ops_expected = 1 ;
290382 ret = ompi_datatype_type_size (origin_dt , & length );
291383 if (OMPI_SUCCESS != ret ) {
292384 OMPI_OSC_PORTALS4_REQUEST_RETURN (request );
293385 return ret ;
294386 }
295387 length *= origin_count ;
296- ret = PtlGet (module -> req_md_h ,
388+ request -> ops_expected = number_of_fragment (length , mca_osc_portals4_component .ptl_max_msg_size );
389+ opal_atomic_add_64 (& module -> opcount , request -> ops_expected );
390+ OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output ,
391+ "%s,%d Get" , __FUNCTION__ , __LINE__ ));
392+ ret = splittedPtlGet (module -> req_md_h ,
297393 (ptl_size_t ) origin_addr ,
298394 length ,
299395 peer ,
@@ -368,11 +464,13 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
368464
369465 do {
370466 size_t msg_length = MIN (module -> atomic_max , length - sent );
371- (void )opal_atomic_add_64 (& module -> opcount , 1 );
372- request -> ops_expected ++ ;
373467
374468 if (MPI_REPLACE == op ) {
375- ret = PtlPut (module -> req_md_h ,
469+ request -> ops_expected += number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size );
470+ opal_atomic_add_64 (& module -> opcount , number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size ));
471+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
472+ "%s,%d Put" , __FUNCTION__ , __LINE__ ));
473+ ret = splittedPtlPut (module -> req_md_h ,
376474 md_offset + sent ,
377475 msg_length ,
378476 PTL_ACK_REQ ,
@@ -383,6 +481,8 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
383481 request ,
384482 0 );
385483 } else {
484+ request -> ops_expected ++ ;
485+ opal_atomic_add_64 (& module -> opcount , 1 );
386486 ret = ompi_osc_portals4_get_dt (origin_dt , & ptl_dt );
387487 if (OMPI_SUCCESS != ret ) {
388488 opal_output (ompi_osc_base_framework .framework_output ,
@@ -396,7 +496,8 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
396496 "MPI_Raccumulate: operation is not currently supported" );
397497 return OMPI_ERR_NOT_SUPPORTED ;
398498 }
399-
499+ OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output ,
500+ "%s,%d Atomic" , __FUNCTION__ , __LINE__ ));
400501 ret = PtlAtomic (module -> req_md_h ,
401502 offset + sent ,
402503 msg_length ,
@@ -499,6 +600,8 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
499600 (void )opal_atomic_add_64 (& module -> opcount , 1 );
500601 request -> ops_expected ++ ;
501602
603+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
604+ "%s,%d Swap" , __FUNCTION__ , __LINE__ ));
502605 ret = PtlSwap (module -> req_md_h ,
503606 result_md_offset + sent ,
504607 module -> md_h ,
@@ -530,10 +633,11 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
530633 do {
531634 size_t msg_length = MIN (module -> fetch_atomic_max , length - sent );
532635
533- (void )opal_atomic_add_64 (& module -> opcount , 1 );
534- request -> ops_expected ++ ;
535-
536- ret = PtlGet (module -> req_md_h ,
636+ opal_atomic_add_64 (& module -> opcount , number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size ));
637+ request -> ops_expected += number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size );
638+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
639+ "%s,%d Get" , __FUNCTION__ , __LINE__ ));
640+ ret = splittedPtlGet (module -> req_md_h ,
537641 md_offset + sent ,
538642 msg_length ,
539643 peer ,
@@ -558,14 +662,14 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
558662
559663 ret = ompi_osc_portals4_get_dt (origin_dt , & ptl_dt );
560664 if (OMPI_SUCCESS != ret ) {
561- opal_output (ompi_osc_base_framework .framework_output ,
665+ opal_output (ompi_osc_base_framework .framework_output ,
562666 "MPI_Rget_accumulate: datatype is not currently supported" );
563667 return OMPI_ERR_NOT_SUPPORTED ;
564668 }
565669
566670 ret = ompi_osc_portals4_get_op (op , & ptl_op );
567671 if (OMPI_SUCCESS != ret ) {
568- opal_output (ompi_osc_base_framework .framework_output ,
672+ opal_output (ompi_osc_base_framework .framework_output ,
569673 "MPI_Rget_accumulate: operation is not currently supported" );
570674 return OMPI_ERR_NOT_SUPPORTED ;
571675 }
@@ -576,6 +680,8 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
576680 (void )opal_atomic_add_64 (& module -> opcount , 1 );
577681 request -> ops_expected ++ ;
578682
683+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
684+ "%s,%d FetchAtomic" , __FUNCTION__ , __LINE__ ));
579685 ret = PtlFetchAtomic (module -> req_md_h ,
580686 result_md_offset + sent ,
581687 module -> md_h ,
@@ -634,13 +740,15 @@ ompi_osc_portals4_put(const void *origin_addr,
634740 "MPI_Put: transfer of non-contiguous memory is not currently supported.\n" );
635741 return OMPI_ERR_NOT_SUPPORTED ;
636742 } else {
637- (void )opal_atomic_add_64 (& module -> opcount , 1 );
638743 ret = ompi_datatype_type_size (origin_dt , & length );
639744 if (OMPI_SUCCESS != ret ) {
640745 return ret ;
641746 }
642747 length *= origin_count ;
643- ret = PtlPut (module -> md_h ,
748+ opal_atomic_add_64 (& module -> opcount , number_of_fragment (length , mca_osc_portals4_component .ptl_max_msg_size ));
749+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
750+ "%s,%d Put" , __FUNCTION__ , __LINE__ ));
751+ ret = splittedPtlPut (module -> md_h ,
644752 (ptl_size_t ) origin_addr ,
645753 length ,
646754 PTL_ACK_REQ ,
@@ -691,13 +799,15 @@ ompi_osc_portals4_get(void *origin_addr,
691799 "MPI_Get: transfer of non-contiguous memory is not currently supported.\n" );
692800 return OMPI_ERR_NOT_SUPPORTED ;
693801 } else {
694- (void )opal_atomic_add_64 (& module -> opcount , 1 );
695802 ret = ompi_datatype_type_size (origin_dt , & length );
696803 if (OMPI_SUCCESS != ret ) {
697804 return ret ;
698805 }
699806 length *= origin_count ;
700- ret = PtlGet (module -> md_h ,
807+ opal_atomic_add_64 (& module -> opcount , number_of_fragment (length , mca_osc_portals4_component .ptl_max_msg_size ));
808+ OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output ,
809+ "%s,%d Get" , __FUNCTION__ , __LINE__ ));
810+ ret = splittedPtlGet (module -> md_h ,
701811 (ptl_size_t ) origin_addr ,
702812 length ,
703813 peer ,
@@ -763,10 +873,12 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
763873
764874 do {
765875 size_t msg_length = MIN (module -> atomic_max , length - sent );
766- (void )opal_atomic_add_64 (& module -> opcount , 1 );
767876
768877 if (MPI_REPLACE == op ) {
769- ret = PtlPut (module -> md_h ,
878+ opal_atomic_add_64 (& module -> opcount , number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size ));
879+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
880+ "%s,%d Put" , __FUNCTION__ , __LINE__ ));
881+ ret = splittedPtlPut (module -> md_h ,
770882 md_offset + sent ,
771883 msg_length ,
772884 PTL_ACK_REQ ,
@@ -777,6 +889,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
777889 NULL ,
778890 0 );
779891 } else {
892+ (void )opal_atomic_add_64 (& module -> opcount , 1 );
780893 ret = ompi_osc_portals4_get_dt (origin_dt , & ptl_dt );
781894 if (OMPI_SUCCESS != ret ) {
782895 opal_output (ompi_osc_base_framework .framework_output ,
@@ -791,6 +904,8 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
791904 return OMPI_ERR_NOT_SUPPORTED ;
792905 }
793906
907+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
908+ "%s,%d Atomic" , __FUNCTION__ , __LINE__ ));
794909 ret = PtlAtomic (module -> md_h ,
795910 md_offset + sent ,
796911 msg_length ,
@@ -882,6 +997,8 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
882997
883998 (void )opal_atomic_add_64 (& module -> opcount , 1 );
884999
1000+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1001+ "%s,%d Swap" , __FUNCTION__ , __LINE__ ));
8851002 ret = PtlSwap (module -> md_h ,
8861003 result_md_offset + sent ,
8871004 module -> md_h ,
@@ -912,9 +1029,10 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
9121029 do {
9131030 size_t msg_length = MIN (module -> fetch_atomic_max , length - sent );
9141031
915- (void )opal_atomic_add_64 (& module -> opcount , 1 );
916-
917- ret = PtlGet (module -> md_h ,
1032+ opal_atomic_add_64 (& module -> opcount , number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size ));
1033+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1034+ "%s,%d Get" , __FUNCTION__ , __LINE__ ));
1035+ ret = splittedPtlGet (module -> md_h ,
9181036 md_offset + sent ,
9191037 msg_length ,
9201038 peer ,
@@ -955,6 +1073,8 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
9551073
9561074 (void )opal_atomic_add_64 (& module -> opcount , 1 );
9571075
1076+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1077+ "%s,%d FetchAtomic" , __FUNCTION__ , __LINE__ ));
9581078 ret = PtlFetchAtomic (module -> md_h ,
9591079 result_md_offset + sent ,
9601080 module -> md_h ,
@@ -1025,6 +1145,8 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr,
10251145
10261146 (void )opal_atomic_add_64 (& module -> opcount , 1 );
10271147
1148+ OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output ,
1149+ "%s,%d Swap" , __FUNCTION__ , __LINE__ ));
10281150 ret = PtlSwap (module -> md_h ,
10291151 result_md_offset ,
10301152 module -> md_h ,
@@ -1087,14 +1209,15 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
10871209
10881210 assert (length <= module -> fetch_atomic_max );
10891211
1090- (void )opal_atomic_add_64 (& module -> opcount , 1 );
1091-
10921212 if (MPI_REPLACE == op ) {
10931213 ptl_size_t result_md_offset , origin_md_offset ;
10941214
10951215 result_md_offset = (ptl_size_t ) result_addr ;
10961216 origin_md_offset = (ptl_size_t ) origin_addr ;
10971217
1218+ (void )opal_atomic_add_64 (& module -> opcount , 1 );
1219+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1220+ "%s,%d Swap" , __FUNCTION__ , __LINE__ ));
10981221 ret = PtlSwap (module -> md_h ,
10991222 result_md_offset ,
11001223 module -> md_h ,
@@ -1114,7 +1237,10 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
11141237
11151238 md_offset = (ptl_size_t ) result_addr ;
11161239
1117- ret = PtlGet (module -> md_h ,
1240+ opal_atomic_add_64 (& module -> opcount , number_of_fragment (length , mca_osc_portals4_component .ptl_max_msg_size ));
1241+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1242+ "%s,%d Get" , __FUNCTION__ , __LINE__ ));
1243+ ret = splittedPtlGet (module -> md_h ,
11181244 md_offset ,
11191245 length ,
11201246 peer ,
@@ -1124,6 +1250,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
11241250 NULL );
11251251 } else {
11261252 ptl_size_t result_md_offset , origin_md_offset ;
1253+ (void )opal_atomic_add_64 (& module -> opcount , 1 );
11271254
11281255 ret = ompi_osc_portals4_get_op (op , & ptl_op );
11291256 if (OMPI_SUCCESS != ret ) {
@@ -1135,6 +1262,8 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
11351262 result_md_offset = (ptl_size_t ) result_addr ;
11361263 origin_md_offset = (ptl_size_t ) origin_addr ;
11371264
1265+ OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1266+ "%s,%d FetchAtomic" , __FUNCTION__ , __LINE__ ));
11381267 ret = PtlFetchAtomic (module -> md_h ,
11391268 result_md_offset ,
11401269 module -> md_h ,
0 commit comments