Skip to content

Commit 471d767

Browse files
committed
UCX osc: fence active operations before releasing accumulate lock and free memory if required
Signed-off-by: Joseph Schuchart <[email protected]>
1 parent 4d7a385 commit 471d767

File tree

1 file changed

+24
-29
lines changed

1 file changed

+24
-29
lines changed

ompi/mca/osc/ucx/osc_ucx_comm.c

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -257,15 +257,30 @@ static inline int start_atomicity(ompi_osc_ucx_module_t *module, int target) {
257257
}
258258
}
259259

260-
static inline int end_atomicity(ompi_osc_ucx_module_t *module, int target) {
260+
static inline int end_atomicity(
261+
ompi_osc_ucx_module_t *module,
262+
int target,
263+
void *free_ptr) {
261264
uint64_t result_value = 0;
262265
uint64_t remote_addr = (module->state_addrs)[target] + OSC_UCX_STATE_ACC_LOCK_OFFSET;
263266
int ret = OMPI_SUCCESS;
264267

268+
/* fence any still active operations */
269+
ret = opal_common_ucx_wpmem_fence(module->mem);
270+
if (ret != OMPI_SUCCESS) {
271+
OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_fence failed: %d", ret);
272+
return OMPI_ERROR;
273+
}
274+
265275
ret = opal_common_ucx_wpmem_fetch(module->state_mem,
266276
UCP_ATOMIC_FETCH_OP_SWAP, TARGET_LOCK_UNLOCKED,
267277
target, &result_value, sizeof(result_value),
268278
remote_addr);
279+
280+
/* TODO: encapsulate in a request and make the release non-blocking */
281+
if (NULL != free_ptr) {
282+
free(free_ptr);
283+
}
269284
if (ret != OMPI_SUCCESS) {
270285
OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_fetch failed: %d", ret);
271286
return OMPI_ERROR;
@@ -546,6 +561,7 @@ int accumulate_req(const void *origin_addr, int origin_count,
546561

547562
ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module;
548563
int ret = OMPI_SUCCESS;
564+
void *free_ptr = NULL;
549565

550566
ret = check_sync_state(module, target, false);
551567
if (ret != OMPI_SUCCESS) {
@@ -576,7 +592,6 @@ int accumulate_req(const void *origin_addr, int origin_count,
576592
return ret;
577593
}
578594
} else {
579-
void *temp_addr_holder = NULL;
580595
void *temp_addr = NULL;
581596
uint32_t temp_count;
582597
ompi_datatype_t *temp_dt;
@@ -593,7 +608,7 @@ int accumulate_req(const void *origin_addr, int origin_count,
593608
}
594609
}
595610
ompi_datatype_get_true_extent(temp_dt, &temp_lb, &temp_extent);
596-
temp_addr = temp_addr_holder = malloc(temp_extent * temp_count);
611+
temp_addr = free_ptr = malloc(temp_extent * temp_count);
597612
if (temp_addr == NULL) {
598613
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
599614
}
@@ -659,20 +674,14 @@ int accumulate_req(const void *origin_addr, int origin_count,
659674
return ret;
660675
}
661676

662-
ret = opal_common_ucx_wpmem_flush(module->mem, OPAL_COMMON_UCX_SCOPE_EP, target);
663-
if (ret != OMPI_SUCCESS) {
664-
return ret;
665-
}
666-
667-
free(temp_addr_holder);
668677
}
669678

670679
if (NULL != ucx_req) {
671680
// nothing to wait for, mark request as completed
672681
ompi_request_complete(&ucx_req->super, true);
673682
}
674683

675-
return end_atomicity(module, target);
684+
return end_atomicity(module, target, free_ptr);
676685
}
677686

678687
int ompi_osc_ucx_accumulate(const void *origin_addr, int origin_count,
@@ -729,14 +738,7 @@ int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_a
729738
return ret;
730739
}
731740

732-
// fence before releasing the accumulate lock
733-
ret = opal_common_ucx_wpmem_fence(module->mem);
734-
if (ret != OMPI_SUCCESS) {
735-
OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_fence failed: %d", ret);
736-
// don't return error, try to release the accumulate lock
737-
}
738-
739-
return end_atomicity(module, target);
741+
return end_atomicity(module, target, NULL);
740742
}
741743

742744
int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr,
@@ -790,7 +792,7 @@ int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr,
790792
return ret;
791793
}
792794

793-
return end_atomicity(module, target);
795+
return end_atomicity(module, target, NULL);
794796
} else {
795797
return ompi_osc_ucx_get_accumulate(origin_addr, 1, dt, result_addr, 1, dt,
796798
target, target_disp, 1, dt, op, win);
@@ -808,6 +810,7 @@ int get_accumulate_req(const void *origin_addr, int origin_count,
808810
ompi_osc_ucx_request_t *ucx_req) {
809811
ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module;
810812
int ret = OMPI_SUCCESS;
813+
void *free_addr = NULL;
811814

812815
ret = check_sync_state(module, target, false);
813816
if (ret != OMPI_SUCCESS) {
@@ -841,7 +844,6 @@ int get_accumulate_req(const void *origin_addr, int origin_count,
841844
return ret;
842845
}
843846
} else {
844-
void *temp_addr_holder = NULL;
845847
void *temp_addr = NULL;
846848
uint32_t temp_count;
847849
ompi_datatype_t *temp_dt;
@@ -858,7 +860,7 @@ int get_accumulate_req(const void *origin_addr, int origin_count,
858860
}
859861
}
860862
ompi_datatype_get_true_extent(temp_dt, &temp_lb, &temp_extent);
861-
temp_addr = temp_addr_holder = malloc(temp_extent * temp_count);
863+
temp_addr = free_addr = malloc(temp_extent * temp_count);
862864
if (temp_addr == NULL) {
863865
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
864866
}
@@ -922,13 +924,6 @@ int get_accumulate_req(const void *origin_addr, int origin_count,
922924
if (ret != OMPI_SUCCESS) {
923925
return ret;
924926
}
925-
926-
ret = opal_common_ucx_wpmem_flush(module->mem, OPAL_COMMON_UCX_SCOPE_EP, target);
927-
if (ret != OMPI_SUCCESS) {
928-
return ret;
929-
}
930-
931-
free(temp_addr_holder);
932927
}
933928
}
934929

@@ -938,7 +933,7 @@ int get_accumulate_req(const void *origin_addr, int origin_count,
938933
}
939934

940935

941-
return end_atomicity(module, target);
936+
return end_atomicity(module, target, free_addr);
942937
}
943938

944939
int ompi_osc_ucx_get_accumulate(const void *origin_addr, int origin_count,

0 commit comments

Comments
 (0)