|
1 | 1 | /* |
| 2 | + * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. |
2 | 3 | * Copyright (c) 2004-2015 The University of Tennessee and The University |
3 | 4 | * of Tennessee Research Foundation. All rights |
4 | 5 | * reserved. |
@@ -78,3 +79,50 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, |
78 | 79 | } |
79 | 80 | return rc; |
80 | 81 | } |
| 82 | + |
| 83 | +int |
| 84 | +mca_coll_cuda_reduce_local(const void *sbuf, void *rbuf, int count, |
| 85 | + struct ompi_datatype_t *dtype, |
| 86 | + struct ompi_op_t *op, |
| 87 | + mca_coll_base_module_t *module) |
| 88 | +{ |
| 89 | + ptrdiff_t gap; |
| 90 | + char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; |
| 91 | + size_t bufsize; |
| 92 | + int rc; |
| 93 | + |
| 94 | + bufsize = opal_datatype_span(&dtype->super, count, &gap); |
| 95 | + |
| 96 | + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { |
| 97 | + sbuf1 = (char*)malloc(bufsize); |
| 98 | + if (NULL == sbuf1) { |
| 99 | + return OMPI_ERR_OUT_OF_RESOURCE; |
| 100 | + } |
| 101 | + opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); |
| 102 | + sbuf = sbuf1 - gap; |
| 103 | + } |
| 104 | + |
| 105 | + if (opal_cuda_check_bufs((char *)rbuf, NULL)) { |
| 106 | + rbuf1 = (char*)malloc(bufsize); |
| 107 | + if (NULL == rbuf1) { |
| 108 | + if (NULL != sbuf1) free(sbuf1); |
| 109 | + return OMPI_ERR_OUT_OF_RESOURCE; |
| 110 | + } |
| 111 | + opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); |
| 112 | + rbuf2 = rbuf; /* save away original buffer */ |
| 113 | + rbuf = rbuf1 - gap; |
| 114 | + } |
| 115 | + |
| 116 | + ompi_op_reduce(op, (void *)sbuf, rbuf, count, dtype); |
| 117 | + rc = OMPI_SUCCESS; |
| 118 | + |
| 119 | + if (NULL != sbuf1) { |
| 120 | + free(sbuf1); |
| 121 | + } |
| 122 | + if (NULL != rbuf1) { |
| 123 | + rbuf = rbuf2; |
| 124 | + opal_cuda_memcpy_sync(rbuf, rbuf1, bufsize); |
| 125 | + free(rbuf1); |
| 126 | + } |
| 127 | + return rc; |
| 128 | +} |
0 commit comments