Skip to content

Commit 1f59fbd

Browse files
committed
UCP: Merged with master
2 parents 464887d + fb479dd commit 1f59fbd

File tree

5 files changed

+309
-80
lines changed

5 files changed

+309
-80
lines changed

src/tools/perf/cuda/ucp_cuda_kernel.cu

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,19 @@ public:
139139
const ucp_perf_cuda_params &get_params() const { return m_params; }
140140

141141
private:
142+
static bool has_counter(const ucx_perf_context_t &perf)
143+
{
144+
return (perf.params.command != UCX_PERF_CMD_PUT_SINGLE);
145+
}
146+
142147
void init_mem_list(const ucx_perf_context_t &perf)
143148
{
144-
/* +1 for the counter */
145-
size_t count = perf.params.msg_size_cnt + 1;
146-
size_t offset = 0;
149+
size_t data_count = perf.params.msg_size_cnt;
150+
size_t count = data_count + (has_counter(perf) ? 1 : 0);
151+
size_t offset = 0;
147152
ucp_device_mem_list_elem_t elems[count];
148153

149-
for (size_t i = 0; i < count; ++i) {
154+
for (size_t i = 0; i < data_count; ++i) {
150155
elems[i].field_mask = UCP_DEVICE_MEM_LIST_ELEM_FIELD_MEMH |
151156
UCP_DEVICE_MEM_LIST_ELEM_FIELD_RKEY |
152157
UCP_DEVICE_MEM_LIST_ELEM_FIELD_LOCAL_ADDR |
@@ -156,11 +161,19 @@ private:
156161
elems[i].rkey = perf.ucp.rkey;
157162
elems[i].local_addr = UCS_PTR_BYTE_OFFSET(perf.send_buffer, offset);
158163
elems[i].remote_addr = perf.ucp.remote_addr + offset;
159-
elems[i].length = (i == count - 1) ? ONESIDED_SIGNAL_SIZE :
160-
perf.params.msg_size_list[i];
164+
elems[i].length = perf.params.msg_size_list[i];
161165
offset += elems[i].length;
162166
}
163167

168+
if (has_counter(perf)) {
169+
elems[data_count].field_mask = UCP_DEVICE_MEM_LIST_ELEM_FIELD_RKEY |
170+
UCP_DEVICE_MEM_LIST_ELEM_FIELD_REMOTE_ADDR |
171+
UCP_DEVICE_MEM_LIST_ELEM_FIELD_LENGTH;
172+
elems[data_count].rkey = perf.ucp.rkey;
173+
elems[data_count].remote_addr = perf.ucp.remote_addr + offset;
174+
elems[data_count].length = ONESIDED_SIGNAL_SIZE;
175+
}
176+
164177
ucp_device_mem_list_params_t params;
165178
params.field_mask = UCP_DEVICE_MEM_LIST_PARAMS_FIELD_ELEMENTS |
166179
UCP_DEVICE_MEM_LIST_PARAMS_FIELD_ELEMENT_SIZE |
@@ -178,18 +191,22 @@ private:
178191

179192
void init_elements(const ucx_perf_context_t &perf)
180193
{
181-
/* +1 for the counter */
182-
size_t count = perf.params.msg_size_cnt + 1;
194+
size_t data_count = perf.params.msg_size_cnt;
195+
size_t count = data_count + (has_counter(perf) ? 1 : 0);
183196

184197
std::vector<unsigned> indices(count);
185198
std::vector<size_t> local_offsets(count, 0);
186199
std::vector<size_t> remote_offsets(count, 0);
187200
std::vector<size_t> lengths(count);
188201

189-
for (unsigned i = 0; i < count; ++i) {
202+
for (unsigned i = 0; i < data_count; ++i) {
190203
indices[i] = i;
191-
lengths[i] = (i == count - 1) ? ONESIDED_SIGNAL_SIZE :
192-
perf.params.msg_size_list[i];
204+
lengths[i] = perf.params.msg_size_list[i];
205+
}
206+
207+
if (has_counter(perf)) {
208+
indices[data_count] = data_count;
209+
lengths[data_count] = ONESIDED_SIGNAL_SIZE;
193210
}
194211

195212
m_params.indices = device_vector(indices);

src/ucp/api/device/ucp_device_impl.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,11 @@ UCS_F_DEVICE ucs_status_t ucp_device_counter_inc(
232232
* This operation can be polled on the receiver to detect completion of all the
233233
* operations of the batch, started during the same routine call.
234234
*
235-
* The last entry in the descriptor list contains
236-
* the remote memory registration descriptors to be used for the increment
237-
* operation.
235+
* All the elements except the last one are data elements that must contain all
236+
* @ref ucp_device_mem_list_elem_fields and @ref ucp_device_mem_list_elem_t.
237+
*
238+
* The last entry in the descriptor list contains the remote memory
239+
* registration descriptors to be used for the increment operation.
238240
*
239241
* The routine returns a request that can be progressed and checked for
240242
* completion with @ref ucp_device_progress_req.

src/ucp/api/device/ucp_host.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,16 @@ BEGIN_C_DECLS
3131
* The enumeration allows specifying which fields in @ref
3232
* ucp_device_mem_list_elem are present.
3333
*
34+
* @note Counter elements can omit the @a UCP_DEVICE_MEM_LIST_ELEM_FIELD_MEMH
35+
* and @a UCP_DEVICE_MEM_LIST_ELEM_FIELD_LOCAL_ADDR fields.
36+
*
3437
* It is used to enable backward compatibility support.
3538
*/
3639
enum ucp_device_mem_list_elem_field {
3740
UCP_DEVICE_MEM_LIST_ELEM_FIELD_MEMH = UCS_BIT(0), /**< Source memory handle */
38-
UCP_DEVICE_MEM_LIST_ELEM_FIELD_RKEY = UCS_BIT(1), /**< Unpacked remote memory key */
41+
UCP_DEVICE_MEM_LIST_ELEM_FIELD_RKEY = UCS_BIT(1), /**< Unpacked remote memory key (always required) */
3942
UCP_DEVICE_MEM_LIST_ELEM_FIELD_LOCAL_ADDR = UCS_BIT(2), /**< Local address */
40-
UCP_DEVICE_MEM_LIST_ELEM_FIELD_REMOTE_ADDR = UCS_BIT(3), /**< Remote address */
43+
UCP_DEVICE_MEM_LIST_ELEM_FIELD_REMOTE_ADDR = UCS_BIT(3), /**< Remote address */
4144
UCP_DEVICE_MEM_LIST_ELEM_FIELD_LENGTH = UCS_BIT(4) /**< Length of the local buffer in bytes */
4245
};
4346

@@ -48,6 +51,8 @@ enum ucp_device_mem_list_elem_field {
4851
*
4952
* This describes a pair of local and remote memory for which a memory operation
5053
* can later be performed multiple times, possibly with varying memory offsets.
54+
*
55+
* @note Counter elements can omit the @a memh and @a local_addr fields.
5156
*/
5257
typedef struct ucp_device_mem_list_elem {
5358
/**
@@ -80,6 +85,7 @@ typedef struct ucp_device_mem_list_elem {
8085

8186
/**
8287
* Unpacked memory key for the remote memory endpoint.
88+
* Always required.
8389
*/
8490
ucp_rkey_h rkey;
8591
} ucp_device_mem_list_elem_t;

0 commit comments

Comments
 (0)