Skip to content

Commit 03dbcd8

Browse files
Sourabh Betigeriagunashe
authored andcommitted
SWDEV-440866 - [hip-roclr] Adds support to batch memory operations APIs
Change-Id: I5ac63a6626af8c2b4ac382c52dfe1aaf0b3716b8
1 parent 3ad8f1b commit 03dbcd8

File tree

18 files changed

+220
-20
lines changed

18 files changed

+220
-20
lines changed

hipamd/include/hip/amd_detail/hip_api_trace.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
// - Reset any of the *_STEP_VERSION defines to zero if the corresponding *_MAJOR_VERSION increases
6262
#define HIP_API_TABLE_STEP_VERSION 0
6363
#define HIP_COMPILER_API_TABLE_STEP_VERSION 0
64-
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 6
64+
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 7
6565

6666
// HIP API interface
6767
typedef hipError_t (*t___hipPopCallConfiguration)(dim3* gridDim, dim3* blockDim, size_t* sharedMem,
@@ -722,6 +722,8 @@ typedef hipError_t (*t_hipStreamWriteValue32)(hipStream_t stream, void* ptr, uin
722722
unsigned int flags);
723723
typedef hipError_t (*t_hipStreamWriteValue64)(hipStream_t stream, void* ptr, uint64_t value,
724724
unsigned int flags);
725+
typedef hipError_t (*t_hipStreamBatchMemOp)(hipStream_t stream, unsigned int count,
726+
hipStreamBatchMemOpParams* paramArray, unsigned int flags);
725727
typedef hipError_t (*t_hipTexObjectCreate)(hipTextureObject_t* pTexObject,
726728
const HIP_RESOURCE_DESC* pResDesc,
727729
const HIP_TEXTURE_DESC* pTexDesc,
@@ -1519,6 +1521,9 @@ struct HipDispatchTable {
15191521
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 6
15201522
t_hipDeviceGetTexture1DLinearMaxWidth hipDeviceGetTexture1DLinearMaxWidth_fn;
15211523

1524+
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 7
1525+
t_hipStreamBatchMemOp hipStreamBatchMemOp_fn;
1526+
15221527
// DO NOT EDIT ABOVE!
15231528
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 7
15241529

hipamd/include/hip/amd_detail/hip_prof_str.h

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,8 @@ enum hip_api_id_t {
425425
HIP_API_ID_hipMemcpyHtoAAsync = 405,
426426
HIP_API_ID_hipSetValidDevices = 406,
427427
HIP_API_ID_hipExtHostAlloc = 407,
428-
HIP_API_ID_LAST = 407,
428+
HIP_API_ID_hipStreamBatchMemOp = 408,
429+
HIP_API_ID_LAST = 408,
429430

430431
HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
431432
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
@@ -545,7 +546,6 @@ static inline const char* hip_api_name(const uint32_t id) {
545546
case HIP_API_ID_hipEventQuery: return "hipEventQuery";
546547
case HIP_API_ID_hipEventRecord: return "hipEventRecord";
547548
case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize";
548-
case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError";
549549
case HIP_API_ID_hipExtGetLinkTypeAndHopCount: return "hipExtGetLinkTypeAndHopCount";
550550
case HIP_API_ID_hipExtLaunchKernel: return "hipExtLaunchKernel";
551551
case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: return "hipExtLaunchMultiKernelMultiDevice";
@@ -859,6 +859,8 @@ static inline const char* hip_api_name(const uint32_t id) {
859859
case HIP_API_ID_hipUserObjectRelease: return "hipUserObjectRelease";
860860
case HIP_API_ID_hipUserObjectRetain: return "hipUserObjectRetain";
861861
case HIP_API_ID_hipWaitExternalSemaphoresAsync: return "hipWaitExternalSemaphoresAsync";
862+
case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError";
863+
case HIP_API_ID_hipStreamBatchMemOp: return "hipStreamBatchMemOp";
862864
};
863865
return "unknown";
864866
};
@@ -1262,6 +1264,7 @@ static inline uint32_t hipApiIdByName(const char* name) {
12621264
if (strcmp("hipUserObjectRelease", name) == 0) return HIP_API_ID_hipUserObjectRelease;
12631265
if (strcmp("hipUserObjectRetain", name) == 0) return HIP_API_ID_hipUserObjectRetain;
12641266
if (strcmp("hipWaitExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipWaitExternalSemaphoresAsync;
1267+
if (strcmp("hipStreamBatchMemOp", name) == 0) return HIP_API_ID_hipStreamBatchMemOp;
12651268
return HIP_API_ID_NONE;
12661269
}
12671270

@@ -3623,6 +3626,13 @@ typedef struct hip_api_data_s {
36233626
unsigned int numExtSems;
36243627
hipStream_t stream;
36253628
} hipWaitExternalSemaphoresAsync;
3629+
struct {
3630+
hipStream_t stream;
3631+
unsigned int count;
3632+
hipStreamBatchMemOpParams* paramArray;
3633+
hipStreamBatchMemOpParams paramArray__val;
3634+
unsigned int flags;
3635+
} hipStreamBatchMemOp;
36263636
} args;
36273637
uint64_t *phase_data;
36283638
} hip_api_data_t;
@@ -5892,6 +5902,15 @@ typedef struct hip_api_data_s {
58925902
cb_data.args.hipStreamWriteValue64.value = (uint64_t)value; \
58935903
cb_data.args.hipStreamWriteValue64.flags = (unsigned int)flags; \
58945904
};
5905+
5906+
// hipStreamBatchMemOp[('hipStream_t', 'stream'), ('unsigned int', 'count'),
5907+
// ('hipStreamBatchMemOpParams*', 'paramArray'), ('unsigned int', 'flags')]
5908+
#define INIT_hipStreamBatchMemOp_CB_ARGS_DATA(cb_data) { \
5909+
cb_data.args.hipStreamBatchMemOp.stream = (hipStream_t)stream; \
5910+
cb_data.args.hipStreamBatchMemOp.count = (unsigned int)count; \
5911+
cb_data.args.hipStreamBatchMemOp.paramArray= (hipStreamBatchMemOpParams*)paramArray; \
5912+
cb_data.args.hipStreamBatchMemOp.flags = (unsigned int)flags; \
5913+
};
58955914
// hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')]
58965915
#define INIT_hipTexRefGetAddress_CB_ARGS_DATA(cb_data) { \
58975916
cb_data.args.hipTexRefGetAddress.dev_ptr = (hipDeviceptr_t*)dptr; \
@@ -7527,6 +7546,11 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
75277546
// hipStreamWriteValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags')]
75287547
case HIP_API_ID_hipStreamWriteValue64:
75297548
break;
7549+
// hipStreamBatchMemOp[('hipStream_t', 'stream'), ('unsigned int', 'count'),
7550+
// ('hipStreamBatchMemOpParams*', 'paramArray'), ('unsigned int', 'flags')]
7551+
case HIP_API_ID_hipStreamBatchMemOp:
7552+
if (data->args.hipStreamBatchMemOp.paramArray) data->args.hipStreamBatchMemOp.paramArray__val = *(data->args.hipStreamBatchMemOp.paramArray);
7553+
break;
75307554
// hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')]
75317555
case HIP_API_ID_hipTexRefGetAddress:
75327556
if (data->args.hipTexRefGetAddress.dev_ptr) data->args.hipTexRefGetAddress.dev_ptr__val = *(data->args.hipTexRefGetAddress.dev_ptr);
@@ -10614,6 +10638,15 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
1061410638
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.flags);
1061510639
oss << ")";
1061610640
break;
10641+
case HIP_API_ID_hipStreamBatchMemOp:
10642+
oss << "hipStreamBatchMemOp(";
10643+
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.stream);
10644+
oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.count);
10645+
if (data->args.hipStreamBatchMemOp.paramArray == NULL) oss << ", paramArray=NULL";
10646+
else { oss << ", paramArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.paramArray__val); }
10647+
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.flags);
10648+
oss << ")";
10649+
break;
1061710650
case HIP_API_ID_hipTexRefGetAddress:
1061810651
oss << "hipTexRefGetAddress(";
1061910652
if (data->args.hipTexRefGetAddress.dev_ptr == NULL) oss << "dev_ptr=NULL";

hipamd/src/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ if(WIN32 OR NOT BUILD_SHARED_LIBS)
299299
# rocprofiler-register is not support on Windows
300300
set(HIP_ENABLE_ROCPROFILER_REGISTER OFF)
301301
else()
302-
option(HIP_ENABLE_ROCPROFILER_REGISTER "Enable rocprofiler-register support" ON)
302+
option(HIP_ENABLE_ROCPROFILER_REGISTER "Enable rocprofiler-register support" ON)
303303
endif()
304304

305305
if(HIP_ENABLE_ROCPROFILER_REGISTER)

hipamd/src/amdhip.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,3 +480,4 @@ hipGraphExecNodeSetParams
480480
hipDrvGraphMemcpyNodeSetParams
481481
hipDrvGraphMemcpyNodeGetParams
482482
hipExtHostAlloc
483+
hipStreamBatchMemOp

hipamd/src/hip_api_trace.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,8 @@ hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, uint64_t value, u
602602
uint64_t mask);
603603
hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags);
604604
hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags);
605+
hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count,
606+
hipStreamBatchMemOpParams* paramArray, unsigned int flags);
605607
hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc,
606608
const HIP_TEXTURE_DESC* pTexDesc,
607609
const HIP_RESOURCE_VIEW_DESC* pResViewDesc);
@@ -1197,6 +1199,7 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
11971199
ptrDispatchTable->hipStreamWaitValue64_fn = hip::hipStreamWaitValue64;
11981200
ptrDispatchTable->hipStreamWriteValue32_fn = hip::hipStreamWriteValue32;
11991201
ptrDispatchTable->hipStreamWriteValue64_fn = hip::hipStreamWriteValue64;
1202+
ptrDispatchTable->hipStreamBatchMemOp_fn = hip::hipStreamBatchMemOp;
12001203
ptrDispatchTable->hipTexObjectCreate_fn = hip::hipTexObjectCreate;
12011204
ptrDispatchTable->hipTexObjectDestroy_fn = hip::hipTexObjectDestroy;
12021205
ptrDispatchTable->hipTexObjectGetResourceDesc_fn = hip::hipTexObjectGetResourceDesc;
@@ -1887,16 +1890,19 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipDrvGraphMemcpyNodeSetParams_fn, 460)
18871890
HIP_ENFORCE_ABI(HipDispatchTable, hipExtHostAlloc_fn, 461)
18881891
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 6
18891892
HIP_ENFORCE_ABI(HipDispatchTable, hipDeviceGetTexture1DLinearMaxWidth_fn, 462)
1893+
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 7
1894+
HIP_ENFORCE_ABI(HipDispatchTable, hipStreamBatchMemOp_fn, 463);
1895+
18901896

18911897
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
18921898
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
18931899
//
18941900
// HIP_ENFORCE_ABI(<table>, <functor>, 8)
18951901
//
18961902
// HIP_ENFORCE_ABI_VERSIONING(<table>, 9) <- 8 + 1 = 9
1897-
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 463)
1903+
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 464)
18981904

1899-
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 6,
1905+
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 7,
19001906
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
19011907
"pointers and then update this check so it is true");
19021908
#endif

hipamd/src/hip_hcc.map.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,7 @@ local:
583583
hip_6.3 {
584584
global:
585585
hipExtHostAlloc;
586+
hipStreamBatchMemOp;
586587
local:
587588
*;
588589
} hip_6.2;

hipamd/src/hip_stream_ops.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,34 @@
2323
#include "platform/command_utils.hpp"
2424

2525
namespace hip {
26+
hipError_t ihipBatchMemOperation(hipStream_t stream, cl_command_type cmdType, unsigned int count,
27+
hipStreamBatchMemOpParams* paramArray, unsigned int flags) {
28+
if (paramArray == nullptr || flags != 0 || count > 256) {
29+
return hipErrorInvalidValue;
30+
}
31+
32+
if (!hip::isValid(stream)) {
33+
return hipErrorContextIsDestroyed;
34+
}
35+
36+
hip::Stream* hip_stream = hip::getStream(stream);
37+
amd::Command::EventWaitList waitList;
38+
39+
amd::BatchMemoryOperationCommand* command = new amd::BatchMemoryOperationCommand(
40+
*hip_stream, cmdType, count, flags, waitList, paramArray, sizeof(hipStreamBatchMemOpParams));
41+
42+
if (command == nullptr) {
43+
return hipErrorOutOfMemory;
44+
}
45+
command->enqueue();
46+
command->release();
47+
HIP_RETURN(hipSuccess);
48+
}
49+
50+
2651
hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void* ptr,
27-
uint64_t value, uint64_t mask, unsigned int flags, size_t sizeBytes) {
52+
uint64_t value, uint64_t mask, unsigned int flags,
53+
size_t sizeBytes) {
2854
size_t offset = 0;
2955
unsigned int outFlags = 0;
3056

@@ -136,4 +162,15 @@ hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value,
136162
0, // flags un-used for now set it to 0
137163
sizeof(uint64_t)));
138164
}
165+
166+
hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count,
167+
hipStreamBatchMemOpParams* paramArray, unsigned int flags) {
168+
HIP_INIT_API(hipStreamBatchMemOp, count, paramArray, flags);
169+
HIP_RETURN_DURATION(ihipBatchMemOperation(
170+
stream,
171+
ROCCLR_COMMAND_BATCH_STREAM,
172+
count,
173+
paramArray,
174+
flags));
175+
}
139176
} // namespace hip

hipamd/src/hip_table_interface.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,6 +1413,10 @@ hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value,
14131413
unsigned int flags) {
14141414
return hip::GetHipDispatchTable()->hipStreamWriteValue64_fn(stream, ptr, value, flags);
14151415
}
1416+
hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count,
1417+
hipStreamBatchMemOpParams* paramArray, unsigned int flags) {
1418+
return hip::GetHipDispatchTable()->hipStreamBatchMemOp_fn(stream, count, paramArray, flags);
1419+
}
14161420
hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc,
14171421
const HIP_TEXTURE_DESC* pTexDesc,
14181422
const HIP_RESOURCE_VIEW_DESC* pResViewDesc) {

rocclr/device/blit.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,12 @@ class BlitManager : public amd::HeapObject {
233233
uint64_t mask
234234
) const = 0;
235235

236+
//! Stream batch memory operation
237+
virtual bool batchMemOps(const void* paramArray,
238+
size_t paramSize,
239+
uint32_t count
240+
) const = 0;
241+
236242
//! Enables synchronization on blit operations
237243
void enableSynchronization() { syncOperation_ = true; }
238244

rocclr/device/blitcl.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
4343

4444
extern void __amd_streamOpsWait(__global uint*, __global ulong*, ulong, ulong, ulong);
4545

46+
extern void __amd_batchMemOp(__global void*, uint count);
47+
4648
extern void __ockl_dm_init_v1(ulong, ulong, uint, uint);
4749

4850
extern void __ockl_gws_init(uint nwm1, uint rid);
@@ -162,6 +164,10 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
162164
ulong4 srcRect, ulong4 dstRect, ulong4 size) {
163165
__amd_copyBufferRectAligned(src, dst, srcRect, dstRect, size);
164166
}
167+
168+
__kernel void __amd_rocclr_batchMemOp(__global void* params, uint count) {
169+
__amd_batchMemOp(params, count);
170+
}
165171
);
166172

167173
const char* HipExtraSourceCode = BLIT_KERNELS(
@@ -254,7 +260,6 @@ const char* BlitImageSourceCode = BLIT_KERNELS(
254260
__amd_copyImageToBuffer(src, dstUInt, dstUShort, dstUChar, srcOrigin, dstOrigin, size, format,
255261
pitch);
256262
}
257-
258263
);
259264

260265
} // namespace amd::device

0 commit comments

Comments
 (0)