Skip to content

Commit 8a1b617

Browse files
committed
Promote till commit 'e3b87544482f43760e0bf1c49e628039199c4bdf'
Change-Id: Ibdf9d022f18b264f484dbe57ff15e3f09f1d0e61
2 parents 220465d + e3b8754 commit 8a1b617

File tree

18 files changed

+158
-60
lines changed

18 files changed

+158
-60
lines changed

.azuredevops/rocm-ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ resources:
88
type: github
99
endpoint: ROCm
1010
name: ROCm/HIP
11-
ref: amd-staging
11+
ref: $(Build.SourceBranch)
1212
- repository: hipother_repo
1313
type: github
1414
endpoint: ROCm
1515
name: ROCm/hipother
16-
ref: amd-staging
16+
ref: $(Build.SourceBranch)
1717
pipelines:
1818
- pipeline: hip_pipeline
1919
source: \HIP

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22

33
Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs.amd.com/projects/HIP/en/latest/index.html)
44

5+
## HIP 6.4 (For ROCm 6.4)
6+
7+
### Changed
8+
* Added new environment variable
9+
- `DEBUG_HIP_7_PREVIEW` This is used for enabling the backward incompatible changes before the next major ROCm release 7.0. By default this is set to 0. Users can set this variable to 0x1, to match the behavior of hipGetLastError with its corresponding CUDA API.
10+
511
## HIP 6.3 for ROCm 6.3
612

713
### Changed

hipamd/include/hip/amd_detail/amd_warp_functions.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,19 +99,16 @@ int __any(int predicate) {
9999
return __ockl_wfany_i32(predicate);
100100
}
101101

102-
// XXX from llvm/include/llvm/IR/InstrTypes.h
103-
#define ICMP_NE 33
104-
105102
__device__
106103
inline
107104
unsigned long long int __ballot(int predicate) {
108-
return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
105+
return __builtin_amdgcn_ballot_w64(predicate);
109106
}
110107

111108
__device__
112109
inline
113110
unsigned long long int __ballot64(int predicate) {
114-
return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
111+
return __ballot(predicate);
115112
}
116113

117114
// See amd_warp_sync_functions.h for an explanation of this preprocessor flag.

hipamd/src/hip_device.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -257,15 +257,30 @@ void Device::destroyAllStreams() {
257257
}
258258

259259
// ================================================================================================
260-
void Device::SyncAllStreams( bool cpu_wait) {
260+
void Device::SyncAllStreams(bool cpu_wait, bool wait_blocking_streams_only) {
261261
// Make a local copy to avoid stalls for GPU finish with multiple threads
262262
std::vector<hip::Stream*> streams;
263263
streams.reserve(streamSet.size());
264264
{
265265
amd::ScopedLock lock(streamSetLock);
266-
for (auto it : streamSet) {
267-
streams.push_back(it);
268-
it->retain();
266+
if (wait_blocking_streams_only) {
267+
auto null_stream = GetNullStream();
268+
for (auto it : streamSet) {
269+
if (it != null_stream && (it->Flags() & hipStreamNonBlocking) == 0) {
270+
streams.push_back(it);
271+
it->retain();
272+
}
273+
}
274+
// Add null stream to the end of the list so that wait happens after all blocking streams.
275+
if (null_stream != nullptr) {
276+
streams.push_back(null_stream);
277+
null_stream->retain();
278+
}
279+
} else {
280+
for (auto it : streamSet) {
281+
streams.push_back(it);
282+
it->retain();
283+
}
269284
}
270285
}
271286
for (auto it : streams) {

hipamd/src/hip_event.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ hipError_t hipEventSynchronize(hipEvent_t event) {
439439
hip::Stream* s = reinterpret_cast<hip::Stream*>(e->GetCaptureStream());
440440
if ((s != nullptr) && (s->GetCaptureStatus() == hipStreamCaptureStatusActive)) {
441441
s->SetCaptureStatus(hipStreamCaptureStatusInvalidated);
442-
return HIP_RETURN(hipErrorCapturedEvent);
442+
HIP_RETURN(hipErrorCapturedEvent);
443443
}
444444
if (hip::Stream::StreamCaptureOngoing(e->GetCaptureStream()) == true) {
445445
HIP_RETURN(hipErrorStreamCaptureUnsupported);
@@ -460,7 +460,7 @@ hipError_t ihipEventQuery(hipEvent_t event) {
460460
hip::Stream* s = reinterpret_cast<hip::Stream*>(e->GetCaptureStream());
461461
if ((s != nullptr) && (s->GetCaptureStatus() == hipStreamCaptureStatusActive)) {
462462
s->SetCaptureStatus(hipStreamCaptureStatusInvalidated);
463-
return HIP_RETURN(hipErrorCapturedEvent);
463+
HIP_RETURN(hipErrorCapturedEvent);
464464
}
465465
return e->query();
466466
}

hipamd/src/hip_graph.cpp

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,7 +1231,7 @@ hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraph
12311231
HIP_INIT_API(hipGraphExecMemcpyNodeSetParams1D, hGraphExec, node, dst, src, count, kind);
12321232
hip::GraphNode* n = reinterpret_cast<hip::GraphNode*>(node);
12331233
if (hGraphExec == nullptr || !hip::GraphNode::isNodeValid(n) || dst == nullptr ||
1234-
src == nullptr || count == 0 || src == dst) {
1234+
src == nullptr || count == 0 || src == dst || n->GetType() != hipGraphNodeTypeMemcpy) {
12351235
HIP_RETURN(hipErrorInvalidValue);
12361236
}
12371237
hip::GraphNode* clonedNode = reinterpret_cast<hip::GraphNode*>(
@@ -1354,7 +1354,11 @@ hipError_t ihipGraphInstantiate(hip::GraphExec** pGraphExec, hip::Graph* graph,
13541354
}
13551355
std::vector<std::vector<hip::GraphNode*>> parallelLists;
13561356
std::unordered_map<hip::GraphNode*, std::vector<hip::GraphNode*>> nodeWaitLists;
1357-
clonedGraph->GetRunList(parallelLists, nodeWaitLists);
1357+
if (DEBUG_HIP_FORCE_GRAPH_QUEUES == 1) {
1358+
parallelLists.push_back(graphNodes);
1359+
} else {
1360+
clonedGraph->GetRunList(parallelLists, nodeWaitLists);
1361+
}
13581362
if (DEBUG_HIP_FORCE_GRAPH_QUEUES != 0) {
13591363
clonedGraph->ScheduleNodes();
13601364
}
@@ -1632,7 +1636,8 @@ hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNo
16321636
HIP_INIT_API(hipGraphExecMemcpyNodeSetParams, hGraphExec, node, pNodeParams);
16331637
hip::GraphNode* n = reinterpret_cast<hip::GraphNode*>(node);
16341638
if (hGraphExec == nullptr ||
1635-
!hip::GraphNode::isNodeValid(reinterpret_cast<hip::GraphNode*>(n))) {
1639+
!hip::GraphNode::isNodeValid(reinterpret_cast<hip::GraphNode*>(n)) ||
1640+
n->GetType() != hipGraphNodeTypeMemcpy) {
16361641
HIP_RETURN(hipErrorInvalidValue);
16371642
}
16381643
if (ihipMemcpy3D_validate(pNodeParams) != hipSuccess) {
@@ -1693,7 +1698,7 @@ hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNo
16931698
hip::GraphNode* n = reinterpret_cast<hip::GraphNode*>(node);
16941699

16951700
if (hGraphExec == nullptr || !hip::GraphNode::isNodeValid(n) || pNodeParams == nullptr ||
1696-
pNodeParams->dst == nullptr) {
1701+
pNodeParams->dst == nullptr || n->GetType() != hipGraphNodeTypeMemset) {
16971702
HIP_RETURN(hipErrorInvalidValue);
16981703
}
16991704
if (ihipGraphMemsetParams_validate(pNodeParams) != hipSuccess) {
@@ -1758,7 +1763,7 @@ hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNo
17581763
hip::GraphNode* n = reinterpret_cast<hip::GraphNode*>(node);
17591764
if (hGraphExec == nullptr ||
17601765
!hip::GraphNode::isNodeValid(n) ||
1761-
pNodeParams == nullptr || pNodeParams->func == nullptr) {
1766+
pNodeParams == nullptr || pNodeParams->func == nullptr || n->GetType() != hipGraphNodeTypeKernel) {
17621767
HIP_RETURN(hipErrorInvalidValue);
17631768
}
17641769
hip::GraphNode* clonedNode = reinterpret_cast<hip::GraphExec*>(hGraphExec)->GetClonedNode(n);
@@ -1795,7 +1800,7 @@ hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGra
17951800
hip::GraphNode* n = reinterpret_cast<hip::GraphNode*>(node);
17961801
hip::Graph* cg = reinterpret_cast<hip::Graph*>(childGraph);
17971802
if (hGraphExec == nullptr || !hip::GraphNode::isNodeValid(n) || childGraph == nullptr ||
1798-
!hip::Graph::isGraphValid(cg)) {
1803+
!hip::Graph::isGraphValid(cg) || n->GetType() != hipGraphNodeTypeGraph) {
17991804
HIP_RETURN(hipErrorInvalidValue);
18001805
}
18011806

@@ -2424,7 +2429,7 @@ hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, hipGraph
24242429
hip::GraphNode* n = reinterpret_cast<hip::GraphNode*>(hNode);
24252430

24262431
if (hGraphExec == nullptr || hNode == nullptr || event == nullptr ||
2427-
(n->GetType() != hipGraphNodeTypeWaitEvent)) {
2432+
(n->GetType() != hipGraphNodeTypeWaitEvent) || n->GetType() != hipGraphNodeTypeWaitEvent) {
24282433
HIP_RETURN(hipErrorInvalidValue);
24292434
}
24302435
hip::GraphNode* clonedNode = reinterpret_cast<hip::GraphExec*>(hGraphExec)->GetClonedNode(n);
@@ -2475,7 +2480,7 @@ hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode
24752480
HIP_INIT_API(hipGraphExecHostNodeSetParams, hGraphExec, node, pNodeParams);
24762481
hip::GraphNode* n = reinterpret_cast<hip::GraphNode*>(node);
24772482
if (hGraphExec == nullptr || pNodeParams == nullptr || pNodeParams->fn == nullptr ||
2478-
!hip::GraphNode::isNodeValid(n)) {
2483+
!hip::GraphNode::isNodeValid(n) || n->GetType() != hipGraphNodeTypeHost) {
24792484
HIP_RETURN(hipErrorInvalidValue);
24802485
}
24812486
hip::GraphNode* clonedNode = reinterpret_cast<hip::GraphExec*>(hGraphExec)->GetClonedNode(n);
@@ -3161,7 +3166,8 @@ hipError_t hipGraphExecExternalSemaphoresSignalNodeSetParams(hipGraphExec_t hGra
31613166
hip::GraphNode* n = reinterpret_cast<hip::GraphNode*>(hNode);
31623167
hip::GraphExec* graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
31633168
if (hGraphExec == nullptr || hNode == nullptr || !hip::GraphExec::isGraphExecValid(graphExec) ||
3164-
!hip::GraphNode::isNodeValid(n) || nodeParams == nullptr) {
3169+
!hip::GraphNode::isNodeValid(n) || nodeParams == nullptr ||
3170+
n->GetType() != hipGraphNodeTypeExtSemaphoreSignal) {
31653171
HIP_RETURN(hipErrorInvalidValue);
31663172
}
31673173
hip::GraphNode* clonedNode = graphExec->GetClonedNode(n);
@@ -3179,7 +3185,8 @@ hipError_t hipGraphExecExternalSemaphoresWaitNodeSetParams(hipGraphExec_t hGraph
31793185
hip::GraphNode* n = reinterpret_cast<hip::GraphNode*>(hNode);
31803186
hip::GraphExec* graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
31813187
if (hGraphExec == nullptr || hNode == nullptr || !hip::GraphExec::isGraphExecValid(graphExec) ||
3182-
!hip::GraphNode::isNodeValid(n) || nodeParams == nullptr) {
3188+
!hip::GraphNode::isNodeValid(n) || nodeParams == nullptr ||
3189+
n->GetType() != hipGraphNodeTypeExtSemaphoreWait) {
31833190
HIP_RETURN(hipErrorInvalidValue);
31843191
}
31853192
hip::GraphNode* clonedNode = graphExec->GetClonedNode(n);
@@ -3322,12 +3329,12 @@ hipError_t ihipGraphNodeSetParams(hip::GraphNode* n, hipGraphNodeParams *nodePar
33223329
nodeParams->eventRecord.event);
33233330
break;
33243331
case hipGraphNodeTypeExtSemaphoreSignal:
3325-
status = hipErrorNotSupported;
3326-
// to be added.
3332+
status = reinterpret_cast<hip::hipGraphExternalSemSignalNode*>(n)->SetParams(
3333+
&nodeParams->extSemSignal);
33273334
break;
33283335
case hipGraphNodeTypeExtSemaphoreWait:
3329-
status = hipErrorNotSupported;
3330-
// to be added.
3336+
status = reinterpret_cast<hip::hipGraphExternalSemWaitNode*>(n)->SetParams(
3337+
&nodeParams->extSemWait);
33313338
break;
33323339
case hipGraphNodeTypeMemAlloc:
33333340
status = hipErrorNotSupported;

hipamd/src/hip_graph_internal.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1129,6 +1129,9 @@ class GraphKernelNode : public GraphNode {
11291129
const amd::KernelSignature& signature = kernel->signature();
11301130
numParams_ = signature.numParameters();
11311131

1132+
// Copy gridDim, blockDim, sharedMemBytes and func
1133+
kernelParams_ = *pNodeParams;
1134+
11321135
// Allocate/assign memory if params are passed part of 'kernelParams'
11331136
if (pNodeParams->kernelParams != nullptr) {
11341137
kernelParams_.kernelParams = (void**)malloc(numParams_ * sizeof(void*));
@@ -1303,7 +1306,6 @@ class GraphKernelNode : public GraphNode {
13031306
return status;
13041307
}
13051308
freeParams();
1306-
kernelParams_ = *params;
13071309
status = copyParams(params);
13081310
if (status != hipSuccess) {
13091311
ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed to set params");

hipamd/src/hip_internal.hpp

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -164,19 +164,36 @@ const char* ihipGetErrorName(hipError_t hip_error);
164164
HIP_RETURN(hipErrorNoDevice); \
165165
}
166166

167-
#define HIP_INIT_API_NO_RETURN(cid, ...) \
167+
#define HIP_INIT_API_NO_RETURN(cid, ...) \
168168
HIP_INIT_API_INTERNAL(1, cid, __VA_ARGS__)
169169

170170
#define HIP_RETURN_DURATION(ret, ...) \
171-
hip::tls.last_error_ = ret; \
171+
hip::tls.last_command_error_ = ret; \
172+
if (DEBUG_HIP_7_PREVIEW & amd::CHANGE_HIP_GET_LAST_ERROR) { \
173+
if (hip::tls.last_command_error_ != hipSuccess && \
174+
hip::tls.last_command_error_ != hipErrorNotReady) { \
175+
hip::tls.last_error_ = hip::tls.last_command_error_; \
176+
} \
177+
} else { \
178+
hip::tls.last_error_ = hip::tls.last_command_error_; \
179+
} \
172180
HIPPrintDuration(amd::LOG_INFO, amd::LOG_API, &startTimeUs, "%s: Returned %s : %s", __func__, \
173-
hip::ihipGetErrorName(hip::tls.last_error_), ToString(__VA_ARGS__).c_str()); \
174-
return hip::tls.last_error_;
175-
176-
#define HIP_RETURN(ret, ...) \
177-
hip::tls.last_error_ = ret; \
178-
HIP_ERROR_PRINT(hip::tls.last_error_, __VA_ARGS__) \
179-
return hip::tls.last_error_;
181+
hip::ihipGetErrorName(hip::tls.last_command_error_), \
182+
ToString(__VA_ARGS__).c_str()); \
183+
return hip::tls.last_command_error_;
184+
185+
#define HIP_RETURN(ret, ...) \
186+
hip::tls.last_command_error_ = ret; \
187+
if (DEBUG_HIP_7_PREVIEW & amd::CHANGE_HIP_GET_LAST_ERROR) { \
188+
if (hip::tls.last_command_error_ != hipSuccess && \
189+
hip::tls.last_command_error_ != hipErrorNotReady) { \
190+
hip::tls.last_error_ = hip::tls.last_command_error_; \
191+
} \
192+
} else { \
193+
hip::tls.last_error_ = hip::tls.last_command_error_; \
194+
} \
195+
HIP_ERROR_PRINT(hip::tls.last_command_error_, __VA_ARGS__) \
196+
return hip::tls.last_command_error_;
180197

181198
#define HIP_RETURN_ONFAIL(func) \
182199
do { \
@@ -595,7 +612,7 @@ class stream_per_thread {
595612

596613
void destroyAllStreams();
597614

598-
void SyncAllStreams( bool cpu_wait = true);
615+
void SyncAllStreams( bool cpu_wait = true, bool wait_blocking_streams_only = false);
599616

600617
bool StreamCaptureBlocking();
601618

@@ -610,14 +627,15 @@ class stream_per_thread {
610627
public:
611628
Device* device_;
612629
std::stack<Device*> ctxt_stack_;
613-
hipError_t last_error_;
630+
hipError_t last_error_, last_command_error_;
614631
std::vector<hip::Stream*> capture_streams_;
615632
hipStreamCaptureMode stream_capture_mode_;
616633
std::stack<ihipExec_t> exec_stack_;
617634
stream_per_thread stream_per_thread_obj_;
618635

619636
TlsAggregator(): device_(nullptr),
620637
last_error_(hipSuccess),
638+
last_command_error_(hipSuccess),
621639
stream_capture_mode_(hipStreamCaptureModeGlobal) {
622640
}
623641
~TlsAggregator() {

hipamd/src/hip_memory.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,7 +1063,7 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder,
10631063
imageType,
10641064
CL_MEM_READ_WRITE,
10651065
imageFormat,
1066-
(imageWidth == 0) ? 1 : imageWidth,
1066+
imageWidth,
10671067
(imageHeight == 0) ? 1 : imageHeight,
10681068
(imageDepth == 0) ? 1 : imageDepth,
10691069
imageRowPitch,
@@ -4621,7 +4621,7 @@ hipError_t hipExternalMemoryGetMappedMipmappedArray(
46214621
hip::getNumChannels(mipmapDesc->formatDesc),
46224622
mipmapDesc->flags};
46234623
if (!hip::CheckArrayFormat(mipmapDesc->formatDesc)) {
4624-
return HIP_RETURN(hipErrorInvalidValue);
4624+
HIP_RETURN(hipErrorInvalidValue);
46254625
}
46264626

46274627
HIP_RETURN(ihipMipmapArrayCreate(mipmap, &allocateArray, mipmapDesc->numLevels,

hipamd/src/hip_mempool.cpp

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,19 @@ hipError_t hipMallocAsync(void** dev_ptr, size_t size, hipStream_t stream) {
9292
*dev_ptr = nullptr;
9393
HIP_RETURN(hipSuccess);
9494
}
95+
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
9596
auto hip_stream = (stream == nullptr || stream == hipStreamLegacy) ?
96-
hip::getCurrentDevice()->NullStream() : reinterpret_cast<hip::Stream*>(stream);
97+
hip::getCurrentDevice()->NullStream() : s;
9798
auto device = hip_stream->GetDevice();
9899
auto mem_pool = device->GetCurrentMemoryPool();
99100

101+
// Return error if any stream other than the current stream is in capture mode
102+
if (device->StreamCaptureBlocking()) {
103+
if (s->GetCaptureStatus() != hipStreamCaptureStatusActive) {
104+
return hipErrorStreamCaptureUnsupported;
105+
}
106+
}
107+
100108
STREAM_CAPTURE(hipMallocAsync, stream, reinterpret_cast<hipMemPool_t>(mem_pool), size, dev_ptr);
101109

102110
*dev_ptr = mem_pool->AllocateMemory(size, hip_stream);
@@ -138,17 +146,28 @@ class FreeAsyncCommand : public amd::Command {
138146
// ================================================================================================
139147
hipError_t hipFreeAsync(void* dev_ptr, hipStream_t stream) {
140148
HIP_INIT_API(hipFreeAsync, dev_ptr, stream);
141-
if (dev_ptr == nullptr) {
142-
HIP_RETURN(hipErrorInvalidValue);
143-
}
149+
144150
if (!hip::isValid(stream)) {
145151
HIP_RETURN(hipErrorInvalidHandle);
146152
}
147153

148-
STREAM_CAPTURE(hipFreeAsync, stream, dev_ptr);
149-
154+
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
150155
auto hip_stream = (stream == nullptr || stream == hipStreamLegacy) ?
151-
hip::getCurrentDevice()->NullStream(): reinterpret_cast<hip::Stream*>(stream);
156+
hip::getCurrentDevice()->NullStream(): s;
157+
158+
auto device = hip_stream->GetDevice();
159+
// Return error if any stream other than the current stream is in capture mode
160+
if (device->StreamCaptureBlocking()) {
161+
if (s->GetCaptureStatus() != hipStreamCaptureStatusActive) {
162+
return hipErrorStreamCaptureUnsupported;
163+
}
164+
}
165+
166+
if (dev_ptr == nullptr) {
167+
HIP_RETURN(hipErrorInvalidValue);
168+
}
169+
170+
STREAM_CAPTURE(hipFreeAsync, stream, dev_ptr);
152171

153172
hip::Event* event = nullptr;
154173
bool graph_in_use = false;

0 commit comments

Comments
 (0)