Skip to content

Commit 65ff110

Browse files
committed
Refactor stream mutex
1 parent 98f9ea4 commit 65ff110

File tree

5 files changed

+29
-32
lines changed

5 files changed

+29
-32
lines changed

offload/include/Shared/APITypes.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ struct __tgt_async_info {
7777
/// should be freed after finalization.
7878
llvm::SmallVector<void *, 2> AssociatedAllocations;
7979

80-
/// Mutex to guard access to AssociatedAllocations
81-
std::mutex AllocationsMutex;
80+
/// Mutex to guard access to AssociatedAllocations and the Queue
81+
std::mutex Mutex;
8282

8383
/// The kernel launch environment used to issue a kernel. Stored here to
8484
/// ensure it is a valid location while the transfer to the device is

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2232,17 +2232,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
22322232
/// Get the stream of the asynchronous info structure or get a new one.
22332233
Error getStream(AsyncInfoWrapperTy &AsyncInfoWrapper,
22342234
AMDGPUStreamTy *&Stream) {
2235-
std::lock_guard<std::mutex> StreamLock{StreamMutex};
2236-
// Get the stream (if any) from the async info.
2237-
Stream = AsyncInfoWrapper.getQueueAs<AMDGPUStreamTy *>();
2238-
if (!Stream) {
2239-
// There was no stream; get an idle one.
2240-
if (auto Err = AMDGPUStreamManager.getResource(Stream))
2241-
return Err;
2242-
2243-
// Modify the async info's stream.
2244-
AsyncInfoWrapper.setQueueAs<AMDGPUStreamTy *>(Stream);
2245-
}
2235+
auto WrapperStream =
2236+
AsyncInfoWrapper.getOrInitQueue<AMDGPUStreamTy *>(AMDGPUStreamManager);
2237+
if (!WrapperStream)
2238+
return WrapperStream.takeError();
2239+
Stream = *WrapperStream;
22462240
return Plugin::success();
22472241
}
22482242

@@ -3072,9 +3066,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
30723066
/// True is the system is configured with XNACK-Enabled.
30733067
/// False otherwise.
30743068
bool IsXnackEnabled = false;
3075-
3076-
/// Mutex to guard getting/setting the stream
3077-
std::mutex StreamMutex;
30783069
};
30793070

30803071
Error AMDGPUDeviceImageTy::loadExecutable(const AMDGPUDeviceTy &Device) {

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct GenericPluginTy;
6060
struct GenericKernelTy;
6161
struct GenericDeviceTy;
6262
struct RecordReplayTy;
63+
template <typename ResourceRef> class GenericDeviceResourceManagerTy;
6364

6465
namespace Plugin {
6566
/// Create a success error. This is the same as calling Error::success(), but
@@ -127,6 +128,20 @@ struct AsyncInfoWrapperTy {
127128
AsyncInfoPtr->Queue = Queue;
128129
}
129130

131+
/// Get the queue, using the provided resource manager to initialise it if it
132+
/// doesn't exist.
133+
template <typename Ty, typename RMTy>
134+
Expected<Ty>
135+
getOrInitQueue(GenericDeviceResourceManagerTy<RMTy> &ResourceManager) {
136+
std::lock_guard<std::mutex> Lock(AsyncInfoPtr->Mutex);
137+
if (!AsyncInfoPtr->Queue) {
138+
if (auto Err = ResourceManager.getResource(
139+
*reinterpret_cast<Ty *>(&AsyncInfoPtr->Queue)))
140+
return Err;
141+
}
142+
return getQueueAs<Ty>();
143+
}
144+
130145
/// Synchronize with the __tgt_async_info's pending operations if it's the
131146
/// internal async info. The error associated to the asynchronous operations
132147
/// issued in this queue must be provided in \p Err. This function will update
@@ -138,7 +153,7 @@ struct AsyncInfoWrapperTy {
138153
/// Register \p Ptr as an associated allocation that is freed after
139154
/// finalization.
140155
void freeAllocationAfterSynchronization(void *Ptr) {
141-
std::lock_guard<std::mutex> AllocationGuard{AsyncInfoPtr->AllocationsMutex};
156+
std::lock_guard<std::mutex> AllocationGuard{AsyncInfoPtr->Mutex};
142157
AsyncInfoPtr->AssociatedAllocations.push_back(Ptr);
143158
}
144159

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1337,7 +1337,7 @@ Error PinnedAllocationMapTy::unlockUnmappedHostBuffer(void *HstPtr) {
13371337

13381338
Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo,
13391339
bool RemoveQueue) {
1340-
std::lock_guard<std::mutex> AllocationGuard{AsyncInfo->AllocationsMutex};
1340+
std::lock_guard<std::mutex> AllocationGuard{AsyncInfo->Mutex};
13411341

13421342
if (!AsyncInfo || !AsyncInfo->Queue)
13431343
return Plugin::error(ErrorCode::INVALID_ARGUMENT,

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -522,17 +522,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
522522

523523
/// Get the stream of the asynchronous info structure or get a new one.
524524
Error getStream(AsyncInfoWrapperTy &AsyncInfoWrapper, CUstream &Stream) {
525-
std::lock_guard<std::mutex> StreamLock{StreamMutex};
526-
// Get the stream (if any) from the async info.
527-
Stream = AsyncInfoWrapper.getQueueAs<CUstream>();
528-
if (!Stream) {
529-
// There was no stream; get an idle one.
530-
if (auto Err = CUDAStreamManager.getResource(Stream))
531-
return Err;
532-
533-
// Modify the async info's stream.
534-
AsyncInfoWrapper.setQueueAs<CUstream>(Stream);
535-
}
525+
auto WrapperStream =
526+
AsyncInfoWrapper.getOrInitQueue<CUstream>(CUDAStreamManager);
527+
if (!WrapperStream)
528+
return WrapperStream.takeError();
529+
Stream = *WrapperStream;
536530
return Plugin::success();
537531
}
538532

@@ -1293,9 +1287,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
12931287
/// The maximum number of warps that can be resident on all the SMs
12941288
/// simultaneously.
12951289
uint32_t HardwareParallelism = 0;
1296-
1297-
/// Mutex to guard getting/setting the stream
1298-
std::mutex StreamMutex;
12991290
};
13001291

13011292
Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice,

0 commit comments

Comments
 (0)