Skip to content

Commit 8e4208f

Browse files
authored
[flang][cuda][rt] Add entry point to get the allocation stream (#169608)
1 parent 75ca835 commit 8e4208f

File tree

3 files changed

+63
-2
lines changed

3 files changed

+63
-2
lines changed

flang-rt/lib/cuda/allocator.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
#include "flang/Runtime/CUDA/common.h"
2020
#include "flang/Support/Fortran.h"
2121

22-
#include "cuda_runtime.h"
23-
2422
namespace Fortran::runtime::cuda {
2523

2624
struct DeviceAllocation {
@@ -133,6 +131,15 @@ void RTDEF(CUFRegisterAllocator)() {
133131
allocatorRegistry.Register(
134132
kUnifiedAllocatorPos, {&CUFAllocUnified, CUFFreeUnified});
135133
}
134+
135+
cudaStream_t RTDECL(CUFAssociatedGetStream)(void *p) {
136+
int pos = findAllocation(p);
137+
if (pos >= 0) {
138+
cudaStream_t stream = deviceAllocations[pos].stream;
139+
return stream;
140+
}
141+
return nullptr;
142+
}
136143
}
137144

138145
void *CUFAllocPinned(

flang-rt/unittests/Runtime/CUDA/Allocatable.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,54 @@ TEST(AllocatableCUFTest, StreamDeviceAllocatable) {
121121
cudaDeviceSynchronize();
122122
EXPECT_EQ(cudaSuccess, cudaGetLastError());
123123
}
124+
125+
TEST(AllocatableAsyncTest, StreamDeviceAllocatable) {
126+
using Fortran::common::TypeCategory;
127+
RTNAME(CUFRegisterAllocator)();
128+
// REAL(4), DEVICE, ALLOCATABLE :: a(:)
129+
auto a{createAllocatable(TypeCategory::Real, 4)};
130+
a->SetAllocIdx(kDeviceAllocatorPos);
131+
EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx());
132+
EXPECT_FALSE(a->HasAddendum());
133+
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
134+
135+
cudaStream_t stream;
136+
cudaStreamCreate(&stream);
137+
EXPECT_EQ(cudaSuccess, cudaGetLastError());
138+
139+
RTNAME(AllocatableAllocate)
140+
(*a, /*asyncObject=*/(int64_t *)&stream, /*hasStat=*/false,
141+
/*errMsg=*/nullptr, __FILE__, __LINE__);
142+
EXPECT_TRUE(a->IsAllocated());
143+
cudaDeviceSynchronize();
144+
EXPECT_EQ(cudaSuccess, cudaGetLastError());
145+
cudaStream_t s = RTDECL(CUFAssociatedGetStream)(a->raw().base_addr);
146+
EXPECT_EQ(s, stream);
147+
RTNAME(AllocatableDeallocate)
148+
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
149+
EXPECT_FALSE(a->IsAllocated());
150+
cudaDeviceSynchronize();
151+
152+
cudaStream_t defaultStream = 0;
153+
RTNAME(AllocatableAllocate)
154+
(*a, /*asyncObject=*/(int64_t *)&defaultStream, /*hasStat=*/false,
155+
/*errMsg=*/nullptr, __FILE__, __LINE__);
156+
EXPECT_TRUE(a->IsAllocated());
157+
cudaDeviceSynchronize();
158+
EXPECT_EQ(cudaSuccess, cudaGetLastError());
159+
cudaStream_t d = RTDECL(CUFAssociatedGetStream)(a->raw().base_addr);
160+
EXPECT_EQ(d, defaultStream);
161+
RTNAME(AllocatableDeallocate)
162+
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
163+
EXPECT_FALSE(a->IsAllocated());
164+
cudaDeviceSynchronize();
165+
166+
RTNAME(AllocatableAllocate)
167+
(*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
168+
__LINE__);
169+
EXPECT_TRUE(a->IsAllocated());
170+
cudaDeviceSynchronize();
171+
EXPECT_EQ(cudaSuccess, cudaGetLastError());
172+
cudaStream_t empty = RTDECL(CUFAssociatedGetStream)(a->raw().base_addr);
173+
EXPECT_EQ(empty, nullptr);
174+
}

flang/include/flang/Runtime/CUDA/allocator.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,14 @@
1313
#include "flang/Runtime/descriptor-consts.h"
1414
#include "flang/Runtime/entry-names.h"
1515

16+
#include "cuda_runtime.h"
17+
1618
namespace Fortran::runtime::cuda {
1719

1820
extern "C" {
1921

2022
void RTDECL(CUFRegisterAllocator)();
23+
cudaStream_t RTDECL(CUFAssociatedGetStream)(void *);
2124
}
2225

2326
void *CUFAllocPinned(std::size_t, std::int64_t *);

0 commit comments

Comments
 (0)