llvm
diff --git a/‎flang-rt/include/flang-rt/runtime/allocator-registry.h‎
Lines changed: 2 additions & 2 deletions b/‎flang-rt/include/flang-rt/runtime/allocator-registry.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎flang-rt/include/flang-rt/runtime/descriptor.h‎
Lines changed: 3 additions & 3 deletions b/‎flang-rt/include/flang-rt/runtime/descriptor.h‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎flang-rt/include/flang-rt/runtime/reduction-templates.h‎
Lines changed: 1 addition & 1 deletion b/‎flang-rt/include/flang-rt/runtime/reduction-templates.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎flang-rt/lib/cuda/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎flang-rt/lib/cuda/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎flang-rt/lib/cuda/allocatable.cpp‎
Lines changed: 4 additions & 4 deletions b/‎flang-rt/lib/cuda/allocatable.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎flang-rt/lib/cuda/allocator.cpp‎
Lines changed: 10 additions & 10 deletions b/‎flang-rt/lib/cuda/allocator.cpp‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎flang-rt/lib/cuda/descriptor.cpp‎
Lines changed: 1 addition & 1 deletion b/‎flang-rt/lib/cuda/descriptor.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎flang-rt/lib/cuda/pointer.cpp‎
Lines changed: 4 additions & 4 deletions b/‎flang-rt/lib/cuda/pointer.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎flang-rt/lib/runtime/allocatable.cpp‎
Lines changed: 6 additions & 6 deletions b/‎flang-rt/lib/runtime/allocatable.cpp‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎flang-rt/lib/runtime/array-constructor.cpp‎
Lines changed: 2 additions & 2 deletions b/‎flang-rt/lib/runtime/array-constructor.cpp‎
Lines changed: 2 additions & 2 deletions
@@ -19,7 +19,7 @@
 
 namespace Fortran::runtime {
 
-using AllocFct = void *(*)(std::size_t, std::int64_t);
+using AllocFct = void *(*)(std::size_t, std::int64_t *);
 using FreeFct = void (*)(void *);
 
 typedef struct Allocator_t {
@@ -28,7 +28,7 @@ typedef struct Allocator_t {
 } Allocator_t;
 
 static RT_API_ATTRS void *MallocWrapper(
-    std::size_t size, [[maybe_unused]] std::int64_t) {
+    std::size_t size, [[maybe_unused]] std::int64_t *) {
   return std::malloc(size);
 }
 #ifdef RT_DEVICE_COMPILATION
 
@@ -29,8 +29,8 @@
 #include <cstdio>
 #include <cstring>
 
-/// Value used for asyncId when no specific stream is specified.
-static constexpr std::int64_t kNoAsyncId = -1;
+/// Value used for asyncObject when no specific stream is specified.
+static constexpr std::int64_t *kNoAsyncObject = nullptr;
 
 namespace Fortran::runtime {
 
@@ -372,7 +372,7 @@ class Descriptor {
   // before calling.  It (re)computes the byte strides after
   // allocation.  Does not allocate automatic components or
   // perform default component initialization.
-  RT_API_ATTRS int Allocate(std::int64_t asyncId);
+  RT_API_ATTRS int Allocate(std::int64_t *asyncObject);
   RT_API_ATTRS void SetByteStrides();
 
   // Deallocates storage; does not call FINAL subroutines or
 
@@ -347,7 +347,7 @@ inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
     // as the element size of the source.
     result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
         CFI_attribute_allocatable);
-    if (int stat{result.Allocate(kNoAsyncId)}) {
+    if (int stat{result.Allocate(kNoAsyncObject)}) {
       terminator.Crash(
           "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
     }
 
@@ -14,6 +14,7 @@ add_flangrt_library(flang_rt.cuda STATIC SHARED
   kernel.cpp
   memmove-function.cpp
   memory.cpp
+  pointer.cpp
   registration.cpp
 
   TARGET_PROPERTIES
 
@@ -23,7 +23,7 @@ namespace Fortran::runtime::cuda {
 extern "C" {
 RT_EXT_API_GROUP_BEGIN
 
-int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
+int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t *stream,
     bool *pinned, bool hasStat, const Descriptor *errMsg,
     const char *sourceFile, int sourceLine) {
   int stat{RTNAME(CUFAllocatableAllocate)(
@@ -41,7 +41,7 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
   return stat;
 }
 
-int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
+int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream,
     bool *pinned, bool hasStat, const Descriptor *errMsg,
     const char *sourceFile, int sourceLine) {
   if (desc.HasAddendum()) {
@@ -63,7 +63,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
 }
 
 int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
-    const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
+    const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
     const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
   int stat{RTNAME(CUFAllocatableAllocate)(
       alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
@@ -76,7 +76,7 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
 }
 
 int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
-    const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
+    const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
     const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
   int stat{RTNAME(CUFAllocatableAllocateSync)(
       alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
 
@@ -98,15 +98,15 @@ static unsigned findAllocation(void *ptr) {
   return allocNotFound;
 }
 
-static void insertAllocation(void *ptr, std::size_t size, std::int64_t stream) {
+static void insertAllocation(void *ptr, std::size_t size, cudaStream_t stream) {
   CriticalSection critical{lock};
   initAllocations();
   if (numDeviceAllocations >= maxDeviceAllocations) {
     doubleAllocationArray();
   }
   deviceAllocations[numDeviceAllocations].ptr = ptr;
   deviceAllocations[numDeviceAllocations].size = size;
-  deviceAllocations[numDeviceAllocations].stream = (cudaStream_t)stream;
+  deviceAllocations[numDeviceAllocations].stream = stream;
   ++numDeviceAllocations;
   qsort(deviceAllocations, numDeviceAllocations, sizeof(DeviceAllocation),
       compareDeviceAlloc);
@@ -136,26 +136,26 @@ void RTDEF(CUFRegisterAllocator)() {
 }
 
 void *CUFAllocPinned(
-    std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+    std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
   void *p;
   CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
   return p;
 }
 
 void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
 
-void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t asyncId) {
+void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t *asyncObject) {
   void *p;
   if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
     CUDA_REPORT_IF_ERROR(
         cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
   } else {
-    if (asyncId == kNoAsyncId) {
+    if (asyncObject == kNoAsyncObject) {
       CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
     } else {
       CUDA_REPORT_IF_ERROR(
-          cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)asyncId));
-      insertAllocation(p, sizeInBytes, asyncId);
+          cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)*asyncObject));
+      insertAllocation(p, sizeInBytes, (cudaStream_t)*asyncObject);
     }
   }
   return p;
@@ -174,7 +174,7 @@ void CUFFreeDevice(void *p) {
 }
 
 void *CUFAllocManaged(
-    std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+    std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
   void *p;
   CUDA_REPORT_IF_ERROR(
       cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
@@ -184,9 +184,9 @@ void *CUFAllocManaged(
 void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
 
 void *CUFAllocUnified(
-    std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+    std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
   // Call alloc managed for the time being.
-  return CUFAllocManaged(sizeInBytes, asyncId);
+  return CUFAllocManaged(sizeInBytes, asyncObject);
 }
 
 void CUFFreeUnified(void *p) {
 
@@ -21,7 +21,7 @@ RT_EXT_API_GROUP_BEGIN
 Descriptor *RTDEF(CUFAllocDescriptor)(
     std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {
   return reinterpret_cast<Descriptor *>(
-      CUFAllocManaged(sizeInBytes, /*asyncId*/ -1));
+      CUFAllocManaged(sizeInBytes, /*asyncObject=*/nullptr));
 }
 
 void RTDEF(CUFFreeDescriptor)(
 
@@ -22,7 +22,7 @@ namespace Fortran::runtime::cuda {
 extern "C" {
 RT_EXT_API_GROUP_BEGIN
 
-int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool *pinned,
+int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t *stream, bool *pinned,
     bool hasStat, const Descriptor *errMsg, const char *sourceFile,
     int sourceLine) {
   if (desc.HasAddendum()) {
@@ -43,7 +43,7 @@ int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool *pinned,
   return stat;
 }
 
-int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
+int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t *stream,
     bool *pinned, bool hasStat, const Descriptor *errMsg,
     const char *sourceFile, int sourceLine) {
   int stat{RTNAME(CUFPointerAllocate)(
@@ -62,7 +62,7 @@ int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
 }
 
 int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
-    const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
+    const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
     const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
   int stat{RTNAME(CUFPointerAllocate)(
       pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
@@ -75,7 +75,7 @@ int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
 }
 
 int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
-    const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
+    const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
     const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
   int stat{RTNAME(CUFPointerAllocateSync)(
       pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
 
@@ -133,17 +133,17 @@ void RTDEF(AllocatableApplyMold)(
   }
 }
 
-int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId,
-    bool hasStat, const Descriptor *errMsg, const char *sourceFile,
-    int sourceLine) {
+int RTDEF(AllocatableAllocate)(Descriptor &descriptor,
+    std::int64_t *asyncObject, bool hasStat, const Descriptor *errMsg,
+    const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   if (!descriptor.IsAllocatable()) {
     return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat);
   } else if (descriptor.IsAllocated()) {
     return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat);
   } else {
-    int stat{
-        ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)};
+    int stat{ReturnError(
+        terminator, descriptor.Allocate(asyncObject), errMsg, hasStat)};
     if (stat == StatOk) {
       if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
         if (const auto *derived{addendum->derivedType()}) {
@@ -162,7 +162,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc,
     const Descriptor &source, bool hasStat, const Descriptor *errMsg,
     const char *sourceFile, int sourceLine) {
   int stat{RTNAME(AllocatableAllocate)(
-      alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)};
+      alloc, /*asyncObject=*/nullptr, hasStat, errMsg, sourceFile, sourceLine)};
   if (stat == StatOk) {
     Terminator terminator{sourceFile, sourceLine};
     DoFromSourceAssign(alloc, source, terminator);
 
@@ -50,7 +50,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
           initialAllocationSize(fromElements, to.ElementBytes())};
       to.GetDimension(0).SetBounds(1, allocationSize);
       RTNAME(AllocatableAllocate)
-      (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
+      (to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
           vector.sourceFile, vector.sourceLine);
       to.GetDimension(0).SetBounds(1, fromElements);
       vector.actualAllocationSize = allocationSize;
@@ -59,7 +59,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
       // first value: there should be no reallocation.
       RUNTIME_CHECK(terminator, previousToElements >= fromElements);
       RTNAME(AllocatableAllocate)
-      (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
+      (to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
           vector.sourceFile, vector.sourceLine);
       vector.actualAllocationSize = previousToElements;
     }
Original file line number	Diff line number	Diff line change
`@@ -347,7 +347,7 @@ inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,`
`347`	`347`	`// as the element size of the source.`
`348`	`348`	`result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,`
`349`	`349`	`CFI_attribute_allocatable);`
`350`		`- if (int stat{result.Allocate(kNoAsyncId)}) {`
	`350`	`+ if (int stat{result.Allocate(kNoAsyncObject)}) {`
`351`	`351`	`terminator.Crash(`
`352`	`352`	`"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);`
`353`	`353`	`}`
Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ RT_EXT_API_GROUP_BEGIN`
`21`	`21`	`Descriptor *RTDEF(CUFAllocDescriptor)(`
`22`	`22`	`std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {`
`23`	`23`	`return reinterpret_cast<Descriptor *>(`
`24`		`- CUFAllocManaged(sizeInBytes, /asyncId/ -1));`
	`24`	`+ CUFAllocManaged(sizeInBytes, /asyncObject=/nullptr));`
`25`	`25`	`}`
`26`	`26`
`27`	`27`	`void RTDEF(CUFFreeDescriptor)(`