Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions offload/liboffload/API/Memory.td
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,17 @@
def ol_alloc_type_t : Enum {
let desc = "Represents the type of allocation made with olMemAlloc.";
let etors = [
Etor<"HOST", "Host allocation">,
Etor<"DEVICE", "Device allocation">,
Etor<"MANAGED", "Managed allocation">
Etor<"HOST", "Host allocation. Allocated on the host and visible to the host and all devices sharing the same platform.">,
Etor<"DEVICE", "Device allocation. Allocated on a specific device and visible only to that device.">,
Etor<"MANAGED", "Managed allocation. Allocated on a specific device and visible to the host and all devices sharing the same platform.">
];
}

def olMemAlloc : Function {
let desc = "Creates a memory allocation on the specified device.";
let details = [
"`DEVICE` allocations do not share the same address space as the host or other devices. The `AllocationOut` pointer cannot be used to uniquely identify the allocation in these cases.",
];
let params = [
Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
Expand All @@ -36,10 +39,18 @@ def olMemAlloc : Function {

def olMemFree : Function {
let desc = "Frees a memory allocation previously made by olMemAlloc.";
let details = [
"`Address` must be the beginning of the allocation.",
"`Device` must be provided for memory allocated as `OL_ALLOC_TYPE_DEVICE`, and may be provided for other types.",
"If `Device` is provided, it must match the device used to allocate the memory with `olMemAlloc`.",
];
let params = [
Param<"ol_device_handle_t", "Device", "handle of the device this allocation was allocated on", PARAM_IN_OPTIONAL>,
Param<"void*", "Address", "address of the allocation to free", PARAM_IN>,
];
let returns = [];
let returns = [
Return<"OL_ERRC_NOT_FOUND", ["The address was not found in the list of allocations"]>
];
}

def olMemcpy : Function {
Expand Down
46 changes: 27 additions & 19 deletions offload/liboffload/src/OffloadImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ namespace llvm {
namespace offload {

struct AllocInfo {
void *Base;
ol_device_handle_t Device;
ol_alloc_type_t Type;
};
Expand All @@ -201,8 +202,8 @@ struct OffloadContext {

bool TracingEnabled = false;
bool ValidationEnabled = true;
DenseMap<void *, AllocInfo> AllocInfoMap{};
std::mutex AllocInfoMapMutex{};
SmallVector<AllocInfo> AllocInfoList{};
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I considered having this be a Map<void *, SmallVector<AllocInfo>>, however I think it's better if functions like olMemGetInfo accept any address inside the allocation instead of just the start. This makes that logic much less clunky.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

however I think it's better if functions like olMemGetInfo accept any address inside the allocation instead of just the start.

Why? I don't think we have such use case in SYCL. Does OpenMP?
This decision makes the implementation more complicated, and I'm not sure if there's much value.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, the implementation of urEnqueueUSMMemcpy contains this:

auto GetDevice = [&](const void *Ptr) {
    auto Res = hQueue->UrContext->getAllocType(Ptr);
    if (!Res)
      return Adapter->HostDevice;
    return Res->Type == OL_ALLOC_TYPE_HOST ? Adapter->HostDevice
                                           : hQueue->OffloadDevice;
  };

  return doMemcpy(UR_COMMAND_USM_MEMCPY, hQueue, pDst, GetDevice(pDst), pSrc,
                  GetDevice(pSrc), size, blocking, numEventsInWaitList,
                  phEventWaitList, phEvent);

We need to look up whether the allocation is from the host or the device inside liboffload. Afaik, all UR functions working on USM pointers don't require the pointer to be at the start of an allocation, so we'd need to be able to look up the allocation type from anywhere in the buffer.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason olMemcpy needs to take destination/source devices? L0/ur/opencl/cuda don't. The underlying APIs all seem to support inferring the devices types implicitly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UR does require the destination and source device - It needs to know which backend (AMD/CUDA/OpenCL/etc) to dispatch to, which is stored in ur_queue_handle_t. OpenCL also requires the cl_intel_unified_shared_memory extension to query the source and destination device.

Since the queue is optional in olMemcpy, a device to lookup the backend (AMD or Nvidia) is required. And if we need one device, we may as well have the user specify both instead of requiring the backend to support determining whether the copy is h2d/h2h/d2d. It also allows the API to be extended in the future to allow copying data between two platforms if needed.

I wasn't involved in the initial design for this, so @jhuber6 and @callumfare might be able to shed some more light on why it is how it is.

Copy link
Contributor

@pbalcer pbalcer Sep 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UR does require the destination and source device - It needs to know which backend (AMD/CUDA/OpenCL/etc) to dispatch to, which is stored in ur_queue_handle_t.

Not sure I follow, here's the UR entry point:

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
    ur_queue_handle_t hQueue,
    bool blocking,
    void *pDst,
    const void *pSrc,
    size_t size,
    uint32_t numEventsInWaitList,
    const ur_event_handle_t *phEventWaitList,
    ur_event_handle_t *phEvent);

Yes, we need a hQueue to do the dispatch, but it's there because memcpy is logically an operation on the queue.

Since the queue is optional in olMemcpy, a device to lookup the backend (AMD or Nvidia) is required. And if we need one device, we may as well have the user specify both instead of requiring the backend to support determining whether the copy is h2d/h2h/d2d.

I think we should revisit the decision for queue to be optional. Given the current API, the best we can probably do is something like an interval map lookup (2x for both src and dst) - which is going to be both complex and expensive. Do we really want to pay the cost of doing those lookups for every usm memory operation?

In UR, we have a separate non-queued blocking copy that operates on a context:

UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp(
    /// [in] Context associated with the device(s) that own the allocations
    /// `pSrc` and `pDst`.
    ur_context_handle_t hContext,
    void *pDst,
    const void *pSrc,
    size_t size);

We could do something similar instead of trying to overload a single function with multiple different functionalities.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it necessary to have the allocations of all devices in the same structure? I couldn't find a use for that requirement in the current code.

If it's not necessary, you could have a map in each device object with their own allocations. Keeping a map to speedup lookups, and improving concurrency between operations of distinct devices.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory it's possible to allocate host memory which isn't tied to a specific device, although we don't expose that functionality in any liboffload entry points yet.

However, what we do allow is looking up host and managed allocations without providing a device. To do that where each device managed its own allocations we'd need to iterate (and lock) through each device and search its allocation list.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the clarification. I would use a map here instead of the vector, as @pbalcer said.

If you need lookups using non-base pointers (e.g., for a olGetMemInfo), you could use std::map with a base pointer + size as the key, and use lower_bound/upper_bound functions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure it's possible to use a binary search to scan through allocations since memory regions can overlap in weird ways that make it hard to establish an order.

std::mutex AllocInfoListMutex{};
SmallVector<ol_platform_impl_t, 4> Platforms{};
size_t RefCount;

Expand Down Expand Up @@ -625,30 +626,37 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type,

*AllocationOut = *Alloc;
{
std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoMapMutex);
OffloadContext::get().AllocInfoMap.insert_or_assign(
*Alloc, AllocInfo{Device, Type});
std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoListMutex);
OffloadContext::get().AllocInfoList.emplace_back(
AllocInfo{*AllocationOut, Device, Type});
}
return Error::success();
}

Error olMemFree_impl(void *Address) {
ol_device_handle_t Device;
ol_alloc_type_t Type;
Error olMemFree_impl(ol_device_handle_t Device, void *Address) {
AllocInfo Removed;
{
std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoMapMutex);
if (!OffloadContext::get().AllocInfoMap.contains(Address))
return createOffloadError(ErrorCode::INVALID_ARGUMENT,
"address is not a known allocation");

auto AllocInfo = OffloadContext::get().AllocInfoMap.at(Address);
Device = AllocInfo.Device;
Type = AllocInfo.Type;
OffloadContext::get().AllocInfoMap.erase(Address);
std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoListMutex);

auto &List = OffloadContext::get().AllocInfoList;
auto Entry = std::find_if(List.begin(), List.end(), [&](AllocInfo &Entry) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm concerned about a linear search over all allocations during every free. I don't really understand the argument against using a map. You are doing an exact lookup here.

In GetMemInfo, if you really need to be able to find this for any arbitrary address, you can search for greater equals key in the map and look at the allocation size.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I originally considered using std::pair<ol_device_handle_t, AllocInfo> as the key or the value being a SmallVector<AllocInfo> containing the device. However, that caused the implementation to spiral out in complexity and become a mess.

I'm not sure there'd be enough allocations to create a noticeable performance difference.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes liboffload's free (and, really, based on your other comment, any function that accepts an USM pointer) performance to be O(N) where N is the number of allocations. Unless we can reasonably say that N is going to be very small, and I'm not sure we can, because e.g., SYCL doesn't restrict users from arbitrarily allocating memory, there will be a noticeable performance difference at least for some applications.

return Address == Entry.Base && (!Device || Entry.Device == Device);
});

if (Entry == List.end())
return Plugin::error(ErrorCode::NOT_FOUND,
"could not find memory allocated by olMemAlloc");
if (!Device && Entry->Type == OL_ALLOC_TYPE_DEVICE)
return Plugin::error(
ErrorCode::NOT_FOUND,
"specifying the Device parameter is required to query device memory");

Removed = std::move(*Entry);
*Entry = List.pop_back_val();
}

if (auto Res =
Device->Device->dataDelete(Address, convertOlToPluginAllocTy(Type)))
if (auto Res = Removed.Device->Device->dataDelete(
Removed.Base, convertOlToPluginAllocTy(Removed.Type)))
return Res;

return Error::success();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class DeviceContext;

namespace detail {

void freeDeviceMemory(void *Address) noexcept;
void freeDeviceMemory(ol_device_handle_t Device, void *Address) noexcept;
} // namespace detail

//===----------------------------------------------------------------------===//
Expand All @@ -40,7 +40,7 @@ template <typename T> class [[nodiscard]] ManagedBuffer {
public:
~ManagedBuffer() noexcept {
if (Address)
detail::freeDeviceMemory(Address);
detail::freeDeviceMemory(nullptr, Address);
}

ManagedBuffer(const ManagedBuffer &) = delete;
Expand Down
5 changes: 3 additions & 2 deletions offload/unittests/Conformance/lib/DeviceResources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ using namespace mathtest;
// Helpers
//===----------------------------------------------------------------------===//

void detail::freeDeviceMemory(void *Address) noexcept {
void detail::freeDeviceMemory(ol_device_handle_t Device,
void *Address) noexcept {
if (Address)
OL_CHECK(olMemFree(Address));
OL_CHECK(olMemFree(Device, Address));
}

//===----------------------------------------------------------------------===//
Expand Down
16 changes: 8 additions & 8 deletions offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ TEST_P(olLaunchKernelFooTest, Success) {
ASSERT_EQ(Data[i], i);
}

ASSERT_SUCCESS(olMemFree(Mem));
ASSERT_SUCCESS(olMemFree(Device, Mem));
}

TEST_P(olLaunchKernelFooTest, SuccessThreaded) {
Expand All @@ -123,7 +123,7 @@ TEST_P(olLaunchKernelFooTest, SuccessThreaded) {
ASSERT_EQ(Data[i], i);
}

ASSERT_SUCCESS(olMemFree(Mem));
ASSERT_SUCCESS(olMemFree(Device, Mem));
});
}

Expand Down Expand Up @@ -151,7 +151,7 @@ TEST_P(olLaunchKernelFooTest, SuccessSynchronous) {
ASSERT_EQ(Data[i], i);
}

ASSERT_SUCCESS(olMemFree(Mem));
ASSERT_SUCCESS(olMemFree(Device, Mem));
}

TEST_P(olLaunchKernelLocalMemTest, Success) {
Expand All @@ -176,7 +176,7 @@ TEST_P(olLaunchKernelLocalMemTest, Success) {
for (uint32_t i = 0; i < LaunchArgs.GroupSize.x * LaunchArgs.NumGroups.x; i++)
ASSERT_EQ(Data[i], (i % 64) * 2);

ASSERT_SUCCESS(olMemFree(Mem));
ASSERT_SUCCESS(olMemFree(Device, Mem));
}

TEST_P(olLaunchKernelLocalMemReductionTest, Success) {
Expand All @@ -199,7 +199,7 @@ TEST_P(olLaunchKernelLocalMemReductionTest, Success) {
for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++)
ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x);

ASSERT_SUCCESS(olMemFree(Mem));
ASSERT_SUCCESS(olMemFree(Device, Mem));
}

TEST_P(olLaunchKernelLocalMemStaticTest, Success) {
Expand All @@ -222,7 +222,7 @@ TEST_P(olLaunchKernelLocalMemStaticTest, Success) {
for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++)
ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x);

ASSERT_SUCCESS(olMemFree(Mem));
ASSERT_SUCCESS(olMemFree(Device, Mem));
}

TEST_P(olLaunchKernelGlobalTest, Success) {
Expand All @@ -245,7 +245,7 @@ TEST_P(olLaunchKernelGlobalTest, Success) {
ASSERT_EQ(Data[i], i * 2);
}

ASSERT_SUCCESS(olMemFree(Mem));
ASSERT_SUCCESS(olMemFree(Device, Mem));
}

TEST_P(olLaunchKernelGlobalTest, InvalidNotAKernel) {
Expand Down Expand Up @@ -273,7 +273,7 @@ TEST_P(olLaunchKernelGlobalCtorTest, Success) {
ASSERT_EQ(Data[i], i + 100);
}

ASSERT_SUCCESS(olMemFree(Mem));
ASSERT_SUCCESS(olMemFree(Device, Mem));
}

TEST_P(olLaunchKernelGlobalDtorTest, Success) {
Expand Down
6 changes: 3 additions & 3 deletions offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,21 @@ TEST_P(olMemAllocTest, SuccessAllocManaged) {
void *Alloc = nullptr;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, 1024, &Alloc));
ASSERT_NE(Alloc, nullptr);
olMemFree(Alloc);
olMemFree(Device, Alloc);
}

TEST_P(olMemAllocTest, SuccessAllocHost) {
void *Alloc = nullptr;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, &Alloc));
ASSERT_NE(Alloc, nullptr);
olMemFree(Alloc);
olMemFree(Device, Alloc);
}

TEST_P(olMemAllocTest, SuccessAllocDevice) {
void *Alloc = nullptr;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
ASSERT_NE(Alloc, nullptr);
olMemFree(Alloc);
olMemFree(Device, Alloc);
}

TEST_P(olMemAllocTest, InvalidNullDevice) {
Expand Down
12 changes: 6 additions & 6 deletions offload/unittests/OffloadAPI/memory/olMemFill.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct olMemFillTest : OffloadQueueTest {
ASSERT_EQ(AllocPtr[i], Pattern);
}

olMemFree(Alloc);
olMemFree(Device, Alloc);
}
};
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olMemFillTest);
Expand Down Expand Up @@ -92,7 +92,7 @@ TEST_P(olMemFillTest, SuccessLarge) {
ASSERT_EQ(AllocPtr[i].B, UINT64_MAX);
}

olMemFree(Alloc);
olMemFree(Device, Alloc);
}

TEST_P(olMemFillTest, SuccessLargeEnqueue) {
Expand Down Expand Up @@ -120,7 +120,7 @@ TEST_P(olMemFillTest, SuccessLargeEnqueue) {
ASSERT_EQ(AllocPtr[i].B, UINT64_MAX);
}

olMemFree(Alloc);
olMemFree(Device, Alloc);
}

TEST_P(olMemFillTest, SuccessLargeByteAligned) {
Expand All @@ -146,7 +146,7 @@ TEST_P(olMemFillTest, SuccessLargeByteAligned) {
ASSERT_EQ(AllocPtr[i].C, 255);
}

olMemFree(Alloc);
olMemFree(Device, Alloc);
}

TEST_P(olMemFillTest, SuccessLargeByteAlignedEnqueue) {
Expand Down Expand Up @@ -176,7 +176,7 @@ TEST_P(olMemFillTest, SuccessLargeByteAlignedEnqueue) {
ASSERT_EQ(AllocPtr[i].C, 255);
}

olMemFree(Alloc);
olMemFree(Device, Alloc);
}

TEST_P(olMemFillTest, InvalidPatternSize) {
Expand All @@ -189,5 +189,5 @@ TEST_P(olMemFillTest, InvalidPatternSize) {
olMemFill(Queue, Alloc, sizeof(Pattern), &Pattern, Size));

olSyncQueue(Queue);
olMemFree(Alloc);
olMemFree(Device, Alloc);
}
71 changes: 52 additions & 19 deletions offload/unittests/OffloadAPI/memory/olMemFree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,63 @@
#include <OffloadAPI.h>
#include <gtest/gtest.h>

using olMemFreeTest = OffloadDeviceTest;
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olMemFreeTest);
template <ol_alloc_type_t Type> struct olMemFreeTestBase : OffloadDeviceTest {
void SetUp() override {
RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::SetUp());
ASSERT_SUCCESS(olMemAlloc(Device, Type, 0x1000, &Alloc));
}

TEST_P(olMemFreeTest, SuccessFreeManaged) {
void *Alloc = nullptr;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, 1024, &Alloc));
ASSERT_SUCCESS(olMemFree(Alloc));
void *Alloc;
};

struct olMemFreeDeviceTest : olMemFreeTestBase<OL_ALLOC_TYPE_DEVICE> {};
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olMemFreeDeviceTest);

struct olMemFreeHostTest : olMemFreeTestBase<OL_ALLOC_TYPE_HOST> {};
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olMemFreeHostTest);

struct olMemFreeManagedTest : olMemFreeTestBase<OL_ALLOC_TYPE_MANAGED> {};
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olMemFreeManagedTest);

TEST_P(olMemFreeManagedTest, SuccessFree) {
ASSERT_SUCCESS(olMemFree(Device, Alloc));
}

TEST_P(olMemFreeManagedTest, SuccessFreeNull) {
ASSERT_SUCCESS(olMemFree(nullptr, Alloc));
}

TEST_P(olMemFreeHostTest, SuccessFree) {
ASSERT_SUCCESS(olMemFree(Device, Alloc));
}

TEST_P(olMemFreeHostTest, SuccessFreeNull) {
ASSERT_SUCCESS(olMemFree(nullptr, Alloc));
}

TEST_P(olMemFreeDeviceTest, SuccessFree) {
ASSERT_SUCCESS(olMemFree(Device, Alloc));
}

TEST_P(olMemFreeDeviceTest, InvalidNullPtr) {
ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olMemFree(Device, nullptr));
}

TEST_P(olMemFreeDeviceTest, InvalidNullDevice) {
ASSERT_ERROR(OL_ERRC_NOT_FOUND, olMemFree(nullptr, Alloc));
}

TEST_P(olMemFreeTest, SuccessFreeHost) {
void *Alloc = nullptr;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, &Alloc));
ASSERT_SUCCESS(olMemFree(Alloc));
TEST_P(olMemFreeDeviceTest, InvalidFreeWrongDevice) {
ASSERT_ERROR(OL_ERRC_NOT_FOUND,
olMemFree(TestEnvironment::getHostDevice(), Alloc));
}

TEST_P(olMemFreeTest, SuccessFreeDevice) {
void *Alloc = nullptr;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
ASSERT_SUCCESS(olMemFree(Alloc));
TEST_P(olMemFreeHostTest, InvalidFreeWrongDevice) {
ASSERT_ERROR(OL_ERRC_NOT_FOUND,
olMemFree(TestEnvironment::getHostDevice(), Alloc));
}

TEST_P(olMemFreeTest, InvalidNullPtr) {
void *Alloc = nullptr;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olMemFree(nullptr));
ASSERT_SUCCESS(olMemFree(Alloc));
TEST_P(olMemFreeManagedTest, InvalidFreeWrongDevice) {
ASSERT_ERROR(OL_ERRC_NOT_FOUND,
olMemFree(TestEnvironment::getHostDevice(), Alloc));
}
Loading
Loading