Skip to content

Commit ec77765

Browse files
committed
Add fixes and improvements
1 parent 65545d2 commit ec77765

File tree

5 files changed

+59
-51
lines changed

5 files changed

+59
-51
lines changed

offload/libomptarget/device.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,9 @@ bool DeviceTy::isAccessiblePtr(const void *Ptr, size_t Size) {
373373
}
374374

375375
uint64_t DeviceTy::getMaxSharedTeamMemory() {
376-
InfoTreeNode Info = RTL->query_device_info(RTLDeviceID);
376+
InfoTreeNode Info = RTL->obtain_device_info(RTLDeviceID);
377377

378-
auto EntryOpt = Info.get(DeviceInfo::WORK_GROUP_SHARED_MEM_SIZE);
378+
auto EntryOpt = Info.get(DeviceInfo::WORK_GROUP_LOCAL_MEM_SIZE);
379379
if (!EntryOpt)
380380
return 0;
381381

offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ typedef enum {
5252
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE = 6,
5353
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT = 7,
5454
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15,
55-
HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE = 16,
5655
} hsa_amd_memory_pool_info_t;
5756

5857
typedef enum {

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,18 @@ struct InfoTreeNode {
299299
}
300300
};
301301

302+
/// Configuration of dynamic block memory needed for launching a kernel.
303+
struct DynBlockMemConfTy {
304+
/// The size of the dynamic block memory buffer.
305+
uint32_t Size = 0;
306+
/// The size of dynamic shared memory natively provided by the device.
307+
uint32_t NativeSize = 0;
308+
/// The fallback that was triggered (if any).
309+
DynCGroupMemFallbackType Fallback = DynCGroupMemFallbackType::None;
310+
/// The fallback pointer if global memory was used as alternative.
311+
void *FallbackPtr = nullptr;
312+
};
313+
302314
/// Class wrapping a __tgt_device_image and its offload entry table on a
303315
/// specific device. This class is responsible for storing and managing
304316
/// the offload entries for an image on a device.
@@ -386,10 +398,11 @@ struct GenericKernelTy {
386398
}
387399

388400
/// Return a device pointer to a new kernel launch environment.
389-
Expected<KernelLaunchEnvironmentTy *> getKernelLaunchEnvironment(
390-
GenericDeviceTy &GenericDevice, const KernelArgsTy &KernelArgs,
391-
uint32_t BlockMemSize, DynCGroupMemFallbackType DynBlockMemFb,
392-
void *DynBlockMemFbPtr, AsyncInfoWrapperTy &AsyncInfoWrapper) const;
401+
Expected<KernelLaunchEnvironmentTy *>
402+
getKernelLaunchEnvironment(GenericDeviceTy &GenericDevice,
403+
const KernelArgsTy &KernelArgs,
404+
const DynBlockMemConfTy &DynBlockMemConf,
405+
AsyncInfoWrapperTy &AsyncInfoWrapper) const;
393406

394407
/// Indicate whether an execution mode is valid.
395408
static bool isValidExecutionMode(OMPTgtExecModeFlags ExecutionMode) {
@@ -435,19 +448,11 @@ struct GenericKernelTy {
435448
uint32_t NumBlocks[3]) const;
436449

437450
private:
438-
/// Information about the dynamic block memory needed for launching a kernel.
439-
struct DynBlockMemInfoTy {
440-
/// The size of the dynamic block memory buffer.
441-
uint32_t Size = 0;
442-
/// The size of dynamic shared memory natively provided by the device.
443-
uint32_t NativeSize = 0;
444-
/// The fallback that was triggered (if any).
445-
DynCGroupMemFallbackType DynBlockMemFb = DynCGroupMemFallbackType::None;
446-
/// The fallback pointer if global memory was used as alternative.
447-
void *FallbackPtr = nullptr;
448-
};
449-
450-
Expected<DynBlockMemInfoTy> prepareBlockMemory(GenericDeviceTy &GenericDevice, KernelArgsTy &KernelArgs);
451+
/// Prepare the block memory buffer requested for the kernel and execute the
452+
/// specified fallback if necessary.
453+
Expected<DynBlockMemConfTy> prepareBlockMemory(GenericDeviceTy &GenericDevice,
454+
KernelArgsTy &KernelArgs,
455+
uint32_t NumBlocks) const;
451456

452457
/// Prepare the arguments before launching the kernel.
453458
KernelLaunchParamsTy

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -437,8 +437,8 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
437437
Expected<KernelLaunchEnvironmentTy *>
438438
GenericKernelTy::getKernelLaunchEnvironment(
439439
GenericDeviceTy &GenericDevice, const KernelArgsTy &KernelArgs,
440-
uint32_t BlockMemSize, DynCGroupMemFallbackType DynBlockMemFb,
441-
void *DynBlockMemFbPtr, AsyncInfoWrapperTy &AsyncInfoWrapper) const {
440+
const DynBlockMemConfTy &DynBlockMemConf,
441+
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
442442
// Ctor/Dtor have no arguments, replaying uses the original kernel launch
443443
// environment. Older versions of the compiler do not generate a kernel
444444
// launch environment.
@@ -480,9 +480,9 @@ GenericKernelTy::getKernelLaunchEnvironment(
480480
LocalKLE.ReductionBuffer = nullptr;
481481
}
482482

483-
LocalKLE.DynCGroupMemSize = BlockMemSize;
484-
LocalKLE.DynCGroupMemFbPtr = DynBlockMemFbPtr;
485-
LocalKLE.DynCGroupMemFb = DynBlockMemFb;
483+
LocalKLE.DynCGroupMemSize = DynBlockMemConf.Size;
484+
LocalKLE.DynCGroupMemFbPtr = DynBlockMemConf.FallbackPtr;
485+
LocalKLE.DynCGroupMemFb = DynBlockMemConf.Fallback;
486486

487487
INFO(OMP_INFOTYPE_DATA_TRANSFER, GenericDevice.getDeviceId(),
488488
"Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD
@@ -518,47 +518,51 @@ Error GenericKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
518518
return Plugin::success();
519519
}
520520

521-
Expected<DynBlockMemInfoTy> prepareBlockMemory(GenericDeviceTy &GenericDevice, KernelArgsTy &KernelArgs) {
522-
uint32_t MaxSize = GenericDevice.getMaxBlockSharedMemSize();
523-
uint32_t DynSize = KernelArgs.DynCGroupMem;
524-
uint32_t TotalSize = StaticSize + DynSize;
525-
uint32_t DynNativeSize = DynSize;
521+
Expected<DynBlockMemConfTy>
522+
GenericKernelTy::prepareBlockMemory(GenericDeviceTy &GenericDevice,
523+
KernelArgsTy &KernelArgs,
524+
uint32_t NumBlocks) const {
525+
uint32_t MaxBlockMemSize = GenericDevice.getMaxBlockSharedMemSize();
526+
uint32_t DynBlockMemSize = KernelArgs.DynCGroupMem;
527+
uint32_t TotalBlockMemSize = StaticBlockMemSize + DynBlockMemSize;
528+
uint32_t DynNativeBlockMemSize = DynBlockMemSize;
526529
void *DynFallbackPtr = nullptr;
527530

528531
// No enough block memory to cover the static one. Cannot run the kernel.
529-
if (StaticSize > MaxSize)
532+
if (StaticBlockMemSize > MaxBlockMemSize)
530533
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
531534
"Static block memory size exceeds maximum");
532535
// No enough block memory to cover dynamic one, and the fallback is aborting.
533536
else if (static_cast<DynCGroupMemFallbackType>(
534537
KernelArgs.Flags.DynCGroupMemFallback) ==
535538
DynCGroupMemFallbackType::Abort &&
536-
TotalSize > MaxSize)
539+
TotalBlockMemSize > MaxBlockMemSize)
537540
return Plugin::error(
538541
ErrorCode::INVALID_ARGUMENT,
539542
"Static and dynamic block memory size exceeds maximum");
540543

541544
DynCGroupMemFallbackType DynFallback = DynCGroupMemFallbackType::None;
542-
if (DynSize && (!GenericDevice.hasNativeBlockSharedMem() ||
543-
TotalSize > MaxSize)) {
545+
if (DynBlockMemSize && (!GenericDevice.hasNativeBlockSharedMem() ||
546+
TotalBlockMemSize > MaxBlockMemSize)) {
544547
// Launch without native dynamic block memory.
545-
DynNativeSize = 0;
548+
DynNativeBlockMemSize = 0;
546549
DynFallback = static_cast<DynCGroupMemFallbackType>(
547550
KernelArgs.Flags.DynCGroupMemFallback);
548551
if (DynFallback == DynCGroupMemFallbackType::DefaultMem) {
549552
// Get global memory as fallback.
550553
auto AllocOrErr = GenericDevice.dataAlloc(
551-
NumBlocks[0] * DynSize,
554+
NumBlocks * DynBlockMemSize,
552555
/*HostPtr=*/nullptr, TargetAllocTy::TARGET_ALLOC_DEVICE);
553556
if (!AllocOrErr)
554557
return AllocOrErr.takeError();
555558
DynFallbackPtr = *AllocOrErr;
556559
} else {
557560
// Do not provide any memory as fallback.
558-
DynSize = 0;
561+
DynBlockMemSize = 0;
559562
}
560563
}
561-
return { DynSize, DynNativeSize, DynFallback, DynFallbackPtr };
564+
return DynBlockMemConfTy{DynBlockMemSize, DynNativeBlockMemSize, DynFallback,
565+
DynFallbackPtr};
562566
}
563567

564568
Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
@@ -578,17 +582,18 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
578582
NumThreads[0], KernelArgs.ThreadLimit[0] > 0);
579583
}
580584

581-
auto DynBlockMemInfoOrErr = prepareBlockMemory(GenericDevice, KernelArgs);
582-
if (!DynBlockMemInfoOrErr)
583-
return DynBlockMemInfoOrErr.takeError();
585+
auto DynBlockMemConfOrErr =
586+
prepareBlockMemory(GenericDevice, KernelArgs, NumBlocks[0]);
587+
if (!DynBlockMemConfOrErr)
588+
return DynBlockMemConfOrErr.takeError();
584589

585-
DynBlockMemInfoTy &DynBlockMemInfo = *DynBlockMemInfoOrErr;
586-
if (DynBlockMemInfo.FallbackPtr)
587-
AsyncInfoWrapper.freeAllocationAfterSynchronization(DynBlockMemInfo.FallbackPtr);
590+
DynBlockMemConfTy &DynBlockMemConf = *DynBlockMemConfOrErr;
591+
if (DynBlockMemConf.FallbackPtr)
592+
AsyncInfoWrapper.freeAllocationAfterSynchronization(
593+
DynBlockMemConf.FallbackPtr);
588594

589595
auto KernelLaunchEnvOrErr = getKernelLaunchEnvironment(
590-
GenericDevice, KernelArgs, DynBlockMemInfo.Size, DynBlockMemInfo.Fallback,
591-
DynBlockMemInfo.FallbackPtr, AsyncInfoWrapper);
596+
GenericDevice, KernelArgs, DynBlockMemConf, AsyncInfoWrapper);
592597
if (!KernelLaunchEnvOrErr)
593598
return KernelLaunchEnvOrErr.takeError();
594599

@@ -619,8 +624,9 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
619624
printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
620625
return Err;
621626

622-
return launchImpl(GenericDevice, NumThreads, NumBlocks, DynBlockMemInfo.NativeSize,
623-
KernelArgs, LaunchParams, AsyncInfoWrapper);
627+
return launchImpl(GenericDevice, NumThreads, NumBlocks,
628+
DynBlockMemConf.NativeSize, KernelArgs, LaunchParams,
629+
AsyncInfoWrapper);
624630
}
625631

626632
KernelLaunchParamsTy GenericKernelTy::prepareArgs(
@@ -2044,7 +2050,7 @@ InfoTreeNode GenericPluginTy::obtain_device_info(int32_t DeviceId) {
20442050
toString(std::move(Err)).data());
20452051
return InfoTreeNode{};
20462052
}
2047-
return *InfoOrErr;
2053+
return std::move(*InfoOrErr);
20482054
}
20492055

20502056
void GenericPluginTy::print_device_info(int32_t DeviceId) {

offload/tools/deviceinfo/llvm-offload-device-info.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,6 @@ ol_result_t printDevice(std::ostream &S, ol_device_handle_t D) {
205205
S, D, OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, "Max Mem Allocation Size", "B"));
206206
OFFLOAD_ERR(printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_GLOBAL_MEM_SIZE,
207207
"Global Mem Size", "B"));
208-
OFFLOAD_ERR(printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE,
209-
"Work Group Shared Mem Size", "B"));
210208
OFFLOAD_ERR(
211209
printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE,
212210
"Work Group Shared Mem Size", "B"));

0 commit comments

Comments
 (0)