Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions offload/liboffload/API/Device.td
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def ol_device_info_t : Enum {
TaggedEtor<"ADDRESS_BITS", "uint32_t", "Number of bits used to represent an address in device memory">,
TaggedEtor<"MAX_MEM_ALLOC_SIZE", "uint64_t", "The maximum size of memory object allocation in bytes">,
TaggedEtor<"GLOBAL_MEM_SIZE", "uint64_t", "The size of global device memory in bytes">,
TaggedEtor<"WORK_GROUP_SHARED_MEM_SIZE", "uint64_t", "The maximum size of shared memory per work group in bytes">,
];
list<TaggedEtor> fp_configs = !foreach(type, ["Single", "Double", "Half"], TaggedEtor<type # "_FP_CONFIG", "ol_device_fp_capability_flags_t", type # " precision floating point capability">);
list<TaggedEtor> native_vec_widths = !foreach(type, ["char","short","int","long","float","double","half"], TaggedEtor<"NATIVE_VECTOR_WIDTH_" # type, "uint32_t", "Native vector width for " # type>);
Expand Down
8 changes: 8 additions & 0 deletions offload/liboffload/src/OffloadImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,13 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
return Info.write(static_cast<uint32_t>(Value));
}

case OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE: {
if (!std::holds_alternative<uint64_t>(Entry->Value))
return makeError(ErrorCode::BACKEND_FAILURE,
"plugin returned incorrect type");
return Info.write(std::get<uint64_t>(Entry->Value));
}

case OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION:
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION: {
// {x, y, z} triples
Expand Down Expand Up @@ -590,6 +597,7 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
return Info.write<uint32_t>(std::numeric_limits<uintptr_t>::digits);
case OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE:
case OL_DEVICE_INFO_GLOBAL_MEM_SIZE:
case OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE:
return Info.write<uint64_t>(0);
default:
return createOffloadError(ErrorCode::INVALID_ENUMERATION,
Expand Down
14 changes: 14 additions & 0 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2186,6 +2186,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (auto Err = checkIfAPU())
return Err;

// Retrieve the size of the group memory.
for (const auto *Pool : AllMemoryPools) {
if (Pool->isGroup()) {
size_t Size = 0;
if (auto Err = Pool->getAttr(HSA_AMD_MEMORY_POOL_INFO_SIZE, Size))
return Err;
MaxBlockSharedMemSize = Size;
break;
}
}

return Plugin::success();
}

Expand Down Expand Up @@ -2923,6 +2934,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Status == HSA_STATUS_SUCCESS)
Info.add("Cacheline Size", TmpUInt);

Info.add("Max Shared Memory per Work Group", MaxBlockSharedMemSize, "bytes",
DeviceInfo::WORK_GROUP_SHARED_MEM_SIZE);

Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
Info.add("Max Clock Freq", TmpUInt, "MHz",
Expand Down
6 changes: 6 additions & 0 deletions offload/plugins-nextgen/common/include/PluginInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Get the unique identifier of the device.
const char *getDeviceUid() const { return DeviceUid.c_str(); }

/// Get the total shared memory per block that can be used in any kernel.
uint32_t getMaxBlockSharedMemSize() const { return MaxBlockSharedMemSize; }

/// Set the context of the device if needed, before calling device-specific
/// functions. Plugins may implement this function as a no-op if not needed.
virtual Error setContext() = 0;
Expand Down Expand Up @@ -1251,6 +1254,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Internal representation for OMPT device (initialize & finalize)
std::atomic<bool> OmptInitialized;
#endif

/// The total per-block native shared memory that a kernel may use.
uint32_t MaxBlockSharedMemSize = 0;
};

/// Class implementing common functionalities of offload plugins. Each plugin
Expand Down
12 changes: 8 additions & 4 deletions offload/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Err;
HardwareParallelism = NumMuliprocessors * (MaxThreadsPerSM / WarpSize);

uint32_t MaxSharedMem;
if (auto Err = getDeviceAttr(
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, MaxSharedMem))
return Err;
MaxBlockSharedMemSize = MaxSharedMem;

return Plugin::success();
}

Expand Down Expand Up @@ -1089,10 +1095,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (Res == CUDA_SUCCESS)
Info.add("Total Constant Memory", TmpInt, "bytes");

Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
TmpInt);
if (Res == CUDA_SUCCESS)
Info.add("Max Shared Memory per Block", TmpInt, "bytes");
Info.add("Max Shared Memory per Block", MaxBlockSharedMemSize, "bytes",
DeviceInfo::WORK_GROUP_SHARED_MEM_SIZE);

Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, TmpInt);
if (Res == CUDA_SUCCESS)
Expand Down
3 changes: 3 additions & 0 deletions offload/tools/deviceinfo/llvm-offload-device-info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@ ol_result_t printDevice(std::ostream &S, ol_device_handle_t D) {
S, D, OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, "Max Mem Allocation Size", "B"));
OFFLOAD_ERR(printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_GLOBAL_MEM_SIZE,
"Global Mem Size", "B"));
OFFLOAD_ERR(printDeviceValue<uint64_t>(
S, D, OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE,
"Work Group Shared Mem Size", "B"));
OFFLOAD_ERR(
(printDeviceValue<ol_device_fp_capability_flags_t, PrintKind::FP_FLAGS>(
S, D, OL_DEVICE_INFO_SINGLE_FP_CONFIG,
Expand Down
5 changes: 5 additions & 0 deletions offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,11 @@ OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(GlobalMemSize, uint64_t,
OL_DEVICE_INFO_GLOBAL_MEM_SIZE, 0);
OL_DEVICE_INFO_TEST_HOST_SUCCESS(GlobalMemSize, uint64_t,
OL_DEVICE_INFO_GLOBAL_MEM_SIZE);
OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(SharedMemSize, uint64_t,
OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE,
0);
OL_DEVICE_INFO_TEST_HOST_SUCCESS(SharedMemSize, uint64_t,
OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE);

TEST_P(olGetDeviceInfoTest, InvalidNullHandleDevice) {
ol_device_type_t DeviceType;
Expand Down
2 changes: 2 additions & 0 deletions offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ OL_DEVICE_INFO_SIZE_TEST_EQ(MaxMemAllocSize, uint64_t,
OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE);
OL_DEVICE_INFO_SIZE_TEST_EQ(GlobalMemSize, uint64_t,
OL_DEVICE_INFO_GLOBAL_MEM_SIZE);
OL_DEVICE_INFO_SIZE_TEST_EQ(SharedMemSize, uint64_t,
OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE);

TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkGroupSizePerDimension) {
size_t Size = 0;
Expand Down
Loading