Skip to content

Commit 67a60f8

Browse files
committed
Revert "[Offload] Add device info for shared memory (llvm#167817)"
This reverts commit 1a86f0a.
1 parent da1534d commit 67a60f8

File tree

9 files changed

+7
-46
lines changed

9 files changed

+7
-46
lines changed

offload/liboffload/API/Device.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ def ol_device_info_t : Enum {
4343
TaggedEtor<"ADDRESS_BITS", "uint32_t", "Number of bits used to represent an address in device memory">,
4444
TaggedEtor<"MAX_MEM_ALLOC_SIZE", "uint64_t", "The maximum size of memory object allocation in bytes">,
4545
TaggedEtor<"GLOBAL_MEM_SIZE", "uint64_t", "The size of global device memory in bytes">,
46-
TaggedEtor<"WORK_GROUP_LOCAL_MEM_SIZE", "uint64_t", "The maximum size of local shared memory per work group in bytes">,
4746
];
4847
list<TaggedEtor> fp_configs = !foreach(type, ["Single", "Double", "Half"], TaggedEtor<type # "_FP_CONFIG", "ol_device_fp_capability_flags_t", type # " precision floating point capability">);
4948
list<TaggedEtor> native_vec_widths = !foreach(type, ["char","short","int","long","float","double","half"], TaggedEtor<"NATIVE_VECTOR_WIDTH_" # type, "uint32_t", "Native vector width for " # type>);

offload/liboffload/src/OffloadImpl.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -495,13 +495,6 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
495495
return Info.write(static_cast<uint32_t>(Value));
496496
}
497497

498-
case OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE: {
499-
if (!std::holds_alternative<uint64_t>(Entry->Value))
500-
return makeError(ErrorCode::BACKEND_FAILURE,
501-
"plugin returned incorrect type");
502-
return Info.write(std::get<uint64_t>(Entry->Value));
503-
}
504-
505498
case OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION:
506499
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION: {
507500
// {x, y, z} triples
@@ -597,7 +590,6 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
597590
return Info.write<uint32_t>(std::numeric_limits<uintptr_t>::digits);
598591
case OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE:
599592
case OL_DEVICE_INFO_GLOBAL_MEM_SIZE:
600-
case OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE:
601593
return Info.write<uint64_t>(0);
602594
default:
603595
return createOffloadError(ErrorCode::INVALID_ENUMERATION,

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3452,15 +3452,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
34523452
// for map and zero-copy control
34533453
// TODO: put them back in constructor
34543454
// readEnvVars();
3455-
// Retrieve the size of the group memory.
3456-
for (const auto *Pool : AllMemoryPools) {
3457-
if (Pool->isGroup()) {
3458-
if (auto Err = Pool->getAttr(HSA_AMD_MEMORY_POOL_INFO_SIZE,
3459-
MaxBlockSharedMemSize))
3460-
return Err;
3461-
break;
3462-
}
3463-
}
34643455

34653456
return Plugin::success();
34663457
}
@@ -4336,9 +4327,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
43364327
if (Status == HSA_STATUS_SUCCESS)
43374328
Info.add("Cacheline Size", TmpUInt);
43384329

4339-
Info.add("Max Shared Memory per Work Group", MaxBlockSharedMemSize, "bytes",
4340-
DeviceInfo::WORK_GROUP_LOCAL_MEM_SIZE);
4341-
43424330
Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, TmpUInt);
43434331
if (Status == HSA_STATUS_SUCCESS)
43444332
Info.add("Max Clock Freq", TmpUInt, "MHz",

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -869,10 +869,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
869869
/// Get the unique identifier of the device.
870870
const char *getDeviceUid() const { return DeviceUid.c_str(); }
871871

872-
/// Get the total shared memory per block (in bytes) that can be used in any
873-
/// kernel.
874-
size_t getMaxBlockSharedMemSize() const { return MaxBlockSharedMemSize; }
875-
876872
/// Set the context of the device if needed, before calling device-specific
877873
/// functions. Plugins may implement this function as a no-op if not needed.
878874
virtual Error setContext() = 0;
@@ -1583,9 +1579,6 @@ struct KernelRunRecordTy {
15831579
std::unordered_map<std::string, TuningMetadataTy> TuningData;
15841580
/// Internal representation for OMPT device (initialize & finalize)
15851581
std::atomic<bool> OmptInitialized;
1586-
1587-
/// The total per-block native shared memory that a kernel may use.
1588-
size_t MaxBlockSharedMemSize = 0;
15891582
};
15901583

15911584
/// Class implementing common functionalities of offload plugins. Each plugin

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -382,12 +382,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
382382
return Err;
383383
HardwareParallelism = NumMuliprocessors * (MaxThreadsPerSM / WarpSize);
384384

385-
uint32_t MaxSharedMem;
386-
if (auto Err = getDeviceAttr(
387-
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, MaxSharedMem))
388-
return Err;
389-
MaxBlockSharedMemSize = MaxSharedMem;
390-
391385
return Plugin::success();
392386
}
393387

@@ -1098,8 +1092,10 @@ struct CUDADeviceTy : public GenericDeviceTy {
10981092
if (Res == CUDA_SUCCESS)
10991093
Info.add("Total Constant Memory", TmpInt, "bytes");
11001094

1101-
Info.add("Max Shared Memory per Block", MaxBlockSharedMemSize, "bytes",
1102-
DeviceInfo::WORK_GROUP_LOCAL_MEM_SIZE);
1095+
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
1096+
TmpInt);
1097+
if (Res == CUDA_SUCCESS)
1098+
Info.add("Max Shared Memory per Block", TmpInt, "bytes");
11031099

11041100
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, TmpInt);
11051101
if (Res == CUDA_SUCCESS)

offload/tools/deviceinfo/llvm-offload-device-info.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,6 @@ ol_result_t printDevice(std::ostream &S, ol_device_handle_t D) {
205205
S, D, OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, "Max Mem Allocation Size", "B"));
206206
OFFLOAD_ERR(printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_GLOBAL_MEM_SIZE,
207207
"Global Mem Size", "B"));
208-
OFFLOAD_ERR(
209-
printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE,
210-
"Work Group Shared Mem Size", "B"));
211208
OFFLOAD_ERR(
212209
(printDeviceValue<ol_device_fp_capability_flags_t, PrintKind::FP_FLAGS>(
213210
S, D, OL_DEVICE_INFO_SINGLE_FP_CONFIG,

offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -217,11 +217,6 @@ OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(GlobalMemSize, uint64_t,
217217
OL_DEVICE_INFO_GLOBAL_MEM_SIZE, 0);
218218
OL_DEVICE_INFO_TEST_HOST_SUCCESS(GlobalMemSize, uint64_t,
219219
OL_DEVICE_INFO_GLOBAL_MEM_SIZE);
220-
OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(SharedMemSize, uint64_t,
221-
OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE,
222-
0);
223-
OL_DEVICE_INFO_TEST_HOST_SUCCESS(SharedMemSize, uint64_t,
224-
OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE);
225220

226221
TEST_P(olGetDeviceInfoTest, InvalidNullHandleDevice) {
227222
ol_device_type_t DeviceType;

offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,6 @@ OL_DEVICE_INFO_SIZE_TEST_EQ(MaxMemAllocSize, uint64_t,
7171
OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE);
7272
OL_DEVICE_INFO_SIZE_TEST_EQ(GlobalMemSize, uint64_t,
7373
OL_DEVICE_INFO_GLOBAL_MEM_SIZE);
74-
OL_DEVICE_INFO_SIZE_TEST_EQ(SharedMemSize, uint64_t,
75-
OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE);
7674

7775
TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkGroupSizePerDimension) {
7876
size_t Size = 0;

revert_patches.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,6 @@ d57230c7 [AMDGPU][MC] Disallow op_sel in some VOP3P dot instructions (#100485)
55
breaks build of ROCmValidationSuite
66
[C2y] Support WG14 N3457, the __COUNTER__ macro (#162662)
77
---
8+
needs more integration offload.
9+
[Offload] Add device info for shared memory (#167817)
10+
--

0 commit comments

Comments
 (0)