Skip to content

Commit b859179

Browse files
committed
cleanup
1 parent e550bf1 commit b859179

File tree

4 files changed

+11
-18
lines changed

4 files changed

+11
-18
lines changed

offload/plugins-nextgen/level_zero/include/L0Kernel.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ class L0KernelTy : public GenericKernelTy {
147147
return zeKernel;
148148
}
149149

150-
int32_t getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
150+
int32_t getGroupsShape(L0DeviceTy &Device, int32_t NumTeams,
151151
int32_t ThreadLimit, uint32_t *GroupSizes,
152152
ze_group_count_t &GroupCounts, void *LoopDesc,
153153
bool &AllowCooperative) const;

offload/plugins-nextgen/level_zero/include/TLS.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@ namespace plugin {
2525

2626
/// All thread-local data used by the Plugin
2727
class L0ThreadTLSTy {
28-
/// Subdevice encoding
29-
int64_t SubDeviceCode = 0;
30-
3128
/// Async info tracking
3229
static constexpr int32_t PerThreadQueues = 10;
3330
AsyncQueueTy AsyncQueues[PerThreadQueues];
@@ -43,10 +40,6 @@ class L0ThreadTLSTy {
4340

4441
void clear() {}
4542

46-
int64_t getSubDeviceCode() { return SubDeviceCode; }
47-
48-
void setSubDeviceCode(int64_t Code) { SubDeviceCode = Code; }
49-
5043
AsyncQueueTy *getAsyncQueue() {
5144
AsyncQueueTy *ret = nullptr;
5245
if (UsedQueues < PerThreadQueues) {

offload/plugins-nextgen/level_zero/src/L0Device.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ Error L0DeviceTy::synchronizeImpl(__tgt_async_info &AsyncInfo,
445445
" not implemented yet\n",
446446
__func__);
447447
}
448-
int32_t RC = synchronize(&AsyncInfo);
448+
int32_t RC = synchronize(&AsyncInfo, ReleaseQueue);
449449
return Plugin::check(RC, "Error in synchronizeImpl %d", RC);
450450
}
451451

offload/plugins-nextgen/level_zero/src/L0Kernel.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -397,13 +397,13 @@ int32_t L0KernelTy::decideLoopKernelGroupArguments(
397397
return OFFLOAD_SUCCESS;
398398
}
399399

400-
int32_t L0KernelTy::getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
400+
int32_t L0KernelTy::getGroupsShape(L0DeviceTy &Device, int32_t NumTeams,
401401
int32_t ThreadLimit, uint32_t *GroupSizes,
402402
ze_group_count_t &GroupCounts,
403403
void *LoopDesc,
404404
bool &AllowCooperative) const {
405405

406-
const auto SubId = SubDevice.getDeviceId();
406+
const auto DeviceId = Device.getDeviceId();
407407
const auto &KernelPR = getProperties();
408408

409409
// Detect if we need to reduce available HW threads. We need this adjustment
@@ -419,13 +419,13 @@ int32_t L0KernelTy::getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
419419
// Read the most recent global thread limit and max teams.
420420
auto [NumTeamsICV, ThreadLimitICV] = readTeamsThreadLimit();
421421

422-
bool IsXeHPG = SubDevice.isDeviceArch(DeviceArchTy::DeviceArch_XeHPG);
422+
bool IsXeHPG = Device.isDeviceArch(DeviceArchTy::DeviceArch_XeHPG);
423423
bool HalfNumThreads = ZeDebugEnabled && IsXeHPG;
424424
uint32_t KernelWidth = KernelPR.Width;
425425
uint32_t SIMDWidth = KernelPR.SIMDWidth;
426-
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, SubId,
426+
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
427427
"Assumed kernel SIMD width is %" PRIu32 "\n", SIMDWidth);
428-
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, SubId,
428+
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
429429
"Preferred team size is multiple of %" PRIu32 "\n", KernelWidth);
430430
assert(SIMDWidth <= KernelWidth && "Invalid SIMD width.");
431431

@@ -439,10 +439,10 @@ int32_t L0KernelTy::getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
439439
DP("Max team size is set to %" PRId32 " (thread-limit-icv)\n", ThreadLimit);
440440
}
441441

442-
size_t MaxThreadLimit = SubDevice.getMaxGroupSize();
442+
size_t MaxThreadLimit = Device.getMaxGroupSize();
443443
// Set correct max group size if the kernel was compiled with explicit SIMD
444444
if (SIMDWidth == 1) {
445-
MaxThreadLimit = SubDevice.getNumThreadsPerSubslice();
445+
MaxThreadLimit = Device.getNumThreadsPerSubslice();
446446
}
447447

448448
if (KernelPR.MaxThreadGroupSize < MaxThreadLimit) {
@@ -463,7 +463,7 @@ int32_t L0KernelTy::getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
463463
NumTeams);
464464
} else if (NumTeamsICV > 0) {
465465
// OMP_NUM_TEAMS only matters, if num_teams() clause is absent.
466-
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, SubId,
466+
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
467467
"OMP_NUM_TEAMS(%" PRId32 ") is ignored\n", NumTeamsICV);
468468

469469
NumTeams = NumTeamsICV;
@@ -473,7 +473,7 @@ int32_t L0KernelTy::getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
473473

474474
bool UseLoopTC = LoopDesc;
475475
decideKernelGroupArguments(
476-
SubDevice, (uint32_t)NumTeams, (uint32_t)ThreadLimit,
476+
Device, (uint32_t)NumTeams, (uint32_t)ThreadLimit,
477477
UseLoopTC ? (TgtNDRangeDescTy *)LoopDesc : nullptr, GroupSizes,
478478
GroupCounts, HalfNumThreads, false);
479479
AllowCooperative = false;

0 commit comments

Comments
 (0)