cleanup

adurang · adurang · commit b8591792fa44 · 2025-09-26T10:47:28.000+02:00
diff --git a/offload/plugins-nextgen/level_zero/include/L0Kernel.h b/offload/plugins-nextgen/level_zero/include/L0Kernel.h
@@ -147,7 +147,7 @@ class L0KernelTy : public GenericKernelTy {
     return zeKernel;
   }
 
-  int32_t getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
+  int32_t getGroupsShape(L0DeviceTy &Device, int32_t NumTeams,
                          int32_t ThreadLimit, uint32_t *GroupSizes,
                          ze_group_count_t &GroupCounts, void *LoopDesc,
                          bool &AllowCooperative) const;
diff --git a/offload/plugins-nextgen/level_zero/include/TLS.h b/offload/plugins-nextgen/level_zero/include/TLS.h
@@ -25,9 +25,6 @@ namespace plugin {
 
 /// All thread-local data used by the Plugin
 class L0ThreadTLSTy {
-  /// Subdevice encoding
-  int64_t SubDeviceCode = 0;
-
   /// Async info tracking
   static constexpr int32_t PerThreadQueues = 10;
   AsyncQueueTy AsyncQueues[PerThreadQueues];
@@ -43,10 +40,6 @@ class L0ThreadTLSTy {
 
   void clear() {}
 
-  int64_t getSubDeviceCode() { return SubDeviceCode; }
-
-  void setSubDeviceCode(int64_t Code) { SubDeviceCode = Code; }
-
   AsyncQueueTy *getAsyncQueue() {
     AsyncQueueTy *ret = nullptr;
     if (UsedQueues < PerThreadQueues) {
diff --git a/offload/plugins-nextgen/level_zero/src/L0Device.cpp b/offload/plugins-nextgen/level_zero/src/L0Device.cpp
@@ -445,7 +445,7 @@ Error L0DeviceTy::synchronizeImpl(__tgt_async_info &AsyncInfo,
                          " not implemented yet\n",
                          __func__);
   }
-  int32_t RC = synchronize(&AsyncInfo);
+  int32_t RC = synchronize(&AsyncInfo, ReleaseQueue);
   return Plugin::check(RC, "Error in synchronizeImpl %d", RC);
 }
 
diff --git a/offload/plugins-nextgen/level_zero/src/L0Kernel.cpp b/offload/plugins-nextgen/level_zero/src/L0Kernel.cpp
@@ -397,13 +397,13 @@ int32_t L0KernelTy::decideLoopKernelGroupArguments(
   return OFFLOAD_SUCCESS;
 }
 
-int32_t L0KernelTy::getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
+int32_t L0KernelTy::getGroupsShape(L0DeviceTy &Device, int32_t NumTeams,
                                    int32_t ThreadLimit, uint32_t *GroupSizes,
                                    ze_group_count_t &GroupCounts,
                                    void *LoopDesc,
                                    bool &AllowCooperative) const {
 
-  const auto SubId = SubDevice.getDeviceId();
+  const auto DeviceId = Device.getDeviceId();
   const auto &KernelPR = getProperties();
 
   // Detect if we need to reduce available HW threads. We need this adjustment
@@ -419,13 +419,13 @@ int32_t L0KernelTy::getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
   // Read the most recent global thread limit and max teams.
   auto [NumTeamsICV, ThreadLimitICV] = readTeamsThreadLimit();
 
-  bool IsXeHPG = SubDevice.isDeviceArch(DeviceArchTy::DeviceArch_XeHPG);
+  bool IsXeHPG = Device.isDeviceArch(DeviceArchTy::DeviceArch_XeHPG);
   bool HalfNumThreads = ZeDebugEnabled && IsXeHPG;
   uint32_t KernelWidth = KernelPR.Width;
   uint32_t SIMDWidth = KernelPR.SIMDWidth;
-  INFO(OMP_INFOTYPE_PLUGIN_KERNEL, SubId,
+  INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
        "Assumed kernel SIMD width is %" PRIu32 "\n", SIMDWidth);
-  INFO(OMP_INFOTYPE_PLUGIN_KERNEL, SubId,
+  INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
        "Preferred team size is multiple of %" PRIu32 "\n", KernelWidth);
   assert(SIMDWidth <= KernelWidth && "Invalid SIMD width.");
 
@@ -439,10 +439,10 @@ int32_t L0KernelTy::getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
     DP("Max team size is set to %" PRId32 " (thread-limit-icv)\n", ThreadLimit);
   }
 
-  size_t MaxThreadLimit = SubDevice.getMaxGroupSize();
+  size_t MaxThreadLimit = Device.getMaxGroupSize();
   // Set correct max group size if the kernel was compiled with explicit SIMD
   if (SIMDWidth == 1) {
-    MaxThreadLimit = SubDevice.getNumThreadsPerSubslice();
+    MaxThreadLimit = Device.getNumThreadsPerSubslice();
   }
 
   if (KernelPR.MaxThreadGroupSize < MaxThreadLimit) {
@@ -463,7 +463,7 @@ int32_t L0KernelTy::getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
          NumTeams);
     } else if (NumTeamsICV > 0) {
       // OMP_NUM_TEAMS only matters, if num_teams() clause is absent.
-      INFO(OMP_INFOTYPE_PLUGIN_KERNEL, SubId,
+      INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
            "OMP_NUM_TEAMS(%" PRId32 ") is ignored\n", NumTeamsICV);
 
       NumTeams = NumTeamsICV;
@@ -473,7 +473,7 @@ int32_t L0KernelTy::getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,
 
     bool UseLoopTC = LoopDesc;
     decideKernelGroupArguments(
-        SubDevice, (uint32_t)NumTeams, (uint32_t)ThreadLimit,
+        Device, (uint32_t)NumTeams, (uint32_t)ThreadLimit,
         UseLoopTC ? (TgtNDRangeDescTy *)LoopDesc : nullptr, GroupSizes,
         GroupCounts, HalfNumThreads, false);
     AllowCooperative = false;

Original file line number	Diff line number	Diff line change
`@@ -147,7 +147,7 @@ class L0KernelTy : public GenericKernelTy {`
`147`	`147`	`return zeKernel;`
`148`	`148`	`}`
`149`	`149`
`150`		`- int32_t getGroupsShape(L0DeviceTy &SubDevice, int32_t NumTeams,`
	`150`	`+ int32_t getGroupsShape(L0DeviceTy &Device, int32_t NumTeams,`
`151`	`151`	`int32_t ThreadLimit, uint32_t *GroupSizes,`
`152`	`152`	`ze_group_count_t &GroupCounts, void *LoopDesc,`
`153`	`153`	`bool &AllowCooperative) const;`
Original file line number	Diff line number	Diff line change
`@@ -445,7 +445,7 @@ Error L0DeviceTy::synchronizeImpl(__tgt_async_info &AsyncInfo,`
`445`	`445`	`" not implemented yet\n",`
`446`	`446`	`__func__);`
`447`	`447`	`}`
`448`		`- int32_t RC = synchronize(&AsyncInfo);`
	`448`	`+ int32_t RC = synchronize(&AsyncInfo, ReleaseQueue);`
`449`	`449`	`return Plugin::check(RC, "Error in synchronizeImpl %d", RC);`
`450`	`450`	`}`
`451`	`451`