[SYCL][ABI-break] Cleanup reduction ABI entries (#20815)

againull · web-flow · commit e1f0d504f8da · 2025-12-03T16:22:44.000-08:00
diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp
@@ -49,44 +49,21 @@ __SYCL_EXPORT size_t reduComputeWGSize(size_t NWorkItems, size_t MaxWGSize,
   return WGSize;
 }
 
-#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
-// Inline this helper:
-#endif
-uint32_t reduGetMaxNumConcurrentWorkGroups(device_impl &Dev) {
+// Returns the estimated number of physical threads on the device associated
+// with the given queue.
+__SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups(handler &cgh) {
+  const device_impl &Dev = getSyclObjImpl(cgh)->get_device();
   uint32_t NumThreads = Dev.get_info<sycl::info::device::max_compute_units>();
   // TODO: The heuristics here require additional tuning for various devices
   // and vendors. Also, it would be better to check vendor/generation/etc.
   if (Dev.is_gpu() && Dev.get_info<sycl::info::device::host_unified_memory>())
     NumThreads *= 8;
   return NumThreads;
 }
-// Returns the estimated number of physical threads on the device associated
-// with the given queue.
-__SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups(handler &cgh) {
-  return reduGetMaxNumConcurrentWorkGroups(getSyclObjImpl(cgh)->get_device());
-}
 
-#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
-__SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups(
-    std::shared_ptr<sycl::detail::queue_impl> Queue) {
-  // TODO: Graphs extension explicit API uses a handler with no queue attached,
-  // so return some value here. In the future we should have access to the
-  // device so can remove this.
-  //
-  // The 8 value was chosen as the hardcoded value as it is the returned
-  // value for sycl::info::device::max_compute_units on
-  // Intel HD Graphics devices used as a L0 backend during development.
-  if (Queue == nullptr) {
-    return 8;
-  }
-  return reduGetMaxNumConcurrentWorkGroups(Queue->getDeviceImpl());
-}
-#endif
-
-#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
-// Inline this helper:
-#endif
-size_t reduGetMaxWGSize(device_impl &Dev, size_t LocalMemBytesPerWorkItem) {
+__SYCL_EXPORT size_t reduGetMaxWGSize(handler &cgh,
+                                      size_t LocalMemBytesPerWorkItem) {
+  const device_impl &Dev = getSyclObjImpl(cgh)->get_device();
   size_t MaxWGSize = Dev.get_info<sycl::info::device::max_work_group_size>();
 
   size_t WGSizePerMem = MaxWGSize * 2;
@@ -123,24 +100,9 @@ size_t reduGetMaxWGSize(device_impl &Dev, size_t LocalMemBytesPerWorkItem) {
 
   return WGSize;
 }
-__SYCL_EXPORT size_t reduGetMaxWGSize(handler &cgh,
-                                      size_t LocalMemBytesPerWorkItem) {
-  return reduGetMaxWGSize(getSyclObjImpl(cgh)->get_device(),
-                          LocalMemBytesPerWorkItem);
-}
-#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
-__SYCL_EXPORT
-size_t reduGetMaxWGSize(std::shared_ptr<sycl::detail::queue_impl> Queue,
-                        size_t LocalMemBytesPerWorkItem) {
-  return reduGetMaxWGSize(Queue->getDeviceImpl(), LocalMemBytesPerWorkItem);
-}
-#endif
 
-#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
-// Inline this helper:
-#endif
-size_t reduGetPreferredWGSize(device_impl &Dev,
-                              size_t LocalMemBytesPerWorkItem) {
+__SYCL_EXPORT size_t reduGetPreferredWGSize(handler &cgh,
+                                            size_t LocalMemBytesPerWorkItem) {
   // The maximum WGSize returned by CPU devices is very large and does not
   // help the reduction implementation: since all work associated with a
   // work-group is typically assigned to one CPU thread, selecting a large
@@ -150,6 +112,7 @@ size_t reduGetPreferredWGSize(device_impl &Dev,
   // behavior.
   using PrefWGConfig = sycl::detail::SYCLConfig<
       sycl::detail::SYCL_REDUCTION_PREFERRED_WORKGROUP_SIZE>;
+  const device_impl &Dev = getSyclObjImpl(cgh)->get_device();
   if (Dev.is_cpu()) {
     size_t CPUMaxWGSize = PrefWGConfig::get(sycl::info::device_type::cpu);
     if (CPUMaxWGSize == 0)
@@ -177,46 +140,8 @@ size_t reduGetPreferredWGSize(device_impl &Dev,
   }
 
   // Use the maximum work-group size otherwise.
-  return reduGetMaxWGSize(Dev, LocalMemBytesPerWorkItem);
-}
-__SYCL_EXPORT size_t reduGetPreferredWGSize(handler &cgh,
-                                            size_t LocalMemBytesPerWorkItem) {
-  return reduGetPreferredWGSize(getSyclObjImpl(cgh)->get_device(),
-                                LocalMemBytesPerWorkItem);
-}
-#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
-__SYCL_EXPORT size_t reduGetPreferredWGSize(std::shared_ptr<queue_impl> &Queue,
-                                            size_t LocalMemBytesPerWorkItem) {
-  // TODO: Graphs extension explicit API uses a handler with a null queue to
-  // process CGFs, in future we should have access to the device so we can
-  // correctly calculate this.
-  //
-  // The 32 value was chosen as the hardcoded value as it is the returned
-  // value for SYCL_REDUCTION_PREFERRED_WORKGROUP_SIZE on
-  // Intel HD Graphics devices used as a L0 backend during development.
-  if (Queue == nullptr) {
-    return 32;
-  }
-  device_impl &Dev = Queue->getDeviceImpl();
-
-  return reduGetPreferredWGSize(Dev, LocalMemBytesPerWorkItem);
-}
-#endif
-
-#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
-__SYCL_EXPORT void
-addCounterInit(handler &CGH, std::shared_ptr<sycl::detail::queue_impl> &Queue,
-               std::shared_ptr<int> &Counter) {
-  auto EventImpl = detail::event_impl::create_device_event(*Queue);
-  EventImpl->setContextImpl(Queue->getContextImpl());
-  EventImpl->setStateIncomplete();
-  ur_event_handle_t UREvent = nullptr;
-  MemoryManager::fill_usm(Counter.get(), *Queue, sizeof(int), {0}, {},
-                          &UREvent);
-  EventImpl->setHandle(UREvent);
-  CGH.depends_on(createSyclObjFromImpl<event>(EventImpl));
+  return reduGetMaxWGSize(cgh, LocalMemBytesPerWorkItem);
 }
-#endif
 
 __SYCL_EXPORT void verifyReductionProps(const property_list &Props) {
   auto CheckDataLessProperties = [](int PropertyKind) {
diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
@@ -3273,7 +3273,6 @@ _ZN4sycl3_V16detail13lgamma_r_implEfPi
 _ZN4sycl3_V16detail13make_platformEmNS0_7backendE
 _ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEE
 _ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEERKNS0_7contextE
-_ZN4sycl3_V16detail14addCounterInitERNS0_7handlerERSt10shared_ptrINS1_10queue_implEERS4_IiE
 _ZN4sycl3_V16detail14getBorderColorENS0_19image_channel_orderE
 _ZN4sycl3_V16detail14tls_code_loc_t5queryEv
 _ZN4sycl3_V16detail14tls_code_loc_tC1ERKNS1_13code_locationE
@@ -3294,7 +3293,6 @@ _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6a
 _ZN4sycl3_V16detail16get_pointer_typeEPKvRNS1_12context_implE
 _ZN4sycl3_V16detail16openIPCMemHandleEPKSt4bytemRKNS0_7contextERKNS0_6deviceE
 _ZN4sycl3_V16detail16reduGetMaxWGSizeERNS0_7handlerEm
-_ZN4sycl3_V16detail16reduGetMaxWGSizeESt10shared_ptrINS1_10queue_implEEm
 _ZN4sycl3_V16detail17HostProfilingInfo3endEv
 _ZN4sycl3_V16detail17HostProfilingInfo5startEv
 _ZN4sycl3_V16detail17device_global_map3addEPKvPKc
@@ -3332,7 +3330,6 @@ _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6devi
 _ZN4sycl3_V16detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE
 _ZN4sycl3_V16detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE
 _ZN4sycl3_V16detail22reduGetPreferredWGSizeERNS0_7handlerEm
-_ZN4sycl3_V16detail22reduGetPreferredWGSizeERSt10shared_ptrINS1_10queue_implEEm
 _ZN4sycl3_V16detail22removeDuplicateDevicesERKSt6vectorINS0_6deviceESaIS3_EE
 _ZN4sycl3_V16detail23constructorNotificationEPvS2_NS0_6access6targetENS3_4modeERKNS1_13code_locationE
 _ZN4sycl3_V16detail24find_device_intersectionERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EE
@@ -3351,7 +3348,6 @@ _ZN4sycl3_V16detail30UnsampledImageAccessorBaseHostC1ENS0_5rangeILi3EEENS0_6acce
 _ZN4sycl3_V16detail30UnsampledImageAccessorBaseHostC2ENS0_5rangeILi3EEENS0_6access4modeEPviiNS0_2idILi3EEENS0_18image_channel_typeENS0_19image_channel_orderERKNS0_13property_listE
 _ZN4sycl3_V16detail33enable_ext_oneapi_default_contextEb
 _ZN4sycl3_V16detail33reduGetMaxNumConcurrentWorkGroupsERNS0_7handlerE
-_ZN4sycl3_V16detail33reduGetMaxNumConcurrentWorkGroupsESt10shared_ptrINS1_10queue_implEE
 _ZN4sycl3_V16detail34addHostSampledImageAccessorAndWaitEPNS1_28SampledImageAccessorImplHostE
 _ZN4sycl3_V16detail35sampledImageConstructorNotificationEPvS2_RKSt8optionalINS0_12image_targetEEPKvjRKNS1_13code_locationE
 _ZN4sycl3_V16detail36addHostUnsampledImageAccessorAndWaitEPNS1_30UnsampledImageAccessorImplHostE
diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump
@@ -3692,7 +3692,6 @@
 ?add@host_pipe_map@detail@_V1@sycl@@YAXPEBXPEBD@Z
 ?add@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA?AVnode@34567@AEBVproperty_list@67@@Z
 ?addArg@handler@_V1@sycl@@AEAAXW4kernel_param_kind_t@detail@23@PEAXHH@Z
-?addCounterInit@detail@_V1@sycl@@YAXAEAVhandler@23@AEAV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@AEAV?$shared_ptr@H@6@@Z
 ?addGraphLeafDependencies@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXVnode@34567@@Z
 ?addHostAccessorAndWait@detail@_V1@sycl@@YAXPEAVAccessorImplHost@123@@Z
 ?addHostSampledImageAccessorAndWait@detail@_V1@sycl@@YAXPEAVSampledImageAccessorImplHost@123@@Z
@@ -4287,10 +4286,7 @@
 ?query@tls_code_loc_t@detail@_V1@sycl@@QEAAAEBUcode_location@234@XZ
 ?reduComputeWGSize@detail@_V1@sycl@@YA_K_K0AEA_K@Z
 ?reduGetMaxNumConcurrentWorkGroups@detail@_V1@sycl@@YAIAEAVhandler@23@@Z
-?reduGetMaxNumConcurrentWorkGroups@detail@_V1@sycl@@YAIV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@@Z
 ?reduGetMaxWGSize@detail@_V1@sycl@@YA_KAEAVhandler@23@_K@Z
-?reduGetMaxWGSize@detail@_V1@sycl@@YA_KV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z
-?reduGetPreferredWGSize@detail@_V1@sycl@@YA_KAEAV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z
 ?reduGetPreferredWGSize@detail@_V1@sycl@@YA_KAEAVhandler@23@_K@Z
 ?registerDynamicParameter@handler@_V1@sycl@@AEAAXPEAVdynamic_parameter_impl@detail@experimental@oneapi@ext@23@H@Z
 ?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_mem@12345@AEBVdevice@45@AEBVcontext@45@@Z