Skip to content

Commit e1f0d50

Browse files
authored
[SYCL][ABI-break] Cleanup reduction ABI entries (#20815)
1 parent b4aedf0 commit e1f0d50

File tree

3 files changed

+11
-94
lines changed

3 files changed

+11
-94
lines changed

sycl/source/detail/reduction.cpp

Lines changed: 11 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -49,44 +49,21 @@ __SYCL_EXPORT size_t reduComputeWGSize(size_t NWorkItems, size_t MaxWGSize,
4949
return WGSize;
5050
}
5151

52-
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
53-
// Inline this helper:
54-
#endif
55-
uint32_t reduGetMaxNumConcurrentWorkGroups(device_impl &Dev) {
52+
// Returns the estimated number of physical threads on the device associated
53+
// with the given queue.
54+
__SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups(handler &cgh) {
55+
const device_impl &Dev = getSyclObjImpl(cgh)->get_device();
5656
uint32_t NumThreads = Dev.get_info<sycl::info::device::max_compute_units>();
5757
// TODO: The heuristics here require additional tuning for various devices
5858
// and vendors. Also, it would be better to check vendor/generation/etc.
5959
if (Dev.is_gpu() && Dev.get_info<sycl::info::device::host_unified_memory>())
6060
NumThreads *= 8;
6161
return NumThreads;
6262
}
63-
// Returns the estimated number of physical threads on the device associated
64-
// with the given queue.
65-
__SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups(handler &cgh) {
66-
return reduGetMaxNumConcurrentWorkGroups(getSyclObjImpl(cgh)->get_device());
67-
}
6863

69-
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
70-
__SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups(
71-
std::shared_ptr<sycl::detail::queue_impl> Queue) {
72-
// TODO: Graphs extension explicit API uses a handler with no queue attached,
73-
// so return some value here. In the future we should have access to the
74-
// device so can remove this.
75-
//
76-
// The 8 value was chosen as the hardcoded value as it is the returned
77-
// value for sycl::info::device::max_compute_units on
78-
// Intel HD Graphics devices used as a L0 backend during development.
79-
if (Queue == nullptr) {
80-
return 8;
81-
}
82-
return reduGetMaxNumConcurrentWorkGroups(Queue->getDeviceImpl());
83-
}
84-
#endif
85-
86-
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
87-
// Inline this helper:
88-
#endif
89-
size_t reduGetMaxWGSize(device_impl &Dev, size_t LocalMemBytesPerWorkItem) {
64+
__SYCL_EXPORT size_t reduGetMaxWGSize(handler &cgh,
65+
size_t LocalMemBytesPerWorkItem) {
66+
const device_impl &Dev = getSyclObjImpl(cgh)->get_device();
9067
size_t MaxWGSize = Dev.get_info<sycl::info::device::max_work_group_size>();
9168

9269
size_t WGSizePerMem = MaxWGSize * 2;
@@ -123,24 +100,9 @@ size_t reduGetMaxWGSize(device_impl &Dev, size_t LocalMemBytesPerWorkItem) {
123100

124101
return WGSize;
125102
}
126-
__SYCL_EXPORT size_t reduGetMaxWGSize(handler &cgh,
127-
size_t LocalMemBytesPerWorkItem) {
128-
return reduGetMaxWGSize(getSyclObjImpl(cgh)->get_device(),
129-
LocalMemBytesPerWorkItem);
130-
}
131-
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
132-
__SYCL_EXPORT
133-
size_t reduGetMaxWGSize(std::shared_ptr<sycl::detail::queue_impl> Queue,
134-
size_t LocalMemBytesPerWorkItem) {
135-
return reduGetMaxWGSize(Queue->getDeviceImpl(), LocalMemBytesPerWorkItem);
136-
}
137-
#endif
138103

139-
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
140-
// Inline this helper:
141-
#endif
142-
size_t reduGetPreferredWGSize(device_impl &Dev,
143-
size_t LocalMemBytesPerWorkItem) {
104+
__SYCL_EXPORT size_t reduGetPreferredWGSize(handler &cgh,
105+
size_t LocalMemBytesPerWorkItem) {
144106
// The maximum WGSize returned by CPU devices is very large and does not
145107
// help the reduction implementation: since all work associated with a
146108
// work-group is typically assigned to one CPU thread, selecting a large
@@ -150,6 +112,7 @@ size_t reduGetPreferredWGSize(device_impl &Dev,
150112
// behavior.
151113
using PrefWGConfig = sycl::detail::SYCLConfig<
152114
sycl::detail::SYCL_REDUCTION_PREFERRED_WORKGROUP_SIZE>;
115+
const device_impl &Dev = getSyclObjImpl(cgh)->get_device();
153116
if (Dev.is_cpu()) {
154117
size_t CPUMaxWGSize = PrefWGConfig::get(sycl::info::device_type::cpu);
155118
if (CPUMaxWGSize == 0)
@@ -177,46 +140,8 @@ size_t reduGetPreferredWGSize(device_impl &Dev,
177140
}
178141

179142
// Use the maximum work-group size otherwise.
180-
return reduGetMaxWGSize(Dev, LocalMemBytesPerWorkItem);
181-
}
182-
__SYCL_EXPORT size_t reduGetPreferredWGSize(handler &cgh,
183-
size_t LocalMemBytesPerWorkItem) {
184-
return reduGetPreferredWGSize(getSyclObjImpl(cgh)->get_device(),
185-
LocalMemBytesPerWorkItem);
186-
}
187-
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
188-
__SYCL_EXPORT size_t reduGetPreferredWGSize(std::shared_ptr<queue_impl> &Queue,
189-
size_t LocalMemBytesPerWorkItem) {
190-
// TODO: Graphs extension explicit API uses a handler with a null queue to
191-
// process CGFs, in future we should have access to the device so we can
192-
// correctly calculate this.
193-
//
194-
// The 32 value was chosen as the hardcoded value as it is the returned
195-
// value for SYCL_REDUCTION_PREFERRED_WORKGROUP_SIZE on
196-
// Intel HD Graphics devices used as a L0 backend during development.
197-
if (Queue == nullptr) {
198-
return 32;
199-
}
200-
device_impl &Dev = Queue->getDeviceImpl();
201-
202-
return reduGetPreferredWGSize(Dev, LocalMemBytesPerWorkItem);
203-
}
204-
#endif
205-
206-
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
207-
__SYCL_EXPORT void
208-
addCounterInit(handler &CGH, std::shared_ptr<sycl::detail::queue_impl> &Queue,
209-
std::shared_ptr<int> &Counter) {
210-
auto EventImpl = detail::event_impl::create_device_event(*Queue);
211-
EventImpl->setContextImpl(Queue->getContextImpl());
212-
EventImpl->setStateIncomplete();
213-
ur_event_handle_t UREvent = nullptr;
214-
MemoryManager::fill_usm(Counter.get(), *Queue, sizeof(int), {0}, {},
215-
&UREvent);
216-
EventImpl->setHandle(UREvent);
217-
CGH.depends_on(createSyclObjFromImpl<event>(EventImpl));
143+
return reduGetMaxWGSize(cgh, LocalMemBytesPerWorkItem);
218144
}
219-
#endif
220145

221146
__SYCL_EXPORT void verifyReductionProps(const property_list &Props) {
222147
auto CheckDataLessProperties = [](int PropertyKind) {

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3273,7 +3273,6 @@ _ZN4sycl3_V16detail13lgamma_r_implEfPi
32733273
_ZN4sycl3_V16detail13make_platformEmNS0_7backendE
32743274
_ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEE
32753275
_ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEERKNS0_7contextE
3276-
_ZN4sycl3_V16detail14addCounterInitERNS0_7handlerERSt10shared_ptrINS1_10queue_implEERS4_IiE
32773276
_ZN4sycl3_V16detail14getBorderColorENS0_19image_channel_orderE
32783277
_ZN4sycl3_V16detail14tls_code_loc_t5queryEv
32793278
_ZN4sycl3_V16detail14tls_code_loc_tC1ERKNS1_13code_locationE
@@ -3294,7 +3293,6 @@ _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6a
32943293
_ZN4sycl3_V16detail16get_pointer_typeEPKvRNS1_12context_implE
32953294
_ZN4sycl3_V16detail16openIPCMemHandleEPKSt4bytemRKNS0_7contextERKNS0_6deviceE
32963295
_ZN4sycl3_V16detail16reduGetMaxWGSizeERNS0_7handlerEm
3297-
_ZN4sycl3_V16detail16reduGetMaxWGSizeESt10shared_ptrINS1_10queue_implEEm
32983296
_ZN4sycl3_V16detail17HostProfilingInfo3endEv
32993297
_ZN4sycl3_V16detail17HostProfilingInfo5startEv
33003298
_ZN4sycl3_V16detail17device_global_map3addEPKvPKc
@@ -3332,7 +3330,6 @@ _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6devi
33323330
_ZN4sycl3_V16detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE
33333331
_ZN4sycl3_V16detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE
33343332
_ZN4sycl3_V16detail22reduGetPreferredWGSizeERNS0_7handlerEm
3335-
_ZN4sycl3_V16detail22reduGetPreferredWGSizeERSt10shared_ptrINS1_10queue_implEEm
33363333
_ZN4sycl3_V16detail22removeDuplicateDevicesERKSt6vectorINS0_6deviceESaIS3_EE
33373334
_ZN4sycl3_V16detail23constructorNotificationEPvS2_NS0_6access6targetENS3_4modeERKNS1_13code_locationE
33383335
_ZN4sycl3_V16detail24find_device_intersectionERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EE
@@ -3351,7 +3348,6 @@ _ZN4sycl3_V16detail30UnsampledImageAccessorBaseHostC1ENS0_5rangeILi3EEENS0_6acce
33513348
_ZN4sycl3_V16detail30UnsampledImageAccessorBaseHostC2ENS0_5rangeILi3EEENS0_6access4modeEPviiNS0_2idILi3EEENS0_18image_channel_typeENS0_19image_channel_orderERKNS0_13property_listE
33523349
_ZN4sycl3_V16detail33enable_ext_oneapi_default_contextEb
33533350
_ZN4sycl3_V16detail33reduGetMaxNumConcurrentWorkGroupsERNS0_7handlerE
3354-
_ZN4sycl3_V16detail33reduGetMaxNumConcurrentWorkGroupsESt10shared_ptrINS1_10queue_implEE
33553351
_ZN4sycl3_V16detail34addHostSampledImageAccessorAndWaitEPNS1_28SampledImageAccessorImplHostE
33563352
_ZN4sycl3_V16detail35sampledImageConstructorNotificationEPvS2_RKSt8optionalINS0_12image_targetEEPKvjRKNS1_13code_locationE
33573353
_ZN4sycl3_V16detail36addHostUnsampledImageAccessorAndWaitEPNS1_30UnsampledImageAccessorImplHostE

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3692,7 +3692,6 @@
36923692
?add@host_pipe_map@detail@_V1@sycl@@YAXPEBXPEBD@Z
36933693
?add@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA?AVnode@34567@AEBVproperty_list@67@@Z
36943694
?addArg@handler@_V1@sycl@@AEAAXW4kernel_param_kind_t@detail@23@PEAXHH@Z
3695-
?addCounterInit@detail@_V1@sycl@@YAXAEAVhandler@23@AEAV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@AEAV?$shared_ptr@H@6@@Z
36963695
?addGraphLeafDependencies@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXVnode@34567@@Z
36973696
?addHostAccessorAndWait@detail@_V1@sycl@@YAXPEAVAccessorImplHost@123@@Z
36983697
?addHostSampledImageAccessorAndWait@detail@_V1@sycl@@YAXPEAVSampledImageAccessorImplHost@123@@Z
@@ -4287,10 +4286,7 @@
42874286
?query@tls_code_loc_t@detail@_V1@sycl@@QEAAAEBUcode_location@234@XZ
42884287
?reduComputeWGSize@detail@_V1@sycl@@YA_K_K0AEA_K@Z
42894288
?reduGetMaxNumConcurrentWorkGroups@detail@_V1@sycl@@YAIAEAVhandler@23@@Z
4290-
?reduGetMaxNumConcurrentWorkGroups@detail@_V1@sycl@@YAIV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@@Z
42914289
?reduGetMaxWGSize@detail@_V1@sycl@@YA_KAEAVhandler@23@_K@Z
4292-
?reduGetMaxWGSize@detail@_V1@sycl@@YA_KV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z
4293-
?reduGetPreferredWGSize@detail@_V1@sycl@@YA_KAEAV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z
42944290
?reduGetPreferredWGSize@detail@_V1@sycl@@YA_KAEAVhandler@23@_K@Z
42954291
?registerDynamicParameter@handler@_V1@sycl@@AEAAXPEAVdynamic_parameter_impl@detail@experimental@oneapi@ext@23@H@Z
42964292
?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_mem@12345@AEBVdevice@45@AEBVcontext@45@@Z

0 commit comments

Comments
 (0)