File tree Expand file tree Collapse file tree 2 files changed +7
-5
lines changed
source/adapters/native_cpu Expand file tree Collapse file tree 2 files changed +7
-5
lines changed Original file line number Diff line number Diff line change @@ -138,12 +138,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
138138#else
139139 bool isLocalSizeOne =
140140 ndr.LocalSize [0 ] == 1 && ndr.LocalSize [1 ] == 1 && ndr.LocalSize [2 ] == 1 ;
141- if (isLocalSizeOne && ndr.GlobalSize [0 ] > numParallelThreads) {
141+ if (isLocalSizeOne && ndr.GlobalSize [0 ] > numParallelThreads &&
142+ !hKernel->hasLocalArgs ()) {
142143 // If the local size is one, we make the assumption that we are running a
143144 // parallel_for over a sycl::range.
144- // Todo: we could add compiler checks and
145- // kernel properties for this (e.g. check that no barriers are called, no
146- // local memory args).
145+ // Todo: we could add more compiler checks and
146+ // kernel properties for this (e.g. check that no barriers are called).
147147
148148 // Todo: this assumes that dim 0 is the best dimension over which we want to
149149 // parallelize
Original file line number Diff line number Diff line change @@ -142,7 +142,9 @@ struct ur_kernel_handle_t_ : RefCounted {
142142 _localMemPoolSize = reqSize;
143143 }
144144
145- // To be called before executing a work group
145+ bool hasLocalArgs () const { return !_localArgInfo.empty (); }
146+
147+ // To be called before executing a work group if local args are present
146148 void handleLocalArgs (size_t numParallelThread, size_t threadId) {
147149 // For each local argument we have size*numthreads
148150 size_t offset = 0 ;
You can’t perform that action at this time.
0 commit comments