diff --git a/source/adapters/native_cpu/enqueue.cpp b/source/adapters/native_cpu/enqueue.cpp
index 6e4094ddef..ec5a6cf339 100644
--- a/source/adapters/native_cpu/enqueue.cpp
+++ b/source/adapters/native_cpu/enqueue.cpp
@@ -138,12 +138,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
 #else
   bool isLocalSizeOne =
       ndr.LocalSize[0] == 1 && ndr.LocalSize[1] == 1 && ndr.LocalSize[2] == 1;
-  if (isLocalSizeOne && ndr.GlobalSize[0] > numParallelThreads) {
+  if (isLocalSizeOne && ndr.GlobalSize[0] > numParallelThreads &&
+      !hKernel->hasLocalArgs()) {
     // If the local size is one, we make the assumption that we are running a
     // parallel_for over a sycl::range.
-    // Todo: we could add compiler checks and
-    // kernel properties for this (e.g. check that no barriers are called, no
-    // local memory args).
+    // Todo: we could add more compiler checks and
+    // kernel properties for this (e.g. check that no barriers are called).
 
     // Todo: this assumes that dim 0 is the best dimension over which we want to
     // parallelize
diff --git a/source/adapters/native_cpu/kernel.hpp b/source/adapters/native_cpu/kernel.hpp
index e2df672d05..4d2dec85cb 100644
--- a/source/adapters/native_cpu/kernel.hpp
+++ b/source/adapters/native_cpu/kernel.hpp
@@ -142,7 +142,9 @@ struct ur_kernel_handle_t_ : RefCounted {
     _localMemPoolSize = reqSize;
   }
 
-  // To be called before executing a work group
+  bool hasLocalArgs() const { return !_localArgInfo.empty(); }
+
+  // To be called before executing a work group if local args are present
   void handleLocalArgs(size_t numParallelThread, size_t threadId) {
     // For each local argument we have size*numthreads
     size_t offset = 0;