diff --git a/source/adapters/native_cpu/common.hpp b/source/adapters/native_cpu/common.hpp
index af0d11c5af..2b4aabfbad 100644
--- a/source/adapters/native_cpu/common.hpp
+++ b/source/adapters/native_cpu/common.hpp
@@ -12,7 +12,6 @@
 
 #include "logger/ur_logger.hpp"
 #include "ur/ur.hpp"
-#include <chrono>
 
 constexpr size_t MaxMessageSize = 256;
 
@@ -71,31 +70,3 @@ template <typename T> inline void decrementOrDelete(T *refC) {
   if (refC->decrementReferenceCount() == 0)
     delete refC;
 }
-
-inline uint64_t get_timestamp() {
-  return std::chrono::duration_cast<std::chrono::nanoseconds>(
-             std::chrono::high_resolution_clock::now().time_since_epoch())
-      .count();
-}
-
-namespace native_cpu {
-
-inline void *aligned_malloc(size_t alignment, size_t size) {
-  void *ptr = nullptr;
-#ifdef _MSC_VER
-  ptr = _aligned_malloc(size, alignment);
-#else
-  ptr = std::aligned_alloc(alignment, size);
-#endif
-  return ptr;
-}
-
-inline void aligned_free(void *ptr) {
-#ifdef _MSC_VER
-  _aligned_free(ptr);
-#else
-  free(ptr);
-#endif
-}
-
-} // namespace native_cpu
diff --git a/source/adapters/native_cpu/context.hpp b/source/adapters/native_cpu/context.hpp
index b9d2d22dd1..c59ab4eafb 100644
--- a/source/adapters/native_cpu/context.hpp
+++ b/source/adapters/native_cpu/context.hpp
@@ -64,10 +64,17 @@ static size_t get_padding(uint32_t alignment) {
 // allocation so that the pointer returned to the user
 // always satisfies (ptr % align) == 0.
 static inline void *malloc_impl(uint32_t alignment, size_t size) {
+  void *ptr = nullptr;
   assert(alignment >= alignof(usm_alloc_info) &&
          "memory not aligned to usm_alloc_info");
-  void *ptr = native_cpu::aligned_malloc(
-      alignment, alloc_header_size + get_padding(alignment) + size);
+#ifdef _MSC_VER
+  ptr = _aligned_malloc(alloc_header_size + get_padding(alignment) + size,
+                        alignment);
+
+#else
+  ptr = std::aligned_alloc(alignment,
+                           alloc_header_size + get_padding(alignment) + size);
+#endif
   return ptr;
 }
 
@@ -93,8 +100,11 @@ struct ur_context_handle_t_ : RefCounted {
     const native_cpu::usm_alloc_info &info = native_cpu::get_alloc_info(ptr);
     UR_ASSERT(info.type != UR_USM_TYPE_UNKNOWN,
               UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-
-    native_cpu::aligned_free(info.base_alloc_ptr);
+#ifdef _MSC_VER
+    _aligned_free(info.base_alloc_ptr);
+#else
+    free(info.base_alloc_ptr);
+#endif
     allocations.erase(ptr);
     return UR_RESULT_SUCCESS;
   }
diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp
index 93122c8fe6..2a829a82e1 100644
--- a/source/adapters/native_cpu/device.cpp
+++ b/source/adapters/native_cpu/device.cpp
@@ -10,7 +10,6 @@
 
 #include <ur_api.h>
 
-#include "common.hpp"
 #include "platform.hpp"
 
 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__)
@@ -248,6 +247,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
     return ReturnValue(uint32_t{4});
   case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF:
     return ReturnValue(uint32_t{16});
+  // Imported from level_zero
   case UR_DEVICE_INFO_USM_HOST_SUPPORT:
   case UR_DEVICE_INFO_USM_DEVICE_SUPPORT:
   case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT:
@@ -469,12 +469,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
 UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(
     ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp,
     uint64_t *pHostTimestamp) {
-  std::ignore = hDevice;
+  std::ignore = hDevice; // todo
   if (pHostTimestamp) {
-    *pHostTimestamp = get_timestamp();
+    using namespace std::chrono;
+    *pHostTimestamp =
+        duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
+            .count();
   }
   if (pDeviceTimestamp) {
-    *pDeviceTimestamp = get_timestamp();
+    // todo: calculate elapsed time properly
+    using namespace std::chrono;
+    *pDeviceTimestamp =
+        duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
+            .count();
   }
   return UR_RESULT_SUCCESS;
 }
diff --git a/source/adapters/native_cpu/enqueue.cpp b/source/adapters/native_cpu/enqueue.cpp
index 7e03b323cc..33d8c35c36 100644
--- a/source/adapters/native_cpu/enqueue.cpp
+++ b/source/adapters/native_cpu/enqueue.cpp
@@ -13,7 +13,6 @@
 #include "ur_api.h"
 
 #include "common.hpp"
-#include "event.hpp"
 #include "kernel.hpp"
 #include "memory.hpp"
 #include "queue.hpp"
@@ -68,8 +67,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
     const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
     const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
     const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  std::ignore = numEventsInWaitList;
+  std::ignore = phEventWaitList;
+  std::ignore = phEvent;
 
-  urEventWait(numEventsInWaitList, phEventWaitList);
   UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
   UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
   UR_ASSERT(pGlobalWorkOffset, UR_RESULT_ERROR_INVALID_NULL_POINTER);
@@ -102,9 +103,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
   }
 
   // TODO: add proper error checking
+  // TODO: add proper event dep management
   native_cpu::NDRDescT ndr(workDim, pGlobalWorkOffset, pGlobalWorkSize,
                            pLocalWorkSize);
-  auto &tp = hQueue->getDevice()->tp;
+  auto &tp = hQueue->device->tp;
   const size_t numParallelThreads = tp.num_threads();
   hKernel->updateMemPool(numParallelThreads);
   std::vector<std::future<void>> futures;
@@ -116,9 +118,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
                           ndr.GlobalSize[2], ndr.LocalSize[0], ndr.LocalSize[1],
                           ndr.LocalSize[2], ndr.GlobalOffset[0],
                           ndr.GlobalOffset[1], ndr.GlobalOffset[2]);
-  auto event = new ur_event_handle_t_(hQueue, UR_COMMAND_KERNEL_LAUNCH);
-  event->tick_start();
-
 #ifndef NATIVECPU_USE_OCK
   hKernel->handleLocalArgs(1, 0);
   for (unsigned g2 = 0; g2 < numWG2; g2++) {
@@ -128,7 +127,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
           for (unsigned local1 = 0; local1 < ndr.LocalSize[1]; local1++) {
             for (unsigned local0 = 0; local0 < ndr.LocalSize[0]; local0++) {
               state.update(g0, g1, g2, local0, local1, local2);
-              hKernel->_subhandler(hKernel->getArgs().data(), &state);
+              hKernel->_subhandler(hKernel->_args.data(), &state);
             }
           }
         }
@@ -159,12 +158,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
     for (unsigned g2 = 0; g2 < numWG2; g2++) {
       for (unsigned g1 = 0; g1 < numWG1; g1++) {
         for (unsigned g0 = 0; g0 < new_num_work_groups_0; g0 += 1) {
-          futures.emplace_back(tp.schedule_task(
-              [ndr, itemsPerThread, kernel = *hKernel, g0, g1, g2](size_t) {
+          futures.emplace_back(
+              tp.schedule_task([&ndr = std::as_const(ndr), itemsPerThread,
+                                hKernel, g0, g1, g2](size_t) {
                 native_cpu::state resized_state =
                     getResizedState(ndr, itemsPerThread);
                 resized_state.update(g0, g1, g2);
-                kernel._subhandler(kernel.getArgs().data(), &resized_state);
+                hKernel->_subhandler(hKernel->_args.data(), &resized_state);
               }));
         }
         // Peel the remaining work items. Since the local size is 1, we iterate
@@ -172,7 +172,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
         for (unsigned g0 = new_num_work_groups_0 * itemsPerThread; g0 < numWG0;
              g0++) {
           state.update(g0, g1, g2);
-          hKernel->_subhandler(hKernel->getArgs().data(), &state);
+          hKernel->_subhandler(hKernel->_args.data(), &state);
         }
       }
     }
@@ -190,7 +190,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
                 for (unsigned g0 = 0; g0 < numWG0; g0++) {
                   kernel.handleLocalArgs(numParallelThreads, threadId);
                   state.update(g0, g1, g2);
-                  kernel._subhandler(kernel.getArgs().data(), &state);
+                  kernel._subhandler(kernel._args.data(), &state);
                 }
               }));
         }
@@ -207,7 +207,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
                     size_t threadId, ur_kernel_handle_t_ kernel) mutable {
                   kernel.handleLocalArgs(numParallelThreads, threadId);
                   state.update(g0, g1, g2);
-                  kernel._subhandler(kernel.getArgs().data(), &state);
+                  kernel._subhandler(kernel._args.data(), &state);
                 });
           }
         }
@@ -216,12 +216,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
       auto groupsPerThread = numGroups / numParallelThreads;
       auto remainder = numGroups % numParallelThreads;
       for (unsigned thread = 0; thread < numParallelThreads; thread++) {
-        futures.emplace_back(
-            tp.schedule_task([groups, thread, groupsPerThread,
-                              kernel = *hKernel](size_t threadId) {
+        futures.emplace_back(tp.schedule_task(
+            [&groups, thread, groupsPerThread, hKernel](size_t threadId) {
               for (unsigned i = 0; i < groupsPerThread; i++) {
                 auto index = thread * groupsPerThread + i;
-                groups[index](threadId, kernel);
+                groups[index](threadId, *hKernel);
               }
             }));
       }
@@ -229,32 +228,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
       // schedule the remaining tasks
       if (remainder) {
         futures.emplace_back(
-            tp.schedule_task([groups, remainder,
+            tp.schedule_task([&groups, remainder,
                               scheduled = numParallelThreads * groupsPerThread,
-                              kernel = *hKernel](size_t threadId) {
+                              hKernel](size_t threadId) {
               for (unsigned i = 0; i < remainder; i++) {
                 auto index = scheduled + i;
-                groups[index](threadId, kernel);
+                groups[index](threadId, *hKernel);
               }
             }));
       }
     }
   }
 
+  for (auto &f : futures)
+    f.get();
 #endif // NATIVECPU_USE_OCK
-  event->set_futures(futures);
-
-  *phEvent = event;
-  event->set_callback([hKernel, event]() {
-    event->tick_end();
-    // TODO: avoid calling clear() here.
-    hKernel->_localArgInfo.clear();
-  });
-
-  if (hQueue->isInOrder()) {
-    urEventWait(1, phEvent);
-  }
-
+  // TODO: we should avoid calling clear here by avoiding using push_back
+  // in setKernelArgs.
+  hKernel->_args.clear();
+  hKernel->_localArgInfo.clear();
   return UR_RESULT_SUCCESS;
 }
 
@@ -282,23 +274,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
 
 template <bool IsRead>
 static inline ur_result_t enqueueMemBufferReadWriteRect_impl(
-    ur_queue_handle_t hQueue, ur_mem_handle_t Buff, bool,
+    ur_queue_handle_t, ur_mem_handle_t Buff, bool,
     ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset,
     ur_rect_region_t region, size_t BufferRowPitch, size_t BufferSlicePitch,
     size_t HostRowPitch, size_t HostSlicePitch,
     typename std::conditional<IsRead, void *, const void *>::type DstMem,
-    uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  ur_event_handle_t event;
-  if constexpr (IsRead)
-    event = new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_READ_RECT);
-  else
-    event = new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_WRITE_RECT);
-  event->tick_start();
-  // TODO: blocking, check other constraints, performance optimizations
+    uint32_t, const ur_event_handle_t *, ur_event_handle_t *) {
+  // TODO: events, blocking, check other constraints, performance optimizations
   //       More sharing with level_zero where possible
 
-  urEventWait(NumEventsInWaitList, phEventWaitList);
   if (BufferRowPitch == 0)
     BufferRowPitch = region.width;
   if (BufferSlicePitch == 0)
@@ -322,26 +306,21 @@ static inline ur_result_t enqueueMemBufferReadWriteRect_impl(
         else
           buff_mem = ur_cast<const int8_t *>(DstMem)[host_origin];
       }
-
-  event->tick_end();
-  *phEvent = event;
   return UR_RESULT_SUCCESS;
 }
 
 static inline ur_result_t doCopy_impl(ur_queue_handle_t hQueue, void *DstPtr,
                                       const void *SrcPtr, size_t Size,
                                       uint32_t numEventsInWaitList,
-                                      const ur_event_handle_t *phEventWaitList,
-                                      ur_event_handle_t *phEvent,
-                                      ur_command_t command_type) {
-  ur_event_handle_t event = new ur_event_handle_t_(hQueue, command_type);
-  event->tick_start();
-  urEventWait(numEventsInWaitList, phEventWaitList);
+                                      const ur_event_handle_t *EventWaitList,
+                                      ur_event_handle_t *Event) {
+  // todo: non-blocking, events, UR integration
+  std::ignore = EventWaitList;
+  std::ignore = Event;
+  std::ignore = hQueue;
+  std::ignore = numEventsInWaitList;
   if (SrcPtr != DstPtr && Size)
     memmove(DstPtr, SrcPtr, Size);
-  event->tick_end();
-  if (phEvent)
-    *phEvent = event;
   return UR_RESULT_SUCCESS;
 }
 
@@ -352,9 +331,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead(
   std::ignore = blockingRead;
 
   void *FromPtr = /*Src*/ hBuffer->_mem + offset;
-  auto res = doCopy_impl(hQueue, pDst, FromPtr, size, numEventsInWaitList,
-                         phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_READ);
-  return res;
+  return doCopy_impl(hQueue, pDst, FromPtr, size, numEventsInWaitList,
+                     phEventWaitList, phEvent);
 }
 
 UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite(
@@ -364,9 +342,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite(
   std::ignore = blockingWrite;
 
   void *ToPtr = hBuffer->_mem + offset;
-  auto res = doCopy_impl(hQueue, ToPtr, pSrc, size, numEventsInWaitList,
-                         phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_WRITE);
-  return res;
+  return doCopy_impl(hQueue, ToPtr, pSrc, size, numEventsInWaitList,
+                     phEventWaitList, phEvent);
 }
 
 UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect(
@@ -400,11 +377,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy(
     ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size,
     uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
     ur_event_handle_t *phEvent) {
-  urEventWait(numEventsInWaitList, phEventWaitList);
   const void *SrcPtr = hBufferSrc->_mem + srcOffset;
   void *DstPtr = hBufferDst->_mem + dstOffset;
   return doCopy_impl(hQueue, DstPtr, SrcPtr, size, numEventsInWaitList,
-                     phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_COPY);
+                     phEventWaitList, phEvent);
 }
 
 UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect(
@@ -508,17 +484,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap(
     ur_map_flags_t mapFlags, size_t offset, size_t size,
     uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
     ur_event_handle_t *phEvent, void **ppRetMap) {
+  std::ignore = hQueue;
   std::ignore = blockingMap;
   std::ignore = mapFlags;
   std::ignore = size;
+  std::ignore = numEventsInWaitList;
+  std::ignore = phEventWaitList;
+  std::ignore = phEvent;
 
-  urEventWait(numEventsInWaitList, phEventWaitList);
-  ur_event_handle_t event =
-      new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_MAP);
-  event->tick_start();
   *ppRetMap = hBuffer->_mem + offset;
-  event->tick_end();
-  *phEvent = event;
 
   return UR_RESULT_SUCCESS;
 }
@@ -527,10 +501,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
     ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr,
     uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
     ur_event_handle_t *phEvent) {
+  std::ignore = hQueue;
   std::ignore = hMem;
   std::ignore = pMappedPtr;
-  urEventWait(numEventsInWaitList, phEventWaitList);
-  *phEvent = new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_UNMAP);
+  std::ignore = numEventsInWaitList;
+  std::ignore = phEventWaitList;
+  std::ignore = phEvent;
 
   return UR_RESULT_SUCCESS;
 }
@@ -539,10 +515,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
     ur_queue_handle_t hQueue, void *ptr, size_t patternSize,
     const void *pPattern, size_t size, uint32_t numEventsInWaitList,
     const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  urEventWait(numEventsInWaitList, phEventWaitList);
-  ur_event_handle_t event =
-      new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_MAP);
-  event->tick_start();
+  std::ignore = hQueue;
+  std::ignore = numEventsInWaitList;
+  std::ignore = phEventWaitList;
+  std::ignore = phEvent;
 
   UR_ASSERT(ptr, UR_RESULT_ERROR_INVALID_NULL_POINTER);
   UR_ASSERT(pPattern, UR_RESULT_ERROR_INVALID_NULL_POINTER);
@@ -588,10 +564,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
     }
   }
   }
-
-  event->tick_end();
-  *phEvent = event;
-
   return UR_RESULT_SUCCESS;
 }
 
@@ -599,19 +571,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
     ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc,
     size_t size, uint32_t numEventsInWaitList,
     const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  std::ignore = hQueue;
   std::ignore = blocking;
-  urEventWait(numEventsInWaitList, phEventWaitList);
-  ur_event_handle_t event =
-      new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_MAP);
-  event->tick_start();
+  std::ignore = numEventsInWaitList;
+  std::ignore = phEventWaitList;
+  std::ignore = phEvent;
 
   UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_QUEUE);
   UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER);
   UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER);
 
   memcpy(pDst, pSrc, size);
-  event->tick_end();
-  *phEvent = event;
 
   return UR_RESULT_SUCCESS;
 }
diff --git a/source/adapters/native_cpu/event.cpp b/source/adapters/native_cpu/event.cpp
index 37eaf1f6d1..9049e3c1b6 100644
--- a/source/adapters/native_cpu/event.cpp
+++ b/source/adapters/native_cpu/event.cpp
@@ -11,70 +11,50 @@
 #include "ur_api.h"
 
 #include "common.hpp"
-#include "event.hpp"
-#include "queue.hpp"
-#include <cstdint>
-#include <mutex>
 
 UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent,
                                                    ur_event_info_t propName,
                                                    size_t propSize,
                                                    void *pPropValue,
                                                    size_t *pPropSizeRet) {
-  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
-  switch (propName) {
-  case UR_EVENT_INFO_COMMAND_QUEUE:
-    return ReturnValue(hEvent->getQueue());
-  case UR_EVENT_INFO_COMMAND_TYPE:
-    return ReturnValue(hEvent->getCommandType());
-  case UR_EVENT_INFO_REFERENCE_COUNT:
-    return ReturnValue(hEvent->getReferenceCount());
-  case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS:
-    return ReturnValue(hEvent->getExecutionStatus());
-  case UR_EVENT_INFO_CONTEXT:
-    return ReturnValue(hEvent->getContext());
-  default:
-    break;
-  }
-
-  return UR_RESULT_ERROR_INVALID_ENUMERATION;
+  std::ignore = hEvent;
+  std::ignore = propName;
+  std::ignore = propSize;
+  std::ignore = pPropValue;
+  std::ignore = pPropSizeRet;
+
+  DIE_NO_IMPLEMENTATION;
 }
 
 UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
     ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize,
     void *pPropValue, size_t *pPropSizeRet) {
-  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
-  switch (propName) {
-  case UR_PROFILING_INFO_COMMAND_START:
-    return ReturnValue(hEvent->get_start_timestamp());
-  case UR_PROFILING_INFO_COMMAND_END:
-    return ReturnValue(hEvent->get_end_timestamp());
-  case UR_PROFILING_INFO_COMMAND_QUEUED:
-  case UR_PROFILING_INFO_COMMAND_SUBMIT:
-  case UR_PROFILING_INFO_COMMAND_COMPLETE:
-  default:
-    break;
-  }
-
-  return UR_RESULT_ERROR_INVALID_ENUMERATION;
+  std::ignore = hEvent;
+  std::ignore = propName;
+  std::ignore = propSize;
+  std::ignore = pPropValue;
+  std::ignore = pPropSizeRet;
+
+  DIE_NO_IMPLEMENTATION;
 }
 
 UR_APIEXPORT ur_result_t UR_APICALL
 urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) {
-  for (uint32_t i = 0; i < numEvents; i++) {
-    phEventWaitList[i]->wait();
-  }
+  std::ignore = numEvents;
+  std::ignore = phEventWaitList;
+  // TODO: currently we do everything synchronously so this is a no-op
   return UR_RESULT_SUCCESS;
 }
 
 UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) {
-  hEvent->incrementReferenceCount();
-  return UR_RESULT_SUCCESS;
+  std::ignore = hEvent;
+
+  DIE_NO_IMPLEMENTATION;
 }
 
 UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) {
-  decrementOrDelete(hEvent);
-  return UR_RESULT_SUCCESS;
+  std::ignore = hEvent;
+  DIE_NO_IMPLEMENTATION;
 }
 
 UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle(
@@ -119,47 +99,3 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
 
   DIE_NO_IMPLEMENTATION;
 }
-
-ur_event_handle_t_::ur_event_handle_t_(ur_queue_handle_t queue,
-                                       ur_command_t command_type)
-    : queue(queue), context(queue->getContext()), command_type(command_type),
-      done(false) {
-  this->queue->addEvent(this);
-}
-
-ur_event_handle_t_::~ur_event_handle_t_() {
-  if (!done) {
-    wait();
-  }
-}
-
-void ur_event_handle_t_::wait() {
-  std::unique_lock<std::mutex> lock(mutex);
-  if (done) {
-    return;
-  }
-  for (auto &f : futures) {
-    f.wait();
-  }
-  queue->removeEvent(this);
-  done = true;
-  // The callback may need to acquire the lock, so we unlock it here
-  lock.unlock();
-
-  if (callback)
-    callback();
-}
-
-void ur_event_handle_t_::tick_start() {
-  if (!queue->isProfiling())
-    return;
-  std::lock_guard<std::mutex> lock(mutex);
-  timestamp_start = get_timestamp();
-}
-
-void ur_event_handle_t_::tick_end() {
-  if (!queue->isProfiling())
-    return;
-  std::lock_guard<std::mutex> lock(mutex);
-  timestamp_end = get_timestamp();
-}
diff --git a/source/adapters/native_cpu/event.hpp b/source/adapters/native_cpu/event.hpp
deleted file mode 100644
index 60176a33a6..0000000000
--- a/source/adapters/native_cpu/event.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-//===----------- event.hpp - Native CPU Adapter ---------------------------===//
-//
-// Copyright (C) 2023 Intel Corporation
-//
-// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
-// Exceptions. See LICENSE.TXT
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-#include "common.hpp"
-#include "ur_api.h"
-#include <cstdint>
-#include <future>
-#include <mutex>
-#include <vector>
-
-struct ur_event_handle_t_ : RefCounted {
-
-  ur_event_handle_t_(ur_queue_handle_t queue, ur_command_t command_type);
-
-  ~ur_event_handle_t_();
-
-  void set_callback(const std::function<void()> &cb) { callback = cb; }
-
-  void wait();
-
-  uint32_t getExecutionStatus() {
-    // TODO: add support for UR_EVENT_STATUS_RUNNING
-    std::lock_guard<std::mutex> lock(mutex);
-    if (done) {
-      return UR_EVENT_STATUS_COMPLETE;
-    }
-    return UR_EVENT_STATUS_SUBMITTED;
-  }
-
-  ur_queue_handle_t getQueue() const { return queue; }
-
-  ur_context_handle_t getContext() const { return context; }
-
-  ur_command_t getCommandType() const { return command_type; }
-
-  void set_futures(std::vector<std::future<void>> &fs) {
-    std::lock_guard<std::mutex> lock(mutex);
-    futures = std::move(fs);
-  }
-
-  void tick_start();
-
-  void tick_end();
-
-  uint64_t get_start_timestamp() const { return timestamp_start; }
-
-  uint64_t get_end_timestamp() const { return timestamp_end; }
-
-private:
-  ur_queue_handle_t queue;
-  ur_context_handle_t context;
-  ur_command_t command_type;
-  bool done;
-  std::mutex mutex;
-  std::vector<std::future<void>> futures;
-  std::function<void()> callback;
-  uint64_t timestamp_start = 0;
-  uint64_t timestamp_end = 0;
-};
diff --git a/source/adapters/native_cpu/kernel.cpp b/source/adapters/native_cpu/kernel.cpp
index 596a3ffdf1..af8906245c 100644
--- a/source/adapters/native_cpu/kernel.cpp
+++ b/source/adapters/native_cpu/kernel.cpp
@@ -59,14 +59,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue(
     ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize,
     const ur_kernel_arg_value_properties_t *pProperties,
     const void *pArgValue) {
-  // TODO: error checking
+  // Todo: error checking
+  // Todo: I think that the opencl spec (and therefore the pi spec mandates that
+  // arg is copied (this is why it is defined as const void*, I guess we should
+  // do it
+  // TODO: can args arrive out of order?
   std::ignore = argIndex;
   std::ignore = pProperties;
 
   UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
   UR_ASSERT(argSize, UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE);
 
-  hKernel->addArg(pArgValue, argIndex, argSize);
+  hKernel->_args.emplace_back(const_cast<void *>(pArgValue));
 
   return UR_RESULT_SUCCESS;
 }
@@ -77,7 +81,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal(
   std::ignore = pProperties;
   // emplace a placeholder kernel arg, gets replaced with a pointer to the
   // memory pool before enqueueing the kernel.
-  hKernel->addPtrArg(nullptr, argIndex);
+  hKernel->_args.emplace_back(nullptr);
   hKernel->_localArgInfo.emplace_back(argIndex, argSize);
   return UR_RESULT_SUCCESS;
 }
@@ -217,13 +221,14 @@ UR_APIEXPORT ur_result_t UR_APICALL
 urKernelSetArgPointer(ur_kernel_handle_t hKernel, uint32_t argIndex,
                       const ur_kernel_arg_pointer_properties_t *pProperties,
                       const void *pArgValue) {
+  // TODO: out_of_order args?
   std::ignore = argIndex;
   std::ignore = pProperties;
 
   UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
   UR_ASSERT(pArgValue, UR_RESULT_ERROR_INVALID_NULL_POINTER);
 
-  hKernel->addPtrArg(const_cast<void *>(pArgValue), argIndex);
+  hKernel->_args.push_back(const_cast<void *>(pArgValue));
 
   return UR_RESULT_SUCCESS;
 }
@@ -257,6 +262,7 @@ UR_APIEXPORT ur_result_t UR_APICALL
 urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex,
                      const ur_kernel_arg_mem_obj_properties_t *pProperties,
                      ur_mem_handle_t hArgValue) {
+  // TODO: out_of_order args?
   std::ignore = argIndex;
   std::ignore = pProperties;
 
@@ -265,11 +271,11 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex,
   // Taken from ur/adapters/cuda/kernel.cpp
   // zero-sized buffers are expected to be null.
   if (hArgValue == nullptr) {
-    hKernel->addPtrArg(nullptr, argIndex);
+    hKernel->_args.emplace_back(nullptr);
     return UR_RESULT_SUCCESS;
   }
 
-  hKernel->addPtrArg(hArgValue->_mem, argIndex);
+  hKernel->_args.emplace_back(hArgValue->_mem);
   return UR_RESULT_SUCCESS;
 }
 
diff --git a/source/adapters/native_cpu/kernel.hpp b/source/adapters/native_cpu/kernel.hpp
index e2df672d05..084a0ee695 100644
--- a/source/adapters/native_cpu/kernel.hpp
+++ b/source/adapters/native_cpu/kernel.hpp
@@ -11,11 +11,22 @@
 #include "common.hpp"
 #include "nativecpu_state.hpp"
 #include "program.hpp"
-#include <cstring>
+#include <array>
 #include <ur_api.h>
 #include <utility>
 
-using nativecpu_kernel_t = void(void *const *, native_cpu::state *);
+namespace native_cpu {
+
+struct NativeCPUArgDesc {
+  void *MPtr;
+
+  NativeCPUArgDesc(void *Ptr) : MPtr(Ptr){};
+};
+
+} // namespace native_cpu
+
+using nativecpu_kernel_t = void(const native_cpu::NativeCPUArgDesc *,
+                                native_cpu::state *);
 using nativecpu_ptr_t = nativecpu_kernel_t *;
 using nativecpu_task_t = std::function<nativecpu_kernel_t>;
 
@@ -33,9 +44,9 @@ struct ur_kernel_handle_t_ : RefCounted {
       : hProgram(hProgram), _name{name}, _subhandler{std::move(subhandler)} {}
 
   ur_kernel_handle_t_(const ur_kernel_handle_t_ &other)
-      : Args(other.Args), hProgram(other.hProgram), _name(other._name),
-        _subhandler(other._subhandler), _localArgInfo(other._localArgInfo),
-        _localMemPool(other._localMemPool),
+      : hProgram(other.hProgram), _name(other._name),
+        _subhandler(other._subhandler), _args(other._args),
+        _localArgInfo(other._localArgInfo), _localMemPool(other._localMemPool),
         _localMemPoolSize(other._localMemPoolSize),
         ReqdWGSize(other.ReqdWGSize) {
     incrementReferenceCount();
@@ -44,10 +55,8 @@ struct ur_kernel_handle_t_ : RefCounted {
   ~ur_kernel_handle_t_() {
     if (decrementReferenceCount() == 0) {
       free(_localMemPool);
-      Args.deallocate();
     }
   }
-
   ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *name,
                       nativecpu_task_t subhandler,
                       std::optional<native_cpu::WGSize_t> ReqdWGSize,
@@ -57,67 +66,10 @@ struct ur_kernel_handle_t_ : RefCounted {
         ReqdWGSize(ReqdWGSize), MaxWGSize(MaxWGSize),
         MaxLinearWGSize(MaxLinearWGSize) {}
 
-  struct arguments {
-    using args_index_t = std::vector<void *>;
-    args_index_t Indices;
-    std::vector<size_t> ParamSizes;
-    std::vector<bool> OwnsMem;
-    static constexpr size_t MaxAlign = 16 * sizeof(double);
-
-    /// Add an argument to the kernel.
-    /// If the argument existed before, it is replaced.
-    /// Otherwise, it is added.
-    /// Gaps are filled with empty arguments.
-    /// Implicit offset argument is kept at the back of the indices collection.
-    void addArg(size_t Index, size_t Size, const void *Arg) {
-      if (Index + 1 > Indices.size()) {
-        Indices.resize(Index + 1);
-        OwnsMem.resize(Index + 1);
-        ParamSizes.resize(Index + 1);
-
-        // Update the stored value for the argument
-        Indices[Index] = native_cpu::aligned_malloc(MaxAlign, Size);
-        OwnsMem[Index] = true;
-        ParamSizes[Index] = Size;
-      } else {
-        if (ParamSizes[Index] != Size) {
-          Indices[Index] = realloc(Indices[Index], Size);
-          ParamSizes[Index] = Size;
-        }
-      }
-      std::memcpy(Indices[Index], Arg, Size);
-    }
-
-    void addPtrArg(size_t Index, void *Arg) {
-      if (Index + 1 > Indices.size()) {
-        Indices.resize(Index + 1);
-        OwnsMem.resize(Index + 1);
-        ParamSizes.resize(Index + 1);
-
-        OwnsMem[Index] = false;
-        ParamSizes[Index] = sizeof(uint8_t *);
-      }
-      Indices[Index] = Arg;
-    }
-
-    // This is called by the destructor of ur_kernel_handle_t_, since
-    // ur_kernel_handle_t_ implements reference counting and we want
-    // to deallocate only when the reference count is 0.
-    void deallocate() {
-      assert(OwnsMem.size() == Indices.size() && "Size mismatch");
-      for (size_t Index = 0; Index < Indices.size(); Index++) {
-        if (OwnsMem[Index])
-          native_cpu::aligned_free(Indices[Index]);
-      }
-    }
-
-    const args_index_t &getIndices() const noexcept { return Indices; }
-
-  } Args;
-
   ur_program_handle_t hProgram;
   std::string _name;
   nativecpu_task_t _subhandler;
+  std::vector<native_cpu::NativeCPUArgDesc> _args;
   std::vector<local_arg_info_t> _localArgInfo;
 
   std::optional<native_cpu::WGSize_t> getReqdWGSize() const {
@@ -147,21 +99,13 @@ struct ur_kernel_handle_t_ : RefCounted {
     // For each local argument we have size*numthreads
     size_t offset = 0;
     for (auto &entry : _localArgInfo) {
-      Args.Indices[entry.argIndex] =
+      _args[entry.argIndex].MPtr =
           _localMemPool + offset + (entry.argSize * threadId);
       // update offset in the memory pool
       offset += entry.argSize * numParallelThread;
     }
   }
 
-  const std::vector<void *> &getArgs() const { return Args.getIndices(); }
-
-  void addArg(const void *Ptr, size_t Index, size_t Size) {
-    Args.addArg(Index, Size, Ptr);
-  }
-
-  void addPtrArg(void *Ptr, size_t Index) { Args.addPtrArg(Index, Ptr); }
-
 private:
   char *_localMemPool = nullptr;
   size_t _localMemPoolSize = 0;
diff --git a/source/adapters/native_cpu/queue.cpp b/source/adapters/native_cpu/queue.cpp
index e2dda24236..7ee1fdf04c 100644
--- a/source/adapters/native_cpu/queue.cpp
+++ b/source/adapters/native_cpu/queue.cpp
@@ -31,9 +31,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue,
 UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(
     ur_context_handle_t hContext, ur_device_handle_t hDevice,
     const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) {
-  // TODO: UR_QUEUE_FLAG_PROFILING_ENABLE and other props
+  std::ignore = hContext;
+  std::ignore = hDevice;
+  std::ignore = pProperties;
 
-  auto Queue = new ur_queue_handle_t_(hDevice, hContext, pProperties);
+  auto Queue = new ur_queue_handle_t_(hDevice);
   *phQueue = Queue;
 
   return UR_RESULT_SUCCESS;
@@ -76,7 +78,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
 }
 
 UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) {
-  hQueue->finish();
+  std::ignore = hQueue;
+  // TODO: is this fine as no-op?
   return UR_RESULT_SUCCESS;
 }
 
diff --git a/source/adapters/native_cpu/queue.hpp b/source/adapters/native_cpu/queue.hpp
index 05ff78d822..8c34af6327 100644
--- a/source/adapters/native_cpu/queue.hpp
+++ b/source/adapters/native_cpu/queue.hpp
@@ -9,48 +9,10 @@
 //===----------------------------------------------------------------------===//
 #pragma once
 #include "common.hpp"
-#include "event.hpp"
-#include "ur_api.h"
-#include <set>
+#include "device.hpp"
 
 struct ur_queue_handle_t_ : RefCounted {
-  ur_queue_handle_t_(ur_device_handle_t device, ur_context_handle_t context,
-                     const ur_queue_properties_t *pProps)
-      : device(device), context(context),
-        inOrder(pProps ? !(pProps->flags &
-                           UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE)
-                       : true),
-        profilingEnabled(pProps ? pProps->flags & UR_QUEUE_FLAG_PROFILING_ENABLE
-                                : false) {}
+  ur_device_handle_t_ *const device;
 
-  ur_device_handle_t getDevice() const { return device; }
-
-  ur_context_handle_t getContext() const { return context; }
-
-  void addEvent(ur_event_handle_t event) { events.insert(event); }
-
-  void removeEvent(ur_event_handle_t event) { events.erase(event); }
-
-  void finish() {
-    while (!events.empty()) {
-      auto ev = *events.begin();
-      // ur_event_handle_t_::wait removes itself from the events set in the
-      // queue
-      ev->wait();
-    }
-    events.clear();
-  }
-
-  ~ur_queue_handle_t_() { finish(); }
-
-  bool isInOrder() const { return inOrder; }
-
-  bool isProfiling() const { return profilingEnabled; }
-
-private:
-  ur_device_handle_t device;
-  ur_context_handle_t context;
-  std::set<ur_event_handle_t> events;
-  const bool inOrder;
-  const bool profilingEnabled;
+  ur_queue_handle_t_(ur_device_handle_t_ *device) : device(device) {}
 };
diff --git a/test/conformance/event/event_adapter_native_cpu.match b/test/conformance/event/event_adapter_native_cpu.match
index 2989926af4..17066b6d52 100644
--- a/test/conformance/event/event_adapter_native_cpu.match
+++ b/test/conformance/event/event_adapter_native_cpu.match
@@ -1,12 +1,33 @@
 {{NONDETERMINISTIC}}
+urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_COMMAND_QUEUE
+urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_CONTEXT
+urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_COMMAND_TYPE
+urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_COMMAND_EXECUTION_STATUS
+urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_REFERENCE_COUNT
+urEventGetInfoNegativeTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventGetInfoNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventGetInfoNegativeTest.InvalidSizePropSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventGetInfoNegativeTest.InvalidSizePropSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventGetInfoNegativeTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventGetInfoNegativeTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_QUEUED
 urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_SUBMIT
+urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_START
+urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_END
 urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_COMPLETE
 urEventGetProfilingInfoWithTimingComparisonTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urEventGetProfilingInfoNegativeTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventGetProfilingInfoNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urEventGetProfilingInfoNegativeTest.InvalidValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urEventWaitTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventGetNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventGetNativeHandleTest.InvalidNullPointerNativeEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventCreateWithNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urEventSetCallbackTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urEventSetCallbackTest.ValidateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urEventSetCallbackTest.AllStates/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urEventSetCallbackTest.EventAlreadyCompleted/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventSetCallbackNegativeTest.InvalidNullPointerCallback/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
+urEventSetCallbackNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
diff --git a/test/conformance/queue/queue_adapter_native_cpu.match b/test/conformance/queue/queue_adapter_native_cpu.match
index 5d39450e12..32ea573390 100644
--- a/test/conformance/queue/queue_adapter_native_cpu.match
+++ b/test/conformance/queue/queue_adapter_native_cpu.match
@@ -23,6 +23,7 @@ urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_C
 urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE
 urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_USE_DEFAULT_STREAM
 urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM
+urQueueFinishTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urQueueFlushTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_CONTEXT
 urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_DEVICE
diff --git a/test/conformance/usm/usm_adapter_native_cpu.match b/test/conformance/usm/usm_adapter_native_cpu.match
index 5bf8aaed90..84d214c97f 100644
--- a/test/conformance/usm/usm_adapter_native_cpu.match
+++ b/test/conformance/usm/usm_adapter_native_cpu.match
@@ -2,6 +2,7 @@
 urUSMDeviceAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
 urUSMDeviceAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled
 urUSMDeviceAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
+urUSMDeviceAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled
 urUSMDeviceAllocTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
 urUSMDeviceAllocTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
 urUSMDeviceAllocTest.InvalidNullPtrResult/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
@@ -23,6 +24,21 @@ urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_N
 urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled_64_8
 urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled_64_512
 urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled_64_2048
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_8
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_512
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_2048
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_8
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_512
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_2048
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_8
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_512
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_2048
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_8
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_512
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_2048
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_8
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_512
+urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_2048  
 urUSMFreeTest.SuccessDeviceAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urUSMFreeTest.SuccessHostAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urUSMFreeTest.SuccessSharedAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
@@ -38,6 +54,7 @@ urUSMGetMemAllocInfoNegativeTest.InvalidValuePropSize/SYCL_NATIVE_CPU___SYCL_Nat
 urUSMHostAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
 urUSMHostAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled
 urUSMHostAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
+urUSMHostAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled
 urUSMHostAllocTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
 urUSMHostAllocTest.InvalidNullPtrMem/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
 urUSMHostAllocTest.InvalidUSMSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
@@ -58,6 +75,21 @@ urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Nat
 urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled_64_8
 urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled_64_512
 urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled_64_2048
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_8
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_512
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_2048
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_8
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_512
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_2048
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_8
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_512
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_2048
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_8
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_512
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_2048
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_8
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_512
+urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_2048
 urUSMPoolCreateTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urUSMPoolCreateTest.SuccessWithFlag/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urUSMPoolGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_POOL_INFO_CONTEXT
@@ -73,8 +105,11 @@ urUSMPoolDestroyTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU_
 urUSMPoolRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urUSMPoolRetainTest.InvalidNullHandlePool/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 urUSMSharedAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
+urUSMSharedAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled
 urUSMSharedAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
+urUSMSharedAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled
 urUSMSharedAllocTest.SuccessWithMultipleAdvices/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
+urUSMSharedAllocTest.SuccessWithMultipleAdvices/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled
 urUSMSharedAllocTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
 urUSMSharedAllocTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
 urUSMSharedAllocTest.InvalidNullPtrMem/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled
@@ -96,3 +131,18 @@ urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_N
 urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled_64_8
 urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled_64_512
 urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled_64_2048
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_8
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_512
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_2048
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_8
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_512
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_2048
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_8
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_512
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_2048
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_8
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_512
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_2048
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_8
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_512
+urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_2048