Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,13 +467,14 @@ EventImplPtr queue_impl::submit_kernel_scheduler_bypass(
BinImage = detail::retrieveKernelBinary(*this, KData.getKernelName());
assert(BinImage && "Failed to obtain a binary image.");
}
enqueueImpKernel(*this, KData.getNDRDesc(), KData.getArgs(),
KernelBundleImpPtr, KernelImplPtr,
*KData.getDeviceKernelInfoPtr(), RawEvents,
ResultEvent.get(), nullptr, KData.getKernelCacheConfig(),
KData.isCooperative(), KData.usesClusterLaunch(),
KData.getKernelWorkGroupMemorySize(), BinImage,
KData.getKernelFuncPtr());
enqueueImpKernel(
*this, KData.getNDRDesc(), KData.getArgs(), KernelBundleImpPtr,
KernelImplPtr, *KData.getDeviceKernelInfoPtr(), RawEvents,
ResultEvent.get(), nullptr, KData.getKernelCacheConfig(),
KData.isCooperative(), KData.usesClusterLaunch(),
KData.getKernelWorkGroupMemorySize(), BinImage,
KData.getKernelFuncPtr(), KData.getKernelNumArgs(),
KData.getKernelParamDescGetter(), KData.hasSpecialCaptures());
#ifdef XPTI_ENABLE_INSTRUMENTATION
if (xptiEnabled) {
// Emit signal only when event is created
Expand Down
244 changes: 178 additions & 66 deletions sycl/source/detail/scheduler/commands.cpp

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion sycl/source/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,9 @@ void enqueueImpKernel(
ur_kernel_cache_config_t KernelCacheConfig, bool KernelIsCooperative,
const bool KernelUsesClusterLaunch, const size_t WorkGroupMemorySize,
const RTDeviceBinaryImage *BinImage = nullptr,
void *KernelFuncPtr = nullptr);
void *KernelFuncPtr = nullptr, int KernelNumArgs = 0,
detail::kernel_param_desc_t (*KernelParamDescGetter)(int) = nullptr,
bool KernelHasSpecialCaptures = true);

/// The exec CG command enqueues execution of kernel or explicit memory
/// operation.
Expand Down
4 changes: 2 additions & 2 deletions sycl/test-e2e/Adapters/level_zero/batch_barrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ int main(int argc, char *argv[]) {
queue q;

submit_kernel(q); // starts a batch
// CHECK: ---> urEnqueueKernelLaunch
// CHECK: ---> urEnqueueKernelLaunchWithArgsExp
// CHECK-NOT: zeCommandQueueExecuteCommandLists

// Initializing Level Zero driver is required if this test is linked
Expand All @@ -42,7 +42,7 @@ int main(int argc, char *argv[]) {
// CHECK-NOT: zeCommandQueueExecuteCommandLists

submit_kernel(q);
// CHECK: ---> urEnqueueKernelLaunch
// CHECK: ---> urEnqueueKernelLaunchWithArgsExp
// CHECK-NOT: zeCommandQueueExecuteCommandLists

// interop should close the batch
Expand Down
65 changes: 32 additions & 33 deletions sycl/test-e2e/Adapters/level_zero/batch_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,55 +55,54 @@
// variable SYCL_PI_LEVEL_ZEOR+BATCH_SIZE=N.
// This test enqueues 8 kernels and then does a wait. And it does this 3 times.
// Expected output is that for batching =1 you will see zeCommandListClose,
// and zeCommandQueueExecuteCommandLists after every urEnqueueKernelLaunch.
// For batching=3 you will see that after 3rd and 6th enqueues, and then after
// urQueueFinish. For 5, after 5th urEnqueue, and then after urQueueFinish. For
// 4 you will see these after 4th and 8th Enqueue, and for 8, only after the
// 8th enqueue. And lastly for 9, you will see the Close and Execute calls
// only after the urQueueFinish.
// Since the test does this 3 times, this pattern will repeat 2 more times,
// and then the test will print Test Passed 8 times, once for each kernel
// validation check.
// and zeCommandQueueExecuteCommandLists after every
// urEnqueueKernelLaunchWithArgsExp. For batching=3 you will see that after 3rd
// and 6th enqueues, and then after urQueueFinish. For 5, after 5th urEnqueue,
// and then after urQueueFinish. For 4 you will see these after 4th and 8th
// Enqueue, and for 8, only after the 8th enqueue. And lastly for 9, you will
// see the Close and Execute calls only after the urQueueFinish. Since the test
// does this 3 times, this pattern will repeat 2 more times, and then the test
// will print Test Passed 8 times, once for each kernel validation check.
// Pattern starts first set of kernel executions.
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB3: zeCommandListClose(
// CKB3: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB4: zeCommandListClose(
// CKB4: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB5: zeCommandListClose(
// CKB5: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB3: zeCommandListClose(
// CKB3: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB7: zeCommandListClose(
// CKB7: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
Expand All @@ -121,45 +120,45 @@
// CKB9: zeCommandListClose(
// CKB9: zeCommandQueueExecuteCommandLists(
// Pattern starts 2nd set of kernel executions
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB3: zeCommandListClose(
// CKB3: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB4: zeCommandListClose(
// CKB4: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB5: zeCommandListClose(
// CKB5: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB3: zeCommandListClose(
// CKB3: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB7: zeCommandListClose(
// CKB7: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
Expand All @@ -177,45 +176,45 @@
// CKB9: zeCommandListClose(
// CKB9: zeCommandQueueExecuteCommandLists(
// Pattern starts 3rd set of kernel executions
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB3: zeCommandListClose(
// CKB3: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB4: zeCommandListClose(
// CKB4: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB5: zeCommandListClose(
// CKB5: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB3: zeCommandListClose(
// CKB3: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
// CKB7: zeCommandListClose(
// CKB7: zeCommandQueueExecuteCommandLists(
// CKALL: ---> urEnqueueKernelLaunch
// CKALL: ---> urEnqueueKernelLaunchWithArgsExp
// CKALL: zeCommandListAppendLaunchKernel(
// CKB1: zeCommandListClose(
// CKB1: zeCommandQueueExecuteCommandLists(
Expand Down
Loading
Loading