Skip to content

Commit 038e82a

Browse files
authored
optimize command buffer dependencies (#146)
* explicitly pass NULL for an empty event wait list * optimize the in-order command buffer case If there is only one command-queue associated with the command-buffer (so there are no cross-queue dependencies), and if the command-queue is an in-order queue (so there are no intra-queue dependencies), then we don't need to track dependencies within the command-buffer.
1 parent 942bf52 commit 038e82a

File tree

1 file changed

+18
-15
lines changed

1 file changed

+18
-15
lines changed

layers/10_cmdbufemu/emulate.cpp

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ struct BarrierWithWaitList : Command
215215
return g_pNextDispatch->clEnqueueBarrierWithWaitList(
216216
queue,
217217
static_cast<cl_uint>(wait_list.size()),
218-
wait_list.data(),
218+
wait_list.size() ? wait_list.data() : nullptr,
219219
signal);
220220
}
221221

@@ -271,7 +271,7 @@ struct CopyBuffer : Command
271271
dst_offset,
272272
size,
273273
static_cast<cl_uint>(wait_list.size()),
274-
wait_list.data(),
274+
wait_list.size() ? wait_list.data() : nullptr,
275275
signal);
276276
}
277277

@@ -345,7 +345,7 @@ struct CopyBufferRect : Command
345345
dst_row_pitch,
346346
dst_slice_pitch,
347347
static_cast<cl_uint>(wait_list.size()),
348-
wait_list.data(),
348+
wait_list.size() ? wait_list.data() : nullptr,
349349
signal);
350350
}
351351

@@ -411,7 +411,7 @@ struct CopyBufferToImage : Command
411411
dst_origin.data(),
412412
region.data(),
413413
static_cast<cl_uint>(wait_list.size()),
414-
wait_list.data(),
414+
wait_list.size() ? wait_list.data() : nullptr,
415415
signal);
416416
}
417417

@@ -473,7 +473,7 @@ struct CopyImage : Command
473473
dst_origin.data(),
474474
region.data(),
475475
static_cast<cl_uint>(wait_list.size()),
476-
wait_list.data(),
476+
wait_list.size() ? wait_list.data() : nullptr,
477477
signal);
478478
}
479479

@@ -535,7 +535,7 @@ struct CopyImageToBuffer : Command
535535
region.data(),
536536
dst_offset,
537537
static_cast<cl_uint>(wait_list.size()),
538-
wait_list.data(),
538+
wait_list.size() ? wait_list.data() : nullptr,
539539
signal);
540540
}
541541

@@ -601,7 +601,7 @@ struct FillBuffer : Command
601601
offset,
602602
size,
603603
static_cast<cl_uint>(wait_list.size()),
604-
wait_list.data(),
604+
wait_list.size() ? wait_list.data() : nullptr,
605605
signal);
606606
}
607607

@@ -678,7 +678,7 @@ struct FillImage : Command
678678
origin.data(),
679679
region.data(),
680680
static_cast<cl_uint>(wait_list.size()),
681-
wait_list.data(),
681+
wait_list.size() ? wait_list.data() : nullptr,
682682
signal);
683683
}
684684

@@ -725,7 +725,7 @@ struct SVMMemcpy : Command
725725
src_ptr,
726726
size,
727727
static_cast<cl_uint>(wait_list.size()),
728-
wait_list.data(),
728+
wait_list.size() ? wait_list.data() : nullptr,
729729
signal);
730730
}
731731

@@ -779,7 +779,7 @@ struct SVMMemFill : Command
779779
pattern.size(),
780780
size,
781781
static_cast<cl_uint>(wait_list.size()),
782-
wait_list.data(),
782+
wait_list.size() ? wait_list.data() : nullptr,
783783
signal);
784784
}
785785

@@ -1073,7 +1073,7 @@ struct NDRangeKernel : Command
10731073
global_work_size.data(),
10741074
local_work_size.size() ? local_work_size.data() : nullptr,
10751075
static_cast<cl_uint>(wait_list.size()),
1076-
wait_list.data(),
1076+
wait_list.size() ? wait_list.data() : nullptr,
10771077
signal);
10781078
}
10791079

@@ -1510,10 +1510,13 @@ typedef struct _cl_command_buffer_khr
15101510
NextSyncPoint.fetch_add(1, std::memory_order_relaxed) :
15111511
0;
15121512

1513-
command->addDependencies(
1514-
num_sync_points,
1515-
wait_list,
1516-
syncPoint);
1513+
// We only need to add dependencies if there is more than one queue (so
1514+
// we have possible cross-queue dependencies) or the queue is an
1515+
// out-of-order queue (so we have possible intra-queue dependencies).
1516+
if( Queues.size() > 1 || !IsInOrder[0] )
1517+
{
1518+
command->addDependencies(num_sync_points, wait_list, syncPoint);
1519+
}
15171520

15181521
if( sync_point != nullptr )
15191522
{

0 commit comments

Comments
 (0)