@@ -57,6 +57,27 @@ bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr) {
5757 return (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_SHARED);
5858}
5959
60+ // Helper Function to check if the Copy Engine should be preferred given the
61+ // types of memory used.
62+ bool PreferCopyEngineUsage (ur_device_handle_t Device,
63+ ur_context_handle_t Context, const void *Src,
64+ void *Dst) {
65+ bool PreferCopyEngine = false ;
66+ // Given Integrated Devices, Copy Engines are not preferred for any Copy
67+ // operations.
68+ if (!Device->isIntegrated ()) {
69+ // Given non D2D Copies, for better performance, Copy Engines are preferred
70+ // only if one has both the Main and Link Copy Engines.
71+ if (Device->hasLinkCopyEngine () && Device->hasMainCopyEngine () &&
72+ (!IsDevicePointer (Context, Src) || !IsDevicePointer (Context, Dst))) {
73+ PreferCopyEngine = true ;
74+ }
75+ }
76+ // Temporary option added to use force engine for D2D copy
77+ PreferCopyEngine |= UseCopyEngineForD2DCopy;
78+ return PreferCopyEngine;
79+ }
80+
6081// Shared by all memory read/write/copy PI interfaces.
6182// PI interfaces must have queue's and destination buffer's mutexes locked for
6283// exclusive use and source buffer's mutex locked for shared use on entry.
@@ -1189,23 +1210,10 @@ ur_result_t urEnqueueUSMMemcpy(
11891210) {
11901211 std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
11911212
1192- // Device to Device copies are found to execute slower on copy engine
1193- // (versus compute engine).
1194- bool PreferCopyEngine = !IsDevicePointer (Queue->Context , Src) ||
1195- !IsDevicePointer (Queue->Context , Dst);
1196- // For better performance, Copy Engines are not preferred given Shared
1197- // pointers on DG2.
1198- if (Queue->Device ->isDG2 () && (IsSharedPointer (Queue->Context , Src) ||
1199- IsSharedPointer (Queue->Context , Dst))) {
1200- PreferCopyEngine = false ;
1201- }
1202-
1203- // Temporary option added to use copy engine for D2D copy
1204- PreferCopyEngine |= UseCopyEngineForD2DCopy;
1205-
12061213 return enqueueMemCopyHelper ( // TODO: do we need a new command type for this?
12071214 UR_COMMAND_MEM_BUFFER_COPY, Queue, Dst, Blocking, Size, Src,
1208- NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine);
1215+ NumEventsInWaitList, EventWaitList, OutEvent,
1216+ PreferCopyEngineUsage (Queue->Device , Queue->Context , Src, Dst));
12091217}
12101218
12111219ur_result_t urEnqueueUSMPrefetch (
@@ -1396,26 +1404,13 @@ ur_result_t urEnqueueUSMMemcpy2D(
13961404
13971405 std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
13981406
1399- // Device to Device copies are found to execute slower on copy engine
1400- // (versus compute engine).
1401- bool PreferCopyEngine = !IsDevicePointer (Queue->Context , Src) ||
1402- !IsDevicePointer (Queue->Context , Dst);
1403- // For better performance, Copy Engines are not preferred given Shared
1404- // pointers on DG2.
1405- if (Queue->Device ->isDG2 () && (IsSharedPointer (Queue->Context , Src) ||
1406- IsSharedPointer (Queue->Context , Dst))) {
1407- PreferCopyEngine = false ;
1408- }
1409-
1410- // Temporary option added to use copy engine for D2D copy
1411- PreferCopyEngine |= UseCopyEngineForD2DCopy;
1412-
14131407 return enqueueMemCopyRectHelper ( // TODO: do we need a new command type for
14141408 // this?
14151409 UR_COMMAND_MEM_BUFFER_COPY_RECT, Queue, Src, Dst, ZeroOffset, ZeroOffset,
14161410 Region, SrcPitch, DstPitch, 0 , /* SrcSlicePitch=*/
14171411 0 , /* DstSlicePitch=*/
1418- Blocking, NumEventsInWaitList, EventWaitList, Event, PreferCopyEngine);
1412+ Blocking, NumEventsInWaitList, EventWaitList, Event,
1413+ PreferCopyEngineUsage (Queue->Device , Queue->Context , Src, Dst));
14191414}
14201415
14211416static ur_result_t ur2zeImageDesc (const ur_image_format_t *ImageFormat,
0 commit comments