@@ -3127,8 +3127,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
31273127
31283128 // intel extensions for GPU information
31293129 case PI_DEVICE_INFO_DEVICE_ID:
3130- return ReturnValue (
3131- pi_uint32{Device->ZeDeviceProperties ->deviceId });
3130+ return ReturnValue (pi_uint32{Device->ZeDeviceProperties ->deviceId });
31323131 case PI_DEVICE_INFO_PCI_ADDRESS: {
31333132 if (getenv (" ZES_ENABLE_SYSMAN" ) == nullptr ) {
31343133 zePrint (" Set SYCL_ENABLE_PCI=1 to obtain PCI data.\n " );
@@ -3958,8 +3957,12 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
39583957 if ((Flags & PI_MEM_FLAGS_HOST_PTR_USE) != 0 ||
39593958 (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) != 0 ) {
39603959
3960+ // We don't yet know which device needs this buffer, so make the first
3961+ // device in the context be the master, and hold the initial valid
3962+ // allocation.
39613963 char *ZeHandleDst;
3962- PI_CALL (Buffer->getZeHandle (ZeHandleDst, _pi_mem::write_only));
3964+ PI_CALL (Buffer->getZeHandle (ZeHandleDst, _pi_mem::write_only,
3965+ Context->Devices [0 ]));
39633966 if (Buffer->OnHost ) {
39643967 // Do a host to host copy.
39653968 // For an imported HostPtr the copy is unneeded.
@@ -4331,7 +4334,7 @@ pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
43314334 // represent the buffer in this context) copy the data to a newly
43324335 // created device allocation.
43334336 char *ZeHandleDst;
4334- PI_CALL (Buffer->getZeHandle (ZeHandleDst, _pi_mem::write_only));
4337+ PI_CALL (Buffer->getZeHandle (ZeHandleDst, _pi_mem::write_only, Device ));
43354338
43364339 // zeCommandListAppendMemoryCopy must not be called from simultaneous
43374340 // threads with the same command list handle, so we need exclusive lock.
@@ -8757,9 +8760,16 @@ size_t _pi_buffer::getAlignment() const {
87578760pi_result _pi_buffer::getZeHandle (char *&ZeHandle, access_mode_t AccessMode,
87588761 pi_device Device) {
87598762
8763+ // NOTE: There might be no valid allocation at all yet and we get
8764+ // here from piEnqueueKernelLaunch that would be doing the buffer
8765+ // initialization. In this case the Device is not null as kernel
8766+ // launch is always on a specific device.
87608767 if (!Device)
87618768 Device = LastDeviceWithValidAllocation;
8762- PI_ASSERT (Device, PI_ERROR_INVALID_DEVICE);
8769+ // If the device is still not selected then use the first one in
8770+ // the context of the buffer.
8771+ if (!Device)
8772+ Device = Context->Devices [0 ];
87638773
87648774 auto &Allocation = Allocations[Device];
87658775
@@ -8814,6 +8824,9 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
88148824 // devices in the context have the same root.
88158825 PI_CALL (getZeHandle (ZeHandle, AccessMode, Context->SingleRootDevice ));
88168826 Allocation.ReleaseAction = allocation_t ::keep;
8827+ Allocation.ZeHandle = ZeHandle;
8828+ Allocation.Valid = true ;
8829+ return PI_SUCCESS;
88178830 } else { // Create device allocation
88188831 if (enableBufferPooling ()) {
88198832 Allocation.ReleaseAction = allocation_t ::free;
@@ -8836,13 +8849,27 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
88368849 if (Device == LastDeviceWithValidAllocation)
88378850 die (" getZeHandle: last used allocation is not valid" );
88388851
8839- Allocation.Valid = true ;
88408852 // For write-only access the allocation contents is not going to be used.
88418853 // So don't do anything to make it "valid".
8842- if (AccessMode != _pi_mem::write_only) {
8843- char *ZeHandleSrc;
8854+ bool NeedCopy = AccessMode != _pi_mem::write_only;
8855+ // It's also possible that the buffer doesn't have a valid allocation
8856+ // yet presumably when it is passed to a kernel that will perform
8857+ // it's intialization.
8858+ if (NeedCopy && !LastDeviceWithValidAllocation) {
8859+ NeedCopy = false ;
8860+ }
8861+ char *ZeHandleSrc = nullptr ;
8862+ if (NeedCopy) {
88448863 PI_CALL (getZeHandle (ZeHandleSrc, _pi_mem::read_only,
88458864 LastDeviceWithValidAllocation));
8865+ // It's possible with the single root-device contexts that
8866+ // the buffer is represented by the single root-device
8867+ // allocation and then skip the copy to itself.
8868+ if (ZeHandleSrc == ZeHandle)
8869+ NeedCopy = false ;
8870+ }
8871+
8872+ if (NeedCopy) {
88468873 // Copy valid buffer data to this allocation.
88478874 // TODO: see if we should better use peer's device allocation used
88488875 // directly, if that capability is reported with zeDeviceCanAccessPeer,
@@ -8851,7 +8878,7 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
88518878 //
88528879 // zeCommandListAppendMemoryCopy must not be called from simultaneous
88538880 // threads with the same command list handle, so we need exclusive lock.
8854- ze_bool_t P2P;
8881+ ze_bool_t P2P = false ;
88558882 ZE_CALL (
88568883 zeDeviceCanAccessPeer,
88578884 (Device->ZeDevice , LastDeviceWithValidAllocation->ZeDevice , &P2P));
@@ -8895,9 +8922,10 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
88958922 Size, nullptr , 0 , nullptr ));
88968923 }
88978924 }
8925+ Allocation.Valid = true ;
8926+ LastDeviceWithValidAllocation = Device;
88988927 }
88998928
8900- LastDeviceWithValidAllocation = Device;
89018929 // Invalidate other allocations that would become not valid if
89028930 // this access is not read-only.
89038931 if (AccessMode != _pi_mem::read_only) {
0 commit comments