@@ -641,8 +641,7 @@ static pi_result copyModule(ze_context_handle_t ZeContext,
641
641
642
642
static bool setEnvVar (const char *var, const char *value);
643
643
644
- static pi_result getOrCreatePlatform (ze_driver_handle_t ZeDriver,
645
- pi_platform *Platform);
644
+ static pi_result populateDeviceCacheIfNeeded (pi_platform Platform);
646
645
647
646
// Forward declarations for mock implementations of Level Zero APIs that
648
647
// do not yet work in the driver.
@@ -730,40 +729,22 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
730
729
return mapError (ZeResult);
731
730
}
732
731
733
- // Level Zero does not have concept of Platforms, but Level Zero driver is the
734
- // closest match.
735
- if (Platforms && NumEntries > 0 ) {
736
- uint32_t ZeDriverCount = 0 ;
737
- ZE_CALL (zeDriverGet (&ZeDriverCount, nullptr ));
738
- if (ZeDriverCount == 0 ) {
739
- assert (NumPlatforms != 0 );
740
- *NumPlatforms = 0 ;
741
- return PI_SUCCESS;
742
- }
743
- ze_driver_handle_t ZeDriver;
744
- assert (ZeDriverCount == 1 );
745
- ZE_CALL (zeDriverGet (&ZeDriverCount, &ZeDriver));
746
-
747
- pi_result Res = getOrCreatePlatform (ZeDriver, Platforms);
748
- if (Res != PI_SUCCESS) {
749
- return Res;
750
- }
751
- }
752
-
753
- if (NumPlatforms)
754
- *NumPlatforms = 1 ;
755
-
756
- return PI_SUCCESS;
757
- }
758
-
759
- // Retrieve a cached Platform that has a matching driver handle or use the
760
- // driver handle to create and initialize a new Platform.
761
- static pi_result getOrCreatePlatform (ze_driver_handle_t ZeDriver,
762
- pi_platform *Platform) {
763
-
764
- // We will retrieve the Max CommandList Cache in this lamda function so that
765
- // it only has to be executed once
766
- static pi_uint32 CommandListCacheSizeValue = ([] {
732
+ // Cache pi_platforms for reuse in the future
733
+ // It solves two problems;
734
+ // 1. sycl::platform equality issue; we always return the same pi_platform.
735
+ // 2. performance; we can save time by immediately return from cache.
736
+ //
737
+ // Note: The memory for "PiPlatformsCache" and "PiPlatformsCacheMutex" is
738
+ // intentionally leaked because the application may call into the SYCL
739
+ // runtime from a global destructor, and such a call could eventually
740
+ // access these variables. Therefore, there is no safe time when
741
+ // "PiPlatformsCache" and "PiPlatformsCacheMutex" could be deleted.
742
+ static auto PiPlatformsCache = new std::vector<pi_platform>;
743
+ static auto PiPlatformsCacheMutex = new std::mutex;
744
+ static bool PiPlatformCachePopulated = false ;
745
+
746
+ std::lock_guard<std::mutex> Lock (*PiPlatformsCacheMutex);
747
+ if (!PiPlatformCachePopulated) {
767
748
const char *CommandListCacheSize =
768
749
std::getenv (" SYCL_PI_LEVEL_ZERO_MAX_COMMAND_LIST_CACHE" );
769
750
pi_uint32 CommandListCacheSizeValue;
@@ -776,62 +757,69 @@ static pi_result getOrCreatePlatform(ze_driver_handle_t ZeDriver,
776
757
" default set.\n " );
777
758
CommandListCacheSizeValue = 20000 ;
778
759
}
779
- return CommandListCacheSizeValue;
780
- })();
781
760
782
- try {
783
- // Cache pi_platforms for reuse in the future
784
- // It solves two problems;
785
- // 1. sycl::device equality issue; we always return the same pi_device.
786
- // 2. performance; we can save time by immediately return from cache.
787
- //
788
- // Note: The memory for "PiPlatformsCache" and "PiPlatformsCacheMutex" is
789
- // intentionally leaked because the application may call into the SYCL
790
- // runtime from a global destructor, and such a call could eventually
791
- // access these variables. Therefore, there is no safe time when
792
- // "PiPlatformsCache" and "PiPlatformsCacheMutex" could be deleted.
793
- static auto PiPlatformsCache = new std::vector<pi_platform>;
794
- static auto PiPlatformsCacheMutex = new std::mutex;
795
-
796
- std::lock_guard<std::mutex> Lock (*PiPlatformsCacheMutex);
797
- for (const pi_platform &CachedPlatform : *PiPlatformsCache) {
798
- if (CachedPlatform->ZeDriver == ZeDriver) {
799
- Platform[0 ] = CachedPlatform;
800
- return PI_SUCCESS;
761
+ try {
762
+
763
+ // Level Zero does not have concept of Platforms, but Level Zero driver is
764
+ // the closest match.
765
+ uint32_t ZeDriverCount = 0 ;
766
+ ZE_CALL (zeDriverGet (&ZeDriverCount, nullptr ));
767
+ if (ZeDriverCount == 0 ) {
768
+ PiPlatformCachePopulated = true ;
769
+ } else {
770
+ ze_driver_handle_t ZeDriver;
771
+ assert (ZeDriverCount == 1 );
772
+ ZE_CALL (zeDriverGet (&ZeDriverCount, &ZeDriver));
773
+ pi_platform Platform = new _pi_platform (ZeDriver);
774
+
775
+ // Cache driver properties
776
+ ze_driver_properties_t ZeDriverProperties;
777
+ ZE_CALL (zeDriverGetProperties (ZeDriver, &ZeDriverProperties));
778
+ uint32_t ZeDriverVersion = ZeDriverProperties.driverVersion ;
779
+ // Intel Level-Zero GPU driver stores version as:
780
+ // | 31 - 24 | 23 - 16 | 15 - 0 |
781
+ // | Major | Minor | Build |
782
+ auto VersionMajor =
783
+ std::to_string ((ZeDriverVersion & 0xFF000000 ) >> 24 );
784
+ auto VersionMinor =
785
+ std::to_string ((ZeDriverVersion & 0x00FF0000 ) >> 16 );
786
+ auto VersionBuild = std::to_string (ZeDriverVersion & 0x0000FFFF );
787
+ Platform->ZeDriverVersion =
788
+ VersionMajor + " ." + VersionMinor + " ." + VersionBuild;
789
+
790
+ ze_api_version_t ZeApiVersion;
791
+ ZE_CALL (zeDriverGetApiVersion (ZeDriver, &ZeApiVersion));
792
+ Platform->ZeDriverApiVersion =
793
+ std::to_string (ZE_MAJOR_VERSION (ZeApiVersion)) + " ." +
794
+ std::to_string (ZE_MINOR_VERSION (ZeApiVersion));
795
+
796
+ Platform->ZeMaxCommandListCache = CommandListCacheSizeValue;
797
+ // Save a copy in the cache for future uses.
798
+ PiPlatformsCache->push_back (Platform);
799
+ PiPlatformCachePopulated = true ;
801
800
}
801
+ } catch (const std::bad_alloc &) {
802
+ return PI_OUT_OF_HOST_MEMORY;
803
+ } catch (...) {
804
+ return PI_ERROR_UNKNOWN;
802
805
}
806
+ }
803
807
804
- // TODO: figure out how/when to release this memory
805
- *Platform = new _pi_platform (ZeDriver);
806
-
807
- // Cache driver properties
808
- ze_driver_properties_t ZeDriverProperties;
809
- ZE_CALL (zeDriverGetProperties (ZeDriver, &ZeDriverProperties));
810
- uint32_t ZeDriverVersion = ZeDriverProperties.driverVersion ;
811
- // Intel Level-Zero GPU driver stores version as:
812
- // | 31 - 24 | 23 - 16 | 15 - 0 |
813
- // | Major | Minor | Build |
814
- auto VersionMajor = std::to_string ((ZeDriverVersion & 0xFF000000 ) >> 24 );
815
- auto VersionMinor = std::to_string ((ZeDriverVersion & 0x00FF0000 ) >> 16 );
816
- auto VersionBuild = std::to_string (ZeDriverVersion & 0x0000FFFF );
817
- Platform[0 ]->ZeDriverVersion =
818
- VersionMajor + " ." + VersionMinor + " ." + VersionBuild;
819
-
820
- ze_api_version_t ZeApiVersion;
821
- ZE_CALL (zeDriverGetApiVersion (ZeDriver, &ZeApiVersion));
822
- Platform[0 ]->ZeDriverApiVersion =
823
- std::to_string (ZE_MAJOR_VERSION (ZeApiVersion)) + " ." +
824
- std::to_string (ZE_MINOR_VERSION (ZeApiVersion));
825
-
826
- Platform[0 ]->ZeMaxCommandListCache = CommandListCacheSizeValue;
827
- // save a copy in the cache for future uses.
828
- PiPlatformsCache->push_back (Platform[0 ]);
829
- } catch (const std::bad_alloc &) {
830
- return PI_OUT_OF_HOST_MEMORY;
831
- } catch (...) {
832
- return PI_ERROR_UNKNOWN;
808
+ if (Platforms && NumEntries > 0 ) {
809
+ uint32_t I = 0 ;
810
+ for (const pi_platform &CachedPlatform : *PiPlatformsCache) {
811
+ if (I < NumEntries) {
812
+ *Platforms++ = CachedPlatform;
813
+ I++;
814
+ } else {
815
+ break ;
816
+ }
817
+ }
833
818
}
834
819
820
+ if (NumPlatforms)
821
+ *NumPlatforms = PiPlatformsCache->size ();
822
+
835
823
return PI_SUCCESS;
836
824
}
837
825
@@ -900,10 +888,35 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle,
900
888
assert (NativeHandle);
901
889
assert (Platform);
902
890
903
- // Create PI platform from the given Level Zero driver handle or retrieve it
904
- // from the cache.
905
891
auto ZeDriver = pi_cast<ze_driver_handle_t >(NativeHandle);
906
- return getOrCreatePlatform (ZeDriver, Platform);
892
+
893
+ pi_uint32 NumPlatforms = 0 ;
894
+ pi_result Res = piPlatformsGet (0 , nullptr , &NumPlatforms);
895
+ if (Res != PI_SUCCESS) {
896
+ return Res;
897
+ }
898
+
899
+ if (NumPlatforms) {
900
+ std::vector<pi_platform> Platforms (NumPlatforms);
901
+ Res = piPlatformsGet (NumPlatforms, Platforms.data (), nullptr );
902
+ if (Res != PI_SUCCESS) {
903
+ return Res;
904
+ }
905
+
906
+ // The SYCL spec requires that the set of platforms must remain fixed for
907
+ // the duration of the application's execution. We assume that we found all
908
+ // of the Level Zero drivers when we initialized the platform cache, so the
909
+ // "NativeHandle" must already be in the cache. If it is not, this must not
910
+ // be a valid Level Zero driver.
911
+ for (const pi_platform &CachedPlatform : Platforms) {
912
+ if (CachedPlatform->ZeDriver == ZeDriver) {
913
+ *Platform = CachedPlatform;
914
+ return PI_SUCCESS;
915
+ }
916
+ }
917
+ }
918
+
919
+ return PI_INVALID_VALUE;
907
920
}
908
921
909
922
// Get the cahched PI device created for the L0 device handle.
@@ -912,9 +925,11 @@ pi_device _pi_platform::getDeviceFromNativeHandle(ze_device_handle_t ZeDevice) {
912
925
913
926
std::lock_guard<std::mutex> Lock (this ->PiDevicesCacheMutex );
914
927
auto it = std::find_if (PiDevicesCache.begin (), PiDevicesCache.end (),
915
- [&](pi_device &D) { return D->ZeDevice == ZeDevice; });
928
+ [&](std::unique_ptr<_pi_device> &D) {
929
+ return D.get ()->ZeDevice == ZeDevice;
930
+ });
916
931
if (it != PiDevicesCache.end ()) {
917
- return *it;
932
+ return ( *it). get () ;
918
933
}
919
934
return nullptr ;
920
935
}
@@ -924,20 +939,20 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
924
939
pi_uint32 *NumDevices) {
925
940
926
941
assert (Platform);
927
- ze_driver_handle_t ZeDriver = Platform->ZeDriver ;
928
942
929
943
// Get number of devices supporting Level Zero
930
944
uint32_t ZeDeviceCount = 0 ;
931
945
std::lock_guard<std::mutex> Lock (Platform->PiDevicesCacheMutex );
932
- ZeDeviceCount = Platform->PiDevicesCache .size ();
933
946
947
+ pi_result Res = populateDeviceCacheIfNeeded (Platform);
948
+ if (Res != PI_SUCCESS) {
949
+ return Res;
950
+ }
951
+
952
+ ZeDeviceCount = Platform->PiDevicesCache .size ();
934
953
const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU);
935
954
const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT);
936
955
937
- if (ZeDeviceCount == 0 ) {
938
- ZE_CALL (zeDeviceGet (ZeDriver, &ZeDeviceCount, nullptr ));
939
- }
940
-
941
956
if (ZeDeviceCount == 0 || !(AskingForGPU || AskingForDefault)) {
942
957
if (NumDevices)
943
958
*NumDevices = 0 ;
@@ -953,34 +968,53 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
953
968
return PI_SUCCESS;
954
969
}
955
970
956
- // if devices are already captured in cache, return them from the cache.
957
- for (const pi_device CachedDevice : Platform->PiDevicesCache ) {
958
- *Devices++ = CachedDevice;
971
+ // Return the devices from the cache.
972
+ uint32_t I = 0 ;
973
+ for (const std::unique_ptr<_pi_device> &CachedDevice :
974
+ Platform->PiDevicesCache ) {
975
+ if (I < NumEntries) {
976
+ *Devices++ = CachedDevice.get ();
977
+ I++;
978
+ } else {
979
+ break ;
980
+ }
959
981
}
960
- if (!Platform->PiDevicesCache .empty ()) {
982
+
983
+ return PI_SUCCESS;
984
+ }
985
+
986
+ // Check the device cache and load it if necessary. The PiDevicesCacheMutex must
987
+ // be locked before calling this function to prevent any synchronization issues.
988
+ static pi_result populateDeviceCacheIfNeeded (pi_platform Platform) {
989
+
990
+ if (Platform->DeviceCachePopulated ) {
961
991
return PI_SUCCESS;
962
992
}
963
993
994
+ ze_driver_handle_t ZeDriver = Platform->ZeDriver ;
995
+ uint32_t ZeDeviceCount = 0 ;
996
+ ZE_CALL (zeDeviceGet (ZeDriver, &ZeDeviceCount, nullptr ));
997
+
964
998
try {
965
999
std::vector<ze_device_handle_t > ZeDevices (ZeDeviceCount);
966
1000
ZE_CALL (zeDeviceGet (ZeDriver, &ZeDeviceCount, ZeDevices.data ()));
967
1001
968
1002
for (uint32_t I = 0 ; I < ZeDeviceCount; ++I) {
969
- if (I < NumEntries) {
970
- Devices[I] = new _pi_device (ZeDevices[I], Platform);
971
- pi_result Result = Devices[I]->initialize ();
972
- if (Result != PI_SUCCESS) {
973
- return Result;
974
- }
975
- // save a copy in the cache for future uses.
976
- Platform->PiDevicesCache .push_back (Devices[I]);
1003
+ std::unique_ptr<_pi_device> Device (
1004
+ new _pi_device (ZeDevices[I], Platform));
1005
+ pi_result Result = Device->initialize ();
1006
+ if (Result != PI_SUCCESS) {
1007
+ return Result;
977
1008
}
1009
+ // save a copy in the cache for future uses.
1010
+ Platform->PiDevicesCache .push_back (std::move (Device));
978
1011
}
979
1012
} catch (const std::bad_alloc &) {
980
1013
return PI_OUT_OF_HOST_MEMORY;
981
1014
} catch (...) {
982
1015
return PI_ERROR_UNKNOWN;
983
1016
}
1017
+ Platform->DeviceCachePopulated = true ;
984
1018
return PI_SUCCESS;
985
1019
}
986
1020
@@ -1583,11 +1617,28 @@ pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle,
1583
1617
assert (Device);
1584
1618
assert (Platform);
1585
1619
1586
- // Create PI device from the given Level Zero device handle.
1587
- // TODO: get the device from the devices' cache.
1620
+ std::lock_guard<std::mutex> Lock (Platform->PiDevicesCacheMutex );
1621
+ pi_result Res = populateDeviceCacheIfNeeded (Platform);
1622
+ if (Res != PI_SUCCESS) {
1623
+ return Res;
1624
+ }
1625
+
1588
1626
auto ZeDevice = pi_cast<ze_device_handle_t >(NativeHandle);
1589
- *Device = new _pi_device (ZeDevice, Platform);
1590
- return (*Device)->initialize ();
1627
+
1628
+ // The SYCL spec requires that the set of devices must remain fixed for the
1629
+ // duration of the application's execution. We assume that we found all of the
1630
+ // Level Zero devices when we initialized the device cache, so the
1631
+ // "NativeHandle" must already be in the cache. If it is not, this must not be
1632
+ // a valid Level Zero device.
1633
+ for (const std::unique_ptr<_pi_device> &CachedDevice :
1634
+ Platform->PiDevicesCache ) {
1635
+ if (CachedDevice->ZeDevice == ZeDevice) {
1636
+ *Device = CachedDevice.get ();
1637
+ return PI_SUCCESS;
1638
+ }
1639
+ }
1640
+
1641
+ return PI_INVALID_VALUE;
1591
1642
}
1592
1643
1593
1644
pi_result piContextCreate (const pi_context_properties *Properties,
0 commit comments