Skip to content

Commit 24d1c69

Browse files
[SYCL] Fix bugs regarding Platform Caching (#2485)
Added new Platforms that have been created and initialized to the PiPlatformCache. Created a new function, piCachePlatforms, so that the cached Platforms could be shared accross both piPlatformsGet and piextPlatformCreateWithNativeHandle. Handle platform initialization in piCachePlatforms, so that platforms created with a native handle would also be initialized (Before initialization was not occuring for platforms created in piextPlatformCreateWithNativeHandle).
1 parent 678911a commit 24d1c69

File tree

1 file changed

+91
-71
lines changed

1 file changed

+91
-71
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 91 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,9 @@ static pi_result copyModule(ze_context_handle_t ZeContext,
548548
ze_module_handle_t SrcMod,
549549
ze_module_handle_t *DestMod);
550550

551+
static pi_result getOrCreatePlatform(ze_driver_handle_t ZeDriver,
552+
pi_platform *Platform);
553+
551554
// Forward declarations for mock implementations of Level Zero APIs that
552555
// do not yet work in the driver.
553556
// TODO: Remove these mock definitions when they work in the driver.
@@ -582,31 +585,13 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
582585
return PI_INVALID_VALUE;
583586
}
584587

585-
// Cache pi_platforms for reuse in the future
586-
// It solves two problems;
587-
// 1. sycl::device equality issue; we always return the same pi_device.
588-
// 2. performance; we can save time by immediately return from cache.
589-
static std::vector<pi_platform> PiPlatformsCache;
590-
static std::mutex PiPlatformsCacheMutex;
591-
592-
// This is a good time to initialize Level Zero.
593-
static const char *CommandListCacheSize =
594-
std::getenv("SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE");
595-
static pi_uint32 CommandListCacheSizeValue;
596-
try {
597-
CommandListCacheSizeValue =
598-
CommandListCacheSize ? std::atoi(CommandListCacheSize) : 20000;
599-
} catch (std::exception const &) {
600-
zePrint("SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE: invalid value provided, "
601-
"default set.\n");
602-
CommandListCacheSizeValue = 20000;
603-
}
604-
605588
// TODO: We can still safely recover if something goes wrong during the init.
606589
// Implement handling segfault using sigaction.
607-
// TODO: We should not call zeInit multiples times ever, so
608-
// this code should be changed.
609-
ze_result_t ZeResult = ZE_CALL_NOCHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY));
590+
591+
// We must only initialize the driver once, even if piPlatformsGet() is called
592+
// multiple times. Declaring the return value as "static" ensures it's only
593+
// called once.
594+
static ze_result_t ZeResult = ZE_CALL_NOCHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY));
610595

611596
// Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms.
612597
if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
@@ -634,57 +619,93 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
634619
assert(ZeDriverCount == 1);
635620
ZE_CALL(zeDriverGet(&ZeDriverCount, &ZeDriver));
636621

637-
std::lock_guard<std::mutex> Lock(PiPlatformsCacheMutex);
638-
for (const pi_platform CachedPlatform : PiPlatformsCache) {
639-
if (CachedPlatform->ZeDriver == ZeDriver) {
640-
Platforms[0] = CachedPlatform;
641-
// if the caller sent a valid NumPlatforms pointer, set it here
642-
if (NumPlatforms)
643-
*NumPlatforms = 1;
622+
pi_result Res = getOrCreatePlatform(ZeDriver, Platforms);
623+
if (Res != PI_SUCCESS) {
624+
return Res;
625+
}
626+
}
644627

628+
if (NumPlatforms)
629+
*NumPlatforms = 1;
630+
631+
return PI_SUCCESS;
632+
}
633+
634+
// Retrieve a cached Platform that has a matching driver handle or use the
635+
// driver handle to create and initialize a new Platform.
636+
static pi_result getOrCreatePlatform(ze_driver_handle_t ZeDriver,
637+
pi_platform *Platform) {
638+
639+
// We will retrieve the Max CommandList Cache in this lamda function so that
640+
// it only has to be executed once
641+
static pi_uint32 CommandListCacheSizeValue = ([] {
642+
const char *CommandListCacheSize =
643+
std::getenv("SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE");
644+
pi_uint32 CommandListCacheSizeValue;
645+
try {
646+
CommandListCacheSizeValue =
647+
CommandListCacheSize ? std::stoi(CommandListCacheSize) : 20000;
648+
} catch (std::exception const &) {
649+
zePrint("SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE: invalid value provided, "
650+
"default set.\n");
651+
CommandListCacheSizeValue = 20000;
652+
}
653+
return CommandListCacheSizeValue;
654+
})();
655+
656+
try {
657+
// Cache pi_platforms for reuse in the future
658+
// It solves two problems;
659+
// 1. sycl::device equality issue; we always return the same pi_device.
660+
// 2. performance; we can save time by immediately return from cache.
661+
//
662+
// Note: The memory for "PiPlatformsCache" and "PiPlatformsCacheMutex" is
663+
// intentionally leaked because the application may call into the SYCL
664+
// runtime from a global destructor, and such a call could eventually
665+
// access these variables. Therefore, there is no safe time when
666+
// "PiPlatformsCache" and "PiPlatformsCacheMutex" could be deleted.
667+
static auto PiPlatformsCache = new std::vector<pi_platform>;
668+
static auto PiPlatformsCacheMutex = new std::mutex;
669+
670+
std::lock_guard<std::mutex> Lock(*PiPlatformsCacheMutex);
671+
for (const pi_platform &CachedPlatform : *PiPlatformsCache) {
672+
if (CachedPlatform->ZeDriver == ZeDriver) {
673+
Platform[0] = CachedPlatform;
645674
return PI_SUCCESS;
646675
}
647676
}
648677

649-
try {
650-
// TODO: figure out how/when to release this memory
651-
*Platforms = new _pi_platform(ZeDriver);
652-
653-
// Cache driver properties
654-
ze_driver_properties_t ZeDriverProperties;
655-
ZE_CALL(zeDriverGetProperties(ZeDriver, &ZeDriverProperties));
656-
uint32_t ZeDriverVersion = ZeDriverProperties.driverVersion;
657-
// Intel Level-Zero GPU driver stores version as:
658-
// | 31 - 24 | 23 - 16 | 15 - 0 |
659-
// | Major | Minor | Build |
660-
std::string VersionMajor =
661-
std::to_string((ZeDriverVersion & 0xFF000000) >> 24);
662-
std::string VersionMinor =
663-
std::to_string((ZeDriverVersion & 0x00FF0000) >> 16);
664-
std::string VersionBuild = std::to_string(ZeDriverVersion & 0x0000FFFF);
665-
Platforms[0]->ZeDriverVersion = VersionMajor + std::string(".") +
666-
VersionMinor + std::string(".") +
667-
VersionBuild;
668-
669-
ze_api_version_t ZeApiVersion;
670-
ZE_CALL(zeDriverGetApiVersion(ZeDriver, &ZeApiVersion));
671-
Platforms[0]->ZeDriverApiVersion =
672-
std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + std::string(".") +
673-
std::to_string(ZE_MINOR_VERSION(ZeApiVersion));
674-
675-
// save a copy in the cache for future uses
676-
PiPlatformsCache.push_back(Platforms[0]);
677-
Platforms[0]->ZeMaxCommandListCache = CommandListCacheSizeValue;
678-
} catch (const std::bad_alloc &) {
679-
return PI_OUT_OF_HOST_MEMORY;
680-
} catch (...) {
681-
return PI_ERROR_UNKNOWN;
682-
}
678+
// TODO: figure out how/when to release this memory
679+
*Platform = new _pi_platform(ZeDriver);
680+
681+
// Cache driver properties
682+
ze_driver_properties_t ZeDriverProperties;
683+
ZE_CALL(zeDriverGetProperties(ZeDriver, &ZeDriverProperties));
684+
uint32_t ZeDriverVersion = ZeDriverProperties.driverVersion;
685+
// Intel Level-Zero GPU driver stores version as:
686+
// | 31 - 24 | 23 - 16 | 15 - 0 |
687+
// | Major | Minor | Build |
688+
auto VersionMajor = std::to_string((ZeDriverVersion & 0xFF000000) >> 24);
689+
auto VersionMinor = std::to_string((ZeDriverVersion & 0x00FF0000) >> 16);
690+
auto VersionBuild = std::to_string(ZeDriverVersion & 0x0000FFFF);
691+
Platform[0]->ZeDriverVersion =
692+
VersionMajor + "." + VersionMinor + "." + VersionBuild;
693+
694+
ze_api_version_t ZeApiVersion;
695+
ZE_CALL(zeDriverGetApiVersion(ZeDriver, &ZeApiVersion));
696+
Platform[0]->ZeDriverApiVersion =
697+
std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + "." +
698+
std::to_string(ZE_MINOR_VERSION(ZeApiVersion));
699+
700+
Platform[0]->ZeMaxCommandListCache = CommandListCacheSizeValue;
701+
// save a copy in the cache for future uses.
702+
PiPlatformsCache->push_back(Platform[0]);
703+
} catch (const std::bad_alloc &) {
704+
return PI_OUT_OF_HOST_MEMORY;
705+
} catch (...) {
706+
return PI_ERROR_UNKNOWN;
683707
}
684708

685-
if (NumPlatforms)
686-
*NumPlatforms = 1;
687-
688709
return PI_SUCCESS;
689710
}
690711

@@ -753,11 +774,10 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle,
753774
assert(NativeHandle);
754775
assert(Platform);
755776

756-
// Create PI platform from the given Level Zero driver handle.
757-
// TODO: get the platform from the platforms' cache.
777+
// Create PI platform from the given Level Zero driver handle or retrieve it
778+
// from the cache.
758779
auto ZeDriver = pi_cast<ze_driver_handle_t>(NativeHandle);
759-
*Platform = new _pi_platform(ZeDriver);
760-
return PI_SUCCESS;
780+
return getOrCreatePlatform(ZeDriver, Platform);
761781
}
762782

763783
// Get the cahched PI device created for the L0 device handle.

0 commit comments

Comments
 (0)