Skip to content

Commit 51ac08c

Browse files
authored
[SYCL] Adding new device descriptors as SYCL extensions. (#2897)
This commit adds new device descriptors as SYCL extensions. These new device descriptors are detailed in llvm/sycl/docs/extensions/IntelGPU/IntelGPUDeviceInfo.md. They will provide low-level details about Intel GPU devices. This support is only provided for the Level Zero Backend. New aspects have been added to indicate if the support is available. Signed-off-by: Gail Lyons <[email protected]>
1 parent 50b81c3 commit 51ac08c

File tree

16 files changed

+337
-6
lines changed

16 files changed

+337
-6
lines changed

sycl/doc/EnvironmentVariables.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ subject to change. Do not rely on these variables in production code.
3131
| SYCL_PI_LEVEL_ZERO_BATCH_SIZE | Integer | Sets a preferred number of commands to batch into a command list before executing the command list. A value of 0 causes the batch size to be adjusted dynamically. A value greater than 0 specifies fixed size batching, with the batch size set to the specified value. The default is 0. |
3232
| SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE | Any(\*) | Enables tracing of parallel_for invocations with rounded-up ranges. |
3333
| SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING | Any(\*) | Disables automatic rounding-up of parallel_for invocation ranges. |
34+
| SYCL_ENABLE_PCI | Integer | When set to 1, enables obtaining the GPU PCI address when using the Level Zero backend. The default is 0. |
3435

3536
`(*) Note: Any means this environment variable is effective when set to any non-null value.`
3637

sycl/doc/extensions/IntelGPU/IntelGPUDeviceInfo.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# SYCL(TM) Proposal: Intel's Extensions for Device Information
22

3-
**IMPORTANT**: This specification is a draft.
43

54
**NOTE**: Khronos(R) is a registered trademark and SYCL(TM) is a trademark of the Khronos Group, Inc.
65

@@ -15,13 +14,14 @@ The Feature Test Macro will be defined as:
1514
#define SYCL_EXT_INTEL_DEVICE_INFO 1
1615

1716

18-
1917
# PCI Address #
2018

2119
A new device descriptor will be added which will provide the PCI address in BDF format. BDF format contains the address as: `domain:bus:device.function`.
2220

2321
This new device descriptor is only available for devices in the Level Zero platform, and the matching aspect is only true for those devices. The DPC++ default behavior is to expose GPU devices through the Level Zero platform.
2422

23+
**Note:** The environment variable SYCL\_ENABLE\_PCI must be set to 1 to obtain the PCI address.
24+
2525

2626
## Device Information Descriptors ##
2727

sycl/include/CL/sycl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <CL/sycl/device_selector.hpp>
2727
#include <CL/sycl/event.hpp>
2828
#include <CL/sycl/exception.hpp>
29+
#include <CL/sycl/feature_test.hpp>
2930
#include <CL/sycl/group.hpp>
3031
#include <CL/sycl/handler.hpp>
3132
#include <CL/sycl/id.hpp>

sycl/include/CL/sycl/aspects.hpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,14 @@ enum class aspect {
3030
usm_host_allocations,
3131
usm_shared_allocations,
3232
usm_restricted_shared_allocations,
33-
usm_system_allocator
33+
usm_system_allocator,
34+
ext_intel_pci_address,
35+
ext_intel_gpu_eu_count,
36+
ext_intel_gpu_eu_simd_width,
37+
ext_intel_gpu_slices,
38+
ext_intel_gpu_subslices_per_slice,
39+
ext_intel_gpu_eu_count_per_subslice,
40+
ext_intel_max_mem_bandwidth
3441
};
3542

3643
} // namespace sycl

sycl/include/CL/sycl/detail/pi.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
4343
#define _PI_H_VERSION_STRING \
4444
_PI_CONCAT(_PI_H_VERSION_MAJOR, _PI_H_VERSION_MINOR)
45+
4546
// TODO: we need a mapping of PI to OpenCL somewhere, and this can be done
4647
// elsewhere, e.g. in the pi_opencl, but constants/enums mapping is now
4748
// done here, for efficiency and simplicity.
@@ -264,7 +265,15 @@ typedef enum {
264265
PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT =
265266
CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL,
266267
PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT =
267-
CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL
268+
CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL,
269+
// These are Intel-specific extensions.
270+
PI_DEVICE_INFO_PCI_ADDRESS = 0x10020,
271+
PI_DEVICE_INFO_GPU_EU_COUNT = 0x10021,
272+
PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH = 0x10022,
273+
PI_DEVICE_INFO_GPU_SLICES = 0x10023,
274+
PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE = 0x10024,
275+
PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = 0x10025,
276+
PI_DEVICE_INFO_MAX_MEM_BANDWIDTH = 0x10026
268277
} _pi_device_info;
269278

270279
typedef enum {

sycl/include/CL/sycl/feature_test.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//==---- feature_test.hpp - SYCL Feature Test Definitions -----*- C++ -*---==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// ===--------------------------------------------------------------------=== //
8+
#pragma once
9+
10+
__SYCL_INLINE_NAMESPACE(cl) {
11+
namespace sycl {
12+
13+
// Feature test macro definitions
14+
15+
#define SYCL_EXT_INTEL_DEVICE_INFO 1
16+
17+
} // namespace sycl
18+
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/include/CL/sycl/info/device_traits.def

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,10 @@ __SYCL_PARAM_TRAITS_SPEC(device, usm_host_allocations, bool)
8585
__SYCL_PARAM_TRAITS_SPEC(device, usm_shared_allocations, bool)
8686
__SYCL_PARAM_TRAITS_SPEC(device, usm_restricted_shared_allocations, bool)
8787
__SYCL_PARAM_TRAITS_SPEC(device, usm_system_allocator, bool)
88+
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_pci_address, string_class)
89+
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_count, pi_uint32)
90+
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_simd_width, pi_uint32)
91+
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_slices, pi_uint32)
92+
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_subslices_per_slice, pi_uint32)
93+
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_count_per_subslice, pi_uint32)
94+
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_max_mem_bandwidth, pi_uint64)

sycl/include/CL/sycl/info/info_desc.hpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,16 @@ enum class device : cl_device_info {
130130
usm_host_allocations = PI_USM_HOST_SUPPORT,
131131
usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT,
132132
usm_restricted_shared_allocations = PI_USM_CROSS_SHARED_SUPPORT,
133-
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT
133+
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT,
134+
// intel extensions
135+
ext_intel_pci_address = PI_DEVICE_INFO_PCI_ADDRESS,
136+
ext_intel_gpu_eu_count = PI_DEVICE_INFO_GPU_EU_COUNT,
137+
ext_intel_gpu_eu_simd_width = PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH,
138+
ext_intel_gpu_slices = PI_DEVICE_INFO_GPU_SLICES,
139+
ext_intel_gpu_subslices_per_slice = PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE,
140+
ext_intel_gpu_eu_count_per_subslice =
141+
PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE,
142+
ext_intel_max_mem_bandwidth = PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
134143
};
135144

136145
enum class device_type : pi_uint64 {

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1452,6 +1452,16 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
14521452
return getInfo(param_value_size, param_value, param_value_size_ret, value);
14531453
}
14541454

1455+
// TODO: Investigate if this information is available on CUDA.
1456+
case PI_DEVICE_INFO_PCI_ADDRESS:
1457+
case PI_DEVICE_INFO_GPU_EU_COUNT:
1458+
case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
1459+
case PI_DEVICE_INFO_GPU_SLICES:
1460+
case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
1461+
case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
1462+
case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH:
1463+
return PI_INVALID_VALUE;
1464+
14551465
default:
14561466
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
14571467
}

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717
#include <cstdio>
1818
#include <cstring>
1919
#include <memory>
20+
#include <sstream>
2021
#include <string>
2122
#include <thread>
2223
#include <utility>
2324

25+
#include <level_zero/zes_api.h>
2426
#include <level_zero/zet_api.h>
2527

2628
#include "usm_allocator.hpp"
@@ -786,6 +788,12 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
786788
setEnvVar("ZE_ENABLE_PARAMETER_VALIDATION", "1");
787789
}
788790

791+
// Enable SYSMAN support for obtaining the PCI address
792+
// and maximum memory bandwidth.
793+
if (getenv("SYCL_ENABLE_PCI") != nullptr) {
794+
setEnvVar("ZES_ENABLE_SYSMAN", "1");
795+
}
796+
789797
// TODO: We can still safely recover if something goes wrong during the init.
790798
// Implement handling segfault using sigaction.
791799

@@ -1578,6 +1586,42 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
15781586
}
15791587
return ReturnValue(Supported);
15801588
}
1589+
1590+
// intel extensions for GPU information
1591+
case PI_DEVICE_INFO_PCI_ADDRESS: {
1592+
if (getenv("ZES_ENABLE_SYSMAN") == nullptr) {
1593+
zePrint("Set SYCL_ENABLE_PCI=1 to obtain PCI data.\n");
1594+
return PI_INVALID_VALUE;
1595+
}
1596+
zes_pci_properties_t ZeDevicePciProperties = {};
1597+
ZE_CALL(zesDevicePciGetProperties(ZeDevice, &ZeDevicePciProperties));
1598+
std::stringstream ss;
1599+
ss << ZeDevicePciProperties.address.domain << ":"
1600+
<< ZeDevicePciProperties.address.bus << ":"
1601+
<< ZeDevicePciProperties.address.device << "."
1602+
<< ZeDevicePciProperties.address.function;
1603+
return ReturnValue(ss.str().c_str());
1604+
}
1605+
case PI_DEVICE_INFO_GPU_EU_COUNT: {
1606+
pi_uint32 count = Device->ZeDeviceProperties.numEUsPerSubslice *
1607+
Device->ZeDeviceProperties.numSubslicesPerSlice *
1608+
Device->ZeDeviceProperties.numSlices;
1609+
return ReturnValue(pi_uint32{count});
1610+
}
1611+
case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
1612+
return ReturnValue(
1613+
pi_uint32{Device->ZeDeviceProperties.physicalEUSimdWidth});
1614+
case PI_DEVICE_INFO_GPU_SLICES:
1615+
return ReturnValue(pi_uint32{Device->ZeDeviceProperties.numSlices});
1616+
case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
1617+
return ReturnValue(
1618+
pi_uint32{Device->ZeDeviceProperties.numSubslicesPerSlice});
1619+
case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
1620+
return ReturnValue(pi_uint32{Device->ZeDeviceProperties.numEUsPerSubslice});
1621+
case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH:
1622+
// currently not supported in level zero runtime
1623+
return PI_INVALID_VALUE;
1624+
15811625
default:
15821626
zePrint("Unsupported ParamName in piGetDeviceInfo\n");
15831627
zePrint("ParamName=%d(0x%x)\n", ParamName, ParamName);

0 commit comments

Comments
 (0)