Skip to content

Commit 8b0a2f7

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents 51b93d5 + dcbed11 commit 8b0a2f7

File tree

74 files changed

+1334
-987
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+1334
-987
lines changed

clang/include/clang/Driver/Driver.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,11 @@ class Driver {
801801
/// targets.
802802
mutable llvm::StringMap<StringRef> SYCLUniqueIDList;
803803

804+
/// Vector of Macros that need to be added to the Host compilation in a
805+
/// SYCL based offloading scenario. These macros are gathered during
806+
/// construction of the device compilations.
807+
mutable std::vector<std::string> SYCLTargetMacroArgs;
808+
804809
/// Return the typical executable name for the specified driver \p Mode.
805810
static const char *getExecutableForDriverMode(DriverMode Mode);
806811

@@ -871,6 +876,17 @@ class Driver {
871876
void createAppendedFooterInput(Action *&Input, Compilation &C,
872877
const llvm::opt::ArgList &Args) const;
873878

879+
/// addSYCLTargetMacroArg - Add the given macro to the vector of args to be
880+
/// added to the host compilation step.
881+
void addSYCLTargetMacroArg(const llvm::opt::ArgList &Args,
882+
StringRef Macro) const {
883+
SYCLTargetMacroArgs.push_back(Args.MakeArgString(Macro));
884+
}
885+
/// getSYCLTargetMacroArgs - return the previously gathered macro target args.
886+
llvm::ArrayRef<std::string> getSYCLTargetMacroArgs() const {
887+
return SYCLTargetMacroArgs;
888+
}
889+
874890
/// setSYCLUniqueID - set the Unique ID that is used for all FE invocations
875891
/// when performing compilations for SYCL.
876892
void addSYCLUniqueID(StringRef UniqueID, StringRef FileName) const {

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5115,19 +5115,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
51155115

51165116
// Forward -fsycl-default-sub-group-size if in SYCL mode.
51175117
Args.AddLastArg(CmdArgs, options::OPT_fsycl_default_sub_group_size);
5118-
5119-
// Add any predefined macros associated with intel_gpu* type targets
5120-
// passed in with -fsycl-targets
5121-
if (RawTriple.isSPIR() &&
5122-
RawTriple.getSubArch() == llvm::Triple::SPIRSubArch_gen) {
5123-
StringRef Device = JA.getOffloadingArch();
5124-
if (!Device.empty())
5125-
CmdArgs.push_back(Args.MakeArgString(
5126-
Twine("-D") + SYCL::gen::getGenDeviceMacro(Device)));
5127-
}
5128-
if (RawTriple.isSPIR() &&
5129-
RawTriple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64)
5130-
CmdArgs.push_back("-D__SYCL_TARGET_INTEL_X86_64__");
51315118
}
51325119

51335120
if (IsSYCL) {
@@ -5213,6 +5200,35 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
52135200
}
52145201
}
52155202
}
5203+
// Add any predefined macros associated with intel_gpu* type targets
5204+
// passed in with -fsycl-targets
5205+
// TODO: Macros are populated during device compilations and saved for
5206+
// addition to the host compilation. There is no dependence connection
5207+
// between device and host where we should be able to use the offloading
5208+
// arch to add the macro to the host compile.
5209+
auto addTargetMacros = [&](const llvm::Triple &Triple) {
5210+
if (!Triple.isSPIR())
5211+
return;
5212+
SmallString<64> Macro;
5213+
if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) {
5214+
StringRef Device = JA.getOffloadingArch();
5215+
if (!Device.empty()) {
5216+
Macro = "-D";
5217+
Macro += SYCL::gen::getGenDeviceMacro(Device);
5218+
}
5219+
} else if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64)
5220+
Macro = "-D__SYCL_TARGET_INTEL_X86_64__";
5221+
if (Macro.size()) {
5222+
CmdArgs.push_back(Args.MakeArgString(Macro));
5223+
D.addSYCLTargetMacroArg(Args, Macro);
5224+
}
5225+
};
5226+
if (IsSYCLOffloadDevice)
5227+
addTargetMacros(RawTriple);
5228+
else {
5229+
for (auto &Macro : D.getSYCLTargetMacroArgs())
5230+
CmdArgs.push_back(Args.MakeArgString(Macro));
5231+
}
52165232
}
52175233

52185234
if (IsOpenMPDevice) {

clang/test/Driver/sycl-intel-gpu.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@
8484
// MACRO: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
8585
// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
8686
// DEVICE: ocloc{{.*}} "-device" "[[DEV_STR]]"
87+
// MACRO: clang{{.*}} "-fsycl-is-host"
88+
// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
8789

8890
/// -fsycl-targets=spir64_x86_64 should set a specific macro
8991
// RUN: %clangxx -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \
@@ -92,6 +94,8 @@
9294
// RUN: FileCheck %s --check-prefix=MACRO_X86_64
9395
// MACRO_X86_64: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown"
9496
// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__"
97+
// MACRO_X86_64: clang{{.*}} "-fsycl-is-host"
98+
// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__"
9599

96100
/// test for invalid arch
97101
// RUN: %clangxx -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \

devops/test_configs.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
],
1010
"image": "${{ inputs.intel_drivers_image }}",
1111
"container_options": "-u 1001 --device=/dev/dri --privileged --cap-add SYS_ADMIN",
12-
"check_sycl_all": "level_zero:gpu,host",
12+
"check_sycl_all": "level_zero:gpu",
1313
"cmake_args": ""
1414
},
1515
{
@@ -21,7 +21,7 @@
2121
],
2222
"image": "${{ inputs.intel_drivers_image }}",
2323
"container_options": "-u 1001 --device=/dev/dri --privileged --cap-add SYS_ADMIN",
24-
"check_sycl_all": "opencl:gpu,host",
24+
"check_sycl_all": "opencl:gpu",
2525
"cmake_args": ""
2626
},
2727
{
@@ -33,7 +33,7 @@
3333
],
3434
"image": "${{ inputs.intel_drivers_image }}",
3535
"container_options": "-u 1001",
36-
"check_sycl_all": "opencl:cpu,host",
36+
"check_sycl_all": "opencl:cpu",
3737
"cmake_args": ""
3838
},
3939
{
@@ -57,7 +57,7 @@
5757
],
5858
"image": "${{ inputs.amdgpu_image }}",
5959
"container_options": "--device=/dev/dri --device=/dev/kfd",
60-
"check_sycl_all": "hip:gpu,host",
60+
"check_sycl_all": "hip:gpu",
6161
"cmake_args": "-DHIP_PLATFORM=\"AMD\" -DAMD_ARCH=\"gfx1031\""
6262
},
6363
{
@@ -69,7 +69,7 @@
6969
],
7070
"image": "${{ inputs.cuda_image }}",
7171
"container_options": "--gpus all",
72-
"check_sycl_all": "cuda:gpu,host",
72+
"check_sycl_all": "cuda:gpu",
7373
"cmake_args": ""
7474
}
7575
],
@@ -83,7 +83,7 @@
8383
],
8484
"image": "${{ inputs.cuda_image }}",
8585
"container_options": "--gpus all",
86-
"sycl_device_filter": "ext_oneapi_cuda:gpu,host",
86+
"sycl_device_filter": "ext_oneapi_cuda:gpu",
8787
"cmake_args": "-DDPCPP_TARGET_TRIPLES=nvptx64-nvidia-cuda"
8888
}
8989
]

libclc/ptx-nvidiacl/libspirv/group/collectives.cl

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -426,18 +426,16 @@ __CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, long, 0l)
426426
__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, long, 0l)
427427

428428
// half requires additional mangled entry points
429-
_CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFAddjjDF16_(uint scope, uint op,
430-
half x) {
431-
return __spirv_GroupFAdd(scope, op, x);
432-
}
433-
_CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFMinjjDF16_(uint scope, uint op,
434-
half x) {
435-
return __spirv_GroupFMin(scope, op, x);
436-
}
437-
_CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFMaxjjDF16_(uint scope, uint op,
438-
half x) {
439-
return __spirv_GroupFMax(scope, op, x);
440-
}
429+
#define __CLC_GROUP_COLLECTIVE__DF16(MANGLED_NAME, SPIRV_DISPATCH) \
430+
_CLC_DEF _CLC_CONVERGENT half MANGLED_NAME(uint scope, uint op, half x) { \
431+
return SPIRV_DISPATCH(scope, op, x); \
432+
}
433+
__CLC_GROUP_COLLECTIVE__DF16(_Z17__spirv_GroupFAddjjDF16_, __spirv_GroupFAdd)
434+
__CLC_GROUP_COLLECTIVE__DF16(_Z17__spirv_GroupFMinjjDF16_, __spirv_GroupFMin)
435+
__CLC_GROUP_COLLECTIVE__DF16(_Z17__spirv_GroupFMaxjjDF16_, __spirv_GroupFMax)
436+
__CLC_GROUP_COLLECTIVE__DF16(_Z20__spirv_GroupFMulKHRjjDF16_,
437+
__spirv_GroupFMulKHR)
438+
#undef __CLC_GROUP_COLLECTIVE__DF16
441439

442440
#undef __CLC_GROUP_COLLECTIVE_4
443441
#undef __CLC_GROUP_COLLECTIVE_5

sycl/ReleaseNotes.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,22 +48,22 @@ Release notes for commit range [`4043dda3..0f579bae`](https://github.com/intel/l
4848
### Documentation
4949

5050
- Added stateful to stateless memory access conversion
51-
[design document](sycl/doc/design/ESIMDStatelesAccessors.md). [3e03f300]
52-
- Added [`sycl_ext_oneapi_complex`](sycl/doc/extensions/proposed/sycl_ext_oneapi_complex.asciidoc)
51+
[design document](doc/design/ESIMDStatelesAccessors.md). [3e03f300]
52+
- Added [`sycl_ext_oneapi_complex`](doc/extensions/proposed/sycl_ext_oneapi_complex.asciidoc)
5353
extension proposal. [01589da5]
54-
- Updated [`sycl_ext_intel_fpga_device_selector`](sycl/doc/extensions/supported/sycl_ext_intel_fpga_device_selector.asciidoc)
54+
- Updated [`sycl_ext_intel_fpga_device_selector`](doc/extensions/supported/sycl_ext_intel_fpga_device_selector.asciidoc)
5555
extension to add `fpga_simulator_accessor`. [9bef890d]
56-
- Added [`sycl_ext_intel_fpga_kernel_interface_properties`](sycl/doc/extension/proposed/sycl_ext_intel_fpga_kernel_interface_properties.asciidoc) extension proposal. [4b6bd14b]
57-
- Updated [`sycl_ext_oneapi_complex_algorithms`](sycl/doc/extensions/proposed/sycl_ext_oneapi_complex_algorithms.asciidoc)
56+
- Added [`sycl_ext_intel_fpga_kernel_interface_properties`](doc/extension/proposed/sycl_ext_intel_fpga_kernel_interface_properties.asciidoc) extension proposal. [4b6bd14b]
57+
- Updated [`sycl_ext_oneapi_complex_algorithms`](doc/extensions/proposed/sycl_ext_oneapi_complex_algorithms.asciidoc)
5858
extension to include `sycl::complex` as supported type for algorithms. [07c5b48f]
59-
- Clarified sub-group size calculation in [`sycl_ext_oneapi_invoke_simd`](sycl/doc/extensions/experimental/sycl_ext_oneapi_invoke_simd.asciidoc) extension spec. [9b33ad0f]
60-
- Updated [`sycl_ext_oneapi_accessor_properties`](sycl/doc/extensions/supported/sycl_ext_oneapi_accessor_properties.asciidoc)
59+
- Clarified sub-group size calculation in [`sycl_ext_oneapi_invoke_simd`](doc/extensions/experimental/sycl_ext_oneapi_invoke_simd.asciidoc) extension spec. [9b33ad0f]
60+
- Updated [`sycl_ext_oneapi_accessor_properties`](doc/extensions/supported/sycl_ext_oneapi_accessor_properties.asciidoc)
6161
to mark `has_property` API as `noexcept`. [7805aa3f]
62-
- Updated [`sycl_ext_intel_device_info`](sycl/doc/extensions/supported/sycl_ext_intel_device_info.md)
62+
- Updated [`sycl_ext_intel_device_info`](doc/extensions/supported/sycl_ext_intel_device_info.md)
6363
to support querying free device memory. [0eeef2b3]
64-
- Updated [`sycl_ext_oneapi_matrix`](sycl/doc/extensions/experimental/sycl_ext_oneapi_matrix.asciidoc)
64+
- Updated [`sycl_ext_oneapi_matrix`](doc/extensions/experimental/sycl_ext_oneapi_matrix.asciidoc)
6565
with description of new matrix features. [770f540d]
66-
- Moved [`sycl_ext_oneapi_invoke_simd`](sycl/doc/extensions/experimental/sycl_ext_oneapi_invoke_simd.asciidoc)
66+
- Moved [`sycl_ext_oneapi_invoke_simd`](doc/extensions/experimental/sycl_ext_oneapi_invoke_simd.asciidoc)
6767
extensions specification from `proposed` to `experimental` because
6868
implementation is available. [6bee3440]
6969

sycl/doc/EnvironmentVariables.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ fixed order of properties in the pattern.
9090
This environment variable limits the SYCL RT to use only a subset of the system's devices. Setting this environment variable affects all of the device query functions (`platform::get_devices()` and `platform::get_platforms()`) and all of the device selectors.
9191

9292
The value of this environment variable is a comma separated list of filters, where each filter is a triple of the form "`backend`:`device_type`:`device_num`" (without the quotes). Each element of the triple is optional, but each filter must have at least one value. Possible values of `backend` are:
93-
- `host`
93+
- `host` (Deprecated)
9494
- `level_zero`
9595
- `opencl`
9696
- `cuda`
@@ -99,17 +99,17 @@ The value of this environment variable is a comma separated list of filters, whe
9999
- `*`
100100

101101
Possible values of `device_type` are:
102-
- `host`
102+
- `host` (Deprecated)
103103
- `cpu`
104104
- `gpu`
105105
- `acc`
106106
- `*`
107107

108108
`device_num` is an integer that indexes the enumeration of devices from the sycl-ls utility tool, where the first device in that enumeration has index zero in each backend. For example, `SYCL_DEVICE_FILTER=2` will return all devices with index '2' from all different backends. If multiple devices satisfy this device number (e.g., GPU and CPU devices can be assigned device number '2'), then default_selector will choose the device with the highest heuristic point. When `SYCL_DEVICE_ALLOWLIST` is set, it is applied before enumerating devices and affects `device_num` values.
109109

110-
Assuming a filter has all three elements of the triple, it selects only those devices that come from the given backend, have the specified device type, AND have the given device index. If more than one filter is specified, the RT is restricted to the union of devices selected by all filters. The RT does not include the `host` backend and the `host` device automatically unless one of the filters explicitly specifies the `host` device type. Therefore, `SYCL_DEVICE_FILTER=host` should be set to enforce SYCL to use the `host` device only.
110+
Assuming a filter has all three elements of the triple, it selects only those devices that come from the given backend, have the specified device type, AND have the given device index. If more than one filter is specified, the RT is restricted to the union of devices selected by all filters.
111111

112-
Note that all device selectors will throw an exception if the filtered list of devices does not include a device that satisfies the selector. For instance, `SYCL_DEVICE_FILTER=cpu,level_zero` will cause `host_selector()` to throw an exception. `SYCL_DEVICE_FILTER` also limits loading only specified plugins into the SYCL RT. In particular, `SYCL_DEVICE_FILTER=level_zero` will cause the `cpu_selector` to throw an exception since SYCL RT will only load the `level_zero` backend which does not support any CPU devices at this time. When multiple devices satisfy the filter (e..g, `SYCL_DEVICE_FILTER=gpu`), only one of them will be selected.
112+
Note that all device selectors will throw an exception if the filtered list of devices does not include a device that satisfies the selector. For instance, `SYCL_DEVICE_FILTER=cpu` will cause `gpu_selector()` to throw an exception. `SYCL_DEVICE_FILTER` also limits loading only specified plugins into the SYCL RT. In particular, `SYCL_DEVICE_FILTER=level_zero` will cause the `cpu_selector` to throw an exception since SYCL RT will only load the `level_zero` backend which does not support any CPU devices at this time. When multiple devices satisfy the filter (e..g, `SYCL_DEVICE_FILTER=gpu`), only one of them will be selected.
113113

114114
## `SYCL_REDUCTION_PREFERRED_WORKGROUP_SIZE`
115115

sycl/doc/extensions/supported/sycl_ext_intel_fpga_device_selector.asciidoc

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,25 @@ supports.
9090

9191
|2
9292
|fpga_simulator_selector added.
93+
94+
|3
95+
|SYCL 2020 selector variants `fpga_selector_v`, `fpga_simulator_selector_v`, and
96+
`fpga_emulator_selector_v` added. Old selectors `fpga_selector`,
97+
`fpga_simulator_selector`, and `fpga_emulator_selector` deprecated.
9398
|===
9499

95100
=== Select FPGA hardware device
96101
....
97102
// select FPGA hardware device
98-
sycl::queue deviceQueue{sycl::ext::intel::fpga_selector{}};
103+
sycl::queue deviceQueue1{sycl::ext::intel::fpga_selector{}}; // Deprecated
104+
sycl::queue deviceQueue2{sycl::ext::intel::fpga_selector_v};
99105
....
100106

101107
=== Select FPGA simulator device
102108
....
103109
// select FPGA simulator device
104-
sycl::queue deviceQueue{sycl::ext::intel::fpga_simulator_selector{}};
110+
sycl::queue deviceQueue1{sycl::ext::intel::fpga_simulator_selector{}}; // Deprecated
111+
sycl::queue deviceQueue2{sycl::ext::intel::fpga_simulator_selector_v};
105112
....
106113

107114
[NOTE]
@@ -112,14 +119,16 @@ Added in version 2 of this extension.
112119
=== Select FPGA emulator device
113120
....
114121
// select FPGA emulator device
115-
sycl::queue deviceQueue{sycl::ext::intel::fpga_emulator_selector{}};
122+
sycl::queue deviceQueue1{sycl::ext::intel::fpga_emulator_selector{}}; // Deprecated
123+
sycl::queue deviceQueue2{sycl::ext::intel::fpga_emulator_selector_v};
116124
....
117125

118126
== Implementation notes
119127

120128
The current implementation has a restriction on the use of
121-
`fpga_simulator_selector`. If an object of `fpga_simulator_selector` is
122-
defined in the application, FPGA hardware devices selected using
123-
`fpga_selector` will select a simulator device. This behaviour is expected to
129+
`fpga_simulator_selector` and `fpga_simulator_selector_v`. If an object of
130+
`fpga_simulator_selector` is defined or `fpga_simulator_selector_v` is used in
131+
the application, FPGA hardware devices selected using fpga_selector` and
132+
`fpga_selector_v` will select a simulator device. This behaviour is expected to
124133
be eliminated in the future.
125134

sycl/include/sycl/backend_types.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ enum class backend : char {
2828
cuda __SYCL2020_DEPRECATED("use 'ext_oneapi_cuda' instead") = ext_oneapi_cuda,
2929
all = 4,
3030
ext_intel_esimd_emulator = 5,
31-
esimd_cpu __SYCL2020_DEPRECATED("use 'ext_oneapi_esimd_emulator' instead") =
31+
esimd_cpu __SYCL2020_DEPRECATED("use 'ext_intel_esimd_emulator' instead") =
3232
ext_intel_esimd_emulator,
3333
ext_oneapi_hip = 6,
3434
hip __SYCL2020_DEPRECATED("use 'ext_oneapi_hip' instead") = ext_oneapi_hip,

0 commit comments

Comments
 (0)