Skip to content

Commit dd64f97

Browse files
committed
Merge branch 'sycl' into fp-model
2 parents cfa3a25 + 82e08b5 commit dd64f97

File tree

112 files changed

+1290
-1205
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+1290
-1205
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12486,9 +12486,8 @@ def err_sycl_function_attribute_mismatch : Error<
1248612486
def err_sycl_x_y_z_arguments_must_be_one : Error<
1248712487
"all %0 attribute arguments must be '1' when the %1 attribute argument is '0'">;
1248812488
def err_sycl_attribute_internal_decl
12489-
: Error<"%0 attribute cannot be applied to a "
12490-
"static %select{function|variable}1 or %select{function|variable}1 "
12491-
"in an anonymous namespace">;
12489+
: Error<"%0 attribute cannot be applied to a %select{function|variable}1"
12490+
" without external linkage">;
1249212491
def err_sycl_attribute_not_device_global
1249312492
: Error<"%0 attribute can only be applied to 'device_global' variables">;
1249412493
def err_fpga_attribute_incorrect_variable

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -938,8 +938,17 @@ void CudaToolChain::addClangTargetOptions(
938938
DeviceOffloadingKind == Action::OFK_Cuda) &&
939939
"Only OpenMP, SYCL or CUDA offloading kinds are supported for NVIDIA GPUs.");
940940

941-
CC1Args.append(
942-
{"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
941+
// If we are compiling SYCL kernels for Nvidia GPUs, we do not support Cuda
942+
// device code compatability, hence we do not set Cuda mode in that instance.
943+
if (DeviceOffloadingKind == Action::OFK_SYCL) {
944+
toolchains::SYCLToolChain::AddSYCLIncludeArgs(getDriver(), DriverArgs,
945+
CC1Args);
946+
947+
if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt))
948+
CC1Args.push_back("-fcuda-prec-sqrt");
949+
} else {
950+
CC1Args.append(
951+
{"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
943952

944953
// Unsized function arguments used for variadics were introduced in CUDA-9.0
945954
// We still do not support generating code that actually uses variadic
@@ -948,18 +957,10 @@ void CudaToolChain::addClangTargetOptions(
948957
if (CudaInstallation.version() >= CudaVersion::CUDA_90)
949958
CC1Args.push_back("-fcuda-allow-variadic-functions");
950959

951-
if (DriverArgs.hasArg(options::OPT_fsycl)) {
952-
// Add these flags for .cu SYCL compilation.
960+
// Add these flags for .cu SYCL compilation.
961+
if (DeviceOffloadingKind == Action::OFK_Cuda &&
962+
DriverArgs.hasArg(options::OPT_fsycl))
953963
CC1Args.append({"-std=c++17", "-fsycl-is-host"});
954-
}
955-
956-
if (DeviceOffloadingKind == Action::OFK_SYCL) {
957-
toolchains::SYCLToolChain::AddSYCLIncludeArgs(getDriver(), DriverArgs,
958-
CC1Args);
959-
960-
if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt)) {
961-
CC1Args.push_back("-fcuda-prec-sqrt");
962-
}
963964
}
964965

965966
auto NoLibSpirv = DriverArgs.hasArg(options::OPT_fno_sycl_libspirv) ||

clang/test/Driver/sycl.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
// Failing on Windows - temporarily disable
2-
// REQUIRES: system-linux
3-
41
// RUN: %clang -### -fsycl -c %s 2>&1 | FileCheck %s --check-prefix=ENABLED
52
// RUN: %clang -### -fsycl %s 2>&1 | FileCheck %s --check-prefix=ENABLED
63
// RUN: %clang -### -fno-sycl -fsycl %s 2>&1 | FileCheck %s --check-prefix=ENABLED
@@ -107,7 +104,7 @@
107104
// Test with a bad argument is expected to fail
108105
// RUN: not %clang -fsycl-help=foo %s 2>&1 | FileCheck %s --check-prefix=SYCL-HELP-BADARG
109106
// RUN: %clang -### -fsycl-help=gen %s 2>&1 | FileCheck %s --check-prefix=SYCL-HELP-GEN
110-
// RUN: env PATH=%t-sycl-dir %clang -### -fsycl-help=fpga %s 2>&1 | FileCheck %s --check-prefixes=SYCL-HELP-FPGA,SYCL-HELP-FPGA-OUT -DDIR=%t-sycl-dir
107+
// RUN: env "PATH=%t-sycl-dir%{pathsep}%PATH%" %clang -### -fsycl-help=fpga %s 2>&1 | FileCheck %s --check-prefixes=SYCL-HELP-FPGA,SYCL-HELP-FPGA-OUT -DDIR=%t-sycl-dir
111108
// RUN: %clang -### -fsycl-help=x86_64 %s 2>&1 | FileCheck %s --check-prefix=SYCL-HELP-CPU
112109
// RUN: %clang -### -fsycl-help %s 2>&1 | FileCheck %s --check-prefixes=SYCL-HELP-GEN,SYCL-HELP-FPGA,SYCL-HELP-CPU
113110
// SYCL-HELP-BADARG: unsupported argument 'foo' to option '-fsycl-help='

clang/test/Preprocessor/sycl-macro.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
// RUNx: %clang_cc1 %s -fsycl-id-queries-fit-in-int -fsycl-is-device -E -dM -fms-compatibility | FileCheck --check-prefix=CHECK-MSVC %s
77
// RUN: %clang_cc1 -fno-sycl-id-queries-fit-in-int %s -E -dM | FileCheck \
88
// RUN: --check-prefix=CHECK-NO-SYCL_FIT_IN_INT %s
9-
// RUN: %clang_cc1 %s -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fsycl-is-device -E -dM | FileCheck --check-prefix=CHECK-CUDA %s
9+
// RUN: %clang_cc1 %s -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fsycl-is-device -E -dM | FileCheck \
10+
// RUN: --check-prefix=CHECK-CUDA %s -DARCH_CODE=800
11+
// RUN: %clangxx %s -fsycl -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --offload-arch=sm_80 -E -dM | FileCheck \
12+
// RUN: --check-prefix=CHECK-CUDA-SYCL-DRIVER %s
1013
// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa -target-cpu gfx906 -fsycl-is-device -E -dM | FileCheck --check-prefix=CHECK-HIP %s
1114

1215
// RUN: %clang_cc1 %s -triple nvptx64-nvidia-cuda -target-cpu sm_90a -fsycl-is-device -E -dM | FileCheck --check-prefix=CHECK-CUDA-FEATURE %s
@@ -32,8 +35,10 @@
3235
// CHECK-NO-SYCL_FIT_IN_INT-NOT:#define __SYCL_ID_QUERIES_FIT_IN_INT__ 1
3336
// CHECK-SYCL-ID:#define __SYCL_ID_QUERIES_FIT_IN_INT__ 1
3437

35-
// CHECK-CUDA:#define __SYCL_CUDA_ARCH__ 800
36-
// CHECK-CUDA-NOT:#define __CUDA_ARCH__ 800
38+
// CHECK-CUDA:#define __SYCL_CUDA_ARCH__ [[ARCH_CODE]]
39+
// CHECK-CUDA-NOT:#define __CUDA_ARCH__ {{[0-9]+}}
40+
41+
// CHECK-CUDA-SYCL-DRIVER-NOT: #define __CUDA_ARCH__ {{[0-9]+}}
3742

3843
// CHECK-HIP:#define __CUDA_ARCH__ 0
3944

clang/test/SemaSYCL/device-indirectly-callable-attr.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,21 @@ int N;
1212
void
1313
bar() {}
1414

15-
[[intel::device_indirectly_callable]] // expected-error {{'device_indirectly_callable' attribute cannot be applied to a static function or function in an anonymous namespace}}
15+
[[intel::device_indirectly_callable]] // expected-error {{'device_indirectly_callable' attribute cannot be applied to a function without external linkage}}
1616
static void
1717
func1() {}
1818

1919
namespace {
20-
[[intel::device_indirectly_callable]] // expected-error {{'device_indirectly_callable' attribute cannot be applied to a static function or function in an anonymous namespace}}
20+
[[intel::device_indirectly_callable]] // expected-error {{'device_indirectly_callable' attribute cannot be applied to a function without external linkage}}
2121
void
2222
func2() {}
23+
24+
struct UnnX {};
2325
}
2426

27+
[[intel::device_indirectly_callable]] // expected-error {{'device_indirectly_callable' attribute cannot be applied to a function without external linkage}}
28+
void func4(UnnX) {}
29+
2530
class A {
2631
[[intel::device_indirectly_callable]] A() {}
2732

clang/test/SemaSYCL/device_global_external.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
using namespace sycl::ext::oneapi;
66

77
SYCL_EXTERNAL device_global<int> glob;
8-
// expected-error@+1{{'sycl_device' attribute cannot be applied to a static variable or variable in an anonymous namespace}}
8+
// expected-error@+1{{'sycl_device' attribute cannot be applied to a variable without external linkage}}
99
SYCL_EXTERNAL static device_global<float> static_glob;
1010

1111
namespace foo {
@@ -20,10 +20,15 @@ struct RandomStruct {
2020
SYCL_EXTERNAL RandomStruct S;
2121

2222
namespace {
23-
// expected-error@+1{{'sycl_device' attribute cannot be applied to a static variable or variable in an anonymous namespace}}
23+
// expected-error@+1{{'sycl_device' attribute cannot be applied to a variable without external linkage}}
2424
SYCL_EXTERNAL device_global<int> same_name;
25+
26+
struct UnnX {};
2527
} // namespace
2628

29+
// expected-error@+1{{'sycl_device' attribute cannot be applied to a variable without external linkage}}
30+
SYCL_EXTERNAL device_global<UnnX> dg_x;
31+
2732
// expected-error@+1{{'sycl_device' attribute can only be applied to 'device_global' variables}}
2833
SYCL_EXTERNAL int AAA;
2934

clang/test/SemaSYCL/sycl-device.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,19 @@ int N;
1212
__attribute__((sycl_device(3))) // expected-error {{'sycl_device' attribute takes no arguments}}
1313
void bar() {}
1414

15-
__attribute__((sycl_device)) // expected-error {{'sycl_device' attribute cannot be applied to a static function or function in an anonymous namespace}}
15+
__attribute__((sycl_device)) // expected-error {{'sycl_device' attribute cannot be applied to a function without external linkage}}
1616
static void func1() {}
1717

1818
namespace {
19-
__attribute__((sycl_device)) // expected-error {{'sycl_device' attribute cannot be applied to a static function or function in an anonymous namespace}}
19+
__attribute__((sycl_device)) // expected-error {{'sycl_device' attribute cannot be applied to a function without external linkage}}
2020
void func2() {}
21+
22+
struct UnnX {};
2123
}
2224

25+
__attribute__((sycl_device)) // expected-error {{'sycl_device' attribute cannot be applied to a function without external linkage}}
26+
void func4(UnnX) {}
27+
2328
class A {
2429
__attribute__((sycl_device))
2530
A() {}

opencl/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ set(OCL_LOADER_REPO
2020

2121
# Repo tags/hashes
2222

23-
set(OCL_HEADERS_TAG 9ddb236e6eb3cf844f9e2f81677e1045f9bf838e)
24-
set(OCL_LOADER_TAG 9a3e962f16f5097d2054233ad8b6dad51b6f41b7)
23+
set(OCL_HEADERS_TAG 542d7a8f65ecfd88b38de35d8b10aa67b36b33b2)
24+
set(OCL_LOADER_TAG 3d27d7ca04d29fabe608a2372ce693601bcc4e81)
2525

2626
# OpenCL Headers
2727
if(NOT OpenCL_HEADERS)

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT)
117117
endfunction()
118118

119119
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
120-
# commit 185149248dd257bd37482aac43307a136204c051
121-
# Merge: 2af159d4 d619bcd1
120+
# commit 9ca3ec7a9c1d2f4a362d7e5add103b30271a8a55
121+
# Merge: 7384e2d7 59e5e405
122122
# Author: Piotr Balcer <[email protected]>
123-
# Date: Thu Sep 19 11:02:27 2024 +0200
124-
# Merge pull request #1934 from yingcong-wu/yc/0806-exclude-shadow-from-coredump
125-
# [DeviceSanitizer] Exclude shadow memory from coredump file for CPU device.
126-
set(UNIFIED_RUNTIME_TAG 185149248dd257bd37482aac43307a136204c051)
123+
# Date: Mon Sep 23 10:58:51 2024 +0200
124+
# Merge pull request #2113 from oneapi-src/revert-1698-counter-based-2
125+
# Revert "[L0] Phase 2 of Counter-Based Event Implementation"
126+
set(UNIFIED_RUNTIME_TAG 9ca3ec7a9c1d2f4a362d7e5add103b30271a8a55)
127127

128128
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
129129
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need

sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,9 +401,14 @@ of the link:sycl_ext_intel_matrix.asciidoc[sycl_ext_intel_matrix]
401401

402402
Besides the `Group` and the `joint_matrix` arguments,
403403
`joint_matrix_apply` takes a C++ Callable object which is invoked once
404-
for each element of the matrix. This callable object must be invocable
405-
with a single parameter of type `T&`. Commonly, applications pass a
406-
lambda expression.
404+
for each element of the matrix. There are two cases: (1) one matrix is
405+
passed, (2) two matrices are passed.
406+
407+
===== Unary Operation
408+
In this case, `joint_matrix_apply` takes one `joint_matrix`
409+
argument. The callable object must be invocable with a single
410+
parameter of type `T&`. Commonly, applications pass a lambda
411+
expression.
407412

408413
```c++
409414
namespace sycl::ext::oneapi::experimental::matrix {
@@ -427,6 +432,39 @@ joint_matrix_apply(sg, C, [=](T &x) {
427432
});
428433
```
429434

435+
===== Binary Operation
436+
In this case, `joint_matrix_apply` takes two `joint_matrix` arguments:
437+
`jm0` and `jm1` that have the same `use`, number of rows, number of
438+
columns, and `layout`. `jm0` and `jm1` can be read-only, write-only,
439+
or read and write arguments. The callable object must be invocable
440+
with two parameters `x` and `y` of types `T0&` amd `T1&`, where `x` is
441+
an element from `jm0` and `y` is an element from `jm1`. Moreover, `x`
442+
and `y` are guaranteed to have identical coordinates in their
443+
respective matrices. Commonly, applications pass a lambda expression.
444+
445+
```c++
446+
namespace sycl::ext::oneapi::experimental::matrix {
447+
448+
template<typename Group, typename T0, typename T1, use Use,
449+
size_t Rows, size_t Cols, layout Layout, typename F>
450+
void joint_matrix_apply(Group g,
451+
joint_matrix<Group, T0, Use, Rows, Cols, Layout>& jm0,
452+
joint_matrix<Group, T1, Use, Rows, Cols, Layout>& jm1,
453+
F&& func);
454+
455+
} // namespace sycl::ext::oneapi::experimental::matrix
456+
```
457+
458+
In the following example, every element `x` of the matrix `C` is
459+
multiplied by `alpha`. The result is returned into the element `y` of
460+
the matrix `D`.
461+
462+
```c++
463+
joint_matrix_apply(sg, C, D, [=](const T &x, T &y) {
464+
y = x * alpha;
465+
});
466+
```
467+
430468
==== Prefetch
431469

432470
```c++

0 commit comments

Comments
 (0)