Skip to content

Commit 4c855cb

Browse files
committed
Merge remote-tracking branch 'origin/sycl' into private/asachkov/track-amount-of-xfailed-tests-without-tracker
2 parents e9e4dd2 + 6ba05b7 commit 4c855cb

File tree

160 files changed

+2283
-305
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

160 files changed

+2283
-305
lines changed

.github/workflows/sycl-linux-run-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ permissions:
151151

152152
jobs:
153153
run:
154-
if: inputs.skip_run == 'false'
154+
if: github.event_name == 'workflow_dispatch' || inputs.skip_run == 'false'
155155
name: ${{ inputs.name }}
156156
runs-on: ${{ fromJSON(inputs.runner) }}
157157
container:

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -946,6 +946,15 @@ void CudaToolChain::addClangTargetOptions(
946946

947947
if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt))
948948
CC1Args.push_back("-fcuda-prec-sqrt");
949+
950+
bool FastRelaxedMath = DriverArgs.hasFlag(
951+
options::OPT_ffast_math, options::OPT_fno_fast_math, false);
952+
bool UnsafeMathOpt =
953+
DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
954+
options::OPT_fno_unsafe_math_optimizations, false);
955+
if (FastRelaxedMath || UnsafeMathOpt)
956+
CC1Args.append({"-mllvm", "--nvptx-prec-divf32=0", "-mllvm",
957+
"--nvptx-prec-sqrtf32=0"});
949958
} else {
950959
CC1Args.append(
951960
{"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});

clang/test/Driver/sycl-device-traits-macros-amdgcn.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,10 @@
5050
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
5151
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx940 \
5252
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
53+
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
5354
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx941 \
5455
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
56+
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
5557
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx942 \
5658
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
5759
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// REQUIRES: nvptx-registered-target
2+
3+
// RUN: %clang -### -nocudalib \
4+
// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
5+
// RUN: | FileCheck --check-prefix=CHECK-DEFAULT %s
6+
7+
// RUN: %clang -### -nocudalib \
8+
// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ffast-math %s 2>&1 \
9+
// RUN: | FileCheck --check-prefix=CHECK-FAST %s
10+
11+
// RUN: %clang -### -nocudalib \
12+
// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda -funsafe-math-optimizations %s 2>&1 \
13+
// RUN: | FileCheck --check-prefix=CHECK-FAST %s
14+
15+
// CHECK-FAST: "-mllvm" "--nvptx-prec-divf32=0" "-mllvm" "--nvptx-prec-sqrtf32=0"
16+
17+
// CHECK-DEFAULT-NOT: "nvptx-prec-divf32=0"
18+
// CHECK-DEFAULT-NOT: "nvptx-prec-sqrtf32=0"

libdevice/nativecpu_utils.cpp

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,13 @@ __spirv_MemoryBarrier(uint32_t Memory, uint32_t Semantics) {
6565
template <> \
6666
__SYCL_CONVERGENT__ DEVICE_EXTERNAL Type \
6767
__spirv_SubgroupBlockReadINTEL<Type>(const OCL_GLOBAL PType *Ptr) noexcept { \
68-
return *Ptr; \
68+
return Ptr[__spirv_SubgroupLocalInvocationId()]; \
6969
} \
7070
template <> \
7171
__SYCL_CONVERGENT__ DEVICE_EXTERNAL void \
7272
__spirv_SubgroupBlockWriteINTEL<Type>(PType OCL_GLOBAL * ptr, \
7373
Type v) noexcept { \
74-
*(Type *)ptr = v; \
74+
((Type*)ptr)[__spirv_SubgroupLocalInvocationId()] = v; \
7575
}
7676

7777
#define DefSubgroupBlockINTEL_vt(Type, VT_name) \
@@ -92,16 +92,19 @@ template <class T> struct vtypes {
9292
DefSubgroupBlockINTEL(uint32_t) DefSubgroupBlockINTEL(uint64_t)
9393
DefSubgroupBlockINTEL(uint8_t) DefSubgroupBlockINTEL(uint16_t)
9494

95-
#define DefineGOp1(spir_sfx, mux_name)\
96-
DEVICE_EXTERN_C bool mux_name(bool);\
95+
#define DefineGOp1(spir_sfx, name)\
96+
DEVICE_EXTERN_C bool __mux_sub_group_##name##_i1(bool);\
97+
DEVICE_EXTERN_C bool __mux_work_group_##name##_i1(uint32_t id, bool val);\
9798
DEVICE_EXTERNAL bool __spirv_Group ## spir_sfx(unsigned g, bool val) {\
9899
if (__spv::Scope::Flag::Subgroup == g)\
99-
return mux_name(val);\
100+
return __mux_sub_group_##name##_i1(val);\
101+
else if (__spv::Scope::Flag::Workgroup == g)\
102+
return __mux_work_group_##name##_i1(0, val);\
100103
return false;\
101104
}
102105

103-
DefineGOp1(Any, __mux_sub_group_any_i1)
104-
DefineGOp1(All, __mux_sub_group_all_i1)
106+
DefineGOp1(Any, any)
107+
DefineGOp1(All, all)
105108

106109

107110
#define DefineGOp(Type, MuxType, spir_sfx, mux_sfx) \
@@ -184,18 +187,6 @@ DefineBitwiseGroupOp(uint64_t, int64_t, i64)
184187

185188
DefineLogicalGroupOp(bool, bool, i1)
186189

187-
#define DefineBroadCastImpl(Type, Sfx, MuxType, IDType) \
188-
DEVICE_EXTERN_C MuxType __mux_work_group_broadcast_##Sfx( \
189-
int32_t id, MuxType val, int64_t lidx, int64_t lidy, int64_t lidz); \
190-
DEVICE_EXTERN_C MuxType __mux_sub_group_broadcast_##Sfx(MuxType val, \
191-
int32_t sg_lid); \
192-
DEVICE_EXTERNAL Type __spirv_GroupBroadcast(uint32_t g, Type v, \
193-
IDType l) { \
194-
if (__spv::Scope::Flag::Subgroup == g) \
195-
return __mux_sub_group_broadcast_##Sfx(v, l); \
196-
return Type(); /*todo: add support for other flags as they are tested*/ \
197-
}
198-
199190
#define DefineBroadcastMuxType(Type, Sfx, MuxType, IDType) \
200191
DEVICE_EXTERN_C MuxType __mux_work_group_broadcast_##Sfx( \
201192
int32_t id, MuxType val, uint64_t lidx, uint64_t lidy, uint64_t lidz); \
@@ -216,7 +207,7 @@ DefineLogicalGroupOp(bool, bool, i1)
216207
if (__spv::Scope::Flag::Subgroup == g) \
217208
return __mux_sub_group_broadcast_##Sfx(v, l[0]); \
218209
else \
219-
return __mux_work_group_broadcast_##Sfx(0, v, l[0], l[0], 0); \
210+
return __mux_work_group_broadcast_##Sfx(0, v, l[0], l[1], 0); \
220211
} \
221212
\
222213
DEVICE_EXTERNAL Type __spirv_GroupBroadcast(uint32_t g, Type v, \

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def AspectExt_oneapi_virtual_mem : Aspect<"ext_oneapi_virtual_mem">;
8585
def AspectExt_oneapi_cuda_cluster_group : Aspect<"ext_oneapi_cuda_cluster_group">;
8686
def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">;
8787
def AspectExt_oneapi_atomic16 : Aspect<"ext_oneapi_atomic16">;
88+
def AspectExt_oneapi_virtual_functions : Aspect<"ext_oneapi_virtual_functions">;
8889
// Deprecated aspects
8990
def AspectInt64_base_atomics : Aspect<"int64_base_atomics">;
9091
def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">;
@@ -148,7 +149,8 @@ def : TargetInfo<"__TestAspectList",
148149
AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph, AspectExt_oneapi_private_alloca,
149150
AspectExt_oneapi_queue_profiling_tag, AspectExt_oneapi_virtual_mem, AspectExt_oneapi_cuda_cluster_group,
150151
AspectExt_intel_fpga_task_sequence,
151-
AspectExt_oneapi_atomic16],
152+
AspectExt_oneapi_atomic16,
153+
AspectExt_oneapi_virtual_functions],
152154
[]>;
153155
// This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT
154156
// match.

llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ getSYCLESIMDSplitStatusFromMetadata(const Module &M) {
3737
assert(MDOp && "Unexpected metadata operand");
3838
const auto &MDConst = MDOp->getOperand(0);
3939
auto *MDVal = mdconst::dyn_extract_or_null<ConstantInt>(MDConst);
40+
assert(MDVal && "Unexpected metadata operand type");
4041
uint8_t Val = MDVal->getZExtValue();
4142
assert(Val < 3 && "Unexpected value for split metadata");
4243
auto AsEnum = static_cast<module_split::SyclEsimdSplitStatus>(Val);

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,14 @@ if(SYCL_UR_USE_FETCH_CONTENT)
117117
endfunction()
118118

119119
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
120-
# commit b79ebe4e98789144bcdf3832088eb6e6b5ae6967
121-
# Merge: 7b4bc761 fbb6e862
122-
# Author: Kenneth Benzie (Benie) <[email protected]>
123-
# Date: Fri Oct 4 16:39:59 2024 +0100
124-
# Merge pull request #2018 from wenju-he/L0-bindless-image-device-query
125-
# [L0] Fix device query of bindless image support
126-
set(UNIFIED_RUNTIME_TAG b79ebe4e98789144bcdf3832088eb6e6b5ae6967)
120+
# commit df6da35d6e67f2383db28dd49ab08c5c0ef541d2
121+
# Merge: 67590533 55bd5636
122+
# Author: aarongreig <[email protected]>
123+
# Date: Mon Oct 7 12:28:07 2024 +0100
124+
# Merge pull request #2038 from GeorgeWeb/georgi/unsupported-max-coop-wgsize
125+
# [UR][hip][opencl] Mark urKernelSuggestMaxCooperativeGroupCountExp as unsupported
126+
# instead of returning misleading default value
127+
set(UNIFIED_RUNTIME_TAG df6da35d6e67f2383db28dd49ab08c5c0ef541d2)
127128

128129
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
129130
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need

sycl/doc/syclcompat/README.md

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,41 @@ public:
855855
} // syclcompat
856856
```
857857
858+
### ptr_to_int
859+
860+
The following cuda backend specific function is introduced in order to
861+
translate from local memory pointers to `uint32_t` or `size_t` variables that
862+
contain a byte address to the local (local refers to`.shared` in nvptx) memory
863+
state space.
864+
865+
``` c++
866+
namespace syclcompat {
867+
template <typename T>
868+
__syclcompat_inline__
869+
std::enable_if_t<std::is_same_v<T, uint32_t> || std::is_same_v<T, size_t>,
870+
T>
871+
ptr_to_int(void *ptr)
872+
} // namespace syclcompat
873+
```
874+
875+
These variables can be used in inline PTX instructions that take address
876+
operands. Such inline PTX instructions are commonly used in optimized
877+
libraries. A simplified example usage of the above functions is as follows:
878+
879+
``` c++
880+
half *data = syclcompat::local_mem<half[NUM_ELEMENTS]>();
881+
// ...
882+
// ...
883+
T addr =
884+
syclcompat::ptr_to_int<T>(reinterpret_cast<char *>(data) + (id % 8) * 16);
885+
uint32_t fragment;
886+
#if defined(__NVPTX__)
887+
asm volatile("ldmatrix.sync.aligned.m8n8.x1.shared.b16 {%0}, [%1];\n"
888+
: "=r"(fragment)
889+
: "r"(addr));
890+
#endif
891+
```
892+
858893
### Device Information
859894

860895
`sycl::device` properties are encapsulated using the `device_info` helper class.
@@ -1544,10 +1579,10 @@ SYCL spec supported by the current SYCL compiler.
15441579
15451580
The `SYCLCOMPAT_CHECK_ERROR` macro encapsulates an error-handling mechanism for
15461581
expressions that might throw `sycl::exception` and `std::runtime_error`. If no
1547-
exceptions are thrown, it returns `syclcompat::error_code::SUCCESS`. If a
1548-
`sycl::exception` is caught, it returns `syclcompat::error_code::BACKEND_ERROR`.
1582+
exceptions are thrown, it returns `syclcompat::error_code::success`. If a
1583+
`sycl::exception` is caught, it returns `syclcompat::error_code::backend_error`.
15491584
If a `std::runtime_error` exception is caught,
1550-
`syclcompat::error_code::DEFAULT_ERROR` is returned instead. For both cases, it
1585+
`syclcompat::error_code::default_error` is returned instead. For both cases, it
15511586
prints the error message to the standard error stream.
15521587
15531588
``` c++
@@ -1580,7 +1615,7 @@ template <int Arg> class syclcompat_kernel_scalar;
15801615
15811616
15821617
namespace syclcompat {
1583-
enum error_code { SUCCESS = 0, BACKEND_ERROR = 1, DEFAULT_ERROR = 999 };
1618+
enum error_code { success = 0, backend_error = 1, default_error = 999 };
15841619
}
15851620
15861621
#define SYCLCOMPAT_CHECK_ERROR(expr)

sycl/include/CL/__spirv/spirv_ops.hpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1365,7 +1365,15 @@ __clc_BarrierTestWait(int64_t *state, int64_t arrival) noexcept;
13651365
__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
13661366
__clc_BarrierArriveAndWait(int64_t *state) noexcept;
13671367

1368-
#ifdef __SYCL_USE_VARIADIC_SPIRV_OCL_PRINTF__
1368+
#if defined(__SYCL_USE_VARIADIC_SPIRV_OCL_PRINTF__) && \
1369+
!defined(__INTEL_PREVIEW_BREAKING_CHANGES)
1370+
#if defined(__clang__)
1371+
#pragma clang diagnostic push
1372+
#pragma clang diagnostic ignored "-Wpedantic"
1373+
#warning \
1374+
"__SYCL_USE_VARIADIC_SPIRV_OCL_PRINTF__ is deprecated and will be removed in a future release."
1375+
#pragma clang diagnostic pop
1376+
#endif
13691377
extern __DPCPP_SYCL_EXTERNAL int
13701378
__spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format, ...);
13711379
extern __DPCPP_SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format, ...);

0 commit comments

Comments
 (0)