Skip to content

Commit cb6c2e2

Browse files
committed
Merge branch 'sycl' of https://github.com/intel/llvm into benchmarking-workflow
2 parents bbb16f4 + e4d65e0 commit cb6c2e2

File tree

123 files changed

+961
-700
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+961
-700
lines changed

.github/workflows/sycl-nightly.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ jobs:
108108
runner: '["Linux", "arc"]'
109109
image_options: -u 1001
110110
target_devices: opencl:cpu
111+
112+
- name: Preview mode on SPR/PVC
113+
runner: '["Linux", "pvc"]'
114+
image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
115+
target_devices: level_zero:gpu
116+
extra_lit_opts: --param test-preview-mode=True
117+
111118
uses: ./.github/workflows/sycl-linux-run-tests.yml
112119
with:
113120
name: ${{ matrix.name }}

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1056,7 +1056,8 @@ llvm::SmallVector<std::string, 12> ROCMToolChain::getCommonDeviceLibNames(
10561056
bool CorrectSqrt = false;
10571057
if (DeviceOffloadingKind == Action::OFK_SYCL) {
10581058
// When using SYCL, sqrt is only correctly rounded if the flag is specified
1059-
CorrectSqrt = DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt);
1059+
CorrectSqrt = DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt) ||
1060+
DriverArgs.hasArg(options::OPT_foffload_fp32_prec_sqrt);
10601061
} else
10611062
CorrectSqrt = DriverArgs.hasFlag(
10621063
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,8 @@ void CudaToolChain::addClangTargetOptions(
965965
if (DeviceOffloadingKind == Action::OFK_SYCL) {
966966
SYCLInstallation.addSYCLIncludeArgs(DriverArgs, CC1Args);
967967

968-
if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt))
968+
if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt) ||
969+
DriverArgs.hasArg(options::OPT_foffload_fp32_prec_sqrt))
969970
CC1Args.push_back("-fcuda-prec-sqrt");
970971

971972
bool FastRelaxedMath = DriverArgs.hasFlag(

clang/test/Driver/sycl-amdgcn-sqrt.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@
99
// RUN: %s \
1010
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s
1111

12+
// RUN: %clang -### \
13+
// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa -fno-sycl-libspirv \
14+
// RUN: -Xsycl-target-backend --offload-arch=gfx900 \
15+
// RUN: -foffload-fp32-prec-sqrt \
16+
// RUN: --rocm-path=%S/Inputs/rocm \
17+
// RUN: %s \
18+
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s
19+
1220
// CHECK-CORRECT: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc"
1321

1422
// RUN: %clang -### \
@@ -28,6 +36,14 @@
2836
// RUN: %s \
2937
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CONFLICT %s
3038

39+
// RUN: %clang -### \
40+
// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa -fno-sycl-libspirv \
41+
// RUN: -Xsycl-target-backend --offload-arch=gfx900 \
42+
// RUN: -foffload-fp32-prec-sqrt -fno-hip-fp32-correctly-rounded-divide-sqrt \
43+
// RUN: --rocm-path=%S/Inputs/rocm \
44+
// RUN: %s \
45+
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CONFLICT %s
46+
3147
// CHECK-CONFLICT: warning: argument unused during compilation: '-fno-hip-fp32-correctly-rounded-divide-sqrt'
3248
// CHECK-CONFLICT: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc"
3349

clang/test/Driver/sycl-nvptx-sqrt.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
// RUN: %s \
77
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s
88

9+
// RUN: %clang -### -nocudalib \
10+
// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda \
11+
// RUN: -foffload-fp32-prec-sqrt \
12+
// RUN: %s \
13+
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s
14+
915
// CHECK-CORRECT: "-fcuda-prec-sqrt"
1016

1117
// RUN: %clang -### -nocudalib \

devops/dependencies-igc-dev.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"linux": {
33
"igc_dev": {
4-
"github_tag": "igc-dev-5415e63",
5-
"version": "5415e63",
6-
"updated_at": "2025-02-15T18:40:45Z",
7-
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2597523265/zip",
4+
"github_tag": "igc-dev-e9ad10e",
5+
"version": "e9ad10e",
6+
"updated_at": "2025-02-20T03:00:10Z",
7+
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2620807565/zip",
88
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
99
}
1010
}

libdevice/include/sanitizer_defs.hpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,21 +34,10 @@ enum ADDRESS_SPACE : uint32_t {
3434

3535
#if defined(__SPIR__) || defined(__SPIRV__)
3636

37-
#if defined(__SYCL_DEVICE_ONLY__)
38-
39-
#define __USE_SPIR_BUILTIN__ 1
40-
4137
#ifndef SYCL_EXTERNAL
4238
#define SYCL_EXTERNAL
4339
#endif // SYCL_EXTERNAL
4440

45-
#else // __SYCL_DEVICE_ONLY__
46-
47-
#define __USE_SPIR_BUILTIN__ 0
48-
49-
#endif // __SYCL_DEVICE_ONLY__
50-
51-
#if __USE_SPIR_BUILTIN__
5241
extern SYCL_EXTERNAL int
5342
__spirv_ocl_printf(const __SYCL_CONSTANT__ char *Format, ...);
5443

@@ -63,7 +52,6 @@ extern SYCL_EXTERNAL __attribute__((convergent)) void
6352
__spirv_ControlBarrier(uint32_t Execution, uint32_t Memory, uint32_t Semantics);
6453

6554
extern "C" SYCL_EXTERNAL void __devicelib_exit();
66-
#endif // __USE_SPIR_BUILTIN__
6755

6856
__SYCL_GLOBAL__ void *ToGlobal(void *ptr) {
6957
return __spirv_GenericCastToPtrExplicit_ToGlobal(ptr, 5);

libdevice/sanitizer/msan_rtl.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,27 @@ inline uptr __msan_get_shadow_cpu(uptr addr) {
136136
return addr ^ 0x500000000000ULL;
137137
}
138138

139+
inline uptr __msan_get_shadow_dg2(uptr addr, uint32_t as) {
140+
if (as == ADDRESS_SPACE_GENERIC) {
141+
ConvertGenericPointer(addr, as);
142+
}
143+
144+
if (as != ADDRESS_SPACE_GLOBAL || !(addr & 0xffff'0000'0000'0000ULL))
145+
return (uptr)((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
146+
->CleanShadow;
147+
148+
// Device USM only
149+
auto shadow_begin = ((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
150+
->GlobalShadowOffset;
151+
auto shadow_end = ((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
152+
->GlobalShadowOffsetEnd;
153+
if (addr < shadow_begin) {
154+
return addr + (shadow_begin - 0xffff'8000'0000'0000ULL);
155+
} else {
156+
return addr - (0xffff'ffff'ffff'ffffULL - shadow_end);
157+
}
158+
}
159+
139160
inline uptr __msan_get_shadow_pvc(uptr addr, uint32_t as) {
140161
if (as == ADDRESS_SPACE_GENERIC) {
141162
ConvertGenericPointer(addr, as);
@@ -210,6 +231,8 @@ DEVICE_EXTERN_C_NOINLINE uptr __msan_get_shadow(uptr addr, uint32_t as) {
210231
shadow_ptr = __msan_get_shadow_cpu(addr);
211232
} else if (launch_info->DeviceTy == DeviceType::GPU_PVC) {
212233
shadow_ptr = __msan_get_shadow_pvc(addr, as);
234+
} else if (launch_info->DeviceTy == DeviceType::GPU_DG2) {
235+
shadow_ptr = __msan_get_shadow_dg2(addr, as);
213236
} else {
214237
MSAN_DEBUG(__spirv_ocl_printf(__msan_print_unsupport_device_type,
215238
launch_info->DeviceTy));

llvm/docs/NVPTXUsage.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1049,8 +1049,9 @@ The following sets the ftz flag to 1, and the precise sqrt flag to 1.
10491049

10501050
.. code-block:: llvm
10511051
1052-
!llvm.module.flags = !{!0}
1052+
!llvm.module.flags = !{!0, !1}
10531053
!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
1054+
!1 = !{i32 4, !"nvvm-reflect-prec-sqrt", i32 1}
10541055
10551056
(``i32 4`` indicates that the value set here overrides the value in another
10561057
module we link with. See the `LangRef <LangRef.html#module-flags-metadata>`

sycl/CMakeLists.txt

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -234,25 +234,31 @@ string(REPLACE "${sycl_inc_dir}" "${SYCL_INCLUDE_BUILD_DIR}"
234234
string(REPLACE "${sycl_inc_dir}" "${SYCL_INCLUDE_BUILD_DIR}"
235235
OUT_HEADERS_IN_SYCLCOMPAT_DIR "${HEADERS_IN_SYCLCOMPAT_DIR}")
236236

237+
set(OUT_UR_HEADERS
238+
${SYCL_INCLUDE_BUILD_DIR}/ur_api.h
239+
${SYCL_INCLUDE_BUILD_DIR}/ur_api_funcs.def
240+
${SYCL_INCLUDE_BUILD_DIR}/ur_print.hpp)
241+
set(UR_HEADERS_TO_COPY
242+
${UNIFIED_RUNTIME_INCLUDE_DIR}/ur_api.h
243+
${UNIFIED_RUNTIME_INCLUDE_DIR}/ur_api_funcs.def
244+
${UNIFIED_RUNTIME_INCLUDE_DIR}/ur_print.hpp)
245+
237246
# Copy SYCL headers from sources to build directory
238247
add_custom_target(sycl-headers
239248
DEPENDS ${OUT_HEADERS_IN_SYCL_DIR}
240249
${OUT_HEADERS_IN_CL_DIR}
241250
${OUT_HEADERS_IN_STD_DIR}
242251
${OUT_HEADERS_IN_SYCLCOMPAT_DIR}
252+
${OUT_UR_HEADERS}
243253
sycl-device-aspect-macros-header
244254
boost_mp11-headers)
245255

246-
list(APPEND UR_HEADERS_TO_COPY
247-
${UNIFIED_RUNTIME_INCLUDE_DIR}/ur_api.h
248-
${UNIFIED_RUNTIME_INCLUDE_DIR}/ur_api_funcs.def
249-
${UNIFIED_RUNTIME_INCLUDE_DIR}/ur_print.hpp
250-
)
251256
add_custom_command(
252257
OUTPUT ${OUT_HEADERS_IN_SYCL_DIR}
253258
${OUT_HEADERS_IN_CL_DIR}
254259
${OUT_HEADERS_IN_STD_DIR}
255260
${OUT_HEADERS_IN_SYCLCOMPAT_DIR}
261+
${OUT_UR_HEADERS}
256262
DEPENDS ${HEADERS_IN_SYCL_DIR}
257263
${HEADERS_IN_CL_DIR}
258264
${HEADERS_IN_STD_DIR}

0 commit comments

Comments
 (0)