Skip to content

Commit cb35294

Browse files
committed
Merge remote-tracking branch 'origin/sycl' into uwe/syclcoverage
2 parents 637caec + ba99338 commit cb35294

File tree

31 files changed

+147
-70
lines changed

31 files changed

+147
-70
lines changed

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,10 +296,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
296296
}
297297

298298
if (TargetTriple.isNVPTX() && IgnoreSingleLibs)
299-
LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc"));
299+
LibraryList.push_back(Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc"));
300300

301301
if (TargetTriple.isAMDGCN() && IgnoreSingleLibs)
302-
LibraryList.push_back(Args.MakeArgString("devicelib--amd.bc"));
302+
LibraryList.push_back(Args.MakeArgString("devicelib-amdgcn-amd-amdhsa.bc"));
303303

304304
if (IgnoreSingleLibs)
305305
return LibraryList;

clang/test/Driver/sycl-device-lib-amdgcn.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
1111
// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s
1212

13-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
13+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
1414
// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, gfx906)
15-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
15+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
1616
// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, gfx906)
17-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
17+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
1818
// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, gfx906)
19-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
19+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
2020
// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, gfx906)
2121

2222
// Check that the -fsycl-device-lib flag has no effect when "all" is specified.
@@ -39,7 +39,7 @@
3939
// RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s
4040

4141
// CHK-UNUSED-WARN: warning: argument unused during compilation: '-fno-sycl-device-lib='
42-
// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
42+
// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
4343
// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, gfx906)
4444

4545
// Check that llvm-link uses the "-only-needed" flag.
@@ -48,4 +48,4 @@
4848
// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
4949
// RUN: | FileCheck -check-prefix=CHK-ONLY-NEEDED %s
5050

51-
// CHK-ONLY-NEEDED: llvm-link"{{.*}}"-only-needed"{{.*}}"{{.*}}devicelib--amd.bc"{{.*}}
51+
// CHK-ONLY-NEEDED: llvm-link"{{.*}}"-only-needed"{{.*}}"{{.*}}devicelib-amdgcn-amd-amdhsa.bc"{{.*}}

clang/test/Driver/sycl-device-lib-nvptx.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
1111
// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s
1212

13-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50)
13+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
1414
// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50)
15-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50)
15+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
1616
// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50)
17-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50)
17+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
1818
// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50)
19-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50)
19+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
2020
// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, sm_50)
2121

2222
// Check that the -fsycl-device-lib flag has no effect when "all" is specified.
@@ -39,12 +39,12 @@
3939
// RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s
4040

4141
// CHK-UNUSED-WARN: warning: argument unused during compilation: '-fno-sycl-device-lib='
42-
// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50)
42+
// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
4343
// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, sm_50)
4444

4545
// Check that llvm-link uses the "-only-needed" flag.
4646
// Not using the flag breaks kernel bundles.
4747
// RUN: %clangxx -### -nocudalib -fno-sycl-libspirv --sysroot=%S/Inputs/SYCL -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
4848
// RUN: | FileCheck -check-prefix=CHK-ONLY-NEEDED %s
4949

50-
// CHK-ONLY-NEEDED: llvm-link"{{.*}}"-only-needed"{{.*}}"{{.*}}devicelib--cuda.bc"{{.*}}
50+
// CHK-ONLY-NEEDED: llvm-link"{{.*}}"-only-needed"{{.*}}"{{.*}}devicelib-nvptx64-nvidia-cuda.bc"{{.*}}

clang/test/Driver/sycl-nvptx-link.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939

4040
// CHECK: llvm-link
4141
// CHECK-SAME: -only-needed
42-
// CHECK-SAME: devicelib--cuda.bc
42+
// CHECK-SAME: devicelib-nvptx64-nvidia-cuda.bc
4343
// CHECK-SAME: libspirv-nvptx64-nvidia-cuda.bc
4444
// LIBDEVICE10-SAME: libdevice.10.bc
4545
// LIBDEVICE30-SAME: libdevice.compute_30.10.bc

clang/test/Driver/sycl-offload-amdgcn.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
// CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
3838
// CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
3939
// CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, gfx906)
40-
// CHK-PHASES-NO-CC: 10: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
40+
// CHK-PHASES-NO-CC: 10: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
4141
// CHK-PHASES-NO-CC: 11: linker, {9, 10}, ir, (device-sycl, gfx906)
4242
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, gfx906)
4343
// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, gfx906)

libdevice/cmake/modules/SYCLLibdevice.cmake

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,16 @@ endforeach()
7171
# Additional compilation options are needed for compiling each device library.
7272
set(devicelib_arch)
7373
if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
74-
list(APPEND devicelib_arch cuda)
75-
set(compile_opts_cuda "-fsycl-targets=nvptx64-nvidia-cuda"
74+
list(APPEND devicelib_arch nvptx64-nvidia-cuda)
75+
set(compile_opts_nvptx64-nvidia-cuda "-fsycl-targets=nvptx64-nvidia-cuda"
7676
"-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib")
77-
set(opt_flags_cuda "-O3" "--nvvm-reflect-enable=false")
77+
set(opt_flags_nvptx64-nvidia-cuda "-O3" "--nvvm-reflect-enable=false")
7878
endif()
7979
if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
80-
list(APPEND devicelib_arch amd)
81-
set(compile_opts_amd "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa"
80+
list(APPEND devicelib_arch amdgcn-amd-amdhsa)
81+
set(compile_opts_amdgcn-amd-amdhsa "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa"
8282
"-Xsycl-target-backend" "--offload-arch=gfx940")
83-
set(opt_flags_amd "-O3" "--amdgpu-oclc-reflect-enable=false")
83+
set(opt_flags_amdgcn-amd-amdhsa "-O3" "--amdgpu-oclc-reflect-enable=false")
8484
endif()
8585

8686

@@ -181,14 +181,14 @@ function(add_devicelibs filename)
181181
endforeach()
182182

183183
foreach(arch IN LISTS devicelib_arch)
184-
compile_lib(${filename}--${arch}
184+
compile_lib(${filename}-${arch}
185185
FILETYPE bc
186186
SRC ${ARG_SRC}
187187
DEPENDENCIES ${ARG_DEPENDENCIES}
188188
EXTRA_OPTS ${ARG_EXTRA_OPTS} ${bc_device_compile_opts}
189189
${compile_opts_${arch}})
190190

191-
append_to_property(${bc_binary_dir}/${filename}--${arch}.bc
191+
append_to_property(${bc_binary_dir}/${filename}-${arch}.bc
192192
PROPERTY_NAME BC_DEVICE_LIBS_${arch})
193193
endforeach()
194194
endfunction()
@@ -471,7 +471,7 @@ foreach(arch IN LISTS devicelib_arch)
471471
endforeach()
472472
endforeach()
473473

474-
# Create one large bitcode file for the CUDA and AMD targets.
474+
# Create one large bitcode file for the NVPTX and AMD targets.
475475
# Use all the files collected in the respective global properties.
476476
foreach(arch IN LISTS devicelib_arch)
477477
get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch})
@@ -486,15 +486,15 @@ foreach(arch IN LISTS devicelib_arch)
486486

487487
# Run the optimizer on the resulting bitcode file and call prepare_builtins
488488
# on it, which strips away debug and arch information.
489-
process_bc(devicelib--${arch}.bc
489+
process_bc(devicelib-${arch}.bc
490490
LIB_TGT builtins_${arch}.opt
491491
IN_FILE ${builtins_link_lib_${arch}}
492492
OUT_DIR ${bc_binary_dir}
493493
OPT_FLAGS ${opt_flags_${arch}}
494494
DEPENDENCIES device_lib_device_${arch})
495-
add_dependencies(libsycldevice-bc prepare-devicelib--${arch}.bc)
495+
add_dependencies(libsycldevice-bc prepare-devicelib-${arch}.bc)
496496
set(complete_${arch}_libdev
497-
$<TARGET_PROPERTY:prepare-devicelib--${arch}.bc,TARGET_FILE>)
497+
$<TARGET_PROPERTY:prepare-devicelib-${arch}.bc,TARGET_FILE>)
498498
install( FILES ${complete_${arch}_libdev}
499499
DESTINATION ${install_dest_bc}
500500
COMPONENT libsycldevice)

llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,13 @@
3030
#include "compiler/utils/work_item_loops_pass.h"
3131
#include "vecz/pass.h"
3232
#include "vecz/vecz_target_info.h"
33+
#include "llvm/IR/PassManager.h"
34+
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
3335
#include "llvm/Transforms/IPO/AlwaysInliner.h"
36+
#include "llvm/Transforms/Scalar/DCE.h"
37+
#include "llvm/Transforms/Scalar/GVN.h"
38+
#include "llvm/Transforms/Scalar/SROA.h"
39+
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
3440
#endif
3541

3642
using namespace llvm;
@@ -65,6 +71,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
6571
MPM.addPass(SpecConstantsPass(SpecConstantsPass::HandlingMode::emulation));
6672
MPM.addPass(ConvertToMuxBuiltinsSYCLNativeCPUPass());
6773
#ifdef NATIVECPU_USE_OCK
74+
MPM.addPass(compiler::utils::PrepareBarriersPass());
6875
MPM.addPass(compiler::utils::TransferKernelMetadataPass());
6976
MPM.addPass(FixABIMuxBuiltinsPass());
7077
// Always enable vectorizer, unless explictly disabled or -O0 is set.
@@ -86,13 +93,20 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
8693
MAM.registerPass(
8794
[QueryFunc] { return vecz::VeczPassOptionsAnalysis(QueryFunc); });
8895
MPM.addPass(vecz::RunVeczPass());
96+
FunctionPassManager FPM;
97+
FPM.addPass(SimplifyCFGPass());
98+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
99+
FPM.addPass(AggressiveInstCombinePass());
100+
FPM.addPass(GVNPass(GVNOptions().setMemDep(true)));
101+
FPM.addPass(DCEPass());
102+
FPM.addPass(SimplifyCFGPass());
103+
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
89104
}
90105
compiler::utils::WorkItemLoopsPassOptions Opts;
91106
Opts.IsDebug = IsDebug;
92107
Opts.ForceNoTail = ForceNoTail;
93108
MAM.registerPass([] { return compiler::utils::BuiltinInfoAnalysis(); });
94109
MAM.registerPass([] { return compiler::utils::SubgroupAnalysis(); });
95-
MPM.addPass(compiler::utils::PrepareBarriersPass());
96110
MPM.addPass(compiler::utils::WorkItemLoopsPass(Opts));
97111
MPM.addPass(compiler::utils::ReplaceLocalModuleScopeVariablesPass());
98112
MPM.addPass(AlwaysInlinerPass());

llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,19 +1506,49 @@ static bool isUnsupportedAMDGPUAddrspace(Value *Addr) {
15061506
return false;
15071507
}
15081508

1509-
static bool isUnsupportedSPIRAccess(Value *Addr, Function *Func) {
1509+
static bool containsTargetExtType(const Type *Ty) {
1510+
if (isa<TargetExtType>(Ty))
1511+
return true;
1512+
1513+
if (Ty->isVectorTy())
1514+
return containsTargetExtType(Ty->getScalarType());
1515+
1516+
if (Ty->isArrayTy())
1517+
return containsTargetExtType(Ty->getArrayElementType());
1518+
1519+
if (auto *STy = dyn_cast<StructType>(Ty)) {
1520+
for (unsigned int i = 0; i < STy->getNumElements(); i++)
1521+
if (containsTargetExtType(STy->getElementType(i)))
1522+
return true;
1523+
return false;
1524+
}
1525+
1526+
return false;
1527+
}
1528+
1529+
static bool isUnsupportedSPIRAccess(Value *Addr, Instruction *Inst) {
15101530
// Skip SPIR-V built-in varibles
15111531
auto *OrigValue = Addr->stripInBoundsOffsets();
15121532
if (OrigValue->getName().starts_with("__spirv_BuiltIn"))
15131533
return true;
15141534

1535+
// Ignore load/store for target ext type since we can't know exactly what size
1536+
// it is.
1537+
if (isa<StoreInst>(Inst) &&
1538+
containsTargetExtType(
1539+
cast<StoreInst>(Inst)->getValueOperand()->getType()))
1540+
return true;
1541+
1542+
if (isa<LoadInst>(Inst) && containsTargetExtType(Inst->getType()))
1543+
return true;
1544+
15151545
Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
15161546
switch (PtrTy->getPointerAddressSpace()) {
15171547
case kSpirOffloadPrivateAS: {
15181548
if (!ClSpirOffloadPrivates)
15191549
return true;
15201550
// Skip kernel arguments
1521-
return Func->getCallingConv() == CallingConv::SPIR_KERNEL &&
1551+
return Inst->getFunction()->getCallingConv() == CallingConv::SPIR_KERNEL &&
15221552
isa<Argument>(Addr);
15231553
}
15241554
case kSpirOffloadGlobalAS: {
@@ -1756,7 +1786,10 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
17561786
// swifterror allocas are register promoted by ISel
17571787
!AI.isSwiftError() &&
17581788
// safe allocas are not interesting
1759-
!(SSGI && SSGI->isSafe(AI)));
1789+
!(SSGI && SSGI->isSafe(AI)) &&
1790+
// ignore alloc contains target ext type since we can't know exactly what
1791+
// size it is.
1792+
!containsTargetExtType(AI.getAllocatedType()));
17601793

17611794
ProcessedAllocas[&AI] = IsInteresting;
17621795
return IsInteresting;
@@ -1765,7 +1798,7 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
17651798
bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
17661799
// SPIR has its own rules to filter the instrument accesses
17671800
if (TargetTriple.isSPIROrSPIRV()) {
1768-
if (isUnsupportedSPIRAccess(Ptr, Inst->getFunction()))
1801+
if (isUnsupportedSPIRAccess(Ptr, Inst))
17691802
return true;
17701803
} else {
17711804
// Instrument accesses from different address spaces only for AMDGPU.

0 commit comments

Comments
 (0)