Skip to content

Commit 4470fb1

Browse files
authored
Merge branch 'sycl' into fabio/binary_update_fix
2 parents b1fe8a3 + ce0dc32 commit 4470fb1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+1185
-179
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6350,7 +6350,7 @@ class OffloadingActionBuilder final {
63506350
if (GpuInitHasErrors)
63516351
return true;
63526352

6353-
int I = 0;
6353+
int GenIndex = 0;
63546354
// Fill SYCLTargetInfoList
63556355
for (auto &TT : SYCLTripleList) {
63566356
auto TCIt = llvm::find_if(
@@ -6363,10 +6363,21 @@ class OffloadingActionBuilder final {
63636363
// is the target device.
63646364
if (TT.isSPIR() &&
63656365
TT.getSubArch() == llvm::Triple::SPIRSubArch_gen) {
6366-
StringRef Device(GpuArchList[I].second);
6366+
// Multiple spir64_gen targets are allowed to be used via the
6367+
// -fsycl-targets=spir64_gen and -fsycl-targets=intel_gpu_*
6368+
// specifiers. Using an index through the known GpuArchList
6369+
// values, increment through them accordingly to allow for
6370+
// the multiple settings as well as preventing re-use.
6371+
while (TT != GpuArchList[GenIndex].first &&
6372+
GenIndex < GpuArchList.size())
6373+
++GenIndex;
6374+
if (GpuArchList[GenIndex].first != TT)
6375+
// No match.
6376+
continue;
6377+
StringRef Device(GpuArchList[GenIndex].second);
63676378
SYCLTargetInfoList.emplace_back(
63686379
*TCIt, Device.empty() ? nullptr : Device.data());
6369-
++I;
6380+
++GenIndex;
63706381
continue;
63716382
}
63726383
SYCLTargetInfoList.emplace_back(*TCIt, nullptr);
@@ -6380,7 +6391,6 @@ class OffloadingActionBuilder final {
63806391
}
63816392
assert(OffloadArch && "Failed to find matching arch.");
63826393
SYCLTargetInfoList.emplace_back(*TCIt, OffloadArch);
6383-
++I;
63846394
}
63856395
}
63866396
}

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,7 @@ const char *SYCL::Linker::constructLLVMLinkCommand(
579579
auto isSYCLDeviceLib = [&](const InputInfo &II) {
580580
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
581581
const bool IsNVPTX = this->getToolChain().getTriple().isNVPTX();
582+
const bool IsAMDGCN = this->getToolChain().getTriple().isAMDGCN();
582583
const bool IsFPGA = this->getToolChain().getTriple().isSPIR() &&
583584
this->getToolChain().getTriple().getSubArch() ==
584585
llvm::Triple::SPIRSubArch_fpga;
@@ -598,6 +599,9 @@ const char *SYCL::Linker::constructLLVMLinkCommand(
598599
// NativeCPU links against libclc (libspirv)
599600
if (IsSYCLNativeCPU && InputFilename.contains("libspirv"))
600601
return true;
602+
// AMDGCN links against our libdevice (devicelib)
603+
if (IsAMDGCN && InputFilename.starts_with("devicelib-"))
604+
return true;
601605
// NVPTX links against our libclc (libspirv), our libdevice (devicelib),
602606
// and the CUDA libdevice
603607
if (IsNVPTX && (InputFilename.starts_with("devicelib-") ||

clang/test/Driver/sycl-device-lib-amdgcn.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,10 @@
4242
// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
4343
// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, gfx906)
4444

45+
// Check that llvm-link uses the "-only-needed" flag.
46+
// Not using the flag breaks kernel bundles.
47+
// RUN: %clangxx -### -nogpulib --sysroot=%S/Inputs/SYCL \
48+
// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
49+
// RUN: | FileCheck -check-prefix=CHK-ONLY-NEEDED %s
50+
51+
// CHK-ONLY-NEEDED: llvm-link"{{.*}}"-only-needed"{{.*}}"{{.*}}devicelib--amd.bc"{{.*}}

clang/test/Driver/sycl-device-lib-nvptx.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,9 @@
4242
// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50)
4343
// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, sm_50)
4444

45+
// Check that llvm-link uses the "-only-needed" flag.
46+
// Not using the flag breaks kernel bundles.
47+
// RUN: %clangxx -### --sysroot=%S/Inputs/SYCL -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
48+
// RUN: | FileCheck -check-prefix=CHK-ONLY-NEEDED %s
49+
50+
// CHK-ONLY-NEEDED: llvm-link"{{.*}}"-only-needed"{{.*}}"{{.*}}devicelib--cuda.bc"{{.*}}

clang/test/Driver/sycl-offload-old-model.c

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,44 @@
622622
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 28: offload, "device-sycl (spir64-unknown-unknown)" {27}, object
623623
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 29: linker, {8, 21, 28}, image, (host-sycl)
624624

625+
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl \
626+
// RUN: -fno-sycl-instrument-device-code -fno-sycl-device-lib=all \
627+
// RUN: -fsycl-targets=nvptx64-nvidia-cuda,spir64_gen \
628+
// RUN: -Xsycl-target-backend=spir64_gen "-device skl" \
629+
// RUN: -ccc-print-phases %s 2>&1 \
630+
// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH2 %s
631+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl)
632+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
633+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 2: input, "[[INPUT]]", c++, (device-sycl)
634+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: preprocessor, {2}, c++-cpp-output, (device-sycl)
635+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: compiler, {3}, ir, (device-sycl)
636+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (spir64_gen-unknown-unknown)" {4}, c++-cpp-output
637+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: compiler, {5}, ir, (host-sycl)
638+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: backend, {6}, assembler, (host-sycl)
639+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: assembler, {7}, object, (host-sycl)
640+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: input, "[[INPUT]]", c++, (device-sycl, sm_50)
641+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: preprocessor, {9}, c++-cpp-output, (device-sycl, sm_50)
642+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: compiler, {10}, ir, (device-sycl, sm_50)
643+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: linker, {11}, ir, (device-sycl, sm_50)
644+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 13: sycl-post-link, {12}, ir, (device-sycl, sm_50)
645+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 14: file-table-tform, {13}, ir, (device-sycl, sm_50)
646+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 15: backend, {14}, assembler, (device-sycl, sm_50)
647+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 16: assembler, {15}, object, (device-sycl, sm_50)
648+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 17: linker, {15, 16}, cuda-fatbin, (device-sycl, sm_50)
649+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 18: foreach, {14, 17}, cuda-fatbin, (device-sycl, sm_50)
650+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 19: file-table-tform, {13, 18}, tempfiletable, (device-sycl, sm_50)
651+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 20: clang-offload-wrapper, {19}, object, (device-sycl, sm_50)
652+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 21: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {20}, object
653+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 22: linker, {4}, ir, (device-sycl)
654+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 23: sycl-post-link, {22}, tempfiletable, (device-sycl)
655+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 24: file-table-tform, {23}, tempfilelist, (device-sycl)
656+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 25: llvm-spirv, {24}, tempfilelist, (device-sycl)
657+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 26: backend-compiler, {25}, image, (device-sycl)
658+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 27: file-table-tform, {23, 26}, tempfiletable, (device-sycl)
659+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 28: clang-offload-wrapper, {27}, object, (device-sycl)
660+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 29: offload, "device-sycl (spir64_gen-unknown-unknown)" {28}, object
661+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 30: linker, {8, 21, 29}, image, (host-sycl)
662+
625663
/// Check the behaviour however with swapped -fsycl-targets
626664
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fno-sycl-instrument-device-code -fno-sycl-device-lib=all -fsycl-targets=spir64,nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \
627665
// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH-FLIPPED %s

clang/test/Driver/sycl-offload.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,31 @@
394394
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 16: assembler, {15}, object, (host-sycl)
395395
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 17: clang-linker-wrapper, {16}, image, (host-sycl)
396396

397+
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --offload-new-driver \
398+
// RUN: -fno-sycl-instrument-device-code -fno-sycl-device-lib=all \
399+
// RUN: -fsycl-targets=nvptx64-nvidia-cuda,spir64_gen \
400+
// RUN: -Xsycl-target-backend=spir64_gen "-device skl" \
401+
// RUN: -ccc-print-phases %s 2>&1 \
402+
// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH2 %s
403+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl)
404+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
405+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 2: compiler, {1}, ir, (host-sycl)
406+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: input, "[[INPUT]]", c++, (device-sycl, skl)
407+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, skl)
408+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: compiler, {4}, ir, (device-sycl, skl)
409+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: backend, {5}, ir, (device-sycl, skl)
410+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: offload, "device-sycl (spir64_gen-unknown-unknown:skl)" {6}, ir
411+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: input, "[[INPUT]]", c++, (device-sycl, sm_50)
412+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: preprocessor, {8}, c++-cpp-output, (device-sycl, sm_50)
413+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: compiler, {9}, ir, (device-sycl, sm_50)
414+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: backend, {10}, ir, (device-sycl, sm_50)
415+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {11}, ir
416+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 13: clang-offload-packager, {7, 12}, image, (device-sycl)
417+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 14: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (x86_64-unknown-linux-gnu)" {13}, ir
418+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 15: backend, {14}, assembler, (host-sycl)
419+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 16: assembler, {15}, object, (host-sycl)
420+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 17: clang-linker-wrapper, {16}, image, (host-sycl)
421+
397422
/// ###########################################################################
398423

399424
// Check if valid bound arch behaviour occurs when compiling for spir-v,nvidia-gpu, and amd-gpu

llvm-spirv/include/LLVMSPIRVExtensions.inc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ EXT(SPV_KHR_subgroup_rotate)
1717
EXT(SPV_KHR_non_semantic_info)
1818
EXT(SPV_KHR_shader_clock)
1919
EXT(SPV_KHR_cooperative_matrix)
20+
EXT(SPV_KHR_untyped_pointers)
2021
EXT(SPV_INTEL_subgroups)
2122
EXT(SPV_INTEL_media_block_io)
2223
EXT(SPV_INTEL_device_side_avc_motion_estimation)

llvm-spirv/lib/SPIRV/SPIRVReader.cpp

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,11 @@ Type *SPIRVToLLVM::transType(SPIRVType *T, bool UseTPT) {
357357
return TypedPointerType::get(ElementTy, AS);
358358
return mapType(T, PointerType::get(ElementTy, AS));
359359
}
360+
case OpTypeUntypedPointerKHR: {
361+
const unsigned AS =
362+
SPIRSPIRVAddrSpaceMap::rmap(T->getPointerStorageClass());
363+
return mapType(T, PointerType::get(*Context, AS));
364+
}
360365
case OpTypeVector:
361366
return mapType(T,
362367
FixedVectorType::get(transType(T->getVectorComponentType()),
@@ -560,6 +565,8 @@ std::string SPIRVToLLVM::transTypeToOCLTypeName(SPIRVType *T, bool IsSigned) {
560565
}
561566
return transTypeToOCLTypeName(ET) + "*";
562567
}
568+
case OpTypeUntypedPointerKHR:
569+
return "int*";
563570
case OpTypeVector:
564571
return transTypeToOCLTypeName(T->getVectorComponentType()) +
565572
T->getVectorComponentCount();
@@ -1522,9 +1529,15 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F,
15221529
case OpUndef:
15231530
return mapValue(BV, UndefValue::get(transType(BV->getType())));
15241531

1525-
case OpVariable: {
1526-
auto *BVar = static_cast<SPIRVVariable *>(BV);
1527-
auto *PreTransTy = BVar->getType()->getPointerElementType();
1532+
case OpVariable:
1533+
case OpUntypedVariableKHR: {
1534+
auto *BVar = static_cast<SPIRVVariableBase *>(BV);
1535+
SPIRVType *PreTransTy = BVar->getType()->getPointerElementType();
1536+
if (BVar->getType()->isTypeUntypedPointerKHR()) {
1537+
auto *UntypedVar = static_cast<SPIRVUntypedVariableKHR *>(BVar);
1538+
if (SPIRVType *DT = UntypedVar->getDataType())
1539+
PreTransTy = DT;
1540+
}
15281541
auto *Ty = transType(PreTransTy);
15291542
bool IsConst = BVar->isConstant();
15301543
llvm::GlobalValue::LinkageTypes LinkageTy = transLinkageType(BVar);
@@ -4055,7 +4068,7 @@ bool SPIRVToLLVM::transDecoration(SPIRVValue *BV, Value *V) {
40554068
return true;
40564069
}
40574070

4058-
void SPIRVToLLVM::transGlobalCtorDtors(SPIRVVariable *BV) {
4071+
void SPIRVToLLVM::transGlobalCtorDtors(SPIRVVariableBase *BV) {
40594072
if (BV->getName() != "llvm.global_ctors" &&
40604073
BV->getName() != "llvm.global_dtors")
40614074
return;
@@ -4900,15 +4913,17 @@ SPIRVToLLVM::transLinkageType(const SPIRVValue *V) {
49004913
return GlobalValue::ExternalLinkage;
49014914
}
49024915
// Variable declaration
4903-
if (V->getOpCode() == OpVariable) {
4904-
if (static_cast<const SPIRVVariable *>(V)->getInitializer() == 0)
4916+
if (V->getOpCode() == OpVariable ||
4917+
V->getOpCode() == OpUntypedVariableKHR) {
4918+
if (static_cast<const SPIRVVariableBase *>(V)->getInitializer() == 0)
49054919
return GlobalValue::ExternalLinkage;
49064920
}
49074921
// Definition
49084922
return GlobalValue::AvailableExternallyLinkage;
49094923
case LinkageTypeExport:
4910-
if (V->getOpCode() == OpVariable) {
4911-
if (static_cast<const SPIRVVariable *>(V)->getInitializer() == 0)
4924+
if (V->getOpCode() == OpVariable ||
4925+
V->getOpCode() == OpUntypedVariableKHR) {
4926+
if (static_cast<const SPIRVVariableBase *>(V)->getInitializer() == 0)
49124927
// Tentative definition
49134928
return GlobalValue::CommonLinkage;
49144929
}

llvm-spirv/lib/SPIRV/SPIRVReader.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ class SPIRVToLLVM : private BuiltinCallHelper {
251251

252252
void transUserSemantic(SPIRV::SPIRVFunction *Fun);
253253
void transGlobalAnnotations();
254-
void transGlobalCtorDtors(SPIRVVariable *BV);
254+
void transGlobalCtorDtors(SPIRVVariableBase *BV);
255255
void createCXXStructor(const char *ListName,
256256
SmallVectorImpl<Function *> &Funcs);
257257
void transIntelFPGADecorations(SPIRVValue *BV, Value *V);

0 commit comments

Comments
 (0)