Skip to content

Commit c1fea5a

Browse files
authored
Merge branch 'release/20.x' into issue115744
2 parents a32a941 + a69568e commit c1fea5a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1030
-1254
lines changed

.github/workflows/release-binaries.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ jobs:
142142
# 2-3 hours to build on macOS, much slower than on Linux.
143143
# The long build time causes the release build to time out on x86_64,
144144
# so we need to disable flang there.
145-
target_cmake_flags="$target_cmake_flags -DLLVM_RELEASE_ENABLE_PROJECTS='clang;lld;lldb;clang-tools-extra;bolt;polly;mlir'"
145+
target_cmake_flags="$target_cmake_flags -DLLVM_RELEASE_ENABLE_PROJECTS='clang;lld;lldb;clang-tools-extra;polly;mlir'"
146146
fi
147147
target_cmake_flags="$target_cmake_flags -DBOOTSTRAP_DARWIN_osx_ARCHS=$arches -DBOOTSTRAP_DARWIN_osx_BUILTIN_ARCHS=$arches"
148148
fi

clang/cmake/caches/Release.cmake

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,21 @@ endfunction()
2929
# cache file to CMake via -C. e.g.
3030
#
3131
# cmake -D LLVM_RELEASE_ENABLE_PGO=ON -C Release.cmake
32+
33+
set (DEFAULT_PROJECTS "clang;lld;lldb;clang-tools-extra;polly;mlir;flang")
34+
# bolt only supports ELF, so only enable it for Linux.
35+
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
36+
list(APPEND DEFAULT_PROJECTS "bolt")
37+
endif()
38+
3239
set (DEFAULT_RUNTIMES "compiler-rt;libcxx")
3340
if (NOT WIN32)
3441
list(APPEND DEFAULT_RUNTIMES "libcxxabi" "libunwind")
3542
endif()
3643
set(LLVM_RELEASE_ENABLE_LTO THIN CACHE STRING "")
3744
set(LLVM_RELEASE_ENABLE_PGO ON CACHE BOOL "")
3845
set(LLVM_RELEASE_ENABLE_RUNTIMES ${DEFAULT_RUNTIMES} CACHE STRING "")
39-
set(LLVM_RELEASE_ENABLE_PROJECTS "clang;lld;lldb;clang-tools-extra;bolt;polly;mlir;flang" CACHE STRING "")
46+
set(LLVM_RELEASE_ENABLE_PROJECTS ${DEFAULT_PROJECTS} CACHE STRING "")
4047
# Note we don't need to add install here, since it is one of the pre-defined
4148
# steps.
4249
set(LLVM_RELEASE_FINAL_STAGE_TARGETS "clang;package;check-all;check-llvm;check-clang" CACHE STRING "")
@@ -118,16 +125,22 @@ if(NOT ${CMAKE_HOST_SYSTEM_NAME} MATCHES "Darwin")
118125
set(RELEASE_LINKER_FLAGS "${RELEASE_LINKER_FLAGS} -static-libgcc")
119126
endif()
120127

128+
# Set flags for bolt
129+
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
130+
set(RELEASE_LINKER_FLAGS "${RELEASE_LINKER_FLAGS} -Wl,--emit-relocs,-znow")
131+
endif()
132+
121133
set_instrument_and_final_stage_var(CMAKE_EXE_LINKER_FLAGS ${RELEASE_LINKER_FLAGS} STRING)
122134
set_instrument_and_final_stage_var(CMAKE_SHARED_LINKER_FLAGS ${RELEASE_LINKER_FLAGS} STRING)
123135
set_instrument_and_final_stage_var(CMAKE_MODULE_LINKER_FLAGS ${RELEASE_LINKER_FLAGS} STRING)
124136

125137
# Final Stage Config (stage2)
126138
set_final_stage_var(LLVM_ENABLE_RUNTIMES "${LLVM_RELEASE_ENABLE_RUNTIMES}" STRING)
127139
set_final_stage_var(LLVM_ENABLE_PROJECTS "${LLVM_RELEASE_ENABLE_PROJECTS}" STRING)
140+
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
141+
set_final_stage_var(CLANG_BOLT "INSTRUMENT" STRING)
142+
endif()
128143
set_final_stage_var(CPACK_GENERATOR "TXZ" STRING)
129144
set_final_stage_var(CPACK_ARCHIVE_THREADS "0" STRING)
130145

131-
if(${CMAKE_HOST_SYSTEM_NAME} MATCHES "Darwin")
132-
set_final_stage_var(LLVM_USE_STATIC_ZSTD "ON" BOOL)
133-
endif()
146+
set_final_stage_var(LLVM_USE_STATIC_ZSTD "ON" BOOL)

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,8 @@ CUDA Support
12691269

12701270
AIX Support
12711271
^^^^^^^^^^^
1272+
- Fixed the ``-print-runtime-dir`` option.
1273+
- Enable continuous profile syncing feature on AIX.
12721274

12731275
NetBSD Support
12741276
^^^^^^^^^^^^^^

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,13 +1522,13 @@ enum DeclCode {
15221522
/// An ImplicitConceptSpecializationDecl record.
15231523
DECL_IMPLICIT_CONCEPT_SPECIALIZATION,
15241524

1525-
// A decls specilization record.
1525+
// A decls specialization record.
15261526
DECL_SPECIALIZATIONS,
15271527

1528-
// A decls specilization record.
1528+
// A decls specialization record.
15291529
DECL_PARTIAL_SPECIALIZATIONS,
15301530

1531-
DECL_LAST = DECL_IMPLICIT_CONCEPT_SPECIALIZATION
1531+
DECL_LAST = DECL_PARTIAL_SPECIALIZATIONS
15321532
};
15331533

15341534
/// Record codes for each kind of statement or expression.

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
261261
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
262262
TargetInfo::adjust(Diags, Opts);
263263
// ToDo: There are still a few places using default address space as private
264-
// address space in OpenCL, which needs to be cleaned up, then the references
265-
// to OpenCL can be removed from the following line.
266-
setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
264+
// address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
265+
// can be removed from the following line.
266+
setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
267267
!isAMDGCN(getTriple()));
268268
}
269269

clang/lib/CodeGen/CGBlocks.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1396,8 +1396,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
13961396
DI->setLocation(D->getLocation());
13971397
DI->EmitDeclareOfBlockLiteralArgVariable(
13981398
*BlockInfo, D->getName(), argNum,
1399-
cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()),
1400-
Builder);
1399+
cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
14011400
}
14021401
}
14031402

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6092,13 +6092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
60926092
/*IndexTypeQuals=*/0);
60936093
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
60946094
llvm::Value *TmpPtr = Tmp.getPointer();
6095-
// The EmitLifetime* pair expect a naked Alloca as their last argument,
6096-
// however for cases where the default AS is not the Alloca AS, Tmp is
6097-
// actually the Alloca ascasted to the default AS, hence the
6098-
// stripPointerCasts()
6099-
llvm::Value *Alloca = TmpPtr->stripPointerCasts();
61006095
llvm::Value *TmpSize = EmitLifetimeStart(
6101-
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
6096+
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
61026097
llvm::Value *ElemPtr;
61036098
// Each of the following arguments specifies the size of the corresponding
61046099
// argument passed to the enqueued block.
@@ -6114,9 +6109,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
61146109
Builder.CreateAlignedStore(
61156110
V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
61166111
}
6117-
// Return the Alloca itself rather than a potential ascast as this is only
6118-
// used by the paired EmitLifetimeEnd.
6119-
return std::tie(ElemPtr, TmpSize, Alloca);
6112+
return std::tie(ElemPtr, TmpSize, TmpPtr);
61206113
};
61216114

61226115
// Could have events and/or varargs.

clang/lib/Sema/SemaConcept.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -711,9 +711,32 @@ bool Sema::addInstantiatedCapturesToScope(
711711

712712
unsigned Instantiated = 0;
713713

714+
// FIXME: This is a workaround for not having deferred lambda body
715+
// instantiation.
716+
// When transforming a lambda's body, if we encounter another call to a
717+
// nested lambda that contains a constraint expression, we add all of the
718+
// outer lambda's instantiated captures to the current instantiation scope to
719+
// facilitate constraint evaluation. However, these captures don't appear in
720+
// the CXXRecordDecl until after the lambda expression is rebuilt, so we
721+
// pull them out from the corresponding LSI.
722+
LambdaScopeInfo *InstantiatingScope = nullptr;
723+
if (LambdaPattern->capture_size() && !LambdaClass->capture_size()) {
724+
for (FunctionScopeInfo *Scope : llvm::reverse(FunctionScopes)) {
725+
auto *LSI = dyn_cast<LambdaScopeInfo>(Scope);
726+
if (!LSI ||
727+
LSI->CallOperator->getTemplateInstantiationPattern() != PatternDecl)
728+
continue;
729+
InstantiatingScope = LSI;
730+
break;
731+
}
732+
assert(InstantiatingScope);
733+
}
734+
714735
auto AddSingleCapture = [&](const ValueDecl *CapturedPattern,
715736
unsigned Index) {
716-
ValueDecl *CapturedVar = LambdaClass->getCapture(Index)->getCapturedVar();
737+
ValueDecl *CapturedVar =
738+
InstantiatingScope ? InstantiatingScope->Captures[Index].getVariable()
739+
: LambdaClass->getCapture(Index)->getCapturedVar();
717740
assert(CapturedVar->isInitCapture());
718741
Scope.InstantiatedLocal(CapturedPattern, CapturedVar);
719742
};

clang/lib/Sema/SemaDecl.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4803,7 +4803,8 @@ bool Sema::checkVarDeclRedefinition(VarDecl *Old, VarDecl *New) {
48034803
(New->getFormalLinkage() == Linkage::Internal || New->isInline() ||
48044804
isa<VarTemplateSpecializationDecl>(New) ||
48054805
New->getDescribedVarTemplate() || New->getNumTemplateParameterLists() ||
4806-
New->getDeclContext()->isDependentContext())) {
4806+
New->getDeclContext()->isDependentContext() ||
4807+
New->hasAttr<SelectAnyAttr>())) {
48074808
// The previous definition is hidden, and multiple definitions are
48084809
// permitted (in separate TUs). Demote this to a declaration.
48094810
New->demoteThisDefinitionToDeclaration();

clang/test/CodeGen/scoped-fence-ops.c

Lines changed: 120 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
22
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
3-
// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
3+
// RUN: -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL12 %s
44
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
5-
// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
5+
// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL20 %s
66
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
77
// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
88
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-unknown-linux-gnu -ffreestanding \
@@ -30,34 +30,62 @@ void fe1a() {
3030
__scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP);
3131
}
3232

33-
// AMDGCN-LABEL: define hidden void @fe1b(
34-
// AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
35-
// AMDGCN-NEXT: [[ENTRY:.*:]]
36-
// AMDGCN-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
37-
// AMDGCN-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
38-
// AMDGCN-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
39-
// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
40-
// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
41-
// AMDGCN-NEXT: i32 1, label %[[ACQUIRE:.*]]
42-
// AMDGCN-NEXT: i32 2, label %[[ACQUIRE]]
43-
// AMDGCN-NEXT: i32 3, label %[[RELEASE:.*]]
44-
// AMDGCN-NEXT: i32 4, label %[[ACQREL:.*]]
45-
// AMDGCN-NEXT: i32 5, label %[[SEQCST:.*]]
46-
// AMDGCN-NEXT: ]
47-
// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
48-
// AMDGCN-NEXT: ret void
49-
// AMDGCN: [[ACQUIRE]]:
50-
// AMDGCN-NEXT: fence syncscope("workgroup") acquire
51-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
52-
// AMDGCN: [[RELEASE]]:
53-
// AMDGCN-NEXT: fence syncscope("workgroup") release
54-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
55-
// AMDGCN: [[ACQREL]]:
56-
// AMDGCN-NEXT: fence syncscope("workgroup") acq_rel
57-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
58-
// AMDGCN: [[SEQCST]]:
59-
// AMDGCN-NEXT: fence syncscope("workgroup") seq_cst
60-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
33+
// AMDGCN-CL12-LABEL: define hidden void @fe1b(
34+
// AMDGCN-CL12-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
35+
// AMDGCN-CL12-NEXT: [[ENTRY:.*:]]
36+
// AMDGCN-CL12-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
37+
// AMDGCN-CL12-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
38+
// AMDGCN-CL12-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
39+
// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
40+
// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
41+
// AMDGCN-CL12-NEXT: i32 1, label %[[ACQUIRE:.*]]
42+
// AMDGCN-CL12-NEXT: i32 2, label %[[ACQUIRE]]
43+
// AMDGCN-CL12-NEXT: i32 3, label %[[RELEASE:.*]]
44+
// AMDGCN-CL12-NEXT: i32 4, label %[[ACQREL:.*]]
45+
// AMDGCN-CL12-NEXT: i32 5, label %[[SEQCST:.*]]
46+
// AMDGCN-CL12-NEXT: ]
47+
// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]:
48+
// AMDGCN-CL12-NEXT: ret void
49+
// AMDGCN-CL12: [[ACQUIRE]]:
50+
// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acquire
51+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
52+
// AMDGCN-CL12: [[RELEASE]]:
53+
// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release
54+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
55+
// AMDGCN-CL12: [[ACQREL]]:
56+
// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acq_rel
57+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
58+
// AMDGCN-CL12: [[SEQCST]]:
59+
// AMDGCN-CL12-NEXT: fence syncscope("workgroup") seq_cst
60+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
61+
//
62+
// AMDGCN-CL20-LABEL: define hidden void @fe1b(
63+
// AMDGCN-CL20-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
64+
// AMDGCN-CL20-NEXT: [[ENTRY:.*:]]
65+
// AMDGCN-CL20-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
66+
// AMDGCN-CL20-NEXT: store i32 [[ORD]], ptr addrspace(5) [[ORD_ADDR]], align 4
67+
// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[ORD_ADDR]], align 4
68+
// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
69+
// AMDGCN-CL20-NEXT: i32 1, label %[[ACQUIRE:.*]]
70+
// AMDGCN-CL20-NEXT: i32 2, label %[[ACQUIRE]]
71+
// AMDGCN-CL20-NEXT: i32 3, label %[[RELEASE:.*]]
72+
// AMDGCN-CL20-NEXT: i32 4, label %[[ACQREL:.*]]
73+
// AMDGCN-CL20-NEXT: i32 5, label %[[SEQCST:.*]]
74+
// AMDGCN-CL20-NEXT: ]
75+
// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]:
76+
// AMDGCN-CL20-NEXT: ret void
77+
// AMDGCN-CL20: [[ACQUIRE]]:
78+
// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acquire
79+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
80+
// AMDGCN-CL20: [[RELEASE]]:
81+
// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release
82+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
83+
// AMDGCN-CL20: [[ACQREL]]:
84+
// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acq_rel
85+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
86+
// AMDGCN-CL20: [[SEQCST]]:
87+
// AMDGCN-CL20-NEXT: fence syncscope("workgroup") seq_cst
88+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
6189
//
6290
// SPIRV-LABEL: define hidden spir_func void @fe1b(
6391
// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
@@ -119,37 +147,68 @@ void fe1b(int ord) {
119147
__scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP);
120148
}
121149

122-
// AMDGCN-LABEL: define hidden void @fe1c(
123-
// AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
124-
// AMDGCN-NEXT: [[ENTRY:.*:]]
125-
// AMDGCN-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
126-
// AMDGCN-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
127-
// AMDGCN-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
128-
// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
129-
// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
130-
// AMDGCN-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
131-
// AMDGCN-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
132-
// AMDGCN-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
133-
// AMDGCN-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
134-
// AMDGCN-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
135-
// AMDGCN-NEXT: ]
136-
// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
137-
// AMDGCN-NEXT: ret void
138-
// AMDGCN: [[DEVICE_SCOPE]]:
139-
// AMDGCN-NEXT: fence syncscope("agent") release
140-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
141-
// AMDGCN: [[SYSTEM_SCOPE]]:
142-
// AMDGCN-NEXT: fence release
143-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
144-
// AMDGCN: [[WORKGROUP_SCOPE]]:
145-
// AMDGCN-NEXT: fence syncscope("workgroup") release
146-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
147-
// AMDGCN: [[WAVEFRONT_SCOPE]]:
148-
// AMDGCN-NEXT: fence syncscope("wavefront") release
149-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
150-
// AMDGCN: [[SINGLE_SCOPE]]:
151-
// AMDGCN-NEXT: fence syncscope("singlethread") release
152-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
150+
// AMDGCN-CL12-LABEL: define hidden void @fe1c(
151+
// AMDGCN-CL12-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
152+
// AMDGCN-CL12-NEXT: [[ENTRY:.*:]]
153+
// AMDGCN-CL12-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
154+
// AMDGCN-CL12-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
155+
// AMDGCN-CL12-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
156+
// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
157+
// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
158+
// AMDGCN-CL12-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
159+
// AMDGCN-CL12-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
160+
// AMDGCN-CL12-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
161+
// AMDGCN-CL12-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
162+
// AMDGCN-CL12-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
163+
// AMDGCN-CL12-NEXT: ]
164+
// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]:
165+
// AMDGCN-CL12-NEXT: ret void
166+
// AMDGCN-CL12: [[DEVICE_SCOPE]]:
167+
// AMDGCN-CL12-NEXT: fence syncscope("agent") release
168+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
169+
// AMDGCN-CL12: [[SYSTEM_SCOPE]]:
170+
// AMDGCN-CL12-NEXT: fence release
171+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
172+
// AMDGCN-CL12: [[WORKGROUP_SCOPE]]:
173+
// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release
174+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
175+
// AMDGCN-CL12: [[WAVEFRONT_SCOPE]]:
176+
// AMDGCN-CL12-NEXT: fence syncscope("wavefront") release
177+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
178+
// AMDGCN-CL12: [[SINGLE_SCOPE]]:
179+
// AMDGCN-CL12-NEXT: fence syncscope("singlethread") release
180+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
181+
//
182+
// AMDGCN-CL20-LABEL: define hidden void @fe1c(
183+
// AMDGCN-CL20-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
184+
// AMDGCN-CL20-NEXT: [[ENTRY:.*:]]
185+
// AMDGCN-CL20-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
186+
// AMDGCN-CL20-NEXT: store i32 [[SCOPE]], ptr addrspace(5) [[SCOPE_ADDR]], align 4
187+
// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[SCOPE_ADDR]], align 4
188+
// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
189+
// AMDGCN-CL20-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
190+
// AMDGCN-CL20-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
191+
// AMDGCN-CL20-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
192+
// AMDGCN-CL20-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
193+
// AMDGCN-CL20-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
194+
// AMDGCN-CL20-NEXT: ]
195+
// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]:
196+
// AMDGCN-CL20-NEXT: ret void
197+
// AMDGCN-CL20: [[DEVICE_SCOPE]]:
198+
// AMDGCN-CL20-NEXT: fence syncscope("agent") release
199+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
200+
// AMDGCN-CL20: [[SYSTEM_SCOPE]]:
201+
// AMDGCN-CL20-NEXT: fence release
202+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
203+
// AMDGCN-CL20: [[WORKGROUP_SCOPE]]:
204+
// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release
205+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
206+
// AMDGCN-CL20: [[WAVEFRONT_SCOPE]]:
207+
// AMDGCN-CL20-NEXT: fence syncscope("wavefront") release
208+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
209+
// AMDGCN-CL20: [[SINGLE_SCOPE]]:
210+
// AMDGCN-CL20-NEXT: fence syncscope("singlethread") release
211+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
153212
//
154213
// SPIRV-LABEL: define hidden spir_func void @fe1c(
155214
// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {

0 commit comments

Comments
 (0)