Skip to content

Commit 4702c1d

Browse files
committed
Merge branch 'main' of github.com:llvm/llvm-project into loop-vectorize/evl-exit-cond-avlnext-zero
2 parents f727031 + 60ee056 commit 4702c1d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+955
-90
lines changed

clang/lib/CodeGen/CGDecl.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1563,11 +1563,10 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
15631563
// The named return value optimization: allocate this variable in the
15641564
// return slot, so that we can elide the copy when returning this
15651565
// variable (C++0x [class.copy]p34).
1566-
address = ReturnValue;
15671566
AllocaAddr =
15681567
RawAddress(ReturnValue.emitRawPointer(*this),
15691568
ReturnValue.getElementType(), ReturnValue.getAlignment());
1570-
;
1569+
address = MaybeCastStackAddressSpace(AllocaAddr, Ty.getAddressSpace());
15711570

15721571
if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
15731572
const auto *RD = RecordTy->getOriginalDecl()->getDefinitionOrSelf();

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,10 @@ CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits Align,
108108
return RawAddress(Alloca, Ty, Align, KnownNonNull);
109109
}
110110

111-
RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS,
112-
CharUnits Align, const Twine &Name,
113-
llvm::Value *ArraySize,
114-
RawAddress *AllocaAddr) {
115-
RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
116-
if (AllocaAddr)
117-
*AllocaAddr = Alloca;
111+
RawAddress CodeGenFunction::MaybeCastStackAddressSpace(RawAddress Alloca,
112+
LangAS DestLangAS,
113+
llvm::Value *ArraySize) {
114+
118115
llvm::Value *V = Alloca.getPointer();
119116
// Alloca always returns a pointer in alloca address space, which may
120117
// be different from the type defined by the language. For example,
@@ -134,7 +131,18 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS,
134131
/*IsNonNull=*/true);
135132
}
136133

137-
return RawAddress(V, Ty, Align, KnownNonNull);
134+
return RawAddress(V, Alloca.getElementType(), Alloca.getAlignment(),
135+
KnownNonNull);
136+
}
137+
138+
RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS,
139+
CharUnits Align, const Twine &Name,
140+
llvm::Value *ArraySize,
141+
RawAddress *AllocaAddr) {
142+
RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
143+
if (AllocaAddr)
144+
*AllocaAddr = Alloca;
145+
return MaybeCastStackAddressSpace(Alloca, DestLangAS, ArraySize);
138146
}
139147

140148
/// CreateTempAlloca - This creates an alloca and inserts it into the entry

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2804,6 +2804,13 @@ class CodeGenFunction : public CodeGenTypeCache {
28042804
AllocaTracker Tracker;
28052805
};
28062806

2807+
private:
2808+
/// If \p Alloca is not in the same address space as \p DestLangAS, insert an
2809+
/// address space cast and return a new RawAddress based on this value.
2810+
RawAddress MaybeCastStackAddressSpace(RawAddress Alloca, LangAS DestLangAS,
2811+
llvm::Value *ArraySize = nullptr);
2812+
2813+
public:
28072814
/// CreateTempAlloca - This creates an alloca and inserts it into the entry
28082815
/// block if \p ArraySize is nullptr, otherwise inserts it at the current
28092816
/// insertion point of the builder. The caller is responsible for setting an

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,8 @@ __m512 test_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
778778
__m512 test_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
779779
// CHECK-LABEL: test_mm512_mask_fmadd_ps
780780
// CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
781+
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
782+
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
781783
return _mm512_mask_fmadd_ps(__A, __U, __B, __C);
782784
}
783785
__m512 test_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {

clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,15 @@ struct X { int z[17]; };
1010
// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
1111
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
1212
// CHECK-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y_ADDR]] to ptr
13+
// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
1314
// CHECK-NEXT: store i8 [[X]], ptr [[X_ADDR_ASCAST]], align 1
1415
// CHECK-NEXT: store i8 [[Y]], ptr [[Y_ADDR_ASCAST]], align 1
1516
// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[X_ADDR_ASCAST]], align 1
16-
// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
1717
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST]], i64 1
1818
// CHECK-NEXT: store i8 [[TMP0]], ptr [[ADD_PTR]], align 1
1919
// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[Y_ADDR_ASCAST]], align 1
20-
// CHECK-NEXT: [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
21-
// CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST1]], i64 2
22-
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ADD_PTR2]], align 1
20+
// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST]], i64 2
21+
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ADD_PTR1]], align 1
2322
// CHECK-NEXT: ret void
2423
//
2524
X foo(char x, char y) {
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "bar" --version 5
2+
// REQUIRES: amdgpu-registered-target
3+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \
4+
// RUN: -o - %s | FileCheck --check-prefix=AMDGCN --enable-var-scope %s
5+
6+
struct Foo {
7+
unsigned long long val;
8+
//
9+
__attribute__((device)) inline Foo() { val = 0; }
10+
__attribute__((device)) inline Foo(const Foo &src) { val = src.val; }
11+
__attribute__((device)) inline Foo(const volatile Foo &src) { val = src.val; }
12+
};
13+
14+
// AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo(
15+
// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
16+
// AMDGCN-NEXT: [[ENTRY:.*:]]
17+
// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
18+
// AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
19+
// AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5)
20+
// AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr
21+
// AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr
22+
// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
23+
// AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr
24+
// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4
25+
// AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8
26+
// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]]
27+
// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST]], ptr [[DST_ASCAST]], align 8
28+
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8
29+
// AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0
30+
// AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8
31+
// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DST_ASCAST]], align 8
32+
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 0
33+
// AMDGCN-NEXT: store i64 [[TMP1]], ptr [[ARRAYIDX]], align 8
34+
// AMDGCN-NEXT: ret void
35+
//
36+
__attribute__((device)) Foo bar(const Foo *const src_ptr) {
37+
Foo result;
38+
39+
union {
40+
Foo* const ptr;
41+
unsigned long long * const ptr64;
42+
} dst = {&result};
43+
44+
dst.ptr64[0] = src_ptr->val;
45+
return result;
46+
}

clang/test/CodeGenOpenCL/addr-space-struct-arg.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
647647
// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
648648
// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
649649
// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
650+
// AMDGCN20-NEXT: [[RETVAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[RETVAL_ASCAST]] to ptr addrspace(5)
650651
// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
651652
// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
652653
// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4

clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
121121
// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
122122
// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
123123
// AMDGCN-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
124+
// AMDGCN-NEXT: [[RETVAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[RETVAL_ASCAST]] to ptr addrspace(5)
124125
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
125126
// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
126127
// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4

clang/test/SemaTemplate/dedup-types-builtin.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 %s -verify
1+
// RUN: %clang_cc1 %s -verify -Wno-c++20-extensions
22
template <typename...> struct TypeList;
33

44
// === Check results of the builtin.
@@ -211,8 +211,7 @@ InUsingDecl<WithFunc1, WithFunc2> iu2; // expected-note {{in instantiation of te
211211
template <class ...T>
212212
struct LambdaInitCaptures {
213213
static constexpr int test() {
214-
[...foos=__builtin_dedup_pack<T...>()]{}; // expected-warning {{initialized lambda pack captures are a C++20 extension}} \
215-
// expected-error 2{{expansions of '__builtin_dedup_pack' are not supported here.}}
214+
[...foos=__builtin_dedup_pack<T...>()]{}; // expected-error 2{{expansions of '__builtin_dedup_pack' are not supported here.}}
216215
return 3;
217216
}
218217
};

flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1900,8 +1900,7 @@ hlfir::ShapeOfOp::canonicalize(ShapeOfOp shapeOf,
19001900
// shape information is not available at compile time
19011901
return llvm::LogicalResult::failure();
19021902

1903-
rewriter.replaceAllUsesWith(shapeOf.getResult(), shape);
1904-
rewriter.eraseOp(shapeOf);
1903+
rewriter.replaceOp(shapeOf, shape);
19051904
return llvm::LogicalResult::success();
19061905
}
19071906

0 commit comments

Comments
 (0)