Skip to content

Commit 74389a0

Browse files
Merge branch 'main' into setrounding
2 parents e479ba7 + 6ff97d0 commit 74389a0

36 files changed

+3978
-7679
lines changed

clang/lib/Headers/cpuid.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ static __inline int __get_cpuid_count (unsigned int __leaf,
348348
// In some cases, offloading will set the host as the aux triple and define the
349349
// builtin. Given __has_builtin does not detect builtins on aux triples, we need
350350
// to explicitly check for some offloading cases.
351-
#ifndef __NVPTX__
351+
#if !defined(__NVPTX__) && !defined(__AMDGPU__) && !defined(__SPIRV__)
352352
static __inline void __cpuidex(int __cpu_info[4], int __leaf, int __subleaf) {
353353
__cpuid_count(__leaf, __subleaf, __cpu_info[0], __cpu_info[1], __cpu_info[2],
354354
__cpu_info[3]);

clang/test/CodeGen/union-tbaa1.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,16 @@ void bar(vect32 p[][2]);
1919
// CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP1]], [[NUM]]
2020
// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP0]]
2121
// CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA6:![0-9]+]]
22-
// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARR]], i32 [[TMP0]], i32 1
22+
// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i32 4
2323
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !tbaa [[TBAA2]]
2424
// CHECK-NEXT: [[MUL6:%.*]] = mul i32 [[TMP2]], [[NUM]]
25-
// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP0]], i32 1
25+
// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i32 4
2626
// CHECK-NEXT: store i32 [[MUL6]], ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA6]]
2727
// CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[MUL]], 16
2828
// CHECK-NEXT: store i32 [[TMP3]], ptr [[VEC]], align 4, !tbaa [[TBAA2]]
2929
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INDEX]], align 4, !tbaa [[TBAA2]]
30-
// CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP4]], i32 1
31-
// CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX14]], i32 2
30+
// CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP4]]
31+
// CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX13]], i32 6
3232
// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2, !tbaa [[TBAA6]]
3333
// CHECK-NEXT: [[CONV16:%.*]] = zext i16 [[TMP5]] to i32
3434
// CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i32 4

clang/test/Headers/__cpuidex_conflict.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
// Ensure that we do not run into conflicts when offloading.
77
// RUN: %clang_cc1 %s -DIS_STATIC=static -ffreestanding -fopenmp -fopenmp-is-target-device -aux-triple x86_64-unknown-linux-gnu
88
// RUN: %clang_cc1 -DIS_STATIC="" -triple nvptx64-nvidia-cuda -aux-triple x86_64-unknown-linux-gnu -aux-target-cpu x86-64 -fcuda-is-device -x cuda %s -o -
9+
// RUN: %clang_cc1 -DIS_STATIC="" -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -aux-target-cpu x86-64 -fcuda-is-device -x cuda %s -o -
10+
// RUN: %clang_cc1 -DIS_STATIC="" -triple spirv64 -aux-triple x86_64-unknown-linux-gnu -aux-target-cpu x86-64 -fcuda-is-device -x cuda %s -o -
11+
// RUN: %clang_cc1 -DIS_STATIC="" -triple spirv64 -aux-triple x86_64-unknown-linux-gnu -aux-target-cpu x86-64 -fsycl-is-device %s -o -
912

1013
typedef __SIZE_TYPE__ size_t;
1114

llvm/lib/Analysis/ConstantFolding.cpp

100644100755
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1652,6 +1652,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
16521652
case Intrinsic::amdgcn_perm:
16531653
case Intrinsic::amdgcn_wave_reduce_umin:
16541654
case Intrinsic::amdgcn_wave_reduce_umax:
1655+
case Intrinsic::amdgcn_wave_reduce_max:
1656+
case Intrinsic::amdgcn_wave_reduce_min:
1657+
case Intrinsic::amdgcn_wave_reduce_add:
1658+
case Intrinsic::amdgcn_wave_reduce_sub:
1659+
case Intrinsic::amdgcn_wave_reduce_and:
1660+
case Intrinsic::amdgcn_wave_reduce_or:
1661+
case Intrinsic::amdgcn_wave_reduce_xor:
16551662
case Intrinsic::amdgcn_s_wqm:
16561663
case Intrinsic::amdgcn_s_quadmask:
16571664
case Intrinsic::amdgcn_s_bitreplicate:
@@ -3672,6 +3679,13 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
36723679
return ConstantInt::get(Ty, C0->abs());
36733680
case Intrinsic::amdgcn_wave_reduce_umin:
36743681
case Intrinsic::amdgcn_wave_reduce_umax:
3682+
case Intrinsic::amdgcn_wave_reduce_max:
3683+
case Intrinsic::amdgcn_wave_reduce_min:
3684+
case Intrinsic::amdgcn_wave_reduce_add:
3685+
case Intrinsic::amdgcn_wave_reduce_sub:
3686+
case Intrinsic::amdgcn_wave_reduce_and:
3687+
case Intrinsic::amdgcn_wave_reduce_or:
3688+
case Intrinsic::amdgcn_wave_reduce_xor:
36753689
return dyn_cast<Constant>(Operands[0]);
36763690
}
36773691

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 39 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -2744,125 +2744,53 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
27442744
if (auto *I = combineConstantOffsets(GEP, *this))
27452745
return I;
27462746

2747-
// For constant GEPs, use a more general offset-based folding approach.
2748-
Type *PtrTy = Src->getType()->getScalarType();
2749-
if (GEP.hasAllConstantIndices() &&
2750-
(Src->hasOneUse() || Src->hasAllConstantIndices())) {
2751-
// Split Src into a variable part and a constant suffix.
2752-
gep_type_iterator GTI = gep_type_begin(*Src);
2753-
Type *BaseType = GTI.getIndexedType();
2754-
bool IsFirstType = true;
2755-
unsigned NumVarIndices = 0;
2756-
for (auto Pair : enumerate(Src->indices())) {
2757-
if (!isa<ConstantInt>(Pair.value())) {
2758-
BaseType = GTI.getIndexedType();
2759-
IsFirstType = false;
2760-
NumVarIndices = Pair.index() + 1;
2761-
}
2762-
++GTI;
2763-
}
2764-
2765-
// Determine the offset for the constant suffix of Src.
2766-
APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
2767-
if (NumVarIndices != Src->getNumIndices()) {
2768-
// FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2769-
if (BaseType->isScalableTy())
2770-
return nullptr;
2771-
2772-
SmallVector<Value *> ConstantIndices;
2773-
if (!IsFirstType)
2774-
ConstantIndices.push_back(
2775-
Constant::getNullValue(Type::getInt32Ty(GEP.getContext())));
2776-
append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
2777-
Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
2778-
}
2779-
2780-
// Add the offset for GEP (which is fully constant).
2781-
if (!GEP.accumulateConstantOffset(DL, Offset))
2782-
return nullptr;
2783-
2784-
// Convert the total offset back into indices.
2785-
SmallVector<APInt> ConstIndices =
2786-
DL.getGEPIndicesForOffset(BaseType, Offset);
2787-
if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero()))
2788-
return nullptr;
2789-
2790-
GEPNoWrapFlags NW = getMergedGEPNoWrapFlags(*Src, *cast<GEPOperator>(&GEP));
2791-
SmallVector<Value *> Indices(
2792-
drop_end(Src->indices(), Src->getNumIndices() - NumVarIndices));
2793-
for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
2794-
Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
2795-
// Even if the total offset is inbounds, we may end up representing it
2796-
// by first performing a larger negative offset, and then a smaller
2797-
// positive one. The large negative offset might go out of bounds. Only
2798-
// preserve inbounds if all signs are the same.
2799-
if (Idx.isNonNegative() != ConstIndices[0].isNonNegative())
2800-
NW = NW.withoutNoUnsignedSignedWrap();
2801-
if (!Idx.isNonNegative())
2802-
NW = NW.withoutNoUnsignedWrap();
2803-
}
2804-
2805-
return replaceInstUsesWith(
2806-
GEP, Builder.CreateGEP(Src->getSourceElementType(), Src->getOperand(0),
2807-
Indices, "", NW));
2808-
}
2809-
28102747
if (Src->getResultElementType() != GEP.getSourceElementType())
28112748
return nullptr;
28122749

2813-
SmallVector<Value*, 8> Indices;
2814-
28152750
// Find out whether the last index in the source GEP is a sequential idx.
28162751
bool EndsWithSequential = false;
28172752
for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
28182753
I != E; ++I)
28192754
EndsWithSequential = I.isSequential();
2755+
if (!EndsWithSequential)
2756+
return nullptr;
28202757

2821-
// Can we combine the two pointer arithmetics offsets?
2822-
if (EndsWithSequential) {
2823-
// Replace: gep (gep %P, long B), long A, ...
2824-
// With: T = long A+B; gep %P, T, ...
2825-
Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
2826-
Value *GO1 = GEP.getOperand(1);
2827-
2828-
// If they aren't the same type, then the input hasn't been processed
2829-
// by the loop above yet (which canonicalizes sequential index types to
2830-
// intptr_t). Just avoid transforming this until the input has been
2831-
// normalized.
2832-
if (SO1->getType() != GO1->getType())
2833-
return nullptr;
2758+
// Replace: gep (gep %P, long B), long A, ...
2759+
// With: T = long A+B; gep %P, T, ...
2760+
Value *SO1 = Src->getOperand(Src->getNumOperands() - 1);
2761+
Value *GO1 = GEP.getOperand(1);
28342762

2835-
Value *Sum =
2836-
simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2837-
// Only do the combine when we are sure the cost after the
2838-
// merge is never more than that before the merge.
2839-
if (Sum == nullptr)
2840-
return nullptr;
2763+
// If they aren't the same type, then the input hasn't been processed
2764+
// by the loop above yet (which canonicalizes sequential index types to
2765+
// intptr_t). Just avoid transforming this until the input has been
2766+
// normalized.
2767+
if (SO1->getType() != GO1->getType())
2768+
return nullptr;
28412769

2842-
Indices.append(Src->op_begin()+1, Src->op_end()-1);
2843-
Indices.push_back(Sum);
2844-
Indices.append(GEP.op_begin()+2, GEP.op_end());
2845-
} else if (isa<Constant>(*GEP.idx_begin()) &&
2846-
cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2847-
Src->getNumOperands() != 1) {
2848-
// Otherwise we can do the fold if the first index of the GEP is a zero
2849-
Indices.append(Src->op_begin()+1, Src->op_end());
2850-
Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2851-
}
2852-
2853-
// Don't create GEPs with more than one variable index.
2854-
unsigned NumVarIndices =
2855-
count_if(Indices, [](Value *Idx) { return !isa<Constant>(Idx); });
2856-
if (NumVarIndices > 1)
2770+
Value *Sum =
2771+
simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2772+
// Only do the combine when we are sure the cost after the
2773+
// merge is never more than that before the merge.
2774+
if (Sum == nullptr)
28572775
return nullptr;
28582776

2859-
if (!Indices.empty())
2860-
return replaceInstUsesWith(
2861-
GEP, Builder.CreateGEP(
2862-
Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2863-
getMergedGEPNoWrapFlags(*Src, *cast<GEPOperator>(&GEP))));
2777+
SmallVector<Value *, 8> Indices;
2778+
Indices.append(Src->op_begin() + 1, Src->op_end() - 1);
2779+
Indices.push_back(Sum);
2780+
Indices.append(GEP.op_begin() + 2, GEP.op_end());
28642781

2865-
return nullptr;
2782+
// Don't create GEPs with more than one non-zero index.
2783+
unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) {
2784+
auto *C = dyn_cast<Constant>(Idx);
2785+
return !C || !C->isNullValue();
2786+
});
2787+
if (NumNonZeroIndices > 1)
2788+
return nullptr;
2789+
2790+
return replaceInstUsesWith(
2791+
GEP, Builder.CreateGEP(
2792+
Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2793+
getMergedGEPNoWrapFlags(*Src, *cast<GEPOperator>(&GEP))));
28662794
}
28672795

28682796
Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
@@ -3334,17 +3262,18 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
33343262
return replaceInstUsesWith(GEP, Res);
33353263
}
33363264

3337-
bool SeenVarIndex = false;
3265+
bool SeenNonZeroIndex = false;
33383266
for (auto [IdxNum, Idx] : enumerate(Indices)) {
3339-
if (isa<Constant>(Idx))
3267+
auto *C = dyn_cast<Constant>(Idx);
3268+
if (C && C->isNullValue())
33403269
continue;
33413270

3342-
if (!SeenVarIndex) {
3343-
SeenVarIndex = true;
3271+
if (!SeenNonZeroIndex) {
3272+
SeenNonZeroIndex = true;
33443273
continue;
33453274
}
33463275

3347-
// GEP has multiple variable indices: Split it.
3276+
// GEP has multiple non-zero indices: Split it.
33483277
ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum);
33493278
Value *FrontGEP =
33503279
Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices,

llvm/test/Analysis/BasicAA/featuretest.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,14 @@ define i32 @gep_distance_test(ptr %A) {
102102
; cannot alias, even if there is a variable offset between them...
103103
define i32 @gep_distance_test2(ptr %A, i64 %distance) {
104104
; NO_ASSUME-LABEL: @gep_distance_test2(
105-
; NO_ASSUME-NEXT: [[B:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 [[DISTANCE:%.*]], i32 1
105+
; NO_ASSUME-NEXT: [[B_SPLIT:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 [[DISTANCE:%.*]]
106+
; NO_ASSUME-NEXT: [[B:%.*]] = getelementptr i8, ptr [[B_SPLIT]], i64 4
106107
; NO_ASSUME-NEXT: store i32 7, ptr [[B]], align 4
107108
; NO_ASSUME-NEXT: ret i32 0
108109
;
109110
; USE_ASSUME-LABEL: @gep_distance_test2(
110-
; USE_ASSUME-NEXT: [[B:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 [[DISTANCE:%.*]], i32 1
111+
; USE_ASSUME-NEXT: [[B_SPLIT:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 [[DISTANCE:%.*]]
112+
; USE_ASSUME-NEXT: [[B:%.*]] = getelementptr i8, ptr [[B_SPLIT]], i64 4
111113
; USE_ASSUME-NEXT: store i32 7, ptr [[B]], align 4
112114
; USE_ASSUME-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 4), "nonnull"(ptr [[A]]), "align"(ptr [[A]], i64 4) ]
113115
; USE_ASSUME-NEXT: ret i32 0

0 commit comments

Comments
 (0)