Skip to content

Commit c2a88cf

Browse files
committed
Merge remote-tracking branch 'upstream/main' into fix_vector_combine
2 parents 91f0c8d + 7f27482 commit c2a88cf

File tree

39 files changed

+1964
-492
lines changed

39 files changed

+1964
-492
lines changed

.github/workflows/llvm-project-tests.yml

Lines changed: 0 additions & 149 deletions
This file was deleted.

.github/workflows/llvm-project-workflow-tests.yml

Lines changed: 0 additions & 32 deletions
This file was deleted.

clang/lib/Parse/ParseDecl.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6224,7 +6224,6 @@ void Parser::ParseTypeQualifierListOpt(
62246224
case tok::kw___funcref:
62256225
ParseWebAssemblyFuncrefTypeAttribute(DS.getAttributes());
62266226
continue;
6227-
goto DoneWithTypeQuals;
62286227

62296228
case tok::kw___pascal:
62306229
if (AttrReqs & AR_VendorAttributesParsed) {

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,9 @@ class LoopVectorizationLegality {
493493
/// and we only need to check individual instructions.
494494
bool canVectorizeInstrs();
495495

496+
/// Check if an individual instruction is vectorizable.
497+
bool canVectorizeInstr(Instruction &I);
498+
496499
/// When we vectorize loops we may change the order in which
497500
/// we read and write from memory. This method checks if it is
498501
/// legal to vectorize the code, considering only memory constrains.

llvm/lib/Analysis/DemandedBits.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,26 @@ void DemandedBits::determineLiveOperandBits(
7676
computeKnownBits(V2, Known2, DL, &AC, UserI, &DT);
7777
}
7878
};
79+
auto GetShiftedRange = [&](uint64_t Min, uint64_t Max, bool ShiftLeft) {
80+
auto ShiftF = [ShiftLeft](const APInt &Mask, unsigned ShiftAmnt) {
81+
return ShiftLeft ? Mask.shl(ShiftAmnt) : Mask.lshr(ShiftAmnt);
82+
};
83+
AB = APInt::getZero(BitWidth);
84+
uint64_t LoopRange = Max - Min;
85+
APInt Mask = AOut;
86+
APInt Shifted = AOut; // AOut | (AOut << 1) | ... | (AOut << (ShiftAmnt - 1)
87+
for (unsigned ShiftAmnt = 1; ShiftAmnt <= LoopRange; ShiftAmnt <<= 1) {
88+
if (LoopRange & ShiftAmnt) {
89+
// Account for (LoopRange - ShiftAmnt, LoopRange]
90+
Mask |= ShiftF(Shifted, LoopRange - ShiftAmnt + 1);
91+
// Clears the low bit.
92+
LoopRange -= ShiftAmnt;
93+
}
94+
// [0, ShiftAmnt) -> [0, ShiftAmnt * 2)
95+
Shifted |= ShiftF(Shifted, ShiftAmnt);
96+
}
97+
AB = ShiftF(Mask, Min);
98+
};
7999

80100
switch (UserI->getOpcode()) {
81101
default: break;
@@ -183,6 +203,17 @@ void DemandedBits::determineLiveOperandBits(
183203
AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
184204
else if (S->hasNoUnsignedWrap())
185205
AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
206+
} else {
207+
ComputeKnownBits(BitWidth, UserI->getOperand(1), nullptr);
208+
uint64_t Min = Known.getMinValue().getLimitedValue(BitWidth - 1);
209+
uint64_t Max = Known.getMaxValue().getLimitedValue(BitWidth - 1);
210+
// similar to Lshr case
211+
GetShiftedRange(Min, Max, /*ShiftLeft=*/false);
212+
const auto *S = cast<ShlOperator>(UserI);
213+
if (S->hasNoSignedWrap())
214+
AB |= APInt::getHighBitsSet(BitWidth, Max + 1);
215+
else if (S->hasNoUnsignedWrap())
216+
AB |= APInt::getHighBitsSet(BitWidth, Max);
186217
}
187218
}
188219
break;
@@ -197,6 +228,24 @@ void DemandedBits::determineLiveOperandBits(
197228
// (they must be zero).
198229
if (cast<LShrOperator>(UserI)->isExact())
199230
AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
231+
} else {
232+
ComputeKnownBits(BitWidth, UserI->getOperand(1), nullptr);
233+
uint64_t Min = Known.getMinValue().getLimitedValue(BitWidth - 1);
234+
uint64_t Max = Known.getMaxValue().getLimitedValue(BitWidth - 1);
235+
// Suppose AOut == 0b0000 0001
236+
// [min, max] = [1, 3]
237+
// iteration 1 shift by 1 mask is 0b0000 0011
238+
// iteration 2 shift by 2 mask is 0b0000 1111
239+
// iteration 3, shiftAmnt = 4 > max - min, we stop.
240+
//
241+
// After the iterations we need one more shift by min,
242+
// to move from 0b0000 1111 to --> 0b0001 1110.
243+
// The loop populates the mask relative to (0,...,max-min),
244+
// but we need coverage from (min, max).
245+
// This is why the shift by min is needed.
246+
GetShiftedRange(Min, Max, /*ShiftLeft=*/true);
247+
if (cast<LShrOperator>(UserI)->isExact())
248+
AB |= APInt::getLowBitsSet(BitWidth, Max);
200249
}
201250
}
202251
break;
@@ -217,6 +266,26 @@ void DemandedBits::determineLiveOperandBits(
217266
// (they must be zero).
218267
if (cast<AShrOperator>(UserI)->isExact())
219268
AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
269+
} else {
270+
ComputeKnownBits(BitWidth, UserI->getOperand(1), nullptr);
271+
uint64_t Min = Known.getMinValue().getLimitedValue(BitWidth - 1);
272+
uint64_t Max = Known.getMaxValue().getLimitedValue(BitWidth - 1);
273+
GetShiftedRange(Min, Max, /*ShiftLeft=*/true);
274+
if (Max &&
275+
(AOut & APInt::getHighBitsSet(BitWidth, Max)).getBoolValue()) {
276+
// Suppose AOut = 0011 1100
277+
// [min, max] = [1, 3]
278+
// ShiftAmount = 1 : Mask is 1000 0000
279+
// ShiftAmount = 2 : Mask is 1100 0000
280+
// ShiftAmount = 3 : Mask is 1110 0000
281+
// The Mask with Max covers every case in [min, max],
282+
// so we are done
283+
AB.setSignBit();
284+
}
285+
// If the shift is exact, then the low bits are not dead
286+
// (they must be zero).
287+
if (cast<AShrOperator>(UserI)->isExact())
288+
AB |= APInt::getLowBitsSet(BitWidth, Max);
220289
}
221290
}
222291
break;

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2366,8 +2366,12 @@ static bool containsBufferFatPointers(const Function &F,
23662366
BufferFatPtrToStructTypeMap *TypeMap) {
23672367
bool HasFatPointers = false;
23682368
for (const BasicBlock &BB : F)
2369-
for (const Instruction &I : BB)
2369+
for (const Instruction &I : BB) {
23702370
HasFatPointers |= (I.getType() != TypeMap->remapType(I.getType()));
2371+
// Catch null pointer constants in loads, stores, etc.
2372+
for (const Value *V : I.operand_values())
2373+
HasFatPointers |= (V->getType() != TypeMap->remapType(V->getType()));
2374+
}
23712375
return HasFatPointers;
23722376
}
23732377

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,9 @@ struct ConstantComparesGatherer {
565565
/// Number of comparisons matched in the and/or chain
566566
unsigned UsedICmps = 0;
567567

568+
/// If the elements in Vals matches the comparisons
569+
bool IsEq = false;
570+
568571
/// Construct and compute the result for the comparison instruction Cond
569572
ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
570573
gather(Cond);
@@ -736,23 +739,23 @@ struct ConstantComparesGatherer {
736739
/// vector.
737740
/// One "Extra" case is allowed to differ from the other.
738741
void gather(Value *V) {
739-
bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
740-
742+
Value *Op0, *Op1;
743+
if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
744+
IsEq = true;
745+
else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
746+
IsEq = false;
747+
else
748+
return;
741749
// Keep a stack (SmallVector for efficiency) for depth-first traversal
742-
SmallVector<Value *, 8> DFT;
743-
SmallPtrSet<Value *, 8> Visited;
744-
745-
// Initialize
746-
Visited.insert(V);
747-
DFT.push_back(V);
750+
SmallVector<Value *, 8> DFT{Op0, Op1};
751+
SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
748752

749753
while (!DFT.empty()) {
750754
V = DFT.pop_back_val();
751755

752756
if (Instruction *I = dyn_cast<Instruction>(V)) {
753757
// If it is a || (or && depending on isEQ), process the operands.
754-
Value *Op0, *Op1;
755-
if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
758+
if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
756759
: match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
757760
if (Visited.insert(Op1).second)
758761
DFT.push_back(Op1);
@@ -763,7 +766,7 @@ struct ConstantComparesGatherer {
763766
}
764767

765768
// Try to match the current instruction
766-
if (matchInstruction(I, isEQ))
769+
if (matchInstruction(I, IsEq))
767770
// Match succeed, continue the loop
768771
continue;
769772
}
@@ -5103,6 +5106,7 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
51035106
Value *CompVal = ConstantCompare.CompValue;
51045107
unsigned UsedICmps = ConstantCompare.UsedICmps;
51055108
Value *ExtraCase = ConstantCompare.Extra;
5109+
bool TrueWhenEqual = ConstantCompare.IsEq;
51065110

51075111
// If we didn't have a multiply compared value, fail.
51085112
if (!CompVal)
@@ -5112,8 +5116,6 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
51125116
if (UsedICmps <= 1)
51135117
return false;
51145118

5115-
bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
5116-
51175119
// There might be duplicate constants in the list, which the switch
51185120
// instruction can't handle, remove them now.
51195121
array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);

0 commit comments

Comments
 (0)