Skip to content

Commit e158f69

Browse files
author
z1.cciauto
committed
merge main into amd-staging
2 parents 1b6da0d + c639475 commit e158f69

File tree

24 files changed

+629
-127
lines changed

24 files changed

+629
-127
lines changed

clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -438,8 +438,7 @@ void AggExprEmitter::visitCXXParenListOrInitListExpr(
438438
// Push a destructor if necessary.
439439
// FIXME: if we have an array of structures, all explicitly
440440
// initialized, we can end up pushing a linear number of cleanups.
441-
if (QualType::DestructionKind dtorKind =
442-
field->getType().isDestructedType()) {
441+
if (field->getType().isDestructedType()) {
443442
cgf.cgm.errorNYI(e->getSourceRange(),
444443
"visitCXXParenListOrInitListExpr destructor");
445444
return;

llvm/lib/MC/MCExpr.cpp

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -346,17 +346,16 @@ static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
346346
Displacement *= -1;
347347
}
348348

349-
// Track whether B is before a relaxable instruction and whether A is after
350-
// a relaxable instruction. If SA and SB are separated by a linker-relaxable
351-
// instruction, the difference cannot be resolved as it may be changed by
352-
// the linker.
349+
// Track whether B is before a relaxable instruction/alignment and whether A
350+
// is after a relaxable instruction/alignment. If SA and SB are separated by
351+
// a linker-relaxable instruction/alignment, the difference cannot be
352+
// resolved as it may be changed by the linker.
353353
bool BBeforeRelax = false, AAfterRelax = false;
354354
for (auto F = FB; F; F = F->getNext()) {
355-
auto DF = F->getKind() == MCFragment::FT_Data ? F : nullptr;
356-
if (DF && DF->isLinkerRelaxable()) {
357-
if (&*F != FB || SBOffset != DF->getContents().size())
355+
if (F && F->isLinkerRelaxable()) {
356+
if (&*F != FB || SBOffset != F->getSize())
358357
BBeforeRelax = true;
359-
if (&*F != FA || SAOffset == DF->getContents().size())
358+
if (&*F != FA || SAOffset == F->getSize())
360359
AAfterRelax = true;
361360
if (BBeforeRelax && AAfterRelax)
362361
return;
@@ -370,17 +369,15 @@ static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
370369
}
371370

372371
int64_t Num;
373-
if (DF) {
374-
Displacement += DF->getContents().size();
375-
} else if (F->getKind() == MCFragment::FT_Relaxable &&
372+
if (F->getKind() == MCFragment::FT_Data) {
373+
Displacement += F->getFixedSize();
374+
} else if ((F->getKind() == MCFragment::FT_Relaxable ||
375+
F->getKind() == MCFragment::FT_Align) &&
376376
Asm->hasFinalLayout()) {
377377
// Before finishLayout, a relaxable fragment's size is indeterminate.
378378
// After layout, during relocation generation, it can be treated as a
379379
// data fragment.
380380
Displacement += F->getSize();
381-
} else if (F->getKind() == MCFragment::FT_Align && Layout &&
382-
F->isLinkerRelaxable()) {
383-
Displacement += Asm->computeFragmentSize(*F);
384381
} else if (auto *FF = dyn_cast<MCFillFragment>(F);
385382
FF && FF->getNumValues().evaluateAsAbsolute(Num)) {
386383
Displacement += Num * FF->getValueSize();

llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ bool LoongArchAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
254254
MCFixup Fixup =
255255
MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN);
256256
F.setVarFixups({Fixup});
257+
F.setLinkerRelaxable();
257258
F.getParent()->setLinkerRelaxable();
258259
return true;
259260
}

llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
320320
MCFixup Fixup =
321321
MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN);
322322
F.setVarFixups({Fixup});
323+
F.setLinkerRelaxable();
323324
F.getParent()->setLinkerRelaxable();
324325
return true;
325326
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10076,12 +10076,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1007610076
// Get user vectorization factor and interleave count.
1007710077
ElementCount UserVF = Hints.getWidth();
1007810078
unsigned UserIC = Hints.getInterleave();
10079-
if (LVL.hasUncountableEarlyExit() && UserIC != 1) {
10080-
UserIC = 1;
10081-
reportVectorizationInfo("Interleaving not supported for loops "
10082-
"with uncountable early exits",
10083-
"InterleaveEarlyExitDisabled", ORE, L);
10084-
}
1008510079

1008610080
// Plan how to best vectorize.
1008710081
LVP.plan(UserVF, UserIC);

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,6 +1012,10 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
10121012
ReductionStartVector,
10131013
// Creates a step vector starting from 0 to VF with a step of 1.
10141014
StepVector,
1015+
/// Extracts a single lane (first operand) from a set of vector operands.
1016+
/// The lane specifies an index into a vector formed by combining all vector
1017+
/// operands (all operands after the first one).
1018+
ExtractLane,
10151019

10161020
};
10171021

@@ -2304,14 +2308,15 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
23042308
/// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
23052309
/// be omitted (implied by passing an odd number of operands) in which case
23062310
/// all other incoming values are merged into it.
2307-
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
2308-
: VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2311+
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands, DebugLoc DL)
2312+
: VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
23092313
assert(Operands.size() > 0 && "Expected at least one operand!");
23102314
}
23112315

23122316
VPBlendRecipe *clone() override {
23132317
SmallVector<VPValue *> Ops(operands());
2314-
return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2318+
return new VPBlendRecipe(cast_or_null<PHINode>(getUnderlyingValue()), Ops,
2319+
getDebugLoc());
23152320
}
23162321

23172322
VP_CLASSOF_IMPL(VPDef::VPBlendSC)

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
110110
case VPInstruction::BuildStructVector:
111111
case VPInstruction::BuildVector:
112112
return SetResultTyFromOp();
113+
case VPInstruction::ExtractLane:
114+
return inferScalarType(R->getOperand(1));
113115
case VPInstruction::FirstActiveLane:
114116
return Type::getIntNTy(Ctx, 64);
115117
case VPInstruction::ExtractLastElement:

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -466,28 +466,27 @@ void VPlanTransforms::prepareForVectorization(
466466
VPDominatorTree VPDT;
467467
VPDT.recalculate(Plan);
468468

469-
VPBlockBase *HeaderVPB = Plan.getEntry()->getSingleSuccessor();
470-
canonicalHeaderAndLatch(HeaderVPB, VPDT);
471-
VPBlockBase *LatchVPB = HeaderVPB->getPredecessors()[1];
469+
auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
470+
canonicalHeaderAndLatch(HeaderVPBB, VPDT);
471+
auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
472472

473473
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
474474
VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
475475

476476
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
477-
// The canonical LatchVPB has the header block as last successor. If it has
477+
// The canonical LatchVPBB has the header block as last successor. If it has
478478
// another successor, this successor is an exit block - insert middle block on
479479
// its edge. Otherwise, add middle block as another successor retaining header
480480
// as last.
481-
if (LatchVPB->getNumSuccessors() == 2) {
482-
VPBlockBase *LatchExitVPB = LatchVPB->getSuccessors()[0];
483-
VPBlockUtils::insertOnEdge(LatchVPB, LatchExitVPB, MiddleVPBB);
481+
if (LatchVPBB->getNumSuccessors() == 2) {
482+
VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
483+
VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
484484
} else {
485-
VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
486-
LatchVPB->swapSuccessors();
485+
VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
486+
LatchVPBB->swapSuccessors();
487487
}
488488

489-
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
490-
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
489+
addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
491490

492491
[[maybe_unused]] bool HandledUncountableEarlyExit = false;
493492
// Disconnect all early exits from the loop leaving it with a single exit from
@@ -503,8 +502,7 @@ void VPlanTransforms::prepareForVectorization(
503502
assert(!HandledUncountableEarlyExit &&
504503
"can handle exactly one uncountable early exit");
505504
handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
506-
cast<VPBasicBlock>(HeaderVPB),
507-
cast<VPBasicBlock>(LatchVPB), Range);
505+
HeaderVPBB, LatchVPBB, Range);
508506
HandledUncountableEarlyExit = true;
509507
} else {
510508
for (VPRecipeBase &R : EB->phis())
@@ -568,7 +566,7 @@ void VPlanTransforms::prepareForVectorization(
568566
// the corresponding compare because they may have ended up with different
569567
// line numbers and we want to avoid awkward line stepping while debugging.
570568
// E.g., if the compare has got a line number inside the loop.
571-
DebugLoc LatchDL = TheLoop->getLoopLatch()->getTerminator()->getDebugLoc();
569+
DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
572570
VPBuilder Builder(MiddleVPBB);
573571
VPValue *Cmp;
574572
if (!RequiresScalarEpilogueCheck)

llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,8 +251,9 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
251251
}
252252
OperandsWithMask.push_back(EdgeMask);
253253
}
254-
PHINode *IRPhi = cast<PHINode>(PhiR->getUnderlyingValue());
255-
auto *Blend = new VPBlendRecipe(IRPhi, OperandsWithMask);
254+
PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue());
255+
auto *Blend =
256+
new VPBlendRecipe(IRPhi, OperandsWithMask, PhiR->getDebugLoc());
256257
Builder.insert(Blend);
257258
PhiR->replaceAllUsesWith(Blend);
258259
PhiR->eraseFromParent();

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,31 @@ Value *VPInstruction::generate(VPTransformState &State) {
863863
Res = Builder.CreateOr(Res, State.get(Op));
864864
return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);
865865
}
866+
case VPInstruction::ExtractLane: {
867+
Value *LaneToExtract = State.get(getOperand(0), true);
868+
Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0));
869+
Value *Res = nullptr;
870+
Value *RuntimeVF = getRuntimeVF(State.Builder, IdxTy, State.VF);
871+
872+
for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {
873+
Value *VectorStart =
874+
Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
875+
Value *VectorIdx = Idx == 1
876+
? LaneToExtract
877+
: Builder.CreateSub(LaneToExtract, VectorStart);
878+
Value *Ext = State.VF.isScalar()
879+
? State.get(getOperand(Idx))
880+
: Builder.CreateExtractElement(
881+
State.get(getOperand(Idx)), VectorIdx);
882+
if (Res) {
883+
Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);
884+
Res = Builder.CreateSelect(Cmp, Ext, Res);
885+
} else {
886+
Res = Ext;
887+
}
888+
}
889+
return Res;
890+
}
866891
case VPInstruction::FirstActiveLane: {
867892
if (getNumOperands() == 1) {
868893
Value *Mask = State.get(getOperand(0));
@@ -921,7 +946,8 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
921946
}
922947

923948
switch (getOpcode()) {
924-
case Instruction::ExtractElement: {
949+
case Instruction::ExtractElement:
950+
case VPInstruction::ExtractLane: {
925951
// Add on the cost of extracting the element.
926952
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
927953
return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
@@ -983,6 +1009,7 @@ bool VPInstruction::isVectorToScalar() const {
9831009
return getOpcode() == VPInstruction::ExtractLastElement ||
9841010
getOpcode() == VPInstruction::ExtractPenultimateElement ||
9851011
getOpcode() == Instruction::ExtractElement ||
1012+
getOpcode() == VPInstruction::ExtractLane ||
9861013
getOpcode() == VPInstruction::FirstActiveLane ||
9871014
getOpcode() == VPInstruction::ComputeAnyOfResult ||
9881015
getOpcode() == VPInstruction::ComputeFindIVResult ||
@@ -1048,6 +1075,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
10481075
case VPInstruction::BuildVector:
10491076
case VPInstruction::CalculateTripCountMinusVF:
10501077
case VPInstruction::CanonicalIVIncrementForPart:
1078+
case VPInstruction::ExtractLane:
10511079
case VPInstruction::ExtractLastElement:
10521080
case VPInstruction::ExtractPenultimateElement:
10531081
case VPInstruction::FirstActiveLane:
@@ -1097,6 +1125,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
10971125
case VPInstruction::ComputeAnyOfResult:
10981126
case VPInstruction::ComputeFindIVResult:
10991127
return Op == getOperand(1);
1128+
case VPInstruction::ExtractLane:
1129+
return Op == getOperand(0);
11001130
};
11011131
llvm_unreachable("switch should return");
11021132
}
@@ -1176,6 +1206,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
11761206
case VPInstruction::BuildVector:
11771207
O << "buildvector";
11781208
break;
1209+
case VPInstruction::ExtractLane:
1210+
O << "extract-lane";
1211+
break;
11791212
case VPInstruction::ExtractLastElement:
11801213
O << "extract-last-element";
11811214
break;

0 commit comments

Comments
 (0)