Skip to content

Commit cfd5358

Browse files
authored
Merge branch 'release/rocm-rel-7.0' into amd/dev/kchoi/numbafix_rel7.0
2 parents b4fc5f3 + a74b2f2 commit cfd5358

File tree

6 files changed

+67
-90
lines changed

6 files changed

+67
-90
lines changed

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,15 @@ bool PointerReplacer::collectUsers() {
278278
Worklist.emplace_back(I);
279279
};
280280

281+
auto TryPushInstOperand = [&](Instruction *InstOp) {
282+
if (!UsersToReplace.contains(InstOp)) {
283+
if (!ValuesToRevisit.insert(InstOp))
284+
return false;
285+
Worklist.emplace_back(InstOp);
286+
}
287+
return true;
288+
};
289+
281290
PushUsersToWorklist(&Root);
282291
while (!Worklist.empty()) {
283292
Instruction *Inst = Worklist.pop_back_val();
@@ -310,21 +319,26 @@ bool PointerReplacer::collectUsers() {
310319
// incoming values.
311320
Worklist.emplace_back(PHI);
312321
for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) {
313-
auto *IncomingValue = cast<Instruction>(PHI->getIncomingValue(Idx));
314-
if (UsersToReplace.contains(IncomingValue))
315-
continue;
316-
if (!ValuesToRevisit.insert(IncomingValue))
322+
if (!TryPushInstOperand(cast<Instruction>(PHI->getIncomingValue(Idx))))
317323
return false;
318-
Worklist.emplace_back(IncomingValue);
319324
}
320325
} else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
321326
auto *TrueInst = dyn_cast<Instruction>(SI->getTrueValue());
322327
auto *FalseInst = dyn_cast<Instruction>(SI->getFalseValue());
323328
if (!TrueInst || !FalseInst)
324329
return false;
325330

326-
UsersToReplace.insert(SI);
327-
PushUsersToWorklist(SI);
331+
if (isAvailable(TrueInst) && isAvailable(FalseInst)) {
332+
UsersToReplace.insert(SI);
333+
PushUsersToWorklist(SI);
334+
continue;
335+
}
336+
337+
// Push select back onto the stack, followed by unavailable true/false
338+
// value.
339+
Worklist.emplace_back(SI);
340+
if (!TryPushInstOperand(TrueInst) || !TryPushInstOperand(FalseInst))
341+
return false;
328342
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
329343
UsersToReplace.insert(GEP);
330344
PushUsersToWorklist(GEP);

llvm/lib/Transforms/Scalar/ConstraintElimination.cpp

Lines changed: 7 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,7 @@ class ConstraintInfo {
313313
/// New variables that need to be added to the system are collected in
314314
/// \p NewVariables.
315315
ConstraintTy getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
316-
SmallVectorImpl<Value *> &NewVariables,
317-
bool ForceSignedSystem = false) const;
316+
SmallVectorImpl<Value *> &NewVariables) const;
318317

319318
/// Turns a comparison of the form \p Op0 \p Pred \p Op1 into a vector of
320319
/// constraints using getConstraint. Returns an empty constraint if the result
@@ -331,14 +330,6 @@ class ConstraintInfo {
331330
void transferToOtherSystem(CmpInst::Predicate Pred, Value *A, Value *B,
332331
unsigned NumIn, unsigned NumOut,
333332
SmallVectorImpl<StackEntry> &DFSInStack);
334-
335-
private:
336-
/// Adds facts into constraint system. \p ForceSignedSystem can be set when
337-
/// the \p Pred is eq/ne, and signed constraint system is used when it's
338-
/// specified.
339-
void addFactImpl(CmpInst::Predicate Pred, Value *A, Value *B, unsigned NumIn,
340-
unsigned NumOut, SmallVectorImpl<StackEntry> &DFSInStack,
341-
bool ForceSignedSystem);
342333
};
343334

344335
/// Represents a (Coefficient * Variable) entry after IR decomposition.
@@ -645,12 +636,8 @@ static Decomposition decompose(Value *V,
645636

646637
ConstraintTy
647638
ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
648-
SmallVectorImpl<Value *> &NewVariables,
649-
bool ForceSignedSystem) const {
639+
SmallVectorImpl<Value *> &NewVariables) const {
650640
assert(NewVariables.empty() && "NewVariables must be empty when passed in");
651-
assert((!ForceSignedSystem || CmpInst::isEquality(Pred)) &&
652-
"signed system can only be forced on eq/ne");
653-
654641
bool IsEq = false;
655642
bool IsNe = false;
656643

@@ -665,15 +652,15 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
665652
break;
666653
}
667654
case CmpInst::ICMP_EQ:
668-
if (!ForceSignedSystem && match(Op1, m_Zero())) {
655+
if (match(Op1, m_Zero())) {
669656
Pred = CmpInst::ICMP_ULE;
670657
} else {
671658
IsEq = true;
672659
Pred = CmpInst::ICMP_ULE;
673660
}
674661
break;
675662
case CmpInst::ICMP_NE:
676-
if (!ForceSignedSystem && match(Op1, m_Zero())) {
663+
if (match(Op1, m_Zero())) {
677664
Pred = CmpInst::getSwappedPredicate(CmpInst::ICMP_UGT);
678665
std::swap(Op0, Op1);
679666
} else {
@@ -690,7 +677,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
690677
return {};
691678

692679
SmallVector<ConditionTy, 4> Preconditions;
693-
bool IsSigned = ForceSignedSystem || CmpInst::isSigned(Pred);
680+
bool IsSigned = CmpInst::isSigned(Pred);
694681
auto &Value2Index = getValue2Index(IsSigned);
695682
auto ADec = decompose(Op0->stripPointerCastsSameRepresentation(),
696683
Preconditions, IsSigned, DL);
@@ -750,7 +737,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
750737
int64_t OffsetSum;
751738
if (AddOverflow(Offset1, Offset2, OffsetSum))
752739
return {};
753-
if (Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT)
740+
if (Pred == (IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT))
754741
if (AddOverflow(OffsetSum, int64_t(-1), OffsetSum))
755742
return {};
756743
R[0] = OffsetSum;
@@ -1593,20 +1580,10 @@ static bool checkOrAndOpImpliedByOther(
15931580
void ConstraintInfo::addFact(CmpInst::Predicate Pred, Value *A, Value *B,
15941581
unsigned NumIn, unsigned NumOut,
15951582
SmallVectorImpl<StackEntry> &DFSInStack) {
1596-
addFactImpl(Pred, A, B, NumIn, NumOut, DFSInStack, false);
1597-
// If the Pred is eq/ne, also add the fact to signed system.
1598-
if (CmpInst::isEquality(Pred))
1599-
addFactImpl(Pred, A, B, NumIn, NumOut, DFSInStack, true);
1600-
}
1601-
1602-
void ConstraintInfo::addFactImpl(CmpInst::Predicate Pred, Value *A, Value *B,
1603-
unsigned NumIn, unsigned NumOut,
1604-
SmallVectorImpl<StackEntry> &DFSInStack,
1605-
bool ForceSignedSystem) {
16061583
// If the constraint has a pre-condition, skip the constraint if it does not
16071584
// hold.
16081585
SmallVector<Value *> NewVariables;
1609-
auto R = getConstraint(Pred, A, B, NewVariables, ForceSignedSystem);
1586+
auto R = getConstraint(Pred, A, B, NewVariables);
16101587

16111588
// TODO: Support non-equality for facts as well.
16121589
if (!R.isValid(*this) || R.isNe())

llvm/test/Transforms/ConstraintElimination/eq.ll

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -424,53 +424,3 @@ bc_equal:
424424
not_eq:
425425
ret i1 false
426426
}
427-
428-
define i1 @test_eq_for_signed_cmp(i32 noundef %v0, i32 noundef %v1, i32 noundef %v2) {
429-
; CHECK-LABEL: @test_eq_for_signed_cmp(
430-
; CHECK-NEXT: entry:
431-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V2:%.*]], [[V0:%.*]]
432-
; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i32 [[V0]], [[V1:%.*]]
433-
; CHECK-NEXT: [[AND0:%.*]] = and i1 [[CMP1]], [[CMP]]
434-
; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[V1]], [[V2]]
435-
; CHECK-NEXT: [[AND1:%.*]] = and i1 false, [[AND0]]
436-
; CHECK-NEXT: ret i1 [[AND1]]
437-
;
438-
entry:
439-
%cmp = icmp eq i32 %v2, %v0
440-
%cmp1 = icmp sge i32 %v0, %v1
441-
%and0 = and i1 %cmp1, %cmp
442-
%cmp4 = icmp sgt i32 %v1, %v2
443-
%and1 = and i1 %cmp4, %and0
444-
ret i1 %and1
445-
}
446-
447-
define i1 @test_eq_for_signed_cmp_with_decompsition(i32 noundef %v0, i32 noundef %v1, i32 noundef %v2, i32 noundef %addend0, i32 noundef %addend1) {
448-
; CHECK-LABEL: @test_eq_for_signed_cmp_with_decompsition(
449-
; CHECK-NEXT: entry:
450-
; CHECK-NEXT: [[V0ADD:%.*]] = add nsw i32 [[V0:%.*]], [[ADDEND0:%.*]]
451-
; CHECK-NEXT: [[V1ADD:%.*]] = add nsw i32 [[V1:%.*]], [[ADDEND1:%.*]]
452-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V2:%.*]], [[V0ADD]]
453-
; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i32 [[V0ADD]], [[V1ADD]]
454-
; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[ADDEND0]], 0
455-
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[ADDEND0]], [[ADDEND1]]
456-
; CHECK-NEXT: [[AND0:%.*]] = and i1 [[CMP1]], [[CMP]]
457-
; CHECK-NEXT: [[AND1:%.*]] = and i1 [[AND0]], [[CMP2]]
458-
; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[CMP3]]
459-
; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[V1]], [[V2]]
460-
; CHECK-NEXT: [[AND3:%.*]] = and i1 false, [[AND2]]
461-
; CHECK-NEXT: ret i1 [[AND3]]
462-
;
463-
entry:
464-
%v0add = add nsw i32 %v0, %addend0
465-
%v1add = add nsw i32 %v1, %addend1
466-
%cmp = icmp eq i32 %v2, %v0add
467-
%cmp1 = icmp sge i32 %v0add, %v1add
468-
%cmp2 = icmp sge i32 %addend0, 0
469-
%cmp3 = icmp slt i32 %addend0, %addend1
470-
%and0 = and i1 %cmp1, %cmp
471-
%and1 = and i1 %and0, %cmp2
472-
%and2 = and i1 %and1, %cmp3
473-
%cmp4 = icmp sgt i32 %v1, %v2
474-
%and3 = and i1 %cmp4, %and2
475-
ret i1 %and3
476-
}

llvm/test/Transforms/ConstraintElimination/ne.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ define i1 @test_ne_eq_0(i8 %a, i8 %b) {
7171
; CHECK-NEXT: [[RES_13:%.*]] = xor i1 [[RES_12]], false
7272
; CHECK-NEXT: [[RES_14:%.*]] = xor i1 [[RES_13]], false
7373
; CHECK-NEXT: [[RES_15:%.*]] = xor i1 [[RES_14]], false
74-
; CHECK-NEXT: [[RES_16:%.*]] = xor i1 [[RES_15]], false
74+
; CHECK-NEXT: [[C_12:%.*]] = icmp sgt i8 [[A]], 0
75+
; CHECK-NEXT: [[RES_16:%.*]] = xor i1 [[RES_15]], [[C_12]]
7576
; CHECK-NEXT: ret i1 [[RES_16]]
7677
;
7778
entry:
@@ -208,7 +209,8 @@ define i1 @test_ne_eq_1(i8 %a, i8 %b) {
208209
; CHECK-NEXT: [[RES_13:%.*]] = xor i1 [[RES_12]], true
209210
; CHECK-NEXT: [[RES_14:%.*]] = xor i1 [[RES_13]], true
210211
; CHECK-NEXT: [[RES_15:%.*]] = xor i1 [[RES_14]], false
211-
; CHECK-NEXT: [[RES_16:%.*]] = xor i1 [[RES_15]], true
212+
; CHECK-NEXT: [[C_12:%.*]] = icmp sgt i8 [[A]], 0
213+
; CHECK-NEXT: [[RES_16:%.*]] = xor i1 [[RES_15]], [[C_12]]
212214
; CHECK-NEXT: ret i1 [[RES_16]]
213215
;
214216
entry:

llvm/test/Transforms/ConstraintElimination/pr105785.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ define void @pr105785(ptr %p) {
1515
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[FOR_IND2]], 3
1616
; CHECK-NEXT: br i1 [[CMP2]], label %[[FOR_BODY3]], label %[[FOR_COND]]
1717
; CHECK: [[FOR_BODY3]]:
18-
; CHECK-NEXT: store i32 -1, ptr [[P]], align 4
18+
; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[FOR_IND]], i32 1)
19+
; CHECK-NEXT: store i32 [[SCMP]], ptr [[P]], align 4
1920
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[FOR_IND2]], 1
2021
; CHECK-NEXT: br label %[[FOR_COND1]]
2122
; CHECK: [[FOR_END6]]:

llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,37 @@ sink:
7676
ret <2 x i64> %val.sink
7777
}
7878

79+
; Crashed in IC PtrReplacer because an invalid select was generated with addrspace(4) and addrspace(5)
80+
; operands.
81+
define amdgpu_kernel void @select_addr4_addr5(ptr addrspace(4) byref([12 x i8]) align 16 %arg) {
82+
; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5(
83+
; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) {
84+
; CHECK-NEXT: [[BB:.*:]]
85+
; CHECK-NEXT: ret void
86+
;
87+
bb:
88+
%alloca = alloca i32, i32 0, align 8, addrspace(5)
89+
%alloca1 = alloca [12 x i8], align 16, addrspace(5)
90+
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false)
91+
%select = select i1 false, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca
92+
call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false)
93+
ret void
94+
}
95+
96+
; Same as above but with swapped operands on the select.
97+
define amdgpu_kernel void @select_addr4_addr5_swapped(ptr addrspace(4) byref([12 x i8]) align 16 %arg) {
98+
; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5_swapped(
99+
; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) {
100+
; CHECK-NEXT: [[BB:.*:]]
101+
; CHECK-NEXT: ret void
102+
;
103+
bb:
104+
%alloca = alloca i32, i32 0, align 8, addrspace(5)
105+
%alloca1 = alloca [12 x i8], align 16, addrspace(5)
106+
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false)
107+
%select = select i1 false, ptr addrspace(5) %alloca, ptr addrspace(5) %alloca1
108+
call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false)
109+
ret void
110+
}
111+
79112
declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0

0 commit comments

Comments
 (0)