Skip to content

Commit 9049ac3

Browse files
committed
Fix AMDGPU fcanonicalize selection
1 parent d7cbb38 commit 9049ac3

File tree

5 files changed

+16
-58
lines changed

5 files changed

+16
-58
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14306,14 +14306,15 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
1430614306
}
1430714307

1430814308
bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
14309+
SDNodeFlags UserFlags,
1430914310
unsigned MaxDepth) const {
1431014311
unsigned Opcode = Op.getOpcode();
1431114312
if (Opcode == ISD::FCANONICALIZE)
1431214313
return true;
1431314314

1431414315
if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1431514316
const auto &F = CFP->getValueAPF();
14316-
if (F.isNaN() && F.isSignaling())
14317+
if ((UserFlags.hasNoNaNs() || F.isNaN()) && F.isSignaling())
1431714318
return false;
1431814319
if (!F.isDenormal())
1431914320
return true;
@@ -14505,7 +14506,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
1450514506

1450614507
// FIXME: denormalsEnabledForType is broken for dynamic
1450714508
return denormalsEnabledForType(DAG, Op.getValueType()) &&
14508-
DAG.isKnownNeverSNaN(Op);
14509+
(UserFlags.hasNoNaNs() || DAG.isKnownNeverSNaN(Op));
1450914510
}
1451014511

1451114512
bool SITargetLowering::isCanonicalized(Register Reg, const MachineFunction &MF,

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
555555
Register N1) const override;
556556

557557
bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
558-
unsigned MaxDepth = 5) const;
558+
SDNodeFlags UserFlags = {}, unsigned MaxDepth = 5) const;
559559
bool isCanonicalized(Register Reg, const MachineFunction &MF,
560560
unsigned MaxDepth = 5) const;
561561
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,11 +1001,13 @@ def MFMALdScaleXForm : SDNodeXForm<timm, [{
10011001
return CurDAG->getTargetConstant(New, SDLoc(N), MVT::i32);
10021002
}]>;
10031003

1004-
def is_canonicalized : PatLeaf<(fAny srcvalue:$src), [{
1005-
const SITargetLowering &Lowering =
1004+
def fcanonicalize_canonicalized
1005+
: PatFrag<(ops node:$op), (fcanonicalize node:$op), [{
1006+
const SITargetLowering &Lowering =
10061007
*static_cast<const SITargetLowering *>(getTargetLowering());
1007-
return Lowering.isCanonicalized(*CurDAG, Op);
1008+
return Lowering.isCanonicalized(*CurDAG, Op->getOperand(0), N->getFlags());
10081009
}]> {
1010+
// FIXME: GlobalISel is dead code.
10091011
let GISelPredicateCode = [{
10101012
const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
10111013
MF.getSubtarget().getTargetLowering());

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3459,10 +3459,7 @@ def : GCNPat<
34593459
// If fcanonicalize's operand is implicitly canonicalized, we only need a copy.
34603460
let AddedComplexity = 8 in {
34613461
foreach vt = [f16, v2f16, f32, v2f32, f64] in {
3462-
def : GCNPat<
3463-
(fcanonicalize (vt is_canonicalized:$src)),
3464-
(COPY vt:$src)
3465-
>;
3462+
def : GCNPat<(fcanonicalize_canonicalized vt:$src), (COPY vt:$src)>;
34663463
}
34673464
}
34683465

llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll

Lines changed: 6 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,6 @@ define i1 @test70_nnan(float %arg1, float %arg2, float %arg3) {
11301130
; GCN-LABEL: test70_nnan:
11311131
; GCN: ; %bb.0:
11321132
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1133-
; GCN-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
11341133
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
11351134
; GCN-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
11361135
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1200,7 +1199,6 @@ define i1 @test73_nnan(float %arg1, float %arg2, float %arg3) {
12001199
; GCN-LABEL: test73_nnan:
12011200
; GCN: ; %bb.0:
12021201
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1203-
; GCN-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
12041202
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
12051203
; GCN-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v2
12061204
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1270,7 +1268,6 @@ define i1 @test75_nnan(float %arg1, float %arg2, float %arg3) {
12701268
; GCN-LABEL: test75_nnan:
12711269
; GCN: ; %bb.0:
12721270
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273-
; GCN-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
12741271
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
12751272
; GCN-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v2
12761273
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1304,7 +1301,6 @@ define i1 @test76_nnan(float %arg1, float %arg2, float %arg3) {
13041301
; GCN-LABEL: test76_nnan:
13051302
; GCN: ; %bb.0:
13061303
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1307-
; GCN-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
13081304
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
13091305
; GCN-NEXT: v_cmp_le_f32_e32 vcc_lo, v0, v2
13101306
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1516,18 +1512,14 @@ define i1 @test84_nnan(half %arg1, half %arg2, half %arg3) {
15161512
; GFX11-TRUE16-LABEL: test84_nnan:
15171513
; GFX11-TRUE16: ; %bb.0:
15181514
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1519-
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
1520-
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
1521-
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
1515+
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v1.l
15221516
; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v0.l, v2.l
15231517
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
15241518
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
15251519
;
15261520
; GFX11-FAKE16-LABEL: test84_nnan:
15271521
; GFX11-FAKE16: ; %bb.0:
15281522
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1529-
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
1530-
; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
15311523
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
15321524
; GFX11-FAKE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v0, v2
15331525
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1578,8 +1570,6 @@ define <2 x i1> @test85_nnan(<2 x half> %arg1, <2 x half> %arg2, <2 x half> %arg
15781570
; GFX11-TRUE16-LABEL: test85_nnan:
15791571
; GFX11-TRUE16: ; %bb.0:
15801572
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1581-
; GFX11-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0
1582-
; GFX11-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1
15831573
; GFX11-TRUE16-NEXT: v_pk_min_f16 v1, v0, v1
15841574
; GFX11-TRUE16-NEXT: v_cmp_le_f16_e32 vcc_lo, v1.l, v2.l
15851575
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1590,8 +1580,6 @@ define <2 x i1> @test85_nnan(<2 x half> %arg1, <2 x half> %arg2, <2 x half> %arg
15901580
; GFX11-FAKE16-LABEL: test85_nnan:
15911581
; GFX11-FAKE16: ; %bb.0:
15921582
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1593-
; GFX11-FAKE16-NEXT: v_pk_max_f16 v0, v0, v0
1594-
; GFX11-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1
15951583
; GFX11-FAKE16-NEXT: v_pk_min_f16 v0, v0, v1
15961584
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
15971585
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
@@ -1646,8 +1634,6 @@ define <2 x i1> @test86_nnan(<2 x half> %arg1, <2 x half> %arg2, <2 x half> %arg
16461634
; GFX11-TRUE16-LABEL: test86_nnan:
16471635
; GFX11-TRUE16: ; %bb.0:
16481636
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1649-
; GFX11-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0
1650-
; GFX11-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1
16511637
; GFX11-TRUE16-NEXT: v_pk_max_f16 v1, v0, v1
16521638
; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, v1.l, v2.l
16531639
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1658,8 +1644,6 @@ define <2 x i1> @test86_nnan(<2 x half> %arg1, <2 x half> %arg2, <2 x half> %arg
16581644
; GFX11-FAKE16-LABEL: test86_nnan:
16591645
; GFX11-FAKE16: ; %bb.0:
16601646
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1661-
; GFX11-FAKE16-NEXT: v_pk_max_f16 v0, v0, v0
1662-
; GFX11-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1
16631647
; GFX11-FAKE16-NEXT: v_pk_max_f16 v0, v0, v1
16641648
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
16651649
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
@@ -1708,18 +1692,14 @@ define i1 @test87_nnan(half %arg1, half %arg2, half %arg3) {
17081692
; GFX11-TRUE16-LABEL: test87_nnan:
17091693
; GFX11-TRUE16: ; %bb.0:
17101694
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1711-
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
1712-
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
1713-
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
1695+
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v1.l
17141696
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, v0.l, v2.l
17151697
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
17161698
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
17171699
;
17181700
; GFX11-FAKE16-LABEL: test87_nnan:
17191701
; GFX11-FAKE16: ; %bb.0:
17201702
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1721-
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
1722-
; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
17231703
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
17241704
; GFX11-FAKE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, v0, v2
17251705
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1770,8 +1750,6 @@ define <2 x i1> @test88_nnan(<2 x half> %arg1, <2 x half> %arg2, <2 x half> %arg
17701750
; GFX11-TRUE16-LABEL: test88_nnan:
17711751
; GFX11-TRUE16: ; %bb.0:
17721752
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1773-
; GFX11-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0
1774-
; GFX11-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1
17751753
; GFX11-TRUE16-NEXT: v_pk_min_f16 v1, v0, v1
17761754
; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, v1.l, v2.l
17771755
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1782,8 +1760,6 @@ define <2 x i1> @test88_nnan(<2 x half> %arg1, <2 x half> %arg2, <2 x half> %arg
17821760
; GFX11-FAKE16-LABEL: test88_nnan:
17831761
; GFX11-FAKE16: ; %bb.0:
17841762
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1785-
; GFX11-FAKE16-NEXT: v_pk_max_f16 v0, v0, v0
1786-
; GFX11-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1
17871763
; GFX11-FAKE16-NEXT: v_pk_min_f16 v0, v0, v1
17881764
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
17891765
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
@@ -1832,18 +1808,14 @@ define i1 @test89_nnan(half %arg1, half %arg2, half %arg3) {
18321808
; GFX11-TRUE16-LABEL: test89_nnan:
18331809
; GFX11-TRUE16: ; %bb.0:
18341810
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1835-
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
1836-
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
1837-
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h
1811+
; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v1.l
18381812
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, v0.l, v2.l
18391813
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
18401814
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
18411815
;
18421816
; GFX11-FAKE16-LABEL: test89_nnan:
18431817
; GFX11-FAKE16: ; %bb.0:
18441818
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1845-
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
1846-
; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
18471819
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
18481820
; GFX11-FAKE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, v0, v2
18491821
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1888,18 +1860,14 @@ define i1 @test90_nnan(half %arg1, half %arg2, half %arg3) {
18881860
; GFX11-TRUE16-LABEL: test90_nnan:
18891861
; GFX11-TRUE16: ; %bb.0:
18901862
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1891-
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
1892-
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
1893-
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.h
1863+
; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v1.l
18941864
; GFX11-TRUE16-NEXT: v_cmp_le_f16_e32 vcc_lo, v0.l, v2.l
18951865
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
18961866
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
18971867
;
18981868
; GFX11-FAKE16-LABEL: test90_nnan:
18991869
; GFX11-FAKE16: ; %bb.0:
19001870
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1901-
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
1902-
; GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
19031871
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
19041872
; GFX11-FAKE16-NEXT: v_cmp_le_f16_e32 vcc_lo, v0, v2
19051873
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1950,8 +1918,6 @@ define <2 x i1> @test91_nnan(<2 x half> %arg1, <2 x half> %arg2, <2 x half> %arg
19501918
; GFX11-TRUE16-LABEL: test91_nnan:
19511919
; GFX11-TRUE16: ; %bb.0:
19521920
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1953-
; GFX11-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0
1954-
; GFX11-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1
19551921
; GFX11-TRUE16-NEXT: v_pk_max_f16 v1, v0, v1
19561922
; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v1.l, v2.l
19571923
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -1962,8 +1928,6 @@ define <2 x i1> @test91_nnan(<2 x half> %arg1, <2 x half> %arg2, <2 x half> %arg
19621928
; GFX11-FAKE16-LABEL: test91_nnan:
19631929
; GFX11-FAKE16: ; %bb.0:
19641930
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1965-
; GFX11-FAKE16-NEXT: v_pk_max_f16 v0, v0, v0
1966-
; GFX11-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1
19671931
; GFX11-FAKE16-NEXT: v_pk_max_f16 v0, v0, v1
19681932
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
19691933
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
@@ -2712,12 +2676,11 @@ define i1 @test115_nnan(float %arg1, float %arg2, float %arg3, float %arg4, floa
27122676
; GCN-LABEL: test115_nnan:
27132677
; GCN: ; %bb.0:
27142678
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2715-
; GCN-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
2679+
; GCN-NEXT: v_max_f32_e32 v2, v2, v3
27162680
; GCN-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
27172681
; GCN-NEXT: v_cmp_lt_f32_e64 s0, v1, v4
2718-
; GCN-NEXT: v_max_f32_e32 v2, v2, v3
2719-
; GCN-NEXT: s_or_b32 s0, vcc_lo, s0
27202682
; GCN-NEXT: v_cmp_lt_f32_e64 s1, v2, v4
2683+
; GCN-NEXT: s_or_b32 s0, vcc_lo, s0
27212684
; GCN-NEXT: s_or_b32 s0, s0, s1
27222685
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
27232686
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -3370,7 +3333,6 @@ define i1 @test137_nnan(float %arg1, float %arg2, float %arg3) {
33703333
; GCN-LABEL: test137_nnan:
33713334
; GCN: ; %bb.0:
33723335
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3373-
; GCN-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
33743336
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
33753337
; GCN-NEXT: v_cmp_le_f32_e32 vcc_lo, v0, v2
33763338
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -3645,7 +3607,6 @@ define i1 @test146_nnan(float %arg1, float %arg2, float %arg3) {
36453607
; GCN-LABEL: test146_nnan:
36463608
; GCN: ; %bb.0:
36473609
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3648-
; GCN-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
36493610
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
36503611
; GCN-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
36513612
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -3754,7 +3715,6 @@ define i1 @test149_nnan(float %arg1, float %arg2, float %arg3) {
37543715
; GCN-LABEL: test149_nnan:
37553716
; GCN: ; %bb.0:
37563717
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3757-
; GCN-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
37583718
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
37593719
; GCN-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v2
37603720
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -3826,7 +3786,6 @@ define i1 @test151_nnan(float %arg1, float %arg2, float %arg3) {
38263786
; GCN-LABEL: test151_nnan:
38273787
; GCN: ; %bb.0:
38283788
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3829-
; GCN-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
38303789
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
38313790
; GCN-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v2
38323791
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -3861,7 +3820,6 @@ define i1 @test152_nnan(float %arg1, float %arg2, float %arg3) {
38613820
; GCN-LABEL: test152_nnan:
38623821
; GCN: ; %bb.0:
38633822
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3864-
; GCN-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
38653823
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
38663824
; GCN-NEXT: v_cmp_le_f32_e32 vcc_lo, v0, v2
38673825
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo

0 commit comments

Comments
 (0)