Skip to content

Commit 3d1ae88

Browse files
Ana MihajlovicAna Mihajlovic
authored andcommitted
added float instructions
1 parent e7e3462 commit 3d1ae88

File tree

10 files changed

+3595
-1643
lines changed

10 files changed

+3595
-1643
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ class SIFoldOperandsImpl {
107107

108108
unsigned getInverseCompareOpcode(MachineInstr &MI) const {
109109
switch (MI.getOpcode()) {
110+
// unsigned 32
110111
case AMDGPU::V_CMP_EQ_U32_e64:
111112
return AMDGPU::V_CMP_NE_U32_e64;
112113
case AMDGPU::V_CMP_NE_U32_e64:
@@ -119,27 +120,26 @@ class SIFoldOperandsImpl {
119120
return AMDGPU::V_CMP_LE_U32_e64;
120121
case AMDGPU::V_CMP_LT_U32_e64:
121122
return AMDGPU::V_CMP_GE_U32_e64;
122-
123-
// case AMDGPU::V_CMP_EQ_U32_e64:
124-
// return AMDGPU::V_CMP_NE_U32_e64;
125-
// case AMDGPU::V_CMP_NE_U32_e64:
126-
// return AMDGPU::V_CMP_EQ_U32_e64;
127-
// case AMDGPU::V_CMP_GE_U32_e64:
128-
// return AMDGPU::V_CMP_LT_U32_e64;
129-
// case AMDGPU::V_CMP_LE_U32_e64:
130-
// return AMDGPU::V_CMP_GT_U32_e64;
131-
// case AMDGPU::V_CMP_GT_U32_e64:
132-
// return AMDGPU::V_CMP_LE_U32_e64;
133-
// case AMDGPU::V_CMP_LT_U32_e64:
134-
// return AMDGPU::V_CMP_GE_U32_e64;
123+
// float 32
124+
case AMDGPU::V_CMP_EQ_F32_e64:
125+
return AMDGPU::V_CMP_NEQ_F32_e64;
126+
case AMDGPU::V_CMP_NEQ_F32_e64:
127+
return AMDGPU::V_CMP_EQ_F32_e64;
128+
case AMDGPU::V_CMP_GE_F32_e64:
129+
return AMDGPU::V_CMP_LT_F32_e64;
130+
case AMDGPU::V_CMP_LE_F32_e64:
131+
return AMDGPU::V_CMP_GT_F32_e64;
132+
case AMDGPU::V_CMP_GT_F32_e64:
133+
return AMDGPU::V_CMP_LE_F32_e64;
134+
case AMDGPU::V_CMP_LT_F32_e64:
135+
return AMDGPU::V_CMP_GE_F32_e64;
135136
default:
136137
return 0;
137138
}
138139
}
139140

140141
bool foldCopyToVGPROfScalarAddOfFrameIndex(Register DstReg, Register SrcReg,
141142
MachineInstr &MI) const;
142-
143143
bool updateOperand(FoldCandidate &Fold) const;
144144

145145
bool canUseImmWithOpSel(FoldCandidate &Fold) const;
@@ -1541,11 +1541,17 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI, Register *RegVCC,
15411541
auto cmpDL = DefMI->getDebugLoc();
15421542
*NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
15431543
*RegVCC = Reg;
1544-
MachineInstrBuilder inverseCompare = BuildMI(
1544+
MachineInstrBuilder InverseCompare = BuildMI(
15451545
*DefMI->getParent(), DefMI, cmpDL, TII->get(Opcode), *NewVCC);
1546-
1547-
inverseCompare.add(DefMI->getOperand(1));
1548-
inverseCompare.add(DefMI->getOperand(2));
1546+
InverseCompare->setFlags(DefMI->getFlags());
1547+
1548+
unsigned OpNum = DefMI->getNumExplicitOperands();
1549+
for (unsigned i = 1; i < OpNum; i++) {
1550+
MachineOperand Op = DefMI->getOperand(i);
1551+
InverseCompare.add(Op);
1552+
if (Op.isReg() && Op.isKill())
1553+
InverseCompare->getOperand(i).setIsKill(false);
1554+
}
15491555
}
15501556
}
15511557
}

llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll

Lines changed: 135 additions & 135 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,14 @@ define float @v_powi_f32(float %l, i32 %r) {
7979
; GFX7: ; %bb.0:
8080
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8181
; GFX7-NEXT: v_mov_b32_e32 v2, 0x800000
82-
; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
83-
; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
82+
; GFX7-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2
83+
; GFX7-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc
8484
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 5, v2
8585
; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v2
8686
; GFX7-NEXT: v_log_f32_e32 v0, v0
8787
; GFX7-NEXT: v_cvt_f32_i32_e32 v1, v1
8888
; GFX7-NEXT: v_mov_b32_e32 v2, 0x42000000
89-
; GFX7-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
89+
; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
9090
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2
9191
; GFX7-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
9292
; GFX7-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
@@ -104,14 +104,14 @@ define float @v_powi_f32(float %l, i32 %r) {
104104
; GFX8: ; %bb.0:
105105
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106106
; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000
107-
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
108-
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
107+
; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2
108+
; GFX8-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc
109109
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2
110110
; GFX8-NEXT: v_ldexp_f32 v0, v0, v2
111111
; GFX8-NEXT: v_log_f32_e32 v0, v0
112112
; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1
113113
; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000
114-
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
114+
; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
115115
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2
116116
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
117117
; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
@@ -128,13 +128,13 @@ define float @v_powi_f32(float %l, i32 %r) {
128128
; GFX11-LABEL: v_powi_f32:
129129
; GFX11: ; %bb.0:
130130
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131-
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
131+
; GFX11-NEXT: v_cmp_le_f32_e32 vcc_lo, 0x800000, v0
132132
; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1
133-
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
133+
; GFX11-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc_lo
134134
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
135135
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2
136136
; GFX11-NEXT: v_ldexp_f32 v0, v0, v2
137-
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
137+
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo
138138
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
139139
; GFX11-NEXT: v_log_f32_e32 v0, v0
140140
; GFX11-NEXT: s_waitcnt_depctr 0xfff

llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ define float @test_s32(float %a) #0 {
1818
; GCN-LABEL: test_s32:
1919
; GCN: ; %bb.0: ; %entry
2020
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21-
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
22-
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
21+
; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v0
22+
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
2323
; GCN-NEXT: s_setpc_b64 s[30:31]
2424
entry:
2525
%fcmp = fcmp olt float %a, 0.0
@@ -111,10 +111,10 @@ define <2 x float> @test_v2s32(<2 x float> %a) #0 {
111111
; GCN-LABEL: test_v2s32:
112112
; GCN: ; %bb.0: ; %entry
113113
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114-
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
115-
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
116-
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v1
117-
; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
114+
; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v0
115+
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
116+
; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v1
117+
; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
118118
; GCN-NEXT: s_setpc_b64 s[30:31]
119119
entry:
120120
%fcmp = fcmp olt <2 x float> %a, zeroinitializer
@@ -126,14 +126,14 @@ define <4 x float> @test_v4s32(<4 x float> %a) #0 {
126126
; GCN-LABEL: test_v4s32:
127127
; GCN: ; %bb.0: ; %entry
128128
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129-
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
130-
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
131-
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v1
132-
; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
133-
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v2
134-
; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
135-
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v3
136-
; GCN-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
129+
; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v0
130+
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
131+
; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v1
132+
; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
133+
; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v2
134+
; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
135+
; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v3
136+
; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
137137
; GCN-NEXT: s_setpc_b64 s[30:31]
138138
entry:
139139
%fcmp = fcmp olt <4 x float> %a, zeroinitializer

0 commit comments

Comments
 (0)