Skip to content

Commit 41b069e

Browse files
Ana MihajlovicAna Mihajlovic
authored andcommitted
add negative tests
1 parent 47b2325 commit 41b069e

File tree

2 files changed

+68
-6
lines changed

2 files changed

+68
-6
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,7 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
877877
return AMDGPU::V_CMP_LE_U32_e64;
878878
case AMDGPU::V_CMP_LT_U32_e64:
879879
return AMDGPU::V_CMP_GE_U32_e64;
880-
// unsigned 64
880+
// unsigned 64
881881
case AMDGPU::V_CMP_EQ_U64_e64:
882882
return AMDGPU::V_CMP_NE_U64_e64;
883883
case AMDGPU::V_CMP_NE_U64_e64:
@@ -956,7 +956,7 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
956956
bool SIShrinkInstructions::shouldSwapCndOperands(
957957
MachineInstr &MI, SmallVector<MachineOperand *, 4> &UsesToProcess) const {
958958
auto AllUses = MRI->use_nodbg_operands(MI.getOperand(0).getReg());
959-
unsigned Swap = 0, SwapNot = 0;
959+
int InstsToSwap = 0;
960960

961961
for (auto &Use : AllUses) {
962962
MachineInstr *UseInst = Use.getParent();
@@ -972,12 +972,12 @@ bool SIShrinkInstructions::shouldSwapCndOperands(
972972
bool Src1Imm = Src1.isImm();
973973

974974
if (!Src1Imm && Src0Imm)
975-
SwapNot++;
975+
InstsToSwap--;
976976
else if (Src1Imm && !Src0Imm &&
977977
UseInst->getOperand(1).getImm() == SISrcMods::NONE)
978-
Swap++;
978+
InstsToSwap++;
979979
}
980-
return (Swap > SwapNot);
980+
return (InstsToSwap > 0);
981981
}
982982

983983
static void swapCndOperands(MachineInstr &MI) {
@@ -994,7 +994,9 @@ static void swapCndOperands(MachineInstr &MI) {
994994
}
995995

996996
if (Op4.isReg()) {
997-
Op2.setReg(Op4.getReg());
997+
Op2.ChangeToRegister(Op4.getReg(), Op4.isDef(), Op4.isImplicit(),
998+
Op4.isKill(), Op4.isDead(), Op4.isUndef(),
999+
Op4.isDebug());
9981000
Op2.setSubReg(Op4.getSubReg());
9991001
} else if (Op4.isImm()) {
10001002
Op2.ChangeToImmediate(Op4.getImm());

llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,47 @@ define amdgpu_cs void @test_u32_eq(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out
162162
ret void
163163
}
164164

165+
define amdgpu_cs void @test_negative_case(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
166+
; GCN-LABEL: test_negative_case:
167+
; GCN: ; %bb.0: ; %.entry
168+
; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, -1, v0
169+
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
170+
; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
171+
; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
172+
; GCN-NEXT: s_endpgm
173+
.entry:
174+
%vcc = icmp eq i32 %a, -1
175+
%val1 = select i1 %vcc, i32 %p, i32 0
176+
%val2 = select i1 %vcc, i32 0, i32 %q
177+
%ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
178+
%ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
179+
store <2 x i32> %ret1, ptr addrspace(1) %out
180+
ret void
181+
}
182+
183+
define amdgpu_cs void @test_mixed(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
184+
; GCN-LABEL: test_mixed:
185+
; GCN: ; %bb.0: ; %.entry
186+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v0
187+
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
188+
; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
189+
; GCN-NEXT: v_dual_cndmask_b32 v2, 0, v3 :: v_dual_cndmask_b32 v3, 0, v4
190+
; GCN-NEXT: global_store_b128 v[5:6], v[0:3], off
191+
; GCN-NEXT: s_endpgm
192+
.entry:
193+
%vcc = icmp eq i32 -1, %a
194+
%val1 = select i1 %vcc, i32 0, i32 %p
195+
%val2 = select i1 %vcc, i32 %q, i32 0
196+
%val3 = select i1 %vcc, i32 0, i32 %r
197+
%val4 = select i1 %vcc, i32 0, i32 %s
198+
%ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
199+
%ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
200+
%ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
201+
%ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
202+
store <4 x i32> %ret3, ptr addrspace(1) %out
203+
ret void
204+
}
205+
165206
define amdgpu_cs void @test_u32_ne(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
166207
; GCN-LABEL: test_u32_ne:
167208
; GCN: ; %bb.0: ; %.entry
@@ -374,6 +415,25 @@ define amdgpu_cs void @test_f32_oeq(float %a, float %p, float %q, ptr addrspace(
374415
ret void
375416
}
376417

418+
define amdgpu_cs void @test_f32_negative_modifiers(float %a, float %p, float %q, ptr addrspace(1) %out) {
419+
; GCN-LABEL: test_f32_negative_modifiers:
420+
; GCN: ; %bb.0: ; %.entry
421+
; GCN-NEXT: v_cmp_eq_f32_e32 vcc_lo, 2.0, v0
422+
; GCN-NEXT: v_cndmask_b32_e64 v0, -v1, 0, vcc_lo
423+
; GCN-NEXT: v_cndmask_b32_e64 v1, -v2, 0, vcc_lo
424+
; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
425+
; GCN-NEXT: s_endpgm
426+
.entry:
427+
%r = fneg float %p
428+
%s = fneg float %q
429+
%vcc = fcmp oeq float 2.0, %a
430+
%val1 = select i1 %vcc, float 0.0, float %r
431+
%val2 = select i1 %vcc, float 0.0, float %s
432+
%ret0 = insertelement <2 x float> poison, float %val1, i32 0
433+
%ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
434+
store <2 x float> %ret1, ptr addrspace(1) %out
435+
ret void
436+
}
377437

378438
define amdgpu_cs void @test_f32_one(float %a, float %p, float %q, ptr addrspace(1) %out) {
379439
; GCN-LABEL: test_f32_one:

0 commit comments

Comments
 (0)