Skip to content

Commit c32be88

Browse files
committed
Respond to additional comments and add more tests
1 parent ae65bdc commit c32be88

File tree

2 files changed

+119
-8
lines changed

2 files changed

+119
-8
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3053,19 +3053,16 @@ bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
30533053
return true;
30543054

30553055
// Recognise (xor a, 0x80000000) as NEG SrcMod.
3056+
// Recognise (and a, 0x7fffffff) as ABS SrcMod.
3057+
// Recognise (or a, 0x80000000) as NEG+ABS SrcModifiers.
30563058
if (Opc == ISD::XOR && CRHS->getAPIntValue().isSignMask()) {
30573059
Mods |= SISrcMods::NEG;
30583060
Src = Src.getOperand(0);
3059-
3060-
}
3061-
// Recognise (and a, 0x7fffffff) as ABS SrcMod.
3062-
else if (Opc == ISD::AND && AllowAbs &&
3063-
CRHS->getAPIntValue().isMaxSignedValue()) {
3061+
} else if (Opc == ISD::AND && AllowAbs &&
3062+
CRHS->getAPIntValue().isMaxSignedValue()) {
30643063
Mods |= SISrcMods::ABS;
30653064
Src = Src.getOperand(0);
3066-
}
3067-
// Recognise (or a, 0x80000000) as NEG+ABS SrcModifiers.
3068-
else if (Opc == ISD::OR && AllowAbs && CRHS->getAPIntValue().isSignMask()) {
3065+
} else if (Opc == ISD::OR && AllowAbs && CRHS->getAPIntValue().isSignMask()) {
30693066
Mods |= SISrcMods::ABS;
30703067
Mods |= SISrcMods::NEG;
30713068
Src = Src.getOperand(0);

llvm/test/CodeGen/AMDGPU/integer-select-src-modifiers.ll

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,55 @@ define i32 @fneg_1_fabs_2_select_i32(i32 %cond, i32 %a, i32 %b) {
8686
ret i32 %select
8787
}
8888

89+
define i32 @s_fneg_select_i32_1(i32 inreg %cond, i32 inreg %a, i32 inreg %b) {
90+
; GCN-LABEL: s_fneg_select_i32_1:
91+
; GCN: ; %bb.0:
92+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93+
; GCN-NEXT: s_xor_b32 s4, s17, 0x80000000
94+
; GCN-NEXT: s_cmp_eq_u32 s16, 0
95+
; GCN-NEXT: s_cselect_b32 s4, s4, s18
96+
; GCN-NEXT: v_mov_b32_e32 v0, s4
97+
; GCN-NEXT: s_setpc_b64 s[30:31]
98+
;
99+
; GFX11-LABEL: s_fneg_select_i32_1:
100+
; GFX11: ; %bb.0:
101+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+
; GFX11-NEXT: s_xor_b32 s1, s1, 0x80000000
103+
; GFX11-NEXT: s_cmp_eq_u32 s0, 0
104+
; GFX11-NEXT: s_cselect_b32 s0, s1, s2
105+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
106+
; GFX11-NEXT: v_mov_b32_e32 v0, s0
107+
; GFX11-NEXT: s_setpc_b64 s[30:31]
108+
%neg.a = xor i32 %a, u0x80000000
109+
%cmp = icmp eq i32 %cond, zeroinitializer
110+
%select = select i1 %cmp, i32 %neg.a, i32 %b
111+
ret i32 %select
112+
}
113+
114+
define i32 @s_fneg_1_fabs_2_select_i32(i32 inreg %cond, i32 %a, i32 %b) {
115+
; GCN-LABEL: s_fneg_1_fabs_2_select_i32:
116+
; GCN: ; %bb.0:
117+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118+
; GCN-NEXT: s_cmp_eq_u32 s16, 0
119+
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
120+
; GCN-NEXT: v_cndmask_b32_e64 v0, |v0|, -v0, s[4:5]
121+
; GCN-NEXT: s_setpc_b64 s[30:31]
122+
;
123+
; GFX11-LABEL: s_fneg_1_fabs_2_select_i32:
124+
; GFX11: ; %bb.0:
125+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126+
; GFX11-NEXT: s_cmp_eq_u32 s0, 0
127+
; GFX11-NEXT: s_cselect_b32 s0, -1, 0
128+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
129+
; GFX11-NEXT: v_cndmask_b32_e64 v0, |v0|, -v0, s0
130+
; GFX11-NEXT: s_setpc_b64 s[30:31]
131+
%neg.a = xor i32 %a, u0x80000000
132+
%abs.b = and i32 %a, u0x7fffffff
133+
%cmp = icmp eq i32 %cond, zeroinitializer
134+
%select = select i1 %cmp, i32 %neg.a, i32 %abs.b
135+
ret i32 %select
136+
}
137+
89138
define <2 x i32> @fneg_select_v2i32_1(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
90139
; GCN-LABEL: fneg_select_v2i32_1:
91140
; GCN: ; %bb.0:
@@ -287,6 +336,71 @@ define <2 x i32> @fneg_fabs_select_v2i32_2(<2 x i32> %cond, <2 x i32> %a, <2 x i
287336
ret <2 x i32> %select
288337
}
289338

339+
340+
define <2 x i32> @s_fneg_select_v2i32_1(<2 x i32> inreg %cond, <2 x i32> inreg %a, <2 x i32> inreg %b) {
341+
; GCN-LABEL: s_fneg_select_v2i32_1:
342+
; GCN: ; %bb.0:
343+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344+
; GCN-NEXT: s_xor_b32 s4, s19, 0x80000000
345+
; GCN-NEXT: s_xor_b32 s5, s18, 0x80000000
346+
; GCN-NEXT: s_cmp_eq_u32 s16, 0
347+
; GCN-NEXT: s_cselect_b32 s5, s5, s20
348+
; GCN-NEXT: s_cmp_eq_u32 s17, 0
349+
; GCN-NEXT: s_cselect_b32 s4, s4, s21
350+
; GCN-NEXT: v_mov_b32_e32 v0, s5
351+
; GCN-NEXT: v_mov_b32_e32 v1, s4
352+
; GCN-NEXT: s_setpc_b64 s[30:31]
353+
;
354+
; GFX11-LABEL: s_fneg_select_v2i32_1:
355+
; GFX11: ; %bb.0:
356+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357+
; GFX11-NEXT: s_xor_b32 s3, s3, 0x80000000
358+
; GFX11-NEXT: s_xor_b32 s2, s2, 0x80000000
359+
; GFX11-NEXT: s_cmp_eq_u32 s0, 0
360+
; GFX11-NEXT: s_cselect_b32 s0, s2, s16
361+
; GFX11-NEXT: s_cmp_eq_u32 s1, 0
362+
; GFX11-NEXT: s_cselect_b32 s1, s3, s17
363+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
364+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
365+
; GFX11-NEXT: s_setpc_b64 s[30:31]
366+
%neg.a = xor <2 x i32> %a, splat (i32 u0x80000000)
367+
%cmp = icmp eq <2 x i32> %cond, zeroinitializer
368+
%select = select <2 x i1> %cmp, <2 x i32> %neg.a, <2 x i32> %b
369+
ret <2 x i32> %select
370+
}
371+
372+
define <2 x i32> @s_fneg_fabs_select_v2i32_2(<2 x i32> inreg %cond, <2 x i32> inreg %a, <2 x i32> inreg %b) {
373+
; GCN-LABEL: s_fneg_fabs_select_v2i32_2:
374+
; GCN: ; %bb.0:
375+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
376+
; GCN-NEXT: s_bitset1_b32 s19, 31
377+
; GCN-NEXT: s_bitset1_b32 s18, 31
378+
; GCN-NEXT: s_cmp_eq_u32 s16, 0
379+
; GCN-NEXT: s_cselect_b32 s4, s20, s18
380+
; GCN-NEXT: s_cmp_eq_u32 s17, 0
381+
; GCN-NEXT: s_cselect_b32 s5, s21, s19
382+
; GCN-NEXT: v_mov_b32_e32 v0, s4
383+
; GCN-NEXT: v_mov_b32_e32 v1, s5
384+
; GCN-NEXT: s_setpc_b64 s[30:31]
385+
;
386+
; GFX11-LABEL: s_fneg_fabs_select_v2i32_2:
387+
; GFX11: ; %bb.0:
388+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389+
; GFX11-NEXT: s_bitset1_b32 s3, 31
390+
; GFX11-NEXT: s_bitset1_b32 s2, 31
391+
; GFX11-NEXT: s_cmp_eq_u32 s0, 0
392+
; GFX11-NEXT: s_cselect_b32 s0, s16, s2
393+
; GFX11-NEXT: s_cmp_eq_u32 s1, 0
394+
; GFX11-NEXT: s_cselect_b32 s1, s17, s3
395+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
396+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
397+
; GFX11-NEXT: s_setpc_b64 s[30:31]
398+
%neg.a = or <2 x i32> %a, splat (i32 u0x80000000)
399+
%cmp = icmp eq <2 x i32> %cond, zeroinitializer
400+
%select = select <2 x i1> %cmp, <2 x i32> %b, <2 x i32> %neg.a
401+
ret <2 x i32> %select
402+
}
403+
290404
define i64 @fneg_select_i64_1(i64 %cond, i64 %a, i64 %b) {
291405
; GCN-LABEL: fneg_select_i64_1:
292406
; GCN: ; %bb.0:

0 commit comments

Comments
 (0)