Skip to content

Commit 08280c0

Browse files
committed
[AMDGPU] Recognise bitmask operations as srcmods on select
Add to the VOP patterns to recognise when or/xor/and are modifying only the sign bit and replace with the appropriate srcmod.
1 parent 1600450 commit 08280c0

File tree

5 files changed

+1232
-282
lines changed

5 files changed

+1232
-282
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3036,6 +3036,38 @@ bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
30363036
Src = Src.getOperand(0);
30373037
}
30383038

3039+
// Convert various sign-bit masks to src mods. Currently disabled for 16-bit
3040+
// types as the codegen replaces the operand without adding a srcmod.
3041+
// This is intentionally finding the cases where we are performing float neg
3042+
// and abs on int types, the goal is not to obtain two's complement neg or
3043+
// abs.
3044+
// TODO: Add 16-bit support.
3045+
unsigned Opc = Src->getOpcode();
3046+
EVT VT = Src.getValueType();
3047+
if ((Opc != ISD::AND && Opc != ISD::OR && Opc != ISD::XOR) ||
3048+
(VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3049+
return true;
3050+
3051+
ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Src->getOperand(1));
3052+
if (!CRHS)
3053+
return true;
3054+
3055+
// Recognise (xor a, 0x80000000) as NEG SrcMod.
3056+
// Recognise (and a, 0x7fffffff) as ABS SrcMod.
3057+
// Recognise (or a, 0x80000000) as NEG+ABS SrcModifiers.
3058+
if (Opc == ISD::XOR && CRHS->getAPIntValue().isSignMask()) {
3059+
Mods |= SISrcMods::NEG;
3060+
Src = Src.getOperand(0);
3061+
} else if (Opc == ISD::AND && AllowAbs &&
3062+
CRHS->getAPIntValue().isMaxSignedValue()) {
3063+
Mods |= SISrcMods::ABS;
3064+
Src = Src.getOperand(0);
3065+
} else if (Opc == ISD::OR && AllowAbs && CRHS->getAPIntValue().isSignMask()) {
3066+
Mods |= SISrcMods::ABS;
3067+
Mods |= SISrcMods::NEG;
3068+
Src = Src.getOperand(0);
3069+
}
3070+
30393071
return true;
30403072
}
30413073

llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -349,29 +349,24 @@ define i32 @select_fneg_xor_select_i32(i1 %cond0, i1 %cond1, i32 %arg0, i32 %arg
349349
; GCN: ; %bb.0:
350350
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351351
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
352-
; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
353-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
354352
; GCN-NEXT: v_and_b32_e32 v1, 1, v1
355-
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
356-
; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v0
353+
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
354+
; GCN-NEXT: v_cndmask_b32_e64 v0, -v2, v3, vcc
357355
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
358-
; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
356+
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc
359357
; GCN-NEXT: s_setpc_b64 s[30:31]
360358
;
361359
; GFX11-LABEL: select_fneg_xor_select_i32:
362360
; GFX11: ; %bb.0:
363361
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364362
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
365-
; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
366363
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
367-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
364+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
368365
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
369-
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
370-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
366+
; GFX11-NEXT: v_cndmask_b32_e64 v0, -v2, v3, vcc_lo
371367
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
372-
; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v0
373-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
374-
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
368+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
369+
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc_lo
375370
; GFX11-NEXT: s_setpc_b64 s[30:31]
376371
%fneg0 = xor i32 %arg0, -2147483648
377372
%select0 = select i1 %cond0, i32 %arg1, i32 %fneg0
@@ -550,31 +545,25 @@ define i64 @select_fneg_xor_select_i64(i1 %cond0, i1 %cond1, i64 %arg0, i64 %arg
550545
; GCN: ; %bb.0:
551546
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552547
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
553-
; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
554-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
555548
; GCN-NEXT: v_and_b32_e32 v1, 1, v1
549+
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
556550
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
557-
; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc
558-
; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
551+
; GCN-NEXT: v_cndmask_b32_e64 v2, -v3, v5, vcc
559552
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
560-
; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
553+
; GCN-NEXT: v_cndmask_b32_e64 v1, v2, -v2, vcc
561554
; GCN-NEXT: s_setpc_b64 s[30:31]
562555
;
563556
; GFX11-LABEL: select_fneg_xor_select_i64:
564557
; GFX11: ; %bb.0:
565558
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566559
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
567-
; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
568-
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
569-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
560+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
570561
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
571-
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo
572-
; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc_lo
573-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
562+
; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v4 :: v_dual_and_b32 v1, 1, v1
563+
; GFX11-NEXT: v_cndmask_b32_e64 v2, -v3, v5, vcc_lo
574564
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
575-
; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
576-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
577-
; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
565+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
566+
; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, -v2, vcc_lo
578567
; GFX11-NEXT: s_setpc_b64 s[30:31]
579568
%fneg0 = xor i64 %arg0, 9223372036854775808
580569
%select0 = select i1 %cond0, i64 %arg1, i64 %fneg0

0 commit comments

Comments
 (0)