Skip to content

Commit 2ece2eb

Browse files
committed
[AMDGPU] Recognise bitmask operations as srcmods on select
Add to the VOP patterns to recognise when or/xor/and are modifying only the sign bit and replace with the appropriate srcmod.
1 parent c4f6d34 commit 2ece2eb

File tree

5 files changed

+1235
-282
lines changed

5 files changed

+1235
-282
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3212,6 +3212,41 @@ bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
32123212
Src = Src.getOperand(0);
32133213
}
32143214

3215+
// Convert various sign-bit masks on integers to src mods. Currently disabled
3216+
// for 16-bit types as the codegen replaces the operand without adding a
3217+
// srcmod. This is intentionally finding the cases where we are performing
3218+
// float neg and abs on int types, the goal is not to obtain two's complement
3219+
// neg or abs. Hence, this is non-canonicalizing.
3220+
// TODO: Add 16-bit support.
3221+
if (IsCanonicalizing)
3222+
return true;
3223+
3224+
unsigned Opc = Src->getOpcode();
3225+
EVT VT = Src.getValueType();
3226+
if ((Opc != ISD::AND && Opc != ISD::OR && Opc != ISD::XOR) ||
3227+
(VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3228+
return true;
3229+
3230+
ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Src->getOperand(1));
3231+
if (!CRHS)
3232+
return true;
3233+
3234+
// Recognise (xor a, 0x80000000) as NEG SrcMod.
3235+
// Recognise (and a, 0x7fffffff) as ABS SrcMod.
3236+
// Recognise (or a, 0x80000000) as NEG+ABS SrcModifiers.
3237+
if (Opc == ISD::XOR && CRHS->getAPIntValue().isSignMask()) {
3238+
Mods |= SISrcMods::NEG;
3239+
Src = Src.getOperand(0);
3240+
} else if (Opc == ISD::AND && AllowAbs &&
3241+
CRHS->getAPIntValue().isMaxSignedValue()) {
3242+
Mods |= SISrcMods::ABS;
3243+
Src = Src.getOperand(0);
3244+
} else if (Opc == ISD::OR && AllowAbs && CRHS->getAPIntValue().isSignMask()) {
3245+
Mods |= SISrcMods::ABS;
3246+
Mods |= SISrcMods::NEG;
3247+
Src = Src.getOperand(0);
3248+
}
3249+
32153250
return true;
32163251
}
32173252

llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -349,29 +349,24 @@ define i32 @select_fneg_xor_select_i32(i1 %cond0, i1 %cond1, i32 %arg0, i32 %arg
349349
; GCN: ; %bb.0:
350350
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351351
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
352-
; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
353-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
354352
; GCN-NEXT: v_and_b32_e32 v1, 1, v1
355-
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
356-
; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v0
353+
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
354+
; GCN-NEXT: v_cndmask_b32_e64 v0, -v2, v3, vcc
357355
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
358-
; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
356+
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc
359357
; GCN-NEXT: s_setpc_b64 s[30:31]
360358
;
361359
; GFX11-LABEL: select_fneg_xor_select_i32:
362360
; GFX11: ; %bb.0:
363361
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364362
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
365-
; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
366363
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
367-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
364+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
368365
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
369-
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
370-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
366+
; GFX11-NEXT: v_cndmask_b32_e64 v0, -v2, v3, vcc_lo
371367
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
372-
; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v0
373-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
374-
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
368+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
369+
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc_lo
375370
; GFX11-NEXT: s_setpc_b64 s[30:31]
376371
%fneg0 = xor i32 %arg0, -2147483648
377372
%select0 = select i1 %cond0, i32 %arg1, i32 %fneg0
@@ -550,31 +545,25 @@ define i64 @select_fneg_xor_select_i64(i1 %cond0, i1 %cond1, i64 %arg0, i64 %arg
550545
; GCN: ; %bb.0:
551546
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552547
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
553-
; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
554-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
555548
; GCN-NEXT: v_and_b32_e32 v1, 1, v1
549+
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
556550
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
557-
; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc
558-
; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
551+
; GCN-NEXT: v_cndmask_b32_e64 v2, -v3, v5, vcc
559552
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
560-
; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
553+
; GCN-NEXT: v_cndmask_b32_e64 v1, v2, -v2, vcc
561554
; GCN-NEXT: s_setpc_b64 s[30:31]
562555
;
563556
; GFX11-LABEL: select_fneg_xor_select_i64:
564557
; GFX11: ; %bb.0:
565558
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566559
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
567-
; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
568-
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
569-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
560+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
570561
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
571-
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo
572-
; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc_lo
573-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
562+
; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v4 :: v_dual_and_b32 v1, 1, v1
563+
; GFX11-NEXT: v_cndmask_b32_e64 v2, -v3, v5, vcc_lo
574564
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
575-
; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
576-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
577-
; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
565+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
566+
; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, -v2, vcc_lo
578567
; GFX11-NEXT: s_setpc_b64 s[30:31]
579568
%fneg0 = xor i64 %arg0, 9223372036854775808
580569
%select0 = select i1 %cond0, i64 %arg1, i64 %fneg0

0 commit comments

Comments
 (0)