Skip to content

Commit 9fdab97

Browse files
committed
[X86] Improve transform for add-like nodes to add
Remove bespoke logic and use `isADDLike`.
1 parent 5535716 commit 9fdab97

File tree

10 files changed

+155
-150
lines changed

10 files changed

+155
-150
lines changed

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,21 +1560,40 @@ let Predicates = [HasNDD] in {
15601560
}
15611561

15621562
// Depositing value to 8/16 bit subreg:
1563-
def : Pat<(or (and GR64:$dst, -256),
1563+
def : Pat<(or (and GR64:$dst, -256),
15641564
(i64 (zextloadi8 addr:$src))),
1565-
(INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
1565+
(INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
15661566

1567-
def : Pat<(or (and GR32:$dst, -256),
1567+
def : Pat<(or (and GR32:$dst, -256),
15681568
(i32 (zextloadi8 addr:$src))),
1569-
(INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
1569+
(INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
15701570

1571-
def : Pat<(or (and GR64:$dst, -65536),
1571+
def : Pat<(or (and GR64:$dst, -65536),
15721572
(i64 (zextloadi16 addr:$src))),
15731573
(INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
15741574

1575-
def : Pat<(or (and GR32:$dst, -65536),
1575+
def : Pat<(or (and GR32:$dst, -65536),
15761576
(i32 (zextloadi16 addr:$src))),
1577-
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
1577+
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
1578+
1579+
// Same pattern as above but supporting `add` as the join
1580+
// operator. Need to support `add` as well, as we can convert `or` ->
1581+
// `add` when the `or` is `disjoint` (as in this patterns case).
1582+
def : Pat<(add (and GR64:$dst, -256),
1583+
(i64 (zextloadi8 addr:$src))),
1584+
(INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
1585+
1586+
def : Pat<(add (and GR32:$dst, -256),
1587+
(i32 (zextloadi8 addr:$src))),
1588+
(INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
1589+
1590+
def : Pat<(add (and GR64:$dst, -65536),
1591+
(i64 (zextloadi16 addr:$src))),
1592+
(INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
1593+
1594+
def : Pat<(add (and GR32:$dst, -65536),
1595+
(i32 (zextloadi16 addr:$src))),
1596+
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
15781597

15791598
// To avoid needing to materialize an immediate in a register, use a 32-bit and
15801599
// with implicit zero-extension instead of a 64-bit and if the immediate has at

llvm/lib/Target/X86/X86InstrFragments.td

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -676,12 +676,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
676676

677677
// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
678678
def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
679-
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
680-
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
681-
682-
KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
683-
KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
684-
return (~Known0.Zero & ~Known1.Zero) == 0;
679+
return N->getOpcode() == ISD::OR && CurDAG->isADDLike(SDValue(N, 0));
685680
}]>;
686681

687682
def shiftMask8 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir

Lines changed: 32 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -426,48 +426,44 @@ body: |
426426
; SI: liveins: $sgpr0_sgpr1
427427
; SI-NEXT: {{ $}}
428428
; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
429+
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
430+
; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
429431
; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
430-
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
431-
; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
432-
; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
433-
; SI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
434-
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
432+
; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
433+
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
435434
; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
436435
;
437436
; VI-LABEL: name: fabs_s64_ss
438437
; VI: liveins: $sgpr0_sgpr1
439438
; VI-NEXT: {{ $}}
440439
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
440+
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
441+
; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
441442
; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
442-
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
443-
; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
444-
; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
445-
; VI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
446-
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
443+
; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
444+
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
447445
; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
448446
;
449447
; GFX9-LABEL: name: fabs_s64_ss
450448
; GFX9: liveins: $sgpr0_sgpr1
451449
; GFX9-NEXT: {{ $}}
452450
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
451+
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
452+
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
453453
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
454-
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
455-
; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
456-
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
457-
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
458-
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
454+
; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
455+
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
459456
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
460457
;
461458
; GFX10-LABEL: name: fabs_s64_ss
462459
; GFX10: liveins: $sgpr0_sgpr1
463460
; GFX10-NEXT: {{ $}}
464461
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
462+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
463+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
465464
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
466-
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
467-
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
468-
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
469-
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
470-
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
465+
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
466+
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
471467
; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
472468
%0:sgpr(s64) = COPY $sgpr0_sgpr1
473469
%1:sgpr(s64) = G_FABS %0
@@ -643,48 +639,44 @@ body: |
643639
; SI: liveins: $sgpr0_sgpr1
644640
; SI-NEXT: {{ $}}
645641
; SI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
642+
; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
643+
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
646644
; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
647-
; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
648-
; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
649-
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
650-
; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
651-
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
645+
; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
646+
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
652647
; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
653648
;
654649
; VI-LABEL: name: fabs_s64_ss_no_src_constraint
655650
; VI: liveins: $sgpr0_sgpr1
656651
; VI-NEXT: {{ $}}
657652
; VI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
653+
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
654+
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
658655
; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
659-
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
660-
; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
661-
; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
662-
; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
663-
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
656+
; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
657+
; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
664658
; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
665659
;
666660
; GFX9-LABEL: name: fabs_s64_ss_no_src_constraint
667661
; GFX9: liveins: $sgpr0_sgpr1
668662
; GFX9-NEXT: {{ $}}
669663
; GFX9-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
664+
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
665+
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
670666
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
671-
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
672-
; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
673-
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
674-
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
675-
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
667+
; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
668+
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
676669
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
677670
;
678671
; GFX10-LABEL: name: fabs_s64_ss_no_src_constraint
679672
; GFX10: liveins: $sgpr0_sgpr1
680673
; GFX10-NEXT: {{ $}}
681674
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
675+
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
676+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
682677
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
683-
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
684-
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
685-
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
686-
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
687-
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
678+
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
679+
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
688680
; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
689681
%0:sgpr(s64) = IMPLICIT_DEF
690682
%1:sgpr(s64) = G_FABS %0:sgpr(s64)

0 commit comments

Comments
 (0)