Skip to content

Commit 3618ed1

Browse files
rez5427Yui5427
andauthored
[DAGCombiner] Add sra-xor-sra pattern fold (llvm#166777)
Add `fold (sra (xor (sra x, c1), -1), c2) -> (sra (xor x, -1), c3)` The IR like this: ``` %a = ashr i8 %x, 6 %n = xor i8 %a, -1 %s = sext i8 %n to i16 %r = and i16 %s, %y ret i16 %r ``` llvm will produce: ``` slli a0, a0, 56 srai a0, a0, 56 not a0, a0 srai a0, a0, 6 and a0, a0, a1 ret ``` 56 and 6 can be add up alive2: https://alive2.llvm.org/ce/z/yxRQf9 --------- Co-authored-by: rez5427 <[email protected]>
1 parent 19d472f commit 3618ed1

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10988,6 +10988,22 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
1098810988
}
1098910989
}
1099010990

10991+
// fold (sra (xor (sra x, c1), -1), c2) -> (xor (sra x, c3), -1)
10992+
// This allows merging two arithmetic shifts even when there's a NOT in
10993+
// between.
10994+
SDValue X;
10995+
APInt C1;
10996+
if (N1C && sd_match(N0, m_OneUse(m_Not(
10997+
m_OneUse(m_Sra(m_Value(X), m_ConstInt(C1))))))) {
10998+
APInt C2 = N1C->getAPIntValue();
10999+
zeroExtendToMatch(C1, C2, 1 /* Overflow Bit */);
11000+
APInt Sum = C1 + C2;
11001+
unsigned ShiftSum = Sum.getLimitedValue(OpSizeInBits - 1);
11002+
SDValue NewShift = DAG.getNode(
11003+
ISD::SRA, DL, VT, X, DAG.getShiftAmountConstant(ShiftSum, VT, DL));
11004+
return DAG.getNOT(DL, NewShift, VT);
11005+
}
11006+
1099111007
// fold (sra (shl X, m), (sub result_size, n))
1099211008
// -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
1099311009
// result_size - n != m.
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s
3+
4+
; Test folding of: (sra (xor (sra x, c1), -1), c2) -> (sra (xor x, -1), c3)
5+
; Original motivating example: should merge sra+sra across xor
6+
define i16 @not_invert_signbit_splat_mask(i8 %x, i16 %y) {
7+
; CHECK-LABEL: not_invert_signbit_splat_mask:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: slli a0, a0, 56
10+
; CHECK-NEXT: srai a0, a0, 62
11+
; CHECK-NEXT: not a0, a0
12+
; CHECK-NEXT: and a0, a0, a1
13+
; CHECK-NEXT: ret
14+
%a = ashr i8 %x, 6
15+
%n = xor i8 %a, -1
16+
%s = sext i8 %n to i16
17+
%r = and i16 %s, %y
18+
ret i16 %r
19+
}
20+
21+
; Edge case
22+
define i16 @sra_xor_sra_overflow(i8 %x, i16 %y) {
23+
; CHECK-LABEL: sra_xor_sra_overflow:
24+
; CHECK: # %bb.0:
25+
; CHECK-NEXT: li a0, 0
26+
; CHECK-NEXT: ret
27+
%a = ashr i8 %x, 10
28+
%n = xor i8 %a, -1
29+
%s = sext i8 %n to i16
30+
%r = and i16 %s, %y
31+
ret i16 %r
32+
}

0 commit comments

Comments
 (0)