[DAGCombiner] Add sra-xor-sra pattern fold (llvm#166777)

rez5427 · Yui5427 · web-flow · commit 3618ed14e4b0 · 2025-11-10T19:03:34.000-08:00
Add `fold (sra (xor (sra x, c1), -1), c2) -> (sra (xor x, -1), c3)` The IR like this: ``` %a = ashr i8 %x, 6 %n = xor i8 %a, -1 %s = sext i8 %n to i16 %r = and i16 %s, %y ret i16 %r ``` llvm will produce: ``` slli a0, a0, 56 srai a0, a0, 56 not a0, a0 srai a0, a0, 6 and a0, a0, a1 ret ``` 56 and 6 can be add up alive2: https://alive2.llvm.org/ce/z/yxRQf9 --------- Co-authored-by: rez5427 <785369607@qq.com>
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10988,6 +10988,22 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
     }
   }
 
+  // fold (sra (xor (sra x, c1), -1), c2) -> (xor (sra x, c3), -1)
+  // This allows merging two arithmetic shifts even when there's a NOT in
+  // between.
+  SDValue X;
+  APInt C1;
+  if (N1C && sd_match(N0, m_OneUse(m_Not(
+                              m_OneUse(m_Sra(m_Value(X), m_ConstInt(C1))))))) {
+    APInt C2 = N1C->getAPIntValue();
+    zeroExtendToMatch(C1, C2, 1 /* Overflow Bit */);
+    APInt Sum = C1 + C2;
+    unsigned ShiftSum = Sum.getLimitedValue(OpSizeInBits - 1);
+    SDValue NewShift = DAG.getNode(
+        ISD::SRA, DL, VT, X, DAG.getShiftAmountConstant(ShiftSum, VT, DL));
+    return DAG.getNOT(DL, NewShift, VT);
+  }
+
   // fold (sra (shl X, m), (sub result_size, n))
   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
   // result_size - n != m.
diff --git a/llvm/test/CodeGen/RISCV/sra-xor-sra.ll b/llvm/test/CodeGen/RISCV/sra-xor-sra.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s
+
+; Test folding of: (sra (xor (sra x, c1), -1), c2) -> (sra (xor x, -1), c3)
+; Original motivating example: should merge sra+sra across xor
+define i16 @not_invert_signbit_splat_mask(i8 %x, i16 %y) {
+; CHECK-LABEL: not_invert_signbit_splat_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 56
+; CHECK-NEXT:    srai a0, a0, 62
+; CHECK-NEXT:    not a0, a0
+; CHECK-NEXT:    and a0, a0, a1
+; CHECK-NEXT:    ret
+  %a = ashr i8 %x, 6
+  %n = xor i8 %a, -1
+  %s = sext i8 %n to i16
+  %r = and i16 %s, %y
+  ret i16 %r
+}
+
+; Edge case
+define i16 @sra_xor_sra_overflow(i8 %x, i16 %y) {
+; CHECK-LABEL: sra_xor_sra_overflow:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    ret
+  %a = ashr i8 %x, 10
+  %n = xor i8 %a, -1
+  %s = sext i8 %n to i16
+  %r = and i16 %s, %y
+  ret i16 %r
+}