- 
                Notifications
    
You must be signed in to change notification settings  - Fork 15.1k
 
[RISCV] Add ISel patterns for Xqciac QC.MULIADD instruction #147661
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Add basic isel patterns for the multiple accumulate QC.MULIADD instruction.
| 
          
 @llvm/pr-subscribers-backend-risc-v Author: Sudharsan Veeravalli (svs-quic) ChangesAdd basic isel patterns for the multiple accumulate QC.MULIADD instruction. Full diff: https://github.com/llvm/llvm-project/pull/147661.diff 3 Files Affected: 
 diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 98b613d9cc856..f586ff609ca16 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15895,6 +15895,13 @@ static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG,
 // X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
 static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
                                         uint64_t MulAmt) {
+  // Don't do this is if the Xqciac extension is enabled and the MulAmt is
+  // simm12.
+  const RISCVSubtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
+  if (Subtarget.hasVendorXqciac() && isInt<12>(MulAmt))
+    return SDValue();
+
   uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
   ISD::NodeType Op;
   uint64_t ShiftAmt1;
@@ -23700,6 +23707,10 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
   auto *ConstNode = cast<ConstantSDNode>(C);
   const APInt &Imm = ConstNode->getAPIntValue();
 
+  // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
+  if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
+    return false;
+
   // Break the MUL to a SLLI and an ADD/SUB.
   if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
       (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index b2bf09028bc40..3d0712baf6940 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1332,6 +1332,11 @@ class SelectQCbi<CondCode Cond, DAGOperand InTyImm, Pseudo OpNode >
           (OpNode GPRNoX0:$lhs, InTyImm:$Constant,
            (IntCCtoRISCVCC $cc), GPRNoX0:$truev, GPRNoX0:$falsev)>;
 
+let Predicates = [HasVendorXqciac, IsRV32] in {
+def : Pat<(XLenVT (add GPRNoX0:$rd, (mul GPRNoX0:$rs1, simm12:$imm12))),
+          (QC_MULIADD GPRNoX0:$rd, GPRNoX0:$rs1, simm12:$imm12)>;
+} // Predicates = [HasVendorXqciac, IsRV32]
+
 /// Simple arithmetic operations
 
 let Predicates = [HasVendorXqcilia, IsRV32] in {
diff --git a/llvm/test/CodeGen/RISCV/xqciac.ll b/llvm/test/CodeGen/RISCV/xqciac.ll
new file mode 100644
index 0000000000000..2822d1a98afea
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/xqciac.ll
@@ -0,0 +1,177 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IM
+; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-xqciac -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IMXQCIAC
+; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-xqciac,+zba -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IZBAMXQCIAC
+
+define dso_local i32 @mul(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: mul:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a0, a1, 5
+; RV32IM-NEXT:    add a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: mul:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    li a0, 33
+; RV32IMXQCIAC-NEXT:    mul a0, a1, a0
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: mul:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    li a0, 33
+; RV32IZBAMXQCIAC-NEXT:    mul a0, a1, a0
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 33
+  ret i32 %mul
+}
+
+define dso_local i32 @muliadd(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: muliadd:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    li a2, 165
+; RV32IM-NEXT:    mul a1, a1, a2
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: muliadd:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    qc.muliadd a0, a1, 165
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: muliadd:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    qc.muliadd a0, a1, 165
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 165
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @muliadd_neg(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: muliadd_neg:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    li a2, -165
+; RV32IM-NEXT:    mul a1, a1, a2
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: muliadd_neg:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    qc.muliadd a0, a1, -165
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: muliadd_neg:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    qc.muliadd a0, a1, -165
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, -165
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @pow2immplus1(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: pow2immplus1:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a2, a1, 5
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    add a0, a2, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: pow2immplus1:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    qc.muliadd a0, a1, 33
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: pow2immplus1:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    qc.muliadd a0, a1, 33
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 33
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @gtsimm12(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: gtsimm12:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    lui a2, 1
+; RV32IM-NEXT:    addi a2, a2, 477
+; RV32IM-NEXT:    mul a1, a1, a2
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: gtsimm12:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    lui a2, 1
+; RV32IMXQCIAC-NEXT:    addi a2, a2, 477
+; RV32IMXQCIAC-NEXT:    mul a1, a1, a2
+; RV32IMXQCIAC-NEXT:    add a0, a0, a1
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: gtsimm12:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    lui a2, 1
+; RV32IZBAMXQCIAC-NEXT:    addi a2, a2, 477
+; RV32IZBAMXQCIAC-NEXT:    mul a1, a1, a2
+; RV32IZBAMXQCIAC-NEXT:    add a0, a0, a1
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 4573
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+; NOTE: This will become qc.shladd once support is added
+define dso_local i32 @pow2(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: pow2:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a1, a1, 5
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: pow2:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    slli a1, a1, 5
+; RV32IMXQCIAC-NEXT:    add a0, a0, a1
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: pow2:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    slli a1, a1, 5
+; RV32IZBAMXQCIAC-NEXT:    add a0, a0, a1
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 32
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define dso_local i32 @shxadd(i32 %a, i32 %b) local_unnamed_addr #0 {
+; RV32IM-LABEL: shxadd:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a1, a1, 1
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV32IMXQCIAC-LABEL: shxadd:
+; RV32IMXQCIAC:       # %bb.0: # %entry
+; RV32IMXQCIAC-NEXT:    slli a1, a1, 1
+; RV32IMXQCIAC-NEXT:    add a0, a0, a1
+; RV32IMXQCIAC-NEXT:    ret
+;
+; RV32IZBAMXQCIAC-LABEL: shxadd:
+; RV32IZBAMXQCIAC:       # %bb.0: # %entry
+; RV32IZBAMXQCIAC-NEXT:    sh1add a0, a1, a0
+; RV32IZBAMXQCIAC-NEXT:    ret
+entry:
+  %mul = mul nsw i32 %b, 2
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
 | 
    
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, this is a pretty low-quality review.
The pattern is added, and works, so I'm happy, but I don't have an opinion on whether this is the right way to do this.
| 
           @topperc do you have any further comments on this?  | 
    
          
 I don't have a strong opinion on whether this is the right way to do this either.  | 
    
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't have a strong opinion on whether this is the right way to do this either.
Thanks for taking a look, Craig. My disposition is to merge this, given there are no objections and the codegen is working for this instruction. I realise I'm quite biased towards merging Xqci improvements.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| 
           Thanks Sam and Craig for your comments and approval. I think my initial patch and commit message did not explain things well. The changes in ISelLowering are only for some special cases that I have now mentioned in the commit message. I've pushed two commits to show the test cases where we need ISelLowering changes.  | 
    
In the `isInt` check that was added in #147661 we were passing the zero-extended `uint64_t` value instead of the sign-extended one.
…0211) In the `isInt` check that was added in llvm#147661 we were passing the zero-extended `uint64_t` value instead of the sign-extended one. (cherry picked from commit d3937e2)
…0211) In the `isInt` check that was added in llvm#147661 we were passing the zero-extended `uint64_t` value instead of the sign-extended one.
Add basic isel patterns for the multiple accumulate QC.MULIADD instruction.
While most case work with just the TD file pattern, there are few cases which need to be handled in ISelLowering depending on the immediate we are multiplying with:
The patch does not decompose mul if Xqciac is present, for the above conditions. There could be cases where this may not beneficial which I plan to address in follow up patches.