Address review comments

mshockwave · mshockwave · commit bd1b6f856ad8 · 2025-02-26T13:51:06.000-08:00
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
@@ -1,10 +1,59 @@
 # RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
-# RUN:    --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN:    --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | \
+# RUN:    FileCheck %s --allow-empty --check-prefix=LATENCY
 # RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
-# RUN:    --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 --min-instructions=100 | FileCheck %s --check-prefix=RTHROUGHPUT
+# RUN:    --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 \
+# RUN:    --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=RTHROUGHPUT1
+
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN:    --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 | \
+# RUN:    FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 \
+# RUN:    --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=RTHROUGHPUT2
+
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN:    --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 | \
+# RUN:    FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 \
+# RUN:    --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=RTHROUGHPUT3
+
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN:    --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 | \
+# RUN:    FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 \
+# RUN:    --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=RTHROUGHPUT4
+
+# These instructions are only eligible under the inverse throughput mode.
 
 # LATENCY-NOT: PseudoVCOMPRESS_VM_M2_E8
 # LATENCY-NOT: PseudoVCPOP_M_B32
+# LATENCY-NOT: PseudoVRGATHEREI16_VV_M2_E32_M1
+# LATENCY-NOT: PseudoVRGATHER_VI_M2
+# LATENCY-NOT: PseudoVRGATHER_VV_M8_E32
+# LATENCY-NOT: PseudoVRGATHER_VX_M4
+# LATENCY-NOT: PseudoVSLIDE1UP_VX_M1
+# LATENCY-NOT: PseudoVSLIDEUP_VI_M2
+# LATENCY-NOT: PseudoVSLIDEUP_VX_M2
+# LATENCY-NOT: PseudoVNCLIPU_WI_M2
+# LATENCY-NOT: PseudoVNSRA_WI_M2
+# LATENCY-NOT: PseudoVNSRL_WI_M2
 
-# RTHROUGHPUT: PseudoVCOMPRESS_VM_M2_E8
-# RTHROUGHPUT: PseudoVCPOP_M_B32
+# RTHROUGHPUT1: PseudoVCOMPRESS_VM_M2_E8
+# RTHROUGHPUT1: PseudoVCPOP_M_B32
+# RTHROUGHPUT2: PseudoVRGATHEREI16_VV_M2_E32_M1
+# RTHROUGHPUT2: PseudoVRGATHER_VI_M2
+# RTHROUGHPUT2: PseudoVRGATHER_VV_M8_E32
+# RTHROUGHPUT2: PseudoVRGATHER_VX_M4
+# RTHROUGHPUT3: PseudoVSLIDE1UP_VX_M1
+# RTHROUGHPUT3: PseudoVSLIDEUP_VI_M2
+# RTHROUGHPUT3: PseudoVSLIDEUP_VX_M2
+# RTHROUGHPUT4: PseudoVNCLIPU_WI_M2
+# RTHROUGHPUT4: PseudoVNSRA_WI_M2
+# RTHROUGHPUT4: PseudoVNSRL_WI_M2
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
@@ -81,8 +81,9 @@ Register RISCVExegesisPostprocessing::allocateGPRRegister(
     const MachineFunction &MF, const MachineRegisterInfo &MRI) {
   const auto &TRI = *MRI.getTargetRegisterInfo();
 
-  const TargetRegisterClass *GPRClass =
-      TRI.getRegClass(RISCV::GPRJALRRegClassID);
+  // We hope to avoid allocating callee-saved registers. And GPRTC
+  // happens to account for nearly all caller-saved registers.
+  const TargetRegisterClass *GPRClass = TRI.getRegClass(RISCV::GPRTCRegClassID);
   BitVector Candidates = TRI.getAllocatableSet(MF, GPRClass);
 
   for (unsigned SetIdx : Candidates.set_bits()) {
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
@@ -149,17 +149,15 @@ template <class BaseT> class RISCVSnippetGenerator : public BaseT {
         RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID};
 
     for (unsigned RegClassID : StandaloneRegClasses)
-      for (unsigned Reg : RegInfo.getRegClass(RegClassID)) {
+      for (unsigned Reg : RegInfo.getRegClass(RegClassID))
         AggregateRegisters.reset(Reg);
-      }
 
     // Initialize ELEN and VLEN.
-    // FIXME: We could have obtained these two from RISCVSubtarget
+    // FIXME: We could have obtained these two constants from RISCVSubtarget
     // but in order to get that from TargetMachine, we need a Function.
-    const Triple &TT = State.getTargetMachine().getTargetTriple();
-    ELEN = TT.isRISCV32() ? 32 : 64;
-
     const MCSubtargetInfo &STI = State.getSubtargetInfo();
+    ELEN = STI.checkFeatures("+zve64x") ? 64 : 32;
+
     std::string ZvlQuery;
     for (unsigned I = 5U, Size = (1 << I); I < 17U; ++I, Size <<= 1) {
       ZvlQuery = "+zvl";
@@ -175,15 +173,15 @@ template <class BaseT> class RISCVSnippetGenerator : public BaseT {
                         const BitVector &ForbiddenRegisters) const override;
 };
 
-static bool isMaskedSibiling(unsigned MaskedOp, unsigned UnmaskedOp) {
+static bool isMaskedSibling(unsigned MaskedOp, unsigned UnmaskedOp) {
   const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedOp);
   return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp;
 }
 
 // There are primarily two kinds of opcodes that are not eligible
 // in a serial snippet:
-// (1) Only has a single use operand that can not be overlap with
-// the def operand.
+// (1) Has a use operand that can not overlap with the def operand
+// (i.e. early clobber).
 // (2) The register file of the only use operand is different from
 // that of the def operand. For instance, use operand is vector and
 // the result is a scalar.
@@ -197,23 +195,15 @@ static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
   case RISCV::VCOMPRESS_VM:
   case RISCV::VCPOP_M:
   case RISCV::VCPOP_V:
+  // The permutation instructions listed below cannot have destination
+  // overlapping with the source.
   case RISCV::VRGATHEREI16_VV:
   case RISCV::VRGATHER_VI:
   case RISCV::VRGATHER_VV:
   case RISCV::VRGATHER_VX:
   case RISCV::VSLIDE1UP_VX:
   case RISCV::VSLIDEUP_VI:
   case RISCV::VSLIDEUP_VX:
-  // The truncate instructions that arraive here are those who cannot
-  // have any overlap between source and dest at all (i.e.
-  // those whoe don't satisfy condition 2 and 3 in RVV spec
-  // 5.2).
-  case RISCV::VNCLIPU_WI:
-  case RISCV::VNCLIPU_WV:
-  case RISCV::VNCLIPU_WX:
-  case RISCV::VNCLIP_WI:
-  case RISCV::VNCLIP_WV:
-  case RISCV::VNCLIP_WX:
     return true;
   default:
     return false;
@@ -372,8 +362,8 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
         const auto *RVVBase =
             RISCVVInversePseudosTable::getBaseInfo(BaseOpcode, VLMul, SEW);
         if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode ||
-                        isMaskedSibiling(VPseudoOpcode, RVVBase->Pseudo) ||
-                        isMaskedSibiling(RVVBase->Pseudo, VPseudoOpcode))) {
+                        isMaskedSibling(VPseudoOpcode, RVVBase->Pseudo) ||
+                        isMaskedSibling(RVVBase->Pseudo, VPseudoOpcode))) {
           // There is an integrated SEW, remove all but the SEW pushed last.
           SEWCandidates.erase(SEWCandidates.begin(), SEWCandidates.end() - 1);
           break;
@@ -395,7 +385,7 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
           }
         }
 
-        // The EEW for source operand in VSEXT and VZEXT is a fractional
+        // The EEW for source operand in VSEXT and VZEXT is a fraction
         // of the SEW, hence only SEWs that will lead to valid EEW are allowed.
         if (auto Frac = isRVVSignZeroExtend(BaseOpcode))
           if (*SEW / *Frac < MinSEW) {
@@ -411,7 +401,7 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
                                              Feature_HasStdExtZvksedBit,
                                              Feature_HasStdExtZvkshBit})) {
           if (*SEW != 32)
-            // Zvknhb support SEW=64 as well.
+            // Zvknhb supports SEW=64 as well.
             if (*SEW != 64 || !STI.hasFeature(RISCV::FeatureStdExtZvknhb) ||
                 !isOpcodeAvailableIn(BaseOpcode,
                                      {Feature_HasStdExtZvknhaOrZvknhbBit})) {