Skip to content

Conversation

@4vtomat
Copy link
Member

@4vtomat 4vtomat commented Jun 6, 2025

This patch supports the naive vset* insertion. If the state(tm, tn, tk,
sew, widen) changes, it emits all of the vset* instructions that are
needed, partial compatibility is not supported yet.

This is follow up patch for: #133031
Co-authored-by: Piyou Chen [email protected]

@llvmbot
Copy link
Member

llvmbot commented Jun 6, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Brandon Wu (4vtomat)

Changes

This patch supports the naive vset* insertion. If the state(tm, tn, tk,
sew, widen) changes, it emits all of the vset* instructions that are
needed, partial compatibility is not supported yet.

Co-authored-by: Piyou Chen <[email protected]>


Patch is 56.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143068.diff

11 Files Affected:

  • (modified) llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp (+4)
  • (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h (+65-1)
  • (modified) llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (+201-9)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrFormats.td (+19)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.cpp (+3)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td (+13-9)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td (+6-3)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td (+138)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrPredicates.td (+32-1)
  • (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp (+4)
  • (added) llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir (+523)
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 1f434beca5388..8a18221832ecb 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1616,6 +1616,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                       "operand must be a valid system register "
                                       "name or an integer in the range");
   }
+  case Match_InvalidXSfmmVType: {
+    SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+    return generateXSfmmVTypeError(ErrorLoc);
+  }
   case Match_InvalidVTypeI: {
     SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
     return generateVTypeError(ErrorLoc);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 6ef94fb5e93da..e470d51c6c5fa 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -138,6 +138,25 @@ enum {
   // 3 -> SEW * 4
   DestEEWShift = ElementsDependOnMaskShift + 1,
   DestEEWMask = 3ULL << DestEEWShift,
+
+  // 0 -> Don't care about altfmt bit in VTYPE.
+  // 1 -> Is not altfmt.
+  // 2 -> Is altfmt(BF16).
+  AltFmtTypeShift = DestEEWShift + 2,
+  AltFmtTypeMask = 3ULL << AltFmtTypeShift,
+
+  IsWidenShift = AltFmtTypeShift + 2,
+  IsWidenMask = 1ULL << IsWidenShift,
+
+  // XSfmmbase
+  HasTWidenOpShift = IsWidenShift + 1,
+  HasTWidenOpMask = 1ULL << HasTWidenOpShift,
+
+  HasTMOpShift = HasTWidenOpShift + 1,
+  HasTMOpMask = 1ULL << HasTMOpShift,
+
+  HasTKOpShift = HasTMOpShift + 1,
+  HasTKOpMask = 1ULL << HasTKOpShift,
 };
 
 // Helper functions to read TSFlags.
@@ -179,6 +198,11 @@ static inline bool hasRoundModeOp(uint64_t TSFlags) {
   return TSFlags & HasRoundModeOpMask;
 }
 
+enum class AltFmtType { DontCare, NotAltFmt, AltFmt };
+static inline AltFmtType getAltFmtType(uint64_t TSFlags) {
+  return static_cast<AltFmtType>((TSFlags & AltFmtTypeMask) >> AltFmtTypeShift);
+}
+
 /// \returns true if this instruction uses vxrm
 static inline bool usesVXRM(uint64_t TSFlags) { return TSFlags & UsesVXRMMask; }
 
@@ -194,11 +218,47 @@ static inline bool elementsDependOnMask(uint64_t TSFlags) {
   return TSFlags & ElementsDependOnMaskMask;
 }
 
+// XSfmmbase
+static inline bool hasTWidenOp(uint64_t TSFlags) {
+  return TSFlags & HasTWidenOpMask;
+}
+
+static inline bool hasTMOp(uint64_t TSFlags) { return TSFlags & HasTMOpMask; }
+
+static inline bool hasTKOp(uint64_t TSFlags) { return TSFlags & HasTKOpMask; }
+
+static inline unsigned getTNOpNum(const MCInstrDesc &Desc) {
+  const uint64_t TSFlags = Desc.TSFlags;
+  assert(hasTWidenOp(TSFlags) && hasVLOp(TSFlags));
+  unsigned Offset = 3;
+  if (hasTKOp(TSFlags))
+    Offset = 4;
+  return Desc.getNumOperands() - Offset;
+}
+
+static inline unsigned getTMOpNum(const MCInstrDesc &Desc) {
+  const uint64_t TSFlags = Desc.TSFlags;
+  assert(hasTWidenOp(TSFlags) && hasTMOp(TSFlags));
+  if (hasTKOp(TSFlags))
+    return Desc.getNumOperands() - 5;
+  // vtzero.t
+  return Desc.getNumOperands() - 4;
+}
+
+static inline unsigned getTKOpNum(const MCInstrDesc &Desc) {
+  const uint64_t TSFlags = Desc.TSFlags;
+  assert(hasTWidenOp(TSFlags) && hasTKOp(TSFlags));
+  return Desc.getNumOperands() - 3;
+}
+
 static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
   const uint64_t TSFlags = Desc.TSFlags;
   // This method is only called if we expect to have a VL operand, and all
   // instructions with VL also have SEW.
   assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags));
+  // In Xsfmmbase, TN is alias for VL, so here we use the same TSFlags bit.
+  if (hasTWidenOp(TSFlags))
+    return getTNOpNum(Desc);
   unsigned Offset = 2;
   if (hasVecPolicyOp(TSFlags))
     Offset = 3;
@@ -216,7 +276,7 @@ static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) {
   const uint64_t TSFlags = Desc.TSFlags;
   assert(hasSEWOp(TSFlags));
   unsigned Offset = 1;
-  if (hasVecPolicyOp(TSFlags))
+  if (hasVecPolicyOp(TSFlags) || hasTWidenOp(TSFlags))
     Offset = 2;
   return Desc.getNumOperands() - Offset;
 }
@@ -233,6 +293,9 @@ static inline int getFRMOpNum(const MCInstrDesc &Desc) {
   if (!hasRoundModeOp(TSFlags) || usesVXRM(TSFlags))
     return -1;
 
+  if (hasTWidenOp(TSFlags) && hasTMOp(TSFlags))
+    return getTMOpNum(Desc) - 1;
+
   // The operand order
   // --------------------------------------
   // | n-1 (if any)   | n-2  | n-3 | n-4 |
@@ -375,6 +438,7 @@ enum OperandType : unsigned {
   // instructions to represent a value that be passed as AVL to either vsetvli
   // or vsetivli.
   OPERAND_AVL,
+  OPERAND_XSFMM_VTYPE,
 };
 } // namespace RISCVOp
 
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index e1c69c69a99ca..88d1178eba9b1 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -164,10 +164,13 @@ struct DemandedFields {
   // If this is true, we demand that VTYPE is set to some legal state, i.e. that
   // vill is unset.
   bool VILL = false;
+  bool UseTWiden = false;
+  bool UseAltFmt = false;
 
   // Return true if any part of VTYPE was used
   bool usedVTYPE() const {
-    return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
+    return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL ||
+           UseTWiden || UseAltFmt;
   }
 
   // Return true if any property of VL was used
@@ -183,6 +186,8 @@ struct DemandedFields {
     TailPolicy = true;
     MaskPolicy = true;
     VILL = true;
+    UseTWiden = true;
+    UseAltFmt = true;
   }
 
   // Mark all VL properties as demanded
@@ -208,6 +213,8 @@ struct DemandedFields {
     TailPolicy |= B.TailPolicy;
     MaskPolicy |= B.MaskPolicy;
     VILL |= B.VILL;
+    UseAltFmt |= B.UseAltFmt;
+    UseTWiden |= B.UseTWiden;
   }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -255,6 +262,8 @@ struct DemandedFields {
     OS << "TailPolicy=" << TailPolicy << ", ";
     OS << "MaskPolicy=" << MaskPolicy << ", ";
     OS << "VILL=" << VILL;
+    OS << "UseAltFmt=" << UseAltFmt << ", ";
+    OS << "UseTWiden=" << UseTWiden;
     OS << "}";
   }
 #endif
@@ -324,6 +333,15 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
   if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
                              RISCVVType::isMaskAgnostic(NewVType))
     return false;
+  if (Used.UseTWiden && (RISCVVType::hasXSfmmWiden(CurVType) !=
+                             RISCVVType::hasXSfmmWiden(NewVType) ||
+                         (RISCVVType::hasXSfmmWiden(CurVType) &&
+                          RISCVVType::getXSfmmWiden(CurVType) !=
+                              RISCVVType::getXSfmmWiden(NewVType))))
+    return false;
+  if (Used.UseAltFmt &&
+      RISCVVType::isAltFmt(CurVType) != RISCVVType::isAltFmt(NewVType))
+    return false;
   return true;
 }
 
@@ -472,6 +490,11 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
     Res.TailPolicy = false;
   }
 
+  Res.UseAltFmt = RISCVII::getAltFmtType(MI.getDesc().TSFlags) !=
+                  RISCVII::AltFmtType::DontCare;
+  Res.UseTWiden = RISCVII::hasTWidenOp(MI.getDesc().TSFlags) ||
+                  RISCVInstrInfo::isXSfmmVectorConfigInstr(MI);
+
   return Res;
 }
 
@@ -503,6 +526,8 @@ class VSETVLIInfo {
   uint8_t TailAgnostic : 1;
   uint8_t MaskAgnostic : 1;
   uint8_t SEWLMULRatioOnly : 1;
+  uint8_t AltFmt : 1;
+  uint8_t TWiden = 0;
 
 public:
   VSETVLIInfo()
@@ -579,6 +604,8 @@ class VSETVLIInfo {
   RISCVVType::VLMUL getVLMUL() const { return VLMul; }
   bool getTailAgnostic() const { return TailAgnostic; }
   bool getMaskAgnostic() const { return MaskAgnostic; }
+  bool getAltFmt() const { return AltFmt; }
+  unsigned getTWiden() const { return TWiden; }
 
   bool hasNonZeroAVL(const LiveIntervals *LIS) const {
     if (hasAVLImm())
@@ -640,21 +667,31 @@ class VSETVLIInfo {
     SEW = RISCVVType::getSEW(VType);
     TailAgnostic = RISCVVType::isTailAgnostic(VType);
     MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
+    AltFmt = RISCVVType::isAltFmt(VType);
+    TWiden =
+        RISCVVType::hasXSfmmWiden(VType) ? RISCVVType::getXSfmmWiden(VType) : 0;
   }
-  void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) {
+  void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA, bool Altfmt,
+                unsigned W) {
     assert(isValid() && !isUnknown() &&
            "Can't set VTYPE for uninitialized or unknown");
     VLMul = L;
     SEW = S;
     TailAgnostic = TA;
     MaskAgnostic = MA;
+    AltFmt = Altfmt;
+    TWiden = W;
   }
 
+  void setAltFmt(bool AF) { AltFmt = AF; }
+
   void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; }
 
   unsigned encodeVTYPE() const {
     assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
            "Can't encode VTYPE for uninitialized or unknown");
+    if (TWiden != 0)
+      return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt);
     return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
   }
 
@@ -667,9 +704,9 @@ class VSETVLIInfo {
            "Can't compare VTYPE in unknown state");
     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
            "Can't compare when only LMUL/SEW ratio is valid.");
-    return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
+    return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden) ==
            std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
-                    Other.MaskAgnostic);
+                    Other.MaskAgnostic, Other.AltFmt, Other.TWiden);
   }
 
   unsigned getSEWLMULRatio() const {
@@ -819,6 +856,8 @@ class VSETVLIInfo {
        << "TailAgnostic=" << (bool)TailAgnostic << ", "
        << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
        << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
+    OS << "TWiden=" << (bool)TWiden << ", ";
+    OS << "AltFmt=" << (bool)AltFmt << "}";
   }
 #endif
 };
@@ -846,6 +885,11 @@ struct BlockData {
   BlockData() = default;
 };
 
+enum TKTMMode {
+  VSETTK = 0,
+  VSETTM = 1,
+};
+
 class RISCVInsertVSETVLI : public MachineFunctionPass {
   const RISCVSubtarget *ST;
   const TargetInstrInfo *TII;
@@ -901,6 +945,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
   VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
   VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
   void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
+  bool insertVSETMTK(MachineBasicBlock &MBB, TKTMMode Mode) const;
 };
 
 } // end anonymous namespace
@@ -938,6 +983,16 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
   VSETVLIInfo NewInfo;
   if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
     NewInfo.setAVLImm(MI.getOperand(1).getImm());
+  } else if (RISCVInstrInfo::isXSfmmVectorTNConfigInstr(MI)) {
+    Register ATReg = MI.getOperand(1).getReg();
+    switch (MI.getOpcode()) {
+    case RISCV::PseudoSF_VSETTNTX0:
+      NewInfo.setAVLVLMAX();
+      break;
+    case RISCV::PseudoSF_VSETTNT:
+      NewInfo.setAVLRegDef(getVNInfoFromReg(ATReg, MI, LIS), ATReg);
+      break;
+    }
   } else {
     assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
            MI.getOpcode() == RISCV::PseudoVSETVLIX0);
@@ -998,11 +1053,36 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
 
   RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
 
+  bool AltFmt = RISCVII::getAltFmtType(TSFlags) == RISCVII::AltFmtType::AltFmt;
+  InstrInfo.setAltFmt(AltFmt);
+
   unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
   // A Log2SEW of 0 is an operation on mask registers only.
   unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
 
+  if (RISCVII::hasTWidenOp(TSFlags)) {
+    assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+
+    const MachineOperand &TWidenOp =
+        MI.getOperand(MI.getNumExplicitOperands() - 1);
+    unsigned TWiden = TWidenOp.getImm();
+
+    InstrInfo.setAVLVLMAX();
+    if (RISCVII::hasVLOp(TSFlags)) {
+      const MachineOperand &TnOp =
+          MI.getOperand(RISCVII::getTNOpNum(MI.getDesc()));
+
+      if (TnOp.getReg().isVirtual())
+        InstrInfo.setAVLRegDef(getVNInfoFromReg(TnOp.getReg(), MI, LIS),
+                               TnOp.getReg());
+    }
+
+    InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden);
+
+    return InstrInfo;
+  }
+
   if (RISCVII::hasVLOp(TSFlags)) {
     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
     if (VLOp.isImm()) {
@@ -1038,7 +1118,9 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
     assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
   }
 #endif
-  InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+  // TODO: Propagate the twiden from previous vtype for potential reuse.
+  InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt,
+                     /*TWiden*/ 0);
 
   forwardVSETVLIAVL(InstrInfo);
 
@@ -1048,8 +1130,30 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
-
   ++NumInsertedVSETVL;
+
+  if (Info.getTWiden()) {
+    if (Info.hasAVLVLMAX()) {
+      Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
+      auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNTX0))
+                    .addReg(DestReg, RegState::Define | RegState::Dead)
+                    .addReg(RISCV::X0, RegState::Kill)
+                    .addImm(Info.encodeVTYPE());
+      if (LIS) {
+        LIS->InsertMachineInstrInMaps(*MI);
+        LIS->createAndComputeVirtRegInterval(DestReg);
+      }
+    } else {
+      auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNT))
+                    .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+                    .addReg(Info.getAVLReg())
+                    .addImm(Info.encodeVTYPE());
+      if (LIS)
+        LIS->InsertMachineInstrInMaps(*MI);
+    }
+    return;
+  }
+
   if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
     // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
     // VLMAX.
@@ -1190,7 +1294,8 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
     // be coalesced into another vsetvli since we won't demand any fields.
     VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
     NewInfo.setAVLImm(1);
-    NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true);
+    NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true,
+                     /*AltFmt*/ false, /*W*/ 0);
     Info = NewInfo;
     return;
   }
@@ -1232,7 +1337,9 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
       (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
           IncomingInfo.getTailAgnostic(),
       (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
-          IncomingInfo.getMaskAgnostic());
+          IncomingInfo.getMaskAgnostic(),
+      (Demanded.UseAltFmt ? IncomingInfo : Info).getAltFmt(),
+      Demanded.UseTWiden ? IncomingInfo.getTWiden() : 0);
 
   // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
   // the AVL.
@@ -1285,7 +1392,8 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
 
     if (RISCVInstrInfo::isVectorConfigInstr(MI) ||
         RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
-        isVectorCopy(ST->getRegisterInfo(), MI))
+        isVectorCopy(ST->getRegisterInfo(), MI) ||
+        RISCVInstrInfo::isXSfmmVectorConfigInstr(MI))
       HadVectorOp = true;
 
     transferAfter(Info, MI);
@@ -1667,6 +1775,10 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
   };
 
   for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
+    // TODO: Support XSfmm.
+    if (RISCVII::hasTWidenOp(MI.getDesc().TSFlags) ||
+        RISCVInstrInfo::isXSfmmVectorConfigInstr(MI))
+      continue;
 
     if (!RISCVInstrInfo::isVectorConfigInstr(MI)) {
       Used.doUnion(getDemanded(MI, ST));
@@ -1774,6 +1886,80 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
   }
 }
 
+static void shrinkIntervalAndRemoveDeadMI(MachineOperand &MO,
+                                          LiveIntervals *LIS) {
+  Register Reg = MO.getReg();
+  MO.setReg(RISCV::NoRegister);
+  MO.setIsKill(false);
+
+  if (!LIS)
+    return;
+
+  LiveInterval &LI = LIS->getInterval(Reg);
+
+  // Erase the AVL operand from the instruction.
+  SmallVector<MachineInstr *> DeadMIs;
+  LIS->shrinkToUses(&LI, &DeadMIs);
+  // TODO: Enable this once needVSETVLIPHI is supported.
+  // SmallVector<LiveInterval *> SplitLIs;
+  // LIS->splitSeparateComponents(LI, SplitLIs);
+
+  for (MachineInstr *DeadMI : DeadMIs) {
+    LIS->RemoveMachineInstrFromMaps(*DeadMI);
+    DeadMI->eraseFromParent();
+  }
+}
+
+bool RISCVInsertVSETVLI::insertVSETMTK(MachineBasicBlock &MBB,
+                                       TKTMMode Mode) const {
+
+  bool Changed = false;
+  for (auto &MI : MBB) {
+    uint64_t TSFlags = MI.getDesc().TSFlags;
+    if (RISCVInstrInfo::isXSfmmVectorConfigTMTKInstr(MI) ||
+        !RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasTWidenOp(TSFlags))
+      continue;
+
+    VSETVLIInfo CurrInfo = computeInfoForInstr(MI);
+
+    if (Mode == VSETTK && !RISCVII::hasTKOp(TSFlags))
+      continue;
+
+    if (Mode == VSETTM && !RISCVII::hasTMOp(TSFlags))
+      continue;
+
+    unsigned OpNum = 0;
+    unsigned Opcode = 0;
+    switch (Mode) {
+    case VSETTK:
+      OpNum = RISCVII::getTKOpNum(MI.getDesc());
+      Opcode = RISCV::PseudoSF_VSETTK;
+      break;
+    case VSETTM:
+      OpNum = RISCVII::getTMOpNum(MI.getDesc());
+      Opcode = RISCV::PseudoSF_VSETTM;
+      break;
+    }
+
+    assert(OpNum && Opcode && "Invalid OpNum or Opcode");
+
+    const MachineOperand &Op = MI.getOperand(OpNum);
+
+    auto TmpMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opcode))
+                     .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+                     .addReg(Op.getReg())
+                     .addImm(Log2_32(CurrInfo.getSEW()))
+                     .addImm((CurrInfo.getTWiden() >> 1) + 1);
+
+    Changed = true;
+    if (LIS)
+      LIS->InsertMachineInstrInMaps(*TmpMI);
+
+    shrinkIntervalAndRemoveDeadMI(MI.getOperand(OpNum), LIS);
+  }
+  return Changed;
+}
+
 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
   // Skip if the vector extension is not enabled.
   ST = &MF.getSubtarget<RISCVSubtarget>();
@@ -1851,6 +2037,12 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
   for (MachineBasicBlock &MBB : MF)
     insertReadVL(MBB);
 
+  for (MachineBasicBlock &MBB : MF)
+    insertVSETMTK(MBB, VSETTM);
+
+  for (MachineBasicBlock &MBB : MF)
+    insertVSETMTK(MBB, VSETTK);
+
   BlockInfo.clear();
   return HaveVectorOp;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 088a6923fadb1..81105b7815838 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -257,6 +257,25 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
   // Indicates the EEW of a vector instruction's destination operand.
   EEW DestEEW = EEWSEWx1;
   let TSFlags{25-24} = DestEEW.Value;
+
+  // 0 -> Don't care about altfmt bit in VTYPE.
+  // 1 -> Is not altfmt.
+  // 2 -> Is altfmt(BF16).
+  bits<2> AltFmtType = 0;
+  let TSFlags{27-26} = AltFmtType;
+
+  bit IsWiden = 0;
+  let TSFlags{28} = IsWiden;
+
+  // XSfmmbase
+  bit HasTWidenOp = 0;
+  let TSFlags{29} = HasTWidenOp;
+
+  bit HasTmOp = 0;
+  let TSFlags{30} = HasTmOp;
+
+  bit HasTkOp = 0;
+  let TSFlags{31} = HasTkOp;
 }
 
 class RVInst<dag outs, dag ins, string opcodestr, string argstr,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 86a4e8e370ee6..5c6270da592ec 100644
--- a/llvm/lib/Target/RISCV/RISCVInst...
[truncated]

@github-actions
Copy link

github-actions bot commented Jun 6, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@topperc
Copy link
Collaborator

topperc commented Jul 7, 2025

Please address the clang-format failure

.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(Op.getReg())
.addImm(Log2_32(CurrInfo.getSEW()))
.addImm((CurrInfo.getTWiden() >> 1) + 1);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't really the right math. It just happens to work because getTWiden can only return 1, 2, or 4. If it could return 8, this math would return 5 when the real encoding would be 4 if it existed. Please use Log2_32(CurrInfo.getTWiden()) + 1.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh that's a good catch!

for (MachineBasicBlock &MBB : MF)
insertVSETMTK(MBB, VSETTM);

for (MachineBasicBlock &MBB : MF)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can these loops be combined?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I think so

@4vtomat
Copy link
Member Author

4vtomat commented Sep 19, 2025

rebase

@4vtomat 4vtomat force-pushed the xsfmm_pseudo branch 2 times, most recently from 6b8d3a5 to 9feb458 Compare September 22, 2025 14:39
This patch supports the naive vset* insertion. If the state(tm, tn, tk,
sew, widen) changes, it emits all of the vset* instructions that are
needed, partial compatibility is not supported yet.

Co-authored-by: Piyou Chen <[email protected]>
@topperc
Copy link
Collaborator

topperc commented Sep 22, 2025

@4vtomat please do not force push to PRs. https://llvm.org/docs/GitHub.html#rebasing-pull-requests-and-force-pushes You can merge main instead of rebasing.

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

if (LIS)
LIS->InsertMachineInstrInMaps(*TmpMI);

shrinkIntervalAndRemoveDeadMI(MI.getOperand(OpNum), LIS, TII);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you mark Op as non const can you pass it in here?


LiveInterval &LI = LIS->getInterval(Reg);

// Erase the AVL operand from the instruction.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should say the TK or TM operand?

I'm also wondering, if TmpMI is inserted just before MI in `insertVSETMTK, and TmpMI always uses TK/TM, then will the definition of TK/TM ever be dead? If we just ignore the DeadMIs parameter do we get any regressions?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think so, I think we dont get any regressions if we ignore DeadMIs for now. Actually I'm not familiar with this part, why do we have DeadMI in normal vsetvli insertion: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp#L1455-L1473

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's because some instructions like vmv.s.x won't end up using the AVL in a vsetvli instruction so their AVL will end up dead (due to transferBefore etc), see https://github.com/llvm/llvm-project/pull/65934/files

But it doesn't look we do anything similar for VSETTK/VSETTM so we can probably just remove this bit of code

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it! thanks!

Copy link
Contributor

@lukel97 lukel97 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, just some nits

if (LIS)
LIS->InsertMachineInstrInMaps(*TmpMI);

shrinkInterval(Op, LIS, TII);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit, I think this function is small enough it can just be inlined now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great idea!

@4vtomat 4vtomat enabled auto-merge (squash) October 13, 2025 14:37
@4vtomat 4vtomat merged commit 50aac2c into llvm:main Oct 13, 2025
9 of 10 checks passed
@4vtomat 4vtomat deleted the xsfmm_pseudo branch October 13, 2025 15:06
4vtomat added a commit that referenced this pull request Oct 14, 2025
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Oct 14, 2025
akadutta pushed a commit to akadutta/llvm-project that referenced this pull request Oct 14, 2025
…m#143068)

This patch supports the naive vset* insertion. If the state(tm, tn, tk,
sew, widen) changes, it emits all of the vset* instructions that are
needed, partial compatibility is not supported yet.

This is follow up patch for:
llvm#133031
Co-authored-by: Piyou Chen <[email protected]>
Co-authored-by: Craig Topper <[email protected]>
akadutta pushed a commit to akadutta/llvm-project that referenced this pull request Oct 14, 2025
4vtomat added a commit that referenced this pull request Oct 24, 2025
In this version of intrinsics, users need to manage the life time of
tiles on their own, compiler doesn't have tile type for variables not
only for design simplicity but also preventing users to write bad
performance code that could potentially having tile spills which are
quite expensive in terms of cycles.

Intrinsics are specified at the end of this document
https://www.sifive.com/document-file/xsfmm-matrix-extensions-specification

stack on: #143068 and
#143069
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Oct 24, 2025
In this version of intrinsics, users need to manage the life time of
tiles on their own, compiler doesn't have tile type for variables not
only for design simplicity but also preventing users to write bad
performance code that could potentially having tile spills which are
quite expensive in terms of cycles.

Intrinsics are specified at the end of this document
https://www.sifive.com/document-file/xsfmm-matrix-extensions-specification

stack on: llvm/llvm-project#143068 and
llvm/llvm-project#143069
dvbuka pushed a commit to dvbuka/llvm-project that referenced this pull request Oct 27, 2025
In this version of intrinsics, users need to manage the life time of
tiles on their own, compiler doesn't have tile type for variables not
only for design simplicity but also preventing users to write bad
performance code that could potentially having tile spills which are
quite expensive in terms of cycles.

Intrinsics are specified at the end of this document
https://www.sifive.com/document-file/xsfmm-matrix-extensions-specification

stack on: llvm#143068 and
llvm#143069
Lukacma pushed a commit to Lukacma/llvm-project that referenced this pull request Oct 29, 2025
In this version of intrinsics, users need to manage the life time of
tiles on their own, compiler doesn't have tile type for variables not
only for design simplicity but also preventing users to write bad
performance code that could potentially having tile spills which are
quite expensive in terms of cycles.

Intrinsics are specified at the end of this document
https://www.sifive.com/document-file/xsfmm-matrix-extensions-specification

stack on: llvm#143068 and
llvm#143069
aokblast pushed a commit to aokblast/llvm-project that referenced this pull request Oct 30, 2025
In this version of intrinsics, users need to manage the life time of
tiles on their own, compiler doesn't have tile type for variables not
only for design simplicity but also preventing users to write bad
performance code that could potentially having tile spills which are
quite expensive in terms of cycles.

Intrinsics are specified at the end of this document
https://www.sifive.com/document-file/xsfmm-matrix-extensions-specification

stack on: llvm#143068 and
llvm#143069
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants