[AMDGPU] Fix opsel for scaled MFMA operations #140183

VigneshwarJ · 2025-05-16T03:44:53Z

Fix for opsel flags encoding and ASM parsing of the scaled MFMA

llvmbot · 2025-05-16T03:45:25Z

@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Vigneshwar Jayakumar (VigneshwarJ)

Changes

Fix for opsel flags encoding and ASM parsing of the scaled MFMA

Patch is 40.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140183.diff

7 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+78-7)
(modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+8-9)
(modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+10-10)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll (+16-16)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll (+23-23)
(modified) llvm/test/MC/AMDGPU/mai-gfx950.s (+6-6)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt (+6-6)

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b50a2cf1becf7..450c4fffb8453 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1878,6 +1878,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
 
   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
                OptionalImmIndexMap &OptionalIdx);
+  void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
@@ -6796,17 +6797,25 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
   return ParseStatus::Success;
 }
 
-static void addOptionalImmOperand(
-  MCInst& Inst, const OperandVector& Operands,
-  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
-  AMDGPUOperand::ImmTy ImmT,
-  int64_t Default = 0) {
+static void
+addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
+                      AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
+                      AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
+                      std::optional<unsigned> InsertAt = std::nullopt) {
   auto i = OptionalIdx.find(ImmT);
   if (i != OptionalIdx.end()) {
     unsigned Idx = i->second;
-    ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
+    const AMDGPUOperand &Op =
+        static_cast<const AMDGPUOperand &>(*Operands[Idx]);
+    if (InsertAt)
+      Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
+    else
+      Op.addImmOperands(Inst, 1);
   } else {
-    Inst.addOperand(MCOperand::createImm(Default));
+    if (InsertAt.has_value())
+      Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
+    else
+      Inst.addOperand(MCOperand::createImm(Default));
   }
 }
 
@@ -8823,6 +8832,68 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
     Inst.getOperand(ModIdx).setImm(ModVal);
   }
 }
+void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
+                                    const OperandVector &Operands) {
+  OptionalImmIndexMap OptionalIdx;
+  unsigned Opc = Inst.getOpcode();
+  unsigned I = 1;
+
+  const MCInstrDesc &Desc = MII.get(Opc);
+
+  for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
+    static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
+
+  for (unsigned E = Operands.size(); I != E; ++I) {
+    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
+
+    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+    } else if (Op.isImmModifier()) {
+      OptionalIdx[Op.getImmTy()] = I;
+    } else {
+      Op.addRegOrImmOperands(Inst, 1);
+    }
+  }
+
+  // Insert CBSZ and BLGP operands for F8F6F4 variants
+  int InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCBSZ,
+                        0, InsertPos);
+  InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyBLGP,
+                        0, InsertPos);
+
+  // Add dummy src_modifiers
+  Inst.addOperand(MCOperand::createImm(0));
+  Inst.addOperand(MCOperand::createImm(0));
+
+  // Handle op_sel fields
+
+  unsigned OpSel = 0;
+  auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
+  if (OpselIdx != OptionalIdx.end())
+    OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
+                .getImm();
+
+  unsigned OpSelHi = 0;
+  auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
+  if (OpselHiIdx != OptionalIdx.end())
+    OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
+                  .getImm();
+  static const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
+                                          AMDGPU::OpName::src1_modifiers};
+
+  for (unsigned J = 0; J < 2; ++J) {
+    unsigned ModVal = 0;
+    if (OpSel & (1 << J))
+      ModVal |= SISrcMods::OP_SEL_0;
+    if (OpSelHi & (1 << J))
+      ModVal |= SISrcMods::OP_SEL_1;
+
+    const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+    Inst.getOperand(ModIdx).setImm(ModVal);
+  }
+}
 
 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
                               OptionalImmIndexMap &OptionalIdx) {
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index a3be49bf02648..6315d14dbbc92 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -829,12 +829,12 @@ class MFMA_F8F6F4_WithSizeTable_Helper<VOP3_Pseudo  ps, string F8F8Op> :
 // Currently assumes scaled instructions never have abid
 class MAIFrag<SDPatternOperator Op, code pred, bit HasAbid = true, bit Scaled = false> : PatFrag <
   !if(Scaled, (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$blgp,
-                   node:$scale_src0_opsel, node:$scale_src0,
-                   node:$scale_src1_opsel, node:$scale_src1),
+                   node:$src0_modifiers, node:$scale_src0,
+                   node:$src1_modifiers, node:$scale_src1),
               !con((ops node:$src0, node:$src1, node:$src2, node:$cbsz),
                    !if(HasAbid, (ops node:$abid), (ops)),
                     (ops node:$blgp))),
-  !if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $scale_src0_opsel, $scale_src0, $scale_src1_opsel, $scale_src1),
+  !if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $src0_modifiers, $scale_src0, $src1_modifiers, $scale_src1),
               !if(HasAbid, (Op $src0, $src1, $src2, $cbsz, $abid, $blgp),
                            (Op $src0, $src1, $src2, $cbsz, $blgp))),
   pred
@@ -895,12 +895,12 @@ class ScaledMAIInst<string OpName, MAIInst BaseInst, SDPatternOperator node> :
   let InOperandList = !con(BaseInst.InOperandList,
     (ins VSrc_b32:$scale_src0,
          VSrc_b32:$scale_src1,
-         op_sel0:$scale_src0_opsel,
-         op_sel_hi0:$scale_src1_opsel));
+         op_sel0:$src0_modifiers,
+         op_sel_hi0:$src1_modifiers));
   let AsmOperands =
     "$vdst, $src0, $src1, $src2, $scale_src0, $scale_src1"
-    "$scale_src0_opsel$scale_src1_opsel$cbsz$blgp";
-
+    "$src0_modifiers$src1_modifiers$cbsz$blgp";
+  let AsmMatchConverter = "cvtScaledMFMA";
   let FixedSize = 1;
   let Size = 16;
 }
@@ -2041,7 +2041,6 @@ multiclass VOP3PX_Real_ScaledMFMA<bits<7> op> {
   defvar PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64");
   defvar Name = PS_ACD.Mnemonic;
   defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8";
-
   let SubtargetPredicate = HasGFX950Insts,
       DecoderNamespace = "GFX940",
       AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
@@ -2057,7 +2056,7 @@ multiclass VOP3PX_Real_ScaledMFMA<bits<7> op> {
 
 multiclass VOP3PX_Real_ScaledMFMA_F8F6F4_mc<bits<7> op> {
   defm _f8_f8 : VOP3PX_Real_ScaledMFMA<op>;
-
+ 
   let isAsmParserOnly = 1 in { // Disable ambiguous disassembly.
   defm _f8_f6 : VOP3PX_Real_ScaledMFMA<op>;
   defm _f6_f8 : VOP3PX_Real_ScaledMFMA<op>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index f9fa83c3f5ae7..1c97c40e26083 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -526,14 +526,14 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_
   bits<9> scale_src0;
   bits<9> scale_src1;
 
-  bits<2> scale_src0_opsel;
-  bits<2> scale_src1_opsel;
+  bits<4> src0_modifiers;
+  bits<4> src1_modifiers;
 
   // Inst{7-0} = unused
   // Inst{10-8} = neg_hi;
   // Inst{13-11} = op_sel
-  let Inst{11} = scale_src0_opsel{0};
-  let Inst{12} = scale_src1_opsel{0};
+  let Inst{11} = src0_modifiers{2};
+  let Inst{12} = src1_modifiers{2};
   // Inst{13} = unused op_sel
   // Inst{14} = unused op_sel_hi2
 
@@ -542,8 +542,8 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_
   let Inst{49-41} = scale_src1;
   // Inst{50-58} = unused
   // Inst{60-59} = op_sel_hi;
-  let Inst{59} = scale_src0_opsel{1};
-  let Inst{60} = scale_src1_opsel{1};
+  let Inst{59} = src0_modifiers{3};
+  let Inst{60} = src1_modifiers{3};
   // Inst{63-61} = neg;
 
   // The high half of the encoding is the unscaled mfma op.
@@ -1437,17 +1437,17 @@ class getVOP3MAIScaledPat<VOPProfile P, SDPatternOperator node> {
                       // mfma
                       [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
                                             timm:$cbsz, timm:$blgp,
-                                            MFMALdScaleModifierOp:$scale_src0_opsel,
+                                            MFMALdScaleModifierOp:$src0_modifiers,
                                             i32:$scale_src0,
-                                            MFMALdScaleModifierOp:$scale_src1_opsel,
+                                            MFMALdScaleModifierOp:$src1_modifiers,
                                             i32:$scale_src1
                                             ))],
                       // smfmac
                       [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i32:$idx,
                                             timm:$cbsz, timm:$abid,
-                                            MFMALdScaleModifierOp:$scale_src0_opsel,
+                                            MFMALdScaleModifierOp:$src0_modifiers,
                                             i32:$scale_src0,
-                                            MFMALdScaleModifierOp:$scale_src1_opsel,
+                                            MFMALdScaleModifierOp:$src1_modifiers,
                                             i32:$scale_src1))]);
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
index 7cc726a3bd79c..e027dda957a6d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
@@ -46,7 +46,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_1_1__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[0,0,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -70,7 +70,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_2__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[1,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -94,7 +94,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_3__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -118,7 +118,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_3__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[0,1,0] op_sel_hi:[0,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -142,7 +142,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_0__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1,0,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -166,7 +166,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_3__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[0,1,0] op_sel_hi:[1,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -190,7 +190,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_2__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1797,7 +1797,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[1,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1819,7 +1819,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
 ; SDAG-NEXT:    v_accvgpr_write_b32 a2, v18
 ; SDAG-NEXT:    v_accvgpr_write_b32 a3, v19
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[0,0,0]
+; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[1,1,0]
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 3
 ; SDAG-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1837,7 +1837,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
 ; GISEL-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GISEL-NEXT:    v_mov_b32_e32 v16, 0x41
 ; GISEL-NEXT:    s_nop 1
-; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0]
+; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[1,1,0]
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT:    s_nop 3
 ; GISEL-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1860,7 +1860,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
 ; SDAG-NEXT:    v_accvgpr_write_b32 a3, v19
 ; SDAG-NEXT:    v_mov_b32_e32 v16, 0x4d
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0]
+; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[1,1,0]
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 3
 ; SDAG-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1879,7 +1879,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
 ; GISEL-NEXT:    v_mov_b32_e32 v16, 0x41
 ; GISEL-NEXT:    v_mov_b32_e32 v17, 0x4d
 ; GISEL-NEXT:    s_nop 1
-; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0]
+; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[1,1,0]
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT:    s_nop 3
 ; GISEL-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1921,7 +1921,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
 ; SDAG-NEXT:    v_accvgpr_write_b32 a3, s11
 ; SDAG-NEXT:    v_mov_b32_e32 v17, s13
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel_hi:[0,0,0] blgp:2
+; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 3
 ; SDAG-NEXT:    global_store_dwordx4 v16, a[0:3], s[14:15]
@@ -1946,7 +1946,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
 ; GISEL-NEXT:    v_accvgpr_write_b32 a3, s27
 ; GISEL-NEXT:    v_mov_b32_e32 v16, s29
 ; GISEL-NEXT:    s_nop 1
-; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel_hi:[0,0,0] blgp:2
+; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT:    s_nop 2
@@ -1987,7 +1987,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
 ; SDAG-NEXT:    v_accvgpr_write_b32 a2, s2
 ; SDAG-NEXT:    v_accvgpr_write_b32 a3, s3
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel_hi:[0,0,0]
+; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 3
 ; SDAG-NEXT:    global_store_dwordx4 v16, a[0:3], s[4:5]
@@ -2013,7 +2013,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
 ; GISEL-NEXT:    v_accvgpr_write_b32 a2, s2
 ; GISEL-NEXT:    v_accvgpr_write_b32 a3, s3
 ; GISEL-NEXT:    s_nop 1
-; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0]
+; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT:    s_nop 2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
index dac54c9f85e96..5574313f22a47 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
@@ -134,7 +134,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x
 ; SDAG-NEXT:    v_accvgpr_write_b32 a14, v30
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 0
-; SDAG-NEXT:    v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0]
+; SDAG-NEXT:    v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel:[1,1,0] op_sel_hi:[0,0,0]
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 3
@@ -179,7 +179,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x
 ; GISEL-NEXT:    v_accvgpr_write_b32 a14, v30
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GISEL-NEXT:    s_nop 0
-; GISEL-NEXT:    v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0]
+; GISEL-NEXT:    v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel:[1,1,0] op_sel_hi:[0,0,0]
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT: ...
[truncated]

shiltian · 2025-05-16T03:51:51Z

llvm/test/MC/AMDGPU/mai-gfx950.s

 v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 cbsz:3 blgp:1

-// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x08,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
+// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x10,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]


A side question: SP3 says this should be:

v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel:[0,1] op_sel_hi:[0,0] cbsz:3 abid:1 blgp:1

Why do we not emit abid:1 here?

abid is set to 1 default for scaled mfma, we are not just printing it.

arsenm · 2025-05-16T06:48:29Z

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

 multiclass VOP3PX_Real_ScaledMFMA_F8F6F4_mc<bits<7> op> {
  defm _f8_f8 : VOP3PX_Real_ScaledMFMA<op>;
-
+ 


Introduced trailing whitespace

arsenm · 2025-05-16T06:52:35Z

llvm/lib/Target/AMDGPU/VOPInstructions.td

+  bits<4> src0_modifiers;
+  bits<4> src1_modifiers;


Needs comment explaining these. If you're writing custom operand handling maybe we shouldn't rename these? It is a weird case

I thought so too, but this matches the current src_modifiers field and uses an existing bit mask 4 bit value (OpselHI, OpselLo, Abs and Neg) in TableGen. There is also already a logic in place for asm printing using this bit mask. Also, in previous revision, it was a 2-bit field, which was reading the lower 2 bits hence not generating the right encoding for opsel bits. Though this MFMA instruction doesn't use neg and abs, I thought it would be cleaner to match the existing implementation.

I think it does support neg, it's just not wired up

Oh I see it, I see this in doc " NEG[1:0] and ABS[1:0] must be zero. NEG[2] and ABS[2] may be used to control matrix C" . So I will wire up a src2_modifier with neg/abs bits mapped and add some tests for it as well

arsenm · 2025-05-16T06:53:08Z

llvm/test/MC/AMDGPU/mai-gfx950.s

 v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:29], v[32:47], v48, v49 blgp:2

-// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel_hi:[0,0,0] cbsz:2 blgp:3 ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x18,0x00,0x0a,0xae,0xd3,0x10,0x31,0x82,0x64]
+// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[0,1,0] cbsz:2 blgp:3 ; encoding: [0x00,0x10,0xac,0xd3,0x30,0x63,0x02,0x10,0x00,0x0a,0xae,0xd3,0x10,0x31,0x82,0x64]


This doesn't seem like enough lines changed. Do we have every permutation of values tested?

Fixed review comments

This reverts commit 67a22cc.

arsenm · 2025-05-21T16:21:44Z

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

+  if (OpselIdx != OptionalIdx.end())
+    OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
+                .getImm();
+


Suggested change

if (OpselIdx != OptionalIdx.end())

OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])

.getImm();

if (OpselIdx != OptionalIdx.end()) {

OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])

.getImm();

}

arsenm · 2025-05-21T16:21:59Z

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

+  if (OpselHiIdx != OptionalIdx.end())
+    OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
+                  .getImm();


Suggested change

if (OpselHiIdx != OptionalIdx.end())

OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])

.getImm();

if (OpselHiIdx != OptionalIdx.end()) {

OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])

.getImm();

}

arsenm · 2025-05-21T16:23:07Z

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

+  static const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
+                                          AMDGPU::OpName::src1_modifiers};


Suggested change

static const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,

AMDGPU::OpName::src1_modifiers};

const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,

AMDGPU::OpName::src1_modifiers};

llvm-ci · 2025-05-21T17:59:43Z

LLVM Buildbot has detected a new failure on builder sanitizer-aarch64-linux running on sanitizer-buildbot8 while building llvm at step 2 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/51/builds/16552

Here is the relevant piece of the build log for the reference

Step 2 (annotate) failure: 'python ../sanitizer_buildbot/sanitizers/zorg/buildbot/builders/sanitizers/buildbot_selector.py' (failure)
...
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 5734 tests, 72 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80
FAIL: ThreadSanitizer-aarch64 :: cxa_guard_acquire.cpp (4711 of 5734)
******************** TEST 'ThreadSanitizer-aarch64 :: cxa_guard_acquire.cpp' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang  --driver-mode=g++ -fsanitize=thread -Wall   -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta   -nobuiltininc -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include -idirafter /home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include -resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build -Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux  -gline-tables-only -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -std=c++11 -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -nostdinc++ -I/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/tsan/libcxx_tsan_aarch64/include/c++/v1 -O1 /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp -o /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp &&  /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp 2>&1 | FileCheck /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp # RUN: at line 1
+ /home/b/sanitizer-aarch64-linux/build/build_default/bin/clang --driver-mode=g++ -fsanitize=thread -Wall -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta -nobuiltininc -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include -idirafter /home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include -resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build -Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux -gline-tables-only -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -std=c++11 -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -nostdinc++ -I/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/tsan/libcxx_tsan_aarch64/include/c++/v1 -O1 /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp -o /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp
+ /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp
+ FileCheck /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp
/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp:71:17: error: CHECK-NEXT: is not on the line after the previous match
 // CHECK-NEXT: Enter constructor
                ^
<stdin>:3:1: note: 'next' match was here
Enter constructor
^
<stdin>:1:11: note: previous match ended here
Enter main
          ^
<stdin>:2:1: note: non-matching line after previous match is here
Enter potentially blocking region
^

Input file: <stdin>
Check file: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp

-dump-input=help explains the following input dump.

Input was:
<<<<<<
         1: Enter main 
         2: Enter potentially blocking region 
         3: Enter constructor 
next:71     !~~~~~~~~~~~~~~~~  error: match on wrong line
         4: Exit constructor 
         5: Exit potentially blocking region 
         6: Enter constructor 2 
         7: Exit main 
>>>>>>
Step 16 (test standalone compiler-rt) failure: test standalone compiler-rt (failure)
...
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 5734 tests, 72 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80
FAIL: ThreadSanitizer-aarch64 :: cxa_guard_acquire.cpp (4711 of 5734)
******************** TEST 'ThreadSanitizer-aarch64 :: cxa_guard_acquire.cpp' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang  --driver-mode=g++ -fsanitize=thread -Wall   -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta   -nobuiltininc -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include -idirafter /home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include -resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build -Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux  -gline-tables-only -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -std=c++11 -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -nostdinc++ -I/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/tsan/libcxx_tsan_aarch64/include/c++/v1 -O1 /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp -o /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp &&  /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp 2>&1 | FileCheck /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp # RUN: at line 1
+ /home/b/sanitizer-aarch64-linux/build/build_default/bin/clang --driver-mode=g++ -fsanitize=thread -Wall -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta -nobuiltininc -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include -idirafter /home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include -resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build -Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux -gline-tables-only -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -std=c++11 -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -nostdinc++ -I/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/tsan/libcxx_tsan_aarch64/include/c++/v1 -O1 /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp -o /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp
+ /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp
+ FileCheck /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp
/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp:71:17: error: CHECK-NEXT: is not on the line after the previous match
 // CHECK-NEXT: Enter constructor
                ^
<stdin>:3:1: note: 'next' match was here
Enter constructor
^
<stdin>:1:11: note: previous match ended here
Enter main
          ^
<stdin>:2:1: note: non-matching line after previous match is here
Enter potentially blocking region
^

Input file: <stdin>
Check file: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp

-dump-input=help explains the following input dump.

Input was:
<<<<<<
         1: Enter main 
         2: Enter potentially blocking region 
         3: Enter constructor 
next:71     !~~~~~~~~~~~~~~~~  error: match on wrong line
         4: Exit constructor 
         5: Exit potentially blocking region 
         6: Enter constructor 2 
         7: Exit main 
>>>>>>

to pick up llvm/llvm-project#140183

Pulls in llvm/llvm-project#139956 to fix an regression on AMD backend. Also to pick up llvm/llvm-project#140183.

Fix for src scale opsel flags encoding and ASM parsing for gfx950 scaled MFMA.

… (llvm#2319) Fix for src scale opsel flags encoding and ASM parsing for gfx950 scaled MFMA.

to pick up llvm/llvm-project#140183

… (llvm#2466) Fix for src scale opsel flags encoding and ASM parsing for gfx950 scaled MFMA. (cherry picked from commit e12cbd8) Co-authored-by: Vigneshwar Jayakumar <[email protected]>

Pulls in llvm/llvm-project#139956 to fix an regression on AMD backend. Also to pick up llvm/llvm-project#140183.

[AMDGPU] Fix opsel for scaled MFMA operations

f33f712

Fix for opsel flags encoding and ASM parsing of the scaled MFMA

llvmbot added backend:AMDGPU llvm:mc Machine (object) code labels May 16, 2025

VigneshwarJ requested review from Sisyph and arsenm May 16, 2025 03:47

shiltian reviewed May 16, 2025

View reviewed changes

arsenm reviewed May 16, 2025

View reviewed changes

Added neg and abs modifier for src2

67a22cc

Fixed review comments

minor change

e73a9cb

VigneshwarJ requested a review from arsenm May 20, 2025 14:10

VigneshwarJ added 2 commits May 20, 2025 10:45

Merge branch 'main' into OpSelFix

08a5f93

Revert "Added neg and abs modifier for src2"

87f8800

This reverts commit 67a22cc.

arsenm approved these changes May 21, 2025

View reviewed changes

review changes

8e30e0a

VigneshwarJ merged commit e12cbd8 into llvm:main May 21, 2025
8 of 10 checks passed

makslevental added a commit to makslevental/triton that referenced this pull request May 21, 2025

[BACKEND] bump to e12cbd8

7439aab

to pick up llvm/llvm-project#140183

makslevental mentioned this pull request May 21, 2025

[BACKEND] bump to e12cbd8 triton-lang/triton#6902

Merged

antiagainst pushed a commit to triton-lang/triton that referenced this pull request May 21, 2025

[BACKEND] bump LLVM to e12cbd8 (#6902)

269b447

to pick up llvm/llvm-project#140183

makslevental mentioned this pull request May 21, 2025

[BACKEND] bump to llvm/llvm-project@e12cbd8 triton-lang/triton#6880

Merged

ThomasRaoux pushed a commit to triton-lang/triton that referenced this pull request May 23, 2025

[BACKEND] bump to llvm/llvm-project@e12cbd8 (#6880)

75fe113

Pulls in llvm/llvm-project#139956 to fix an regression on AMD backend. Also to pick up llvm/llvm-project#140183.

zwu-2025 pushed a commit to zwu-2025/triton that referenced this pull request May 27, 2025

[BACKEND] bump to llvm/llvm-project@e12cbd8 (triton-lang#6880)

2d76300

Pulls in llvm/llvm-project#139956 to fix an regression on AMD backend. Also to pick up llvm/llvm-project#140183.

jrbyrnes pushed a commit to jrbyrnes/llvm-project that referenced this pull request May 28, 2025

[AMDGPU] Fix scale opsel flags for scaled MFMA operations (llvm#140183)

dd36803

Fix for src scale opsel flags encoding and ASM parsing for gfx950 scaled MFMA.

searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request May 30, 2025

[AMDGPU] Fix scale opsel flags for scaled MFMA operations (llvm#140183)…

8be5034

… (llvm#2319) Fix for src scale opsel flags encoding and ASM parsing for gfx950 scaled MFMA.

loislo pushed a commit to openxla/triton that referenced this pull request Jun 3, 2025

[BACKEND] bump LLVM to e12cbd8 (triton-lang#6902)

acda939

to pick up llvm/llvm-project#140183

ptrojahn pushed a commit to ptrojahn/triton that referenced this pull request Jul 10, 2025

[BACKEND] bump to llvm/llvm-project@e12cbd8 (triton-lang#6880)

5cf96f2

Pulls in llvm/llvm-project#139956 to fix an regression on AMD backend. Also to pick up llvm/llvm-project#140183.

		multiclass VOP3PX_Real_ScaledMFMA_F8F6F4_mc<bits<7> op> {
		defm _f8_f8 : VOP3PX_Real_ScaledMFMA<op>;

		static const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
		AMDGPU::OpName::src1_modifiers};

[AMDGPU] Fix opsel for scaled MFMA operations #140183

[AMDGPU] Fix opsel for scaled MFMA operations #140183

Uh oh!

Conversation

VigneshwarJ commented May 16, 2025

Uh oh!

llvmbot commented May 16, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented May 21, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

llvmbot commented May 16, 2025 •

edited

Loading