Skip to content

Conversation

@VigneshwarJ
Copy link
Contributor

Fix for opsel flags encoding and ASM parsing of the scaled MFMA

Fix for opsel flags encoding and ASM parsing of the scaled MFMA
@llvmbot llvmbot added backend:AMDGPU llvm:mc Machine (object) code labels May 16, 2025
@llvmbot
Copy link
Member

llvmbot commented May 16, 2025

@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Vigneshwar Jayakumar (VigneshwarJ)

Changes

Fix for opsel flags encoding and ASM parsing of the scaled MFMA


Patch is 40.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140183.diff

7 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+78-7)
  • (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+8-9)
  • (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+10-10)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll (+16-16)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll (+23-23)
  • (modified) llvm/test/MC/AMDGPU/mai-gfx950.s (+6-6)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt (+6-6)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b50a2cf1becf7..450c4fffb8453 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1878,6 +1878,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
 
   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
                OptionalImmIndexMap &OptionalIdx);
+  void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
@@ -6796,17 +6797,25 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
   return ParseStatus::Success;
 }
 
-static void addOptionalImmOperand(
-  MCInst& Inst, const OperandVector& Operands,
-  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
-  AMDGPUOperand::ImmTy ImmT,
-  int64_t Default = 0) {
+static void
+addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
+                      AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
+                      AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
+                      std::optional<unsigned> InsertAt = std::nullopt) {
   auto i = OptionalIdx.find(ImmT);
   if (i != OptionalIdx.end()) {
     unsigned Idx = i->second;
-    ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
+    const AMDGPUOperand &Op =
+        static_cast<const AMDGPUOperand &>(*Operands[Idx]);
+    if (InsertAt)
+      Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
+    else
+      Op.addImmOperands(Inst, 1);
   } else {
-    Inst.addOperand(MCOperand::createImm(Default));
+    if (InsertAt.has_value())
+      Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
+    else
+      Inst.addOperand(MCOperand::createImm(Default));
   }
 }
 
@@ -8823,6 +8832,68 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
     Inst.getOperand(ModIdx).setImm(ModVal);
   }
 }
+void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
+                                    const OperandVector &Operands) {
+  OptionalImmIndexMap OptionalIdx;
+  unsigned Opc = Inst.getOpcode();
+  unsigned I = 1;
+
+  const MCInstrDesc &Desc = MII.get(Opc);
+
+  for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
+    static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
+
+  for (unsigned E = Operands.size(); I != E; ++I) {
+    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
+
+    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+    } else if (Op.isImmModifier()) {
+      OptionalIdx[Op.getImmTy()] = I;
+    } else {
+      Op.addRegOrImmOperands(Inst, 1);
+    }
+  }
+
+  // Insert CBSZ and BLGP operands for F8F6F4 variants
+  int InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCBSZ,
+                        0, InsertPos);
+  InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyBLGP,
+                        0, InsertPos);
+
+  // Add dummy src_modifiers
+  Inst.addOperand(MCOperand::createImm(0));
+  Inst.addOperand(MCOperand::createImm(0));
+
+  // Handle op_sel fields
+
+  unsigned OpSel = 0;
+  auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
+  if (OpselIdx != OptionalIdx.end())
+    OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
+                .getImm();
+
+  unsigned OpSelHi = 0;
+  auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
+  if (OpselHiIdx != OptionalIdx.end())
+    OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
+                  .getImm();
+  static const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
+                                          AMDGPU::OpName::src1_modifiers};
+
+  for (unsigned J = 0; J < 2; ++J) {
+    unsigned ModVal = 0;
+    if (OpSel & (1 << J))
+      ModVal |= SISrcMods::OP_SEL_0;
+    if (OpSelHi & (1 << J))
+      ModVal |= SISrcMods::OP_SEL_1;
+
+    const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+    Inst.getOperand(ModIdx).setImm(ModVal);
+  }
+}
 
 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
                               OptionalImmIndexMap &OptionalIdx) {
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index a3be49bf02648..6315d14dbbc92 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -829,12 +829,12 @@ class MFMA_F8F6F4_WithSizeTable_Helper<VOP3_Pseudo  ps, string F8F8Op> :
 // Currently assumes scaled instructions never have abid
 class MAIFrag<SDPatternOperator Op, code pred, bit HasAbid = true, bit Scaled = false> : PatFrag <
   !if(Scaled, (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$blgp,
-                   node:$scale_src0_opsel, node:$scale_src0,
-                   node:$scale_src1_opsel, node:$scale_src1),
+                   node:$src0_modifiers, node:$scale_src0,
+                   node:$src1_modifiers, node:$scale_src1),
               !con((ops node:$src0, node:$src1, node:$src2, node:$cbsz),
                    !if(HasAbid, (ops node:$abid), (ops)),
                     (ops node:$blgp))),
-  !if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $scale_src0_opsel, $scale_src0, $scale_src1_opsel, $scale_src1),
+  !if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $src0_modifiers, $scale_src0, $src1_modifiers, $scale_src1),
               !if(HasAbid, (Op $src0, $src1, $src2, $cbsz, $abid, $blgp),
                            (Op $src0, $src1, $src2, $cbsz, $blgp))),
   pred
@@ -895,12 +895,12 @@ class ScaledMAIInst<string OpName, MAIInst BaseInst, SDPatternOperator node> :
   let InOperandList = !con(BaseInst.InOperandList,
     (ins VSrc_b32:$scale_src0,
          VSrc_b32:$scale_src1,
-         op_sel0:$scale_src0_opsel,
-         op_sel_hi0:$scale_src1_opsel));
+         op_sel0:$src0_modifiers,
+         op_sel_hi0:$src1_modifiers));
   let AsmOperands =
     "$vdst, $src0, $src1, $src2, $scale_src0, $scale_src1"
-    "$scale_src0_opsel$scale_src1_opsel$cbsz$blgp";
-
+    "$src0_modifiers$src1_modifiers$cbsz$blgp";
+  let AsmMatchConverter = "cvtScaledMFMA";
   let FixedSize = 1;
   let Size = 16;
 }
@@ -2041,7 +2041,6 @@ multiclass VOP3PX_Real_ScaledMFMA<bits<7> op> {
   defvar PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64");
   defvar Name = PS_ACD.Mnemonic;
   defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8";
-
   let SubtargetPredicate = HasGFX950Insts,
       DecoderNamespace = "GFX940",
       AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
@@ -2057,7 +2056,7 @@ multiclass VOP3PX_Real_ScaledMFMA<bits<7> op> {
 
 multiclass VOP3PX_Real_ScaledMFMA_F8F6F4_mc<bits<7> op> {
   defm _f8_f8 : VOP3PX_Real_ScaledMFMA<op>;
-
+ 
   let isAsmParserOnly = 1 in { // Disable ambiguous disassembly.
   defm _f8_f6 : VOP3PX_Real_ScaledMFMA<op>;
   defm _f6_f8 : VOP3PX_Real_ScaledMFMA<op>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index f9fa83c3f5ae7..1c97c40e26083 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -526,14 +526,14 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_
   bits<9> scale_src0;
   bits<9> scale_src1;
 
-  bits<2> scale_src0_opsel;
-  bits<2> scale_src1_opsel;
+  bits<4> src0_modifiers;
+  bits<4> src1_modifiers;
 
   // Inst{7-0} = unused
   // Inst{10-8} = neg_hi;
   // Inst{13-11} = op_sel
-  let Inst{11} = scale_src0_opsel{0};
-  let Inst{12} = scale_src1_opsel{0};
+  let Inst{11} = src0_modifiers{2};
+  let Inst{12} = src1_modifiers{2};
   // Inst{13} = unused op_sel
   // Inst{14} = unused op_sel_hi2
 
@@ -542,8 +542,8 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_
   let Inst{49-41} = scale_src1;
   // Inst{50-58} = unused
   // Inst{60-59} = op_sel_hi;
-  let Inst{59} = scale_src0_opsel{1};
-  let Inst{60} = scale_src1_opsel{1};
+  let Inst{59} = src0_modifiers{3};
+  let Inst{60} = src1_modifiers{3};
   // Inst{63-61} = neg;
 
   // The high half of the encoding is the unscaled mfma op.
@@ -1437,17 +1437,17 @@ class getVOP3MAIScaledPat<VOPProfile P, SDPatternOperator node> {
                       // mfma
                       [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
                                             timm:$cbsz, timm:$blgp,
-                                            MFMALdScaleModifierOp:$scale_src0_opsel,
+                                            MFMALdScaleModifierOp:$src0_modifiers,
                                             i32:$scale_src0,
-                                            MFMALdScaleModifierOp:$scale_src1_opsel,
+                                            MFMALdScaleModifierOp:$src1_modifiers,
                                             i32:$scale_src1
                                             ))],
                       // smfmac
                       [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i32:$idx,
                                             timm:$cbsz, timm:$abid,
-                                            MFMALdScaleModifierOp:$scale_src0_opsel,
+                                            MFMALdScaleModifierOp:$src0_modifiers,
                                             i32:$scale_src0,
-                                            MFMALdScaleModifierOp:$scale_src1_opsel,
+                                            MFMALdScaleModifierOp:$src1_modifiers,
                                             i32:$scale_src1))]);
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
index 7cc726a3bd79c..e027dda957a6d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
@@ -46,7 +46,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_1_1__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[0,0,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -70,7 +70,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_2__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[1,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -94,7 +94,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_3__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -118,7 +118,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_3__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[0,1,0] op_sel_hi:[0,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -142,7 +142,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_0__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1,0,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -166,7 +166,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_3__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[0,1,0] op_sel_hi:[1,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -190,7 +190,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_2__cbsz1__blgp1(<8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1797,7 +1797,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, v18
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[0,0,0]
+; GCN-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[1,1,0]
 ; GCN-NEXT:    s_nop 7
 ; GCN-NEXT:    s_nop 3
 ; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1819,7 +1819,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
 ; SDAG-NEXT:    v_accvgpr_write_b32 a2, v18
 ; SDAG-NEXT:    v_accvgpr_write_b32 a3, v19
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[0,0,0]
+; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[1,1,0]
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 3
 ; SDAG-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1837,7 +1837,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
 ; GISEL-NEXT:    v_accvgpr_write_b32 a3, v19
 ; GISEL-NEXT:    v_mov_b32_e32 v16, 0x41
 ; GISEL-NEXT:    s_nop 1
-; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0]
+; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[1,1,0]
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT:    s_nop 3
 ; GISEL-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1860,7 +1860,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
 ; SDAG-NEXT:    v_accvgpr_write_b32 a3, v19
 ; SDAG-NEXT:    v_mov_b32_e32 v16, 0x4d
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0]
+; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[1,1,0]
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 3
 ; SDAG-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1879,7 +1879,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
 ; GISEL-NEXT:    v_mov_b32_e32 v16, 0x41
 ; GISEL-NEXT:    v_mov_b32_e32 v17, 0x4d
 ; GISEL-NEXT:    s_nop 1
-; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0]
+; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[1,1,0]
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT:    s_nop 3
 ; GISEL-NEXT:    v_accvgpr_read_b32 v0, a0
@@ -1921,7 +1921,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
 ; SDAG-NEXT:    v_accvgpr_write_b32 a3, s11
 ; SDAG-NEXT:    v_mov_b32_e32 v17, s13
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel_hi:[0,0,0] blgp:2
+; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 3
 ; SDAG-NEXT:    global_store_dwordx4 v16, a[0:3], s[14:15]
@@ -1946,7 +1946,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
 ; GISEL-NEXT:    v_accvgpr_write_b32 a3, s27
 ; GISEL-NEXT:    v_mov_b32_e32 v16, s29
 ; GISEL-NEXT:    s_nop 1
-; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel_hi:[0,0,0] blgp:2
+; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT:    s_nop 2
@@ -1987,7 +1987,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
 ; SDAG-NEXT:    v_accvgpr_write_b32 a2, s2
 ; SDAG-NEXT:    v_accvgpr_write_b32 a3, s3
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel_hi:[0,0,0]
+; SDAG-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 3
 ; SDAG-NEXT:    global_store_dwordx4 v16, a[0:3], s[4:5]
@@ -2013,7 +2013,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
 ; GISEL-NEXT:    v_accvgpr_write_b32 a2, s2
 ; GISEL-NEXT:    v_accvgpr_write_b32 a3, s3
 ; GISEL-NEXT:    s_nop 1
-; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0]
+; GISEL-NEXT:    v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT:    s_nop 2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
index dac54c9f85e96..5574313f22a47 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
@@ -134,7 +134,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x
 ; SDAG-NEXT:    v_accvgpr_write_b32 a14, v30
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 0
-; SDAG-NEXT:    v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0]
+; SDAG-NEXT:    v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel:[1,1,0] op_sel_hi:[0,0,0]
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 7
 ; SDAG-NEXT:    s_nop 3
@@ -179,7 +179,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x
 ; GISEL-NEXT:    v_accvgpr_write_b32 a14, v30
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GISEL-NEXT:    s_nop 0
-; GISEL-NEXT:    v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0]
+; GISEL-NEXT:    v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel:[1,1,0] op_sel_hi:[0,0,0]
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT:    s_nop 7
 ; GISEL-NEXT: ...
[truncated]

@VigneshwarJ VigneshwarJ requested review from Sisyph and arsenm May 16, 2025 03:47
v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 cbsz:3 blgp:1

// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x00,0xac,0xd3,0x18,0x33,0x02,0x08,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
// GFX950: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[12:19], v[20:23], v24, v25 op_sel:[0,1,0] op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; encoding: [0x00,0x10,0xac,0xd3,0x18,0x33,0x02,0x00,0x00,0x0b,0xad,0xd3,0x04,0x19,0x52,0x24]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A side question: SP3 says this should be:

v_mfma_scale_f32_16x16x128_f8f6f4  v[0:3], v[4:11], v[12:19], v[20:23], v24, v25 op_sel:[0,1] op_sel_hi:[0,0] cbsz:3 abid:1 blgp:1

Why do we not emit abid:1 here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

abid is set to 1 default for scaled mfma, we are not just printing it.

multiclass VOP3PX_Real_ScaledMFMA_F8F6F4_mc<bits<7> op> {
defm _f8_f8 : VOP3PX_Real_ScaledMFMA<op>;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Introduced trailing whitespace

Comment on lines +529 to +530
bits<4> src0_modifiers;
bits<4> src1_modifiers;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs comment explaining these. If you're writing custom operand handling maybe we shouldn't rename these? It is a weird case

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought so too, but this matches the current src_modifiers field and uses an existing bit mask 4 bit value (OpselHI, OpselLo, Abs and Neg) in TableGen. There is also already a logic in place for asm printing using this bit mask. Also, in previous revision, it was a 2-bit field, which was reading the lower 2 bits hence not generating the right encoding for opsel bits. Though this MFMA instruction doesn't use neg and abs, I thought it would be cleaner to match the existing implementation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it does support neg, it's just not wired up

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see it, I see this in doc " NEG[1:0] and ABS[1:0] must be zero. NEG[2] and ABS[2] may be used to control matrix C" . So I will wire up a src2_modifier with neg/abs bits mapped and add some tests for it as well

v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:29], v[32:47], v48, v49 blgp:2

// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel_hi:[0,0,0] cbsz:2 blgp:3 ; encoding: [0x00,0x00,0xac,0xd3,0x30,0x63,0x02,0x18,0x00,0x0a,0xae,0xd3,0x10,0x31,0x82,0x64]
// GFX950: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:21], v[24:29], v[32:47], v48, v49 op_sel:[0,1,0] op_sel_hi:[0,1,0] cbsz:2 blgp:3 ; encoding: [0x00,0x10,0xac,0xd3,0x30,0x63,0x02,0x10,0x00,0x0a,0xae,0xd3,0x10,0x31,0x82,0x64]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem like enough lines changed. Do we have every permutation of values tested?

@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:ir llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels May 20, 2025
@VigneshwarJ VigneshwarJ requested a review from arsenm May 20, 2025 14:10
Comment on lines 8862 to 8865
if (OpselIdx != OptionalIdx.end())
OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
.getImm();

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (OpselIdx != OptionalIdx.end())
OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
.getImm();
if (OpselIdx != OptionalIdx.end()) {
OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
.getImm();
}

Comment on lines 8868 to 8870
if (OpselHiIdx != OptionalIdx.end())
OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
.getImm();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (OpselHiIdx != OptionalIdx.end())
OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
.getImm();
if (OpselHiIdx != OptionalIdx.end()) {
OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
.getImm();
}

Comment on lines 8871 to 8872
static const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
AMDGPU::OpName::src1_modifiers};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
static const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
AMDGPU::OpName::src1_modifiers};
const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
AMDGPU::OpName::src1_modifiers};

@VigneshwarJ VigneshwarJ merged commit e12cbd8 into llvm:main May 21, 2025
8 of 10 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented May 21, 2025

LLVM Buildbot has detected a new failure on builder sanitizer-aarch64-linux running on sanitizer-buildbot8 while building llvm at step 2 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/51/builds/16552

Here is the relevant piece of the build log for the reference
Step 2 (annotate) failure: 'python ../sanitizer_buildbot/sanitizers/zorg/buildbot/builders/sanitizers/buildbot_selector.py' (failure)
...
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 5734 tests, 72 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80
FAIL: ThreadSanitizer-aarch64 :: cxa_guard_acquire.cpp (4711 of 5734)
******************** TEST 'ThreadSanitizer-aarch64 :: cxa_guard_acquire.cpp' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang  --driver-mode=g++ -fsanitize=thread -Wall   -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta   -nobuiltininc -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include -idirafter /home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include -resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build -Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux  -gline-tables-only -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -std=c++11 -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -nostdinc++ -I/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/tsan/libcxx_tsan_aarch64/include/c++/v1 -O1 /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp -o /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp &&  /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp 2>&1 | FileCheck /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp # RUN: at line 1
+ /home/b/sanitizer-aarch64-linux/build/build_default/bin/clang --driver-mode=g++ -fsanitize=thread -Wall -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta -nobuiltininc -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include -idirafter /home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include -resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build -Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux -gline-tables-only -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -std=c++11 -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -nostdinc++ -I/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/tsan/libcxx_tsan_aarch64/include/c++/v1 -O1 /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp -o /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp
+ /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp
+ FileCheck /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp
/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp:71:17: error: CHECK-NEXT: is not on the line after the previous match
 // CHECK-NEXT: Enter constructor
                ^
<stdin>:3:1: note: 'next' match was here
Enter constructor
^
<stdin>:1:11: note: previous match ended here
Enter main
          ^
<stdin>:2:1: note: non-matching line after previous match is here
Enter potentially blocking region
^

Input file: <stdin>
Check file: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp

-dump-input=help explains the following input dump.

Input was:
<<<<<<
         1: Enter main 
         2: Enter potentially blocking region 
         3: Enter constructor 
next:71     !~~~~~~~~~~~~~~~~  error: match on wrong line
         4: Exit constructor 
         5: Exit potentially blocking region 
         6: Enter constructor 2 
         7: Exit main 
>>>>>>
Step 16 (test standalone compiler-rt) failure: test standalone compiler-rt (failure)
...
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/lit.common.cfg.py:60: warning: Path reported by clang does not exist: "/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/aarch64-unknown-linux-gnu". This path was found by running ['/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang', '--target=aarch64-unknown-linux-gnu', '-Wthread-safety', '-Wthread-safety-reference', '-Wthread-safety-beta', '-nobuiltininc', '-I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include', '-idirafter', '/home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include', '-resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build', '-Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux', '-print-runtime-dir'].
llvm-lit: /home/b/sanitizer-aarch64-linux/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 5734 tests, 72 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80
FAIL: ThreadSanitizer-aarch64 :: cxa_guard_acquire.cpp (4711 of 5734)
******************** TEST 'ThreadSanitizer-aarch64 :: cxa_guard_acquire.cpp' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/home/b/sanitizer-aarch64-linux/build/build_default/bin/clang  --driver-mode=g++ -fsanitize=thread -Wall   -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta   -nobuiltininc -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include -idirafter /home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include -resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build -Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux  -gline-tables-only -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -std=c++11 -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -nostdinc++ -I/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/tsan/libcxx_tsan_aarch64/include/c++/v1 -O1 /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp -o /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp &&  /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp 2>&1 | FileCheck /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp # RUN: at line 1
+ /home/b/sanitizer-aarch64-linux/build/build_default/bin/clang --driver-mode=g++ -fsanitize=thread -Wall -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta -nobuiltininc -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/include -idirafter /home/b/sanitizer-aarch64-linux/build/build_default/lib/clang/21/include -resource-dir=/home/b/sanitizer-aarch64-linux/build/compiler_rt_build -Wl,-rpath,/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/linux -gline-tables-only -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -std=c++11 -I/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/../ -nostdinc++ -I/home/b/sanitizer-aarch64-linux/build/compiler_rt_build/lib/tsan/libcxx_tsan_aarch64/include/c++/v1 -O1 /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp -o /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp
+ /home/b/sanitizer-aarch64-linux/build/compiler_rt_build/test/tsan/AARCH64Config/Output/cxa_guard_acquire.cpp.tmp
+ FileCheck /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp
/home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp:71:17: error: CHECK-NEXT: is not on the line after the previous match
 // CHECK-NEXT: Enter constructor
                ^
<stdin>:3:1: note: 'next' match was here
Enter constructor
^
<stdin>:1:11: note: previous match ended here
Enter main
          ^
<stdin>:2:1: note: non-matching line after previous match is here
Enter potentially blocking region
^

Input file: <stdin>
Check file: /home/b/sanitizer-aarch64-linux/build/llvm-project/compiler-rt/test/tsan/cxa_guard_acquire.cpp

-dump-input=help explains the following input dump.

Input was:
<<<<<<
         1: Enter main 
         2: Enter potentially blocking region 
         3: Enter constructor 
next:71     !~~~~~~~~~~~~~~~~  error: match on wrong line
         4: Exit constructor 
         5: Exit potentially blocking region 
         6: Enter constructor 2 
         7: Exit main 
>>>>>>

makslevental added a commit to makslevental/triton that referenced this pull request May 21, 2025
antiagainst pushed a commit to triton-lang/triton that referenced this pull request May 21, 2025
ThomasRaoux pushed a commit to triton-lang/triton that referenced this pull request May 23, 2025
Pulls in llvm/llvm-project#139956 to fix an
regression on AMD backend. Also to pick up
llvm/llvm-project#140183.
zwu-2025 pushed a commit to zwu-2025/triton that referenced this pull request May 27, 2025
jrbyrnes pushed a commit to jrbyrnes/llvm-project that referenced this pull request May 28, 2025
Fix for src scale opsel flags encoding and ASM parsing for gfx950 scaled MFMA.
searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request May 30, 2025
… (llvm#2319)

Fix for src scale opsel flags encoding and ASM parsing for gfx950 scaled
MFMA.
loislo pushed a commit to openxla/triton that referenced this pull request Jun 3, 2025
searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request Jun 16, 2025
… (llvm#2466)

Fix for src scale opsel flags encoding and ASM parsing for gfx950 scaled
MFMA.

(cherry picked from commit e12cbd8)

Co-authored-by: Vigneshwar Jayakumar <[email protected]>
ptrojahn pushed a commit to ptrojahn/triton that referenced this pull request Jul 10, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category llvm:analysis Includes value tracking, cost tables and constant folding llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:ir llvm:mc Machine (object) code llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants