Skip to content

Commit 01b8140

Browse files
committed
[AMDGPU] Fix delay alu for VOPD with src2acc
V_FMAC_F32 and V_DOT2C_F32_F16 have a dummy src2 operand tied to vdst to inform passes that the instructions read the dst operand. The VOPD versions of these instructions lacked the dummy operand, which was a problem for inserting s_delay_alu. Introduce the dummy src2 operand on the VOPD versions, and fix the VOPD operand tracking logic to account for it. Reviewed By: dp Differential Revision: https://reviews.llvm.org/D136629
1 parent 3e067d4 commit 01b8140

File tree

9 files changed

+109
-54
lines changed

9 files changed

+109
-54
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8550,7 +8550,7 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
85508550
llvm_unreachable("Unhandled operand type in cvtVOPD");
85518551
};
85528552

8553-
auto InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8553+
const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
85548554

85558555
// MCInst operands are ordered as follows:
85568556
// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
@@ -8560,9 +8560,11 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
85608560
}
85618561

85628562
for (auto CompIdx : VOPD::COMPONENTS) {
8563+
const auto &CInfo = InstInfo[CompIdx];
8564+
bool CompHasSrc2Acc = CInfo.hasSrc2Acc();
85638565
auto SrcOperandsNum = InstInfo[CompIdx].getSrcOperandsNum();
85648566
for (unsigned SrcIdx = 0; SrcIdx < SrcOperandsNum; ++SrcIdx) {
8565-
addOp(InstInfo[CompIdx].getParsedSrcIndex(SrcIdx));
8567+
addOp(CInfo.getParsedSrcIndex(SrcIdx, CompHasSrc2Acc));
85668568
}
85678569
}
85688570
}

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -480,10 +480,10 @@ ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
480480
assert(TiedIdx == -1 || TiedIdx == Component::DST);
481481
HasSrc2Acc = TiedIdx != -1;
482482

483-
SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs() - HasSrc2Acc;
483+
SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
484484
assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
485485

486-
auto OperandsNum = OpDesc.getNumOperands() - HasSrc2Acc;
486+
auto OperandsNum = OpDesc.getNumOperands();
487487
for (unsigned OprIdx = Component::SRC1; OprIdx < OperandsNum; ++OprIdx) {
488488
if (OpDesc.OpInfo[OprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
489489
MandatoryLiteralIdx = OprIdx;
@@ -500,7 +500,7 @@ unsigned ComponentInfo::getParsedOperandIndex(unsigned OprIdx) const {
500500

501501
auto SrcIdx = OprIdx - Component::DST_NUM;
502502
if (SrcIdx < getSrcOperandsNum())
503-
return getParsedSrcIndex(SrcIdx);
503+
return getParsedSrcIndex(SrcIdx, hasSrc2Acc());
504504

505505
// The specified operand does not exist.
506506
return 0;
@@ -539,8 +539,6 @@ InstInfo::RegIndices InstInfo::getRegIndices(
539539
unsigned Src2Reg = 0;
540540
if (Comp.hasRegularSrcOperand(2))
541541
Src2Reg = GetRegIdx(ComponentIdx, Comp.getSrcIndex(2));
542-
else if (Comp.hasSrc2Acc())
543-
Src2Reg = DstReg;
544542

545543
return {DstReg, Src0Reg, Src1Reg, Src2Reg};
546544
}
@@ -557,8 +555,9 @@ VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
557555
const auto &OpXDesc = InstrInfo->get(OpX);
558556
const auto &OpYDesc = InstrInfo->get(OpY);
559557
VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X);
560-
VOPD::ComponentInfo OpYInfo(OpYDesc, VOPD::ComponentKind::COMPONENT_Y,
561-
OpXInfo.getSrcOperandsNum());
558+
VOPD::ComponentInfo OpYInfo(
559+
OpYDesc, VOPD::ComponentKind::COMPONENT_Y, OpXInfo.getSrcOperandsNum(),
560+
OpXInfo.getSrcOperandsNum() - OpXInfo.hasSrc2Acc());
562561
return VOPD::InstInfo(OpXInfo, OpYInfo);
563562
}
564563

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -549,18 +549,20 @@ class ComponentLayout {
549549
// OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
550550
// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
551551
// Each ComponentKind has operand indices defined below.
552-
static constexpr unsigned PARSED_DST_IDX[] = {1, 1, 4 /* + OpXSrcNum */};
553-
static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {2, 2,
554-
5 /* + OpXSrcNum */};
552+
static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
553+
4 /* + ParsedOpXSrcNum */};
554+
static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
555+
2, 2, 5 /* + ParsedOpXSrcNum */};
555556

556557
private:
557558
ComponentKind Kind;
558559
unsigned OpXSrcNum;
560+
unsigned ParsedOpXSrcNum;
559561

560562
public:
561-
ComponentLayout(ComponentKind Kind_ = ComponentKind::SINGLE,
562-
unsigned OpXSrcNum_ = 0)
563-
: Kind(Kind_), OpXSrcNum(OpXSrcNum_) {
563+
ComponentLayout(ComponentKind Kind = ComponentKind::SINGLE,
564+
unsigned OpXSrcNum = 0, unsigned ParsedOpXSrcNum = 0)
565+
: Kind(Kind), OpXSrcNum(OpXSrcNum), ParsedOpXSrcNum(ParsedOpXSrcNum) {
564566
assert(Kind <= ComponentKind::MAX);
565567
assert((Kind == ComponentKind::COMPONENT_Y) == (OpXSrcNum > 0));
566568
}
@@ -573,11 +575,15 @@ class ComponentLayout {
573575
}
574576

575577
unsigned getParsedDstIndex() const {
576-
return PARSED_DST_IDX[Kind] + OpXSrcNum;
578+
return PARSED_DST_IDX[Kind] + ParsedOpXSrcNum;
577579
}
578-
unsigned getParsedSrcIndex(unsigned SrcIdx) const {
580+
unsigned getParsedSrcIndex(unsigned SrcIdx, bool ComponentHasSrc2Acc) const {
579581
assert(SrcIdx < Component::MAX_SRC_NUM);
580-
return FIRST_PARSED_SRC_IDX[Kind] + OpXSrcNum + SrcIdx;
582+
// FMAC and DOT2C have a src2 operand on the MCInst but
583+
// not on the asm representation. src2 is tied to dst.
584+
if (ComponentHasSrc2Acc && SrcIdx == (MAX_SRC_NUM - 1))
585+
return getParsedDstIndex();
586+
return FIRST_PARSED_SRC_IDX[Kind] + ParsedOpXSrcNum + SrcIdx;
581587
}
582588
};
583589

@@ -616,8 +622,9 @@ class ComponentInfo : public ComponentLayout, public ComponentProps {
616622
public:
617623
ComponentInfo(const MCInstrDesc &OpDesc,
618624
ComponentKind Kind = ComponentKind::SINGLE,
619-
unsigned OpXSrcNum = 0)
620-
: ComponentLayout(Kind, OpXSrcNum), ComponentProps(OpDesc) {}
625+
unsigned OpXSrcNum = 0, unsigned ParsedOpXSrcNum = 0)
626+
: ComponentLayout(Kind, OpXSrcNum, ParsedOpXSrcNum),
627+
ComponentProps(OpDesc) {}
621628

622629
// Map MC operand index to parsed operand index.
623630
// Return 0 if the specified operand does not exist.

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,15 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
427427
let InsVOP3Base = getIns64<Src0VOP3DPP, Src1RC64, RegisterOperand<VGPR_32>, 3,
428428
0, HasModifiers, HasModifiers, HasOMod,
429429
Src0Mod, Src1Mod, Src2Mod>.ret;
430+
// We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
431+
let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
432+
let InsVOPDXDeferred =
433+
(ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X,
434+
VGPR_32:$vsrc1X, VGPRSrc_32:$src2X);
435+
let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y);
436+
let InsVOPDYDeferred =
437+
(ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y,
438+
VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y);
430439

431440
let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
432441
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,

llvm/lib/Target/AMDGPU/VOPDInstructions.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,21 @@ class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
7676
let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
7777
let mayRaiseFPException = ReadsModeReg;
7878

79+
// V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 need a dummy src2 tied to dst for
80+
// passes to track its uses. Its presence does not affect VOPD formation rules
81+
// because the rules for src2 and dst are the same. src2X and src2Y should not
82+
// be encoded.
83+
bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"));
84+
bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"));
85+
string ConstraintsX = !if(hasSrc2AccX, "$src2X = $vdstX", "");
86+
string ConstraintsY = !if(hasSrc2AccY, "$src2Y = $vdstY", "");
87+
let Constraints =
88+
ConstraintsX # !if(!and(hasSrc2AccX, hasSrc2AccY), ", ", "") # ConstraintsY;
89+
string DisableEncodingX = !if(hasSrc2AccX, "$src2X", "");
90+
string DisableEncodingY = !if(hasSrc2AccY, "$src2Y", "");
91+
let DisableEncoding =
92+
DisableEncodingX # !if(!and(hasSrc2AccX, hasSrc2AccY), ", ", "") # DisableEncodingY;
93+
7994
let Uses = RegListUnion<VDX.Uses, VDY.Uses>.ret;
8095
let Defs = RegListUnion<VDX.Defs, VDY.Defs>.ret;
8196
let SchedRW = !listconcat(VDX.SchedRW, VDY.SchedRW);

llvm/test/CodeGen/AMDGPU/vopd-combine.mir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ body: |
6868
; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
6969
; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
7070
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
71-
; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
71+
; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
7272
$vgpr0 = IMPLICIT_DEF
7373
$vgpr1 = IMPLICIT_DEF
7474
$vgpr2 = IMPLICIT_DEF
@@ -133,7 +133,7 @@ body: |
133133
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
134134
; PAIR-NEXT: $sgpr20 = IMPLICIT_DEF
135135
; PAIR-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec
136-
; PAIR-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32 $sgpr20, killed $vgpr1, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
136+
; PAIR-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
137137
; PAIR-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc
138138
; PAIR-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec
139139
; PAIR-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc
@@ -246,7 +246,7 @@ body: |
246246
; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
247247
; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
248248
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
249-
; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
249+
; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
250250
$vgpr0 = IMPLICIT_DEF
251251
$vgpr1 = IMPLICIT_DEF
252252
$vgpr2 = IMPLICIT_DEF
@@ -276,7 +276,7 @@ body: |
276276
; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
277277
; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
278278
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
279-
; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
279+
; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
280280
$vgpr0 = IMPLICIT_DEF
281281
$vgpr1 = IMPLICIT_DEF
282282
$vgpr2 = IMPLICIT_DEF
@@ -447,7 +447,7 @@ body: |
447447
; PAIR-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
448448
; PAIR-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
449449
; PAIR-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
450-
; PAIR-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32 $vgpr0, $vgpr3, 10, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
450+
; PAIR-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
451451
; PAIR-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc
452452
; PAIR-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
453453
; PAIR-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec

llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir

Lines changed: 0 additions & 28 deletions
This file was deleted.
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=gcn-create-vopd,amdgpu-insert-delay-alu %s -o - | FileCheck %s
3+
4+
---
5+
name: vopd_fmac_fmac
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
; CHECK-LABEL: name: vopd_fmac_fmac
10+
; CHECK: $vgpr0 = IMPLICIT_DEF
11+
; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
12+
; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
13+
; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
14+
; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
15+
; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
16+
; CHECK-NEXT: S_DELAY_ALU 1
17+
; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
18+
$vgpr0 = IMPLICIT_DEF
19+
$vgpr1 = IMPLICIT_DEF
20+
$vgpr2 = IMPLICIT_DEF
21+
$vgpr3 = IMPLICIT_DEF
22+
$vgpr4 = IMPLICIT_DEF
23+
$vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
24+
$vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
25+
$vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
26+
$vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
27+
...
28+
---
29+
name: vopd_dot2c_dot2c
30+
tracksRegLiveness: true
31+
body: |
32+
bb.0:
33+
; CHECK-LABEL: name: vopd_dot2c_dot2c
34+
; CHECK: $vgpr0 = IMPLICIT_DEF
35+
; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
36+
; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
37+
; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
38+
; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
39+
; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
40+
; CHECK-NEXT: S_DELAY_ALU 1
41+
; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
42+
$vgpr0 = IMPLICIT_DEF
43+
$vgpr1 = IMPLICIT_DEF
44+
$vgpr2 = IMPLICIT_DEF
45+
$vgpr3 = IMPLICIT_DEF
46+
$vgpr4 = IMPLICIT_DEF
47+
$vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
48+
$vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
49+
$vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
50+
$vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
51+
...

llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,4 +266,4 @@ v_dual_fmac_f32 v7, v1, v2 :: v_dual_fmamk_f32 v6, v2
266266
v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3
267267
// GFX11: error: src2 operands must use different VGPR banks
268268
// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3
269-
// GFX11-NEXT:{{^}} ^
269+
// GFX11-NEXT:{{^}} ^

0 commit comments

Comments
 (0)