Skip to content

Commit 64c89cf

Browse files
committed
Reduced duplicate code length & added a new MIR test in existing testFile.
1 parent 06a6543 commit 64c89cf

File tree

2 files changed

+89
-60
lines changed

2 files changed

+89
-60
lines changed

llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp

Lines changed: 46 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,54 +1468,44 @@ static bool checkForRightSrcRootAccess(MachineInstr *Def0MI,
14681468
Def1DstUnused->getImm() != AMDGPU::SDWA::DstUnused::UNUSED_PAD)
14691469
return false;
14701470

1471-
MachineOperand *Def1Src0 =
1472-
TII->getNamedOperand(*Def1MI, AMDGPU::OpName::src0);
1473-
MachineOperand *Def1Src1 =
1474-
TII->getNamedOperand(*Def1MI, AMDGPU::OpName::src1);
1475-
MachineOperand *Def0Src0 =
1476-
TII->getNamedOperand(*Def0MI, AMDGPU::OpName::src0);
1477-
MachineOperand *Def0Src1 =
1478-
TII->getNamedOperand(*Def0MI, AMDGPU::OpName::src1);
1479-
1480-
auto checkForDef0MIAccess = [&]() -> bool {
1481-
if (Def0Src0 && Def0Src0->isReg() && (Def0Src0->getReg() == SrcRootReg)) {
1482-
MachineOperand *Def0Src0Sel =
1483-
TII->getNamedOperand(*Def0MI, AMDGPU::OpName::src0_sel);
1484-
if (!Def0Src0Sel ||
1485-
Def0Src0Sel->getImm() == AMDGPU::SDWA::SdwaSel::WORD_0)
1486-
return true;
1487-
}
1488-
1489-
if (Def0Src1 && Def0Src1->isReg() && (Def0Src1->getReg() == SrcRootReg)) {
1490-
MachineOperand *Def0Src1Sel =
1491-
TII->getNamedOperand(*Def0MI, AMDGPU::OpName::src1_sel);
1492-
if (!Def0Src1Sel ||
1493-
Def0Src1Sel->getImm() == AMDGPU::SDWA::SdwaSel::WORD_0)
1494-
return true;
1471+
const auto checkSrcSel = [&](MachineInstr *DefMI, AMDGPU::OpName SrcName,
1472+
AMDGPU::OpName SrcSelName,
1473+
AMDGPU::SDWA::SdwaSel SdwaSel) -> bool {
1474+
MachineOperand *DefSrc = TII->getNamedOperand(*DefMI, SrcName);
1475+
if (DefSrc && DefSrc->isReg() && (DefSrc->getReg() == SrcRootReg)) {
1476+
MachineOperand *DefSrcSel = TII->getNamedOperand(*DefMI, SrcSelName);
1477+
if (SdwaSel == AMDGPU::SDWA::SdwaSel::WORD_0) {
1478+
if (!DefSrcSel || DefSrcSel->getImm() == SdwaSel)
1479+
return true;
1480+
} else {
1481+
assert(SdwaSel == AMDGPU::SDWA::SdwaSel::WORD_1 &&
1482+
"Not valid SDWA SrcSel operand");
1483+
if (DefSrcSel && DefSrcSel->getImm() == SdwaSel)
1484+
return true;
1485+
}
14951486
}
1496-
14971487
return false;
14981488
};
14991489

1500-
if (Def1Src0 && Def1Src0->isReg() && (Def1Src0->getReg() == SrcRootReg)) {
1501-
MachineOperand *Def1Src0Sel =
1502-
TII->getNamedOperand(*Def1MI, AMDGPU::OpName::src0_sel);
1503-
if (!Def1Src0Sel || Def1Src0Sel->getImm() != AMDGPU::SDWA::SdwaSel::WORD_1)
1504-
return false;
1490+
const auto checkForDef0MIAccess = [&]() -> bool {
1491+
if (checkSrcSel(Def0MI, AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel,
1492+
AMDGPU::SDWA::SdwaSel::WORD_0))
1493+
return true;
1494+
if (checkSrcSel(Def0MI, AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel,
1495+
AMDGPU::SDWA::SdwaSel::WORD_0))
1496+
return true;
1497+
return false;
1498+
};
15051499

1500+
if (checkSrcSel(Def1MI, AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel,
1501+
AMDGPU::SDWA::SdwaSel::WORD_1))
15061502
if (checkForDef0MIAccess())
15071503
return true;
1508-
}
1509-
1510-
if (Def1Src1 && Def1Src1->isReg() && (Def1Src1->getReg() == SrcRootReg)) {
1511-
MachineOperand *Def1Src1Sel =
1512-
TII->getNamedOperand(*Def1MI, AMDGPU::OpName::src1_sel);
1513-
if (!Def1Src1Sel || Def1Src1Sel->getImm() != AMDGPU::SDWA::SdwaSel::WORD_1)
1514-
return false;
15151504

1505+
if (checkSrcSel(Def1MI, AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel,
1506+
AMDGPU::SDWA::SdwaSel::WORD_1))
15161507
if (checkForDef0MIAccess())
15171508
return true;
1518-
}
15191509

15201510
return false;
15211511
}
@@ -1568,7 +1558,7 @@ void SIPeepholeSDWA::convertMIToSDWAWithOpsel(MachineInstr *MI,
15681558
TII->getNamedOperand(*MI, AMDGPU::OpName::dst_unused);
15691559
assert(DstUnused &&
15701560
AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::dst_unused));
1571-
assert(!(DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) &&
1561+
assert(DstUnused->getImm() != AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE &&
15721562
"Dst_unused should not be UNUSED_PRESERVE already");
15731563
DstUnused->setImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE);
15741564

@@ -1581,31 +1571,27 @@ void SIPeepholeSDWA::convertMIToSDWAWithOpsel(MachineInstr *MI,
15811571
MI->addOperand(NewSrcImplitMO);
15821572
MI->tieOperands(PreserveDstIdx, MI->getNumOperands() - 1);
15831573

1584-
MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
1585-
assert(Src0 && AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0));
1586-
if (Src0->isReg() && (Src0->getReg() == SrcMO.getReg())) {
1587-
MachineOperand *Src0Sel =
1588-
TII->getNamedOperand(*MI, AMDGPU::OpName::src0_sel);
1589-
assert(Src0Sel &&
1590-
AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0_sel));
1591-
Src0Sel->setImm(OpSel);
1574+
auto modifySrcSelIntoOpSel = [&](AMDGPU::OpName SrcName,
1575+
AMDGPU::OpName SrcSelName) -> bool {
1576+
MachineOperand *Src = TII->getNamedOperand(*MI, SrcName);
1577+
assert(Src && AMDGPU::hasNamedOperand(SDWAOpcode, SrcName));
1578+
if (Src->isReg() && (Src->getReg() == SrcMO.getReg())) {
1579+
MachineOperand *SrcSel = TII->getNamedOperand(*MI, SrcSelName);
1580+
assert(SrcSel && AMDGPU::hasNamedOperand(SDWAOpcode, SrcSelName));
1581+
SrcSel->setImm(OpSel);
15921582

1593-
LLVM_DEBUG(dbgs() << "\nInto:" << *MI << '\n');
1594-
return;
1595-
}
1583+
LLVM_DEBUG(dbgs() << "\nInto:" << *MI << '\n');
1584+
return true;
1585+
}
15961586

1597-
MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
1598-
assert(Src1 && AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1));
1599-
if (Src1->isReg() && (Src1->getReg() == SrcMO.getReg())) {
1600-
MachineOperand *Src1Sel =
1601-
TII->getNamedOperand(*MI, AMDGPU::OpName::src1_sel);
1602-
assert(Src1Sel &&
1603-
AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1_sel));
1604-
Src1Sel->setImm(OpSel);
1587+
return false;
1588+
};
16051589

1606-
LLVM_DEBUG(dbgs() << "\nInto:" << *MI << '\n');
1590+
if (modifySrcSelIntoOpSel(AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel))
1591+
return;
1592+
1593+
if (modifySrcSelIntoOpSel(AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel))
16071594
return;
1608-
}
16091595
}
16101596

16111597
// BackTracks the given Parent MI to look for any of its use operand that has

llvm/test/CodeGen/AMDGPU/packed-vec-fp16.mir

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@
3737
ret <4 x half> %res
3838
}
3939

40+
define void @unbalanced_operations_packed(<4 x half> %a) #0 {
41+
ret void
42+
}
43+
4044
declare <4 x half> @llvm.cos.v4f16(<4 x half>) #1
4145

4246
declare <4 x half> @llvm.exp.v4f16(<4 x half>) #1
@@ -335,3 +339,42 @@ body: |
335339
$vgpr1 = COPY %31
336340
SI_RETURN implicit $vgpr0, implicit $vgpr1
337341
...
342+
343+
---
344+
name: unbalanced_operations_packed
345+
tracksRegLiveness: true
346+
body: |
347+
bb.0 (%ir-block.0):
348+
liveins: $vgpr0, $vgpr1
349+
; GFX9-LABEL: name: unbalanced_operations_packed
350+
; GFX9: liveins: $vgpr0, $vgpr1
351+
; GFX9-NEXT: {{ $}}
352+
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
353+
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
354+
; GFX9-NEXT: [[V_LOG_F16_sdwa:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F16_sdwa 0, [[COPY]], 0, 0, 4, 2, 4, implicit $mode, implicit $exec, implicit [[COPY]](tied-def 0)
355+
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 14732
356+
; GFX9-NEXT: [[V_MUL_F16_sdwa:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_sdwa 0, killed [[V_LOG_F16_sdwa]], 0, [[S_MOV_B32_]], 0, 0, 4, 2, 4, 6, implicit $mode, implicit $exec, implicit killed [[V_LOG_F16_sdwa]](tied-def 0)
357+
; GFX9-NEXT: [[V_LOG_F16_sdwa1:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F16_sdwa 0, [[V_MUL_F16_sdwa]], 0, 0, 5, 2, 5, implicit $mode, implicit $exec, implicit [[V_MUL_F16_sdwa]](tied-def 0)
358+
; GFX9-NEXT: [[V_LOG_F16_sdwa2:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F16_sdwa 0, [[COPY1]], 0, 0, 4, 2, 4, implicit $mode, implicit $exec, implicit [[COPY1]](tied-def 0)
359+
; GFX9-NEXT: [[V_LOG_F16_sdwa3:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F16_sdwa 0, [[V_LOG_F16_sdwa2]], 0, 0, 5, 2, 5, implicit $mode, implicit $exec, implicit [[V_LOG_F16_sdwa2]](tied-def 0)
360+
; GFX9-NEXT: [[V_MUL_F16_sdwa1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_sdwa 0, killed [[V_LOG_F16_sdwa3]], 0, [[S_MOV_B32_]], 0, 0, 5, 2, 5, 6, implicit $mode, implicit $exec, implicit killed [[V_LOG_F16_sdwa3]](tied-def 0)
361+
; GFX9-NEXT: $vgpr0 = COPY [[V_MUL_F16_sdwa1]]
362+
; GFX9-NEXT: $vgpr1 = COPY [[V_LOG_F16_sdwa1]]
363+
; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
364+
%9:vgpr_32 = COPY $vgpr1
365+
%8:vgpr_32 = COPY $vgpr0
366+
%11:vgpr_32 = nofpexcept V_LOG_F16_e64 0, %9, 0, 0, implicit $mode, implicit $exec
367+
%12:sreg_32 = S_MOV_B32 14732
368+
%13:vgpr_32 = nofpexcept V_MUL_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $mode, implicit $exec
369+
%15:vgpr_32 = V_LSHRREV_B32_e64 16, %9, implicit $exec
370+
%17:vgpr_32 = nofpexcept V_LOG_F16_e64 0, %15, 0, 0, implicit $mode, implicit $exec
371+
%20:vgpr_32 = nofpexcept V_LOG_F16_e64 0, %8, 0, 0, implicit $mode, implicit $exec
372+
%22:vgpr_32 = V_LSHRREV_B32_e64 16, %8, implicit $exec
373+
%24:vgpr_32 = nofpexcept V_LOG_F16_e64 0, %22, 0, 0, implicit $mode, implicit $exec
374+
%25:vgpr_32 = nofpexcept V_MUL_F16_e64 0, killed %24, 0, %12, 0, 0, implicit $mode, implicit $exec
375+
%26:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, killed %20, 0, killed %25, 0, 0, implicit $mode, implicit $exec
376+
%27:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, killed %13, 0, killed %17, 0, 0, implicit $mode, implicit $exec
377+
$vgpr0 = COPY %26
378+
$vgpr1 = COPY %27
379+
SI_RETURN implicit $vgpr0, implicit $vgpr1
380+
...

0 commit comments

Comments
 (0)