Skip to content

Commit 5d4388f

Browse files
committed
[EXTERNAL] Cherry pick fix for const folding of immediate args (#1939)
* [EXTERNAL] Cherry-pick llvm/llvm-project#148205
1 parent 2e743e5 commit 5d4388f

File tree

3 files changed

+38
-3
lines changed

3 files changed

+38
-3
lines changed

external/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,6 +1657,7 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
16571657
for (MachineInstr *Copy : CopiesToReplace)
16581658
Copy->addImplicitDefUseOperands(*MF);
16591659

1660+
SetVector<MachineInstr *> ConstantFoldCandidates;
16601661
for (FoldCandidate &Fold : FoldList) {
16611662
assert(!Fold.isReg() || Fold.OpToFold);
16621663
if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) {
@@ -1679,16 +1680,21 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
16791680
<< static_cast<int>(Fold.UseOpNo) << " of "
16801681
<< *Fold.UseMI);
16811682

1682-
if (Fold.isImm() && tryConstantFoldOp(Fold.UseMI)) {
1683-
LLVM_DEBUG(dbgs() << "Constant folded " << *Fold.UseMI);
1684-
Changed = true;
1683+
if (Fold.isImm()) {
1684+
ConstantFoldCandidates.insert(Fold.UseMI);
16851685
}
16861686

16871687
} else if (Fold.Commuted) {
16881688
// Restoring instruction's original operand order if fold has failed.
16891689
TII->commuteInstruction(*Fold.UseMI, false);
16901690
}
16911691
}
1692+
for (MachineInstr *MI : ConstantFoldCandidates) {
1693+
if (tryConstantFoldOp(MI)) {
1694+
LLVM_DEBUG(dbgs() << "Constant folded " << *MI);
1695+
Changed = true;
1696+
}
1697+
}
16921698
return true;
16931699
}
16941700

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx1031 -run-pass=si-fold-operands -o - %s | FileCheck %s
3+
---
4+
name: snork
5+
body: |
6+
bb.0:
7+
; CHECK-LABEL: name: snork
8+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
9+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
10+
; CHECK-NEXT: SI_RETURN
11+
%0:sreg_32 = S_MOV_B32 0
12+
%1:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3
13+
%2:sreg_32 = S_OR_B32 %1.sub0, %1.sub3, implicit-def dead $scc
14+
SI_RETURN
15+
...
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// This test is just ensure backend LLVM compiler is not crashing while compiling this kernel. It was crashing earlier and fixed by https://github.com/llvm/llvm-project/pull/148205.
2+
// RUN: sed -e 's/##TOKEN_ARCH##/%arch/g' %s | rocmlir-driver --kernel-pipeline=migraphx,highlevel,gpu,binary --arch %arch | FileCheck %s
3+
// CHECK: gpu.binary
4+
module {
5+
func.func @test(%arg0: !migraphx.shaped<1x40x9419x128xf16, 48225280x1205632x128x1>, %arg1: !migraphx.shaped<1x9419x40x128xf16, 48225280x5120x128x1>) -> !migraphx.shaped<1x40x9419x9419xf16, 3548702440x88717561x9419x1> attributes {kernel = "mixr", arch = "##TOKEN_ARCH##"} {
6+
%0 = migraphx.literal(dense<8.831780e-02> : tensor<1xf16>) : <1xf16, 1>
7+
%1 = migraphx.transpose %arg1 {permutation = [0, 2, 3, 1]} : <1x9419x40x128xf16, 48225280x5120x128x1> -> <1x40x128x9419xf16, 48225280x128x1x5120>
8+
%2 = migraphx.dot %arg0, %1 {perf_config = "v3:128,256,4,64,16,8,1,1,2,1,1"} : <1x40x9419x128xf16, 48225280x1205632x128x1>, <1x40x128x9419xf16, 48225280x128x1x5120> -> <1x40x9419x9419xf16, 3548702440x88717561x9419x1>
9+
%3 = migraphx.multibroadcast %0 {out_dyn_dims = [], out_lens = [1, 40, 9419, 9419]} : <1xf16, 1> -> <1x40x9419x9419xf16, 0x0x0x0>
10+
%4 = migraphx.mul %2, %3 : <1x40x9419x9419xf16, 3548702440x88717561x9419x1>, <1x40x9419x9419xf16, 0x0x0x0> -> <1x40x9419x9419xf16, 3548702440x88717561x9419x1>
11+
return %4 : !migraphx.shaped<1x40x9419x9419xf16, 3548702440x88717561x9419x1>
12+
}
13+
}
14+

0 commit comments

Comments
 (0)