Skip to content

Commit a156362

Browse files
authored
[AMDGPU] Fix machine verification failure after SIFoldOperandsImpl::tryFoldOMod (llvm#113544)
Fixes llvm#54201
1 parent 75e7ba8 commit a156362

File tree

2 files changed

+53
-0
lines changed

2 files changed

+53
-0
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1793,6 +1793,9 @@ bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) {
17931793

17941794
DefOMod->setImm(OMod);
17951795
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1796+
// Kill flags can be wrong if we replaced a def inside a loop with a def
1797+
// outside the loop.
1798+
MRI->clearKillFlags(Def->getOperand(0).getReg());
17961799
MI.eraseFromParent();
17971800

17981801
// Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands %s -verify-machineinstrs -o - | FileCheck %s -check-prefix=GFX9
3+
4+
# When V_ADD_F32 is replaced with an output modifier on V_RSQ_F32, check that
5+
# the kill flag is cleared on the use of %4 in V_MUL_F32.
6+
---
7+
name: main
8+
tracksRegLiveness: true
9+
machineFunctionInfo:
10+
mode:
11+
ieee: false
12+
fp32-input-denormals: false
13+
fp32-output-denormals: false
14+
body: |
15+
; GFX9-LABEL: name: main
16+
; GFX9: bb.0:
17+
; GFX9-NEXT: successors: %bb.1(0x80000000)
18+
; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
19+
; GFX9-NEXT: {{ $}}
20+
; GFX9-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
21+
; GFX9-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
22+
; GFX9-NEXT: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef [[DEF]], 0, 1, implicit $mode, implicit $exec
23+
; GFX9-NEXT: {{ $}}
24+
; GFX9-NEXT: bb.1:
25+
; GFX9-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
26+
; GFX9-NEXT: {{ $}}
27+
; GFX9-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
28+
; GFX9-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, killed undef [[DEF2]], 0, [[V_RSQ_F32_e64_]], 0, 0, implicit $mode, implicit $exec
29+
; GFX9-NEXT: SI_LOOP undef [[DEF1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
30+
; GFX9-NEXT: S_BRANCH %bb.2
31+
; GFX9-NEXT: {{ $}}
32+
; GFX9-NEXT: bb.2:
33+
; GFX9-NEXT: S_ENDPGM 0
34+
bb.0:
35+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
36+
37+
%0:vgpr_32 = IMPLICIT_DEF
38+
%1:sreg_64 = IMPLICIT_DEF
39+
%2:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef %0, 0, 0, implicit $mode, implicit $exec
40+
41+
bb.1:
42+
%3:vgpr_32 = IMPLICIT_DEF
43+
%4:vgpr_32 = nsz reassoc nofpexcept V_ADD_F32_e64 0, undef %2, 0, undef %2, 0, 0, implicit $mode, implicit $exec
44+
%5:vgpr_32 = V_MUL_F32_e64 0, killed undef %3, 0, killed %4, 0, 0, implicit $mode, implicit $exec
45+
SI_LOOP undef %1, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
46+
S_BRANCH %bb.2
47+
48+
bb.2:
49+
S_ENDPGM 0
50+
...

0 commit comments

Comments
 (0)