Skip to content

Commit 50498a6

Browse files
committed
address comment
1 parent 506a798 commit 50498a6

File tree

2 files changed

+12
-14
lines changed

2 files changed

+12
-14
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1900,8 +1900,9 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
19001900
return false;
19011901

19021902
// Look through COPY. COPY only observed with True16.
1903-
MachineOperand *DefSrc = TRI->lookThruCopyLike(ClampSrc->getReg(), MRI);
1904-
MachineInstr *Def = MRI->getVRegDef(DefSrc && DefSrc->isReg() ? DefSrc->getReg() : ClampSrc->getReg());
1903+
MachineOperand *DefSrc = lookUpCopyChain(*TII, *MRI, ClampSrc->getReg());
1904+
MachineInstr *Def = MRI->getVRegDef(
1905+
DefSrc && DefSrc->isReg() ? DefSrc->getReg() : ClampSrc->getReg());
19051906

19061907
// The type of clamp must be compatible.
19071908
if (TII->getClampMask(*Def) != TII->getClampMask(MI))

llvm/test/CodeGen/AMDGPU/true16-fold.mir

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,9 @@ body: |
7373
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
7474
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
7575
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
76-
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
76+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
7777
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
78-
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
79-
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
78+
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
8079
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
8180
%0:vgpr_32 = COPY $vgpr2
8281
%1:vgpr_32 = COPY $vgpr1
@@ -91,13 +90,13 @@ body: |
9190
...
9291

9392
---
94-
name: fold_16bit_subreg_folded_clamp
93+
name: fold_16bit_subreg_1_clamp
9594
tracksRegLiveness: true
9695
registers:
9796
body: |
9897
bb.0:
9998
liveins: $vgpr0, $vgpr1, $vgpr2
100-
; CHECK-LABEL: name: fold_16bit_madmix_clamp
99+
; CHECK-LABEL: name: fold_16bit_subreg_1_clamp
101100
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
102101
; CHECK-NEXT: {{ $}}
103102
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
@@ -106,8 +105,7 @@ body: |
106105
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
107106
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
108107
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
109-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
110-
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
108+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
111109
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
112110
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
113111
%0:vgpr_32 = COPY $vgpr2
@@ -122,13 +120,13 @@ body: |
122120
...
123121

124122
---
125-
name: fold_16bit_subreg_clamp
123+
name: fold_16bit_subreg_2_clamp
126124
tracksRegLiveness: true
127125
registers:
128126
body: |
129127
bb.0:
130128
liveins: $vgpr0, $vgpr1, $vgpr2
131-
; CHECK-LABEL: name: fold_16bit_subreg_clamp
129+
; CHECK-LABEL: name: fold_16bit_subreg_2_clamp
132130
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
133131
; CHECK-NEXT: {{ $}}
134132
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
@@ -167,10 +165,9 @@ body: |
167165
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
168166
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
169167
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
170-
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
168+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
171169
; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
172-
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
173-
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
170+
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
174171
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
175172
%0:vgpr_32 = COPY $vgpr2
176173
%1:vgpr_32 = COPY $vgpr1

0 commit comments

Comments
 (0)