Skip to content

Commit 2982268

Browse files
committed
address comment
1 parent c8affb2 commit 2982268

File tree

2 files changed

+10
-13
lines changed

2 files changed

+10
-13
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1820,7 +1820,7 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
18201820
return false;
18211821

18221822
// Look through COPY. COPY only observed with True16.
1823-
MachineOperand *DefSrc = TRI->lookThruCopyLike(ClampSrc->getReg(), MRI);
1823+
MachineOperand *DefSrc = lookUpCopyChain(*TII, *MRI, ClampSrc->getReg());
18241824
MachineInstr *Def = MRI->getVRegDef(DefSrc && DefSrc->isReg() ? DefSrc->getReg() : ClampSrc->getReg());
18251825

18261826
// The type of clamp must be compatible.

llvm/test/CodeGen/AMDGPU/true16-fold.mir

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@ body: |
1616
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1717
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1818
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
19-
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
19+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
2020
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
21-
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
22-
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
21+
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
2322
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
2423
%0:vgpr_32 = COPY $vgpr2
2524
%1:vgpr_32 = COPY $vgpr1
@@ -34,13 +33,13 @@ body: |
3433
...
3534

3635
---
37-
name: fold_16bit_subreg_folded_clamp
36+
name: fold_16bit_subreg_1_clamp
3837
tracksRegLiveness: true
3938
registers:
4039
body: |
4140
bb.0:
4241
liveins: $vgpr0, $vgpr1, $vgpr2
43-
; CHECK-LABEL: name: fold_16bit_madmix_clamp
42+
; CHECK-LABEL: name: fold_16bit_subreg_1_clamp
4443
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
4544
; CHECK-NEXT: {{ $}}
4645
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
@@ -49,8 +48,7 @@ body: |
4948
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
5049
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
5150
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
52-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
53-
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
51+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
5452
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
5553
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
5654
%0:vgpr_32 = COPY $vgpr2
@@ -65,13 +63,13 @@ body: |
6563
...
6664

6765
---
68-
name: fold_16bit_subreg_clamp
66+
name: fold_16bit_subreg_2_clamp
6967
tracksRegLiveness: true
7068
registers:
7169
body: |
7270
bb.0:
7371
liveins: $vgpr0, $vgpr1, $vgpr2
74-
; CHECK-LABEL: name: fold_16bit_subreg_clamp
72+
; CHECK-LABEL: name: fold_16bit_subreg_2_clamp
7573
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
7674
; CHECK-NEXT: {{ $}}
7775
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
@@ -110,10 +108,9 @@ body: |
110108
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
111109
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
112110
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
113-
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
111+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
114112
; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
115-
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
116-
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
113+
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
117114
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
118115
%0:vgpr_32 = COPY $vgpr2
119116
%1:vgpr_32 = COPY $vgpr1

0 commit comments

Comments
 (0)