11# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2- # RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-fold-operands -mattr="+wavefrontsize32",+ real-true16 -verify-machineinstrs -o - %s | FileCheck %s
2+ # RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-fold-operands -mattr=+ real-true16 -o - %s | FileCheck %s
33
44---
55name : fold_16bit_madmix_clamp
66tracksRegLiveness : true
77registers :
88body : |
9- bb.0.entry :
9+ bb.0:
1010 liveins: $vgpr0, $vgpr1, $vgpr2
1111 ; CHECK-LABEL: name: fold_16bit_madmix_clamp
1212 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -16,18 +16,139 @@ body: |
1616 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1717 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1818 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
19- ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1 , [[COPY3]], 0, 0, implicit $mode, implicit $exec
19+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0 , [[COPY3]], 0, 0, implicit $mode, implicit $exec
2020 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
21- ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]]
21+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
22+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
2223 ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
23- %10:vgpr_32 = COPY $vgpr2
24- %9:vgpr_32 = COPY $vgpr1
25- %8:vgpr_32 = COPY $vgpr0
26- %12:sreg_32 = IMPLICIT_DEF
27- %13:vgpr_32 = COPY %12:sreg_32
28- %11:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %8:vgpr_32, 8, %9:vgpr_32, 0, %10:vgpr_32, 0, %13:vgpr_32, 0, 0, implicit $mode, implicit $exec
29- %15:vgpr_16 = COPY %11:vgpr_32
30- %14:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %15:vgpr_16, 0, %15:vgpr_16, -1, 0, 0, implicit $mode, implicit $exec
31- $vgpr0 = COPY %14:vgpr_16
24+ %0:vgpr_32 = COPY $vgpr2
25+ %1:vgpr_32 = COPY $vgpr1
26+ %2:vgpr_32 = COPY $vgpr0
27+ %3:sreg_32 = IMPLICIT_DEF
28+ %4:vgpr_32 = COPY %3
29+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
30+ %6:vgpr_16 = COPY %5
31+ %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
32+ $vgpr0 = COPY %7
33+ S_ENDPGM 0, implicit $vgpr0
34+ ...
35+
36+ ---
37+ name : fold_16bit_subreg_folded_clamp
38+ tracksRegLiveness : true
39+ registers :
40+ body : |
41+ bb.0:
42+ liveins: $vgpr0, $vgpr1, $vgpr2
43+ ; CHECK-LABEL: name: fold_16bit_madmix_clamp
44+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
45+ ; CHECK-NEXT: {{ $}}
46+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
47+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
48+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
49+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
50+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
51+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
52+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
53+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
54+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
55+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
56+ %0:vgpr_32 = COPY $vgpr2
57+ %1:vgpr_32 = COPY $vgpr1
58+ %2:vgpr_32 = COPY $vgpr0
59+ %3:sreg_32 = IMPLICIT_DEF
60+ %4:vgpr_32 = COPY %3
61+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
62+ %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %5.lo16, 0, %5.lo16, -1, 0, 0, implicit $mode, implicit $exec
63+ $vgpr0 = COPY %6
64+ S_ENDPGM 0, implicit $vgpr0
65+ ...
66+
67+ ---
68+ name : fold_16bit_subreg_clamp
69+ tracksRegLiveness : true
70+ registers :
71+ body : |
72+ bb.0:
73+ liveins: $vgpr0, $vgpr1, $vgpr2
74+ ; CHECK-LABEL: name: fold_16bit_subreg_clamp
75+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
76+ ; CHECK-NEXT: {{ $}}
77+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
78+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
79+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
80+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
81+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
82+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
83+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
84+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
85+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
86+ %0:vgpr_32 = COPY $vgpr2
87+ %1:vgpr_32 = COPY $vgpr1
88+ %2:vgpr_32 = COPY $vgpr0
89+ %3:sreg_32 = IMPLICIT_DEF
90+ %4:vgpr_32 = COPY %3
91+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
92+ %6:vgpr_16 = COPY %5.lo16
93+ %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
94+ $vgpr0 = COPY %7
95+ S_ENDPGM 0, implicit $vgpr0
96+ ...
97+
98+ ---
99+ name : fold_16bit_phyreg_clamp
100+ tracksRegLiveness : true
101+ registers :
102+ body : |
103+ bb.0:
104+ liveins: $vgpr0, $vgpr1, $vgpr2
105+ ; CHECK-LABEL: name: fold_16bit_phyreg_clamp
106+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
107+ ; CHECK-NEXT: {{ $}}
108+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
109+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
110+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
111+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
112+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
113+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
114+ ; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
115+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
116+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
117+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
118+ %0:vgpr_32 = COPY $vgpr2
119+ %1:vgpr_32 = COPY $vgpr1
120+ %2:vgpr_32 = COPY $vgpr0
121+ %3:sreg_32 = IMPLICIT_DEF
122+ %4:vgpr_32 = COPY %3
123+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
124+ $vgpr10_lo16 = COPY %5
125+ %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
126+ $vgpr0 = COPY %6
127+ S_ENDPGM 0, implicit $vgpr0
128+ ...
129+
130+ ---
131+ name : fold_16bit_undef_clamp
132+ tracksRegLiveness : true
133+ registers :
134+ body : |
135+ bb.0:
136+ liveins: $vgpr0, $vgpr1, $vgpr2
137+ ; CHECK-LABEL: name: fold_16bit_undef_clamp
138+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
139+ ; CHECK-NEXT: {{ $}}
140+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
141+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
142+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
143+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
144+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[DEF]], 0, [[DEF]], -1, 0, 0, implicit $mode, implicit $exec
145+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
146+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
147+ %0:vgpr_32 = COPY $vgpr2
148+ %1:vgpr_32 = COPY $vgpr1
149+ %2:vgpr_32 = COPY $vgpr0
150+ %3:vgpr_16 = IMPLICIT_DEF
151+ %4:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %3, 0, %3, -1, 0, 0, implicit $mode, implicit $exec
152+ $vgpr0 = COPY %4
32153 S_ENDPGM 0, implicit $vgpr0
33154 ...
0 commit comments