@@ -57,4 +57,153 @@ body: |
5757 %4:vgpr_16 = COPY %3:sgpr_lo16
5858 %5:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, %0:sreg_32, 0, killed %1:sreg_32, 0, killed %4:vgpr_16, 0, 0, implicit $exec
5959 S_ENDPGM 0, implicit %5
60+
61+ ---
62+ name : fold_16bit_madmix_clamp
63+ tracksRegLiveness : true
64+ registers :
65+ body : |
66+ bb.0:
67+ liveins: $vgpr0, $vgpr1, $vgpr2
68+ ; CHECK-LABEL: name: fold_16bit_madmix_clamp
69+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
70+ ; CHECK-NEXT: {{ $}}
71+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
72+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
73+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
74+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
75+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
76+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
77+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
78+ ; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
79+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
80+ %0:vgpr_32 = COPY $vgpr2
81+ %1:vgpr_32 = COPY $vgpr1
82+ %2:vgpr_32 = COPY $vgpr0
83+ %3:sreg_32 = IMPLICIT_DEF
84+ %4:vgpr_32 = COPY %3
85+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
86+ %6:vgpr_16 = COPY %5
87+ %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
88+ $vgpr0 = COPY %7
89+ S_ENDPGM 0, implicit $vgpr0
90+ ...
91+
92+ ---
93+ name : fold_16bit_subreg_1_clamp
94+ tracksRegLiveness : true
95+ registers :
96+ body : |
97+ bb.0:
98+ liveins: $vgpr0, $vgpr1, $vgpr2
99+ ; CHECK-LABEL: name: fold_16bit_subreg_1_clamp
100+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
101+ ; CHECK-NEXT: {{ $}}
102+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
103+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
104+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
105+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
106+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
107+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
108+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
109+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
110+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
111+ %0:vgpr_32 = COPY $vgpr2
112+ %1:vgpr_32 = COPY $vgpr1
113+ %2:vgpr_32 = COPY $vgpr0
114+ %3:sreg_32 = IMPLICIT_DEF
115+ %4:vgpr_32 = COPY %3
116+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
117+ %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %5.lo16, 0, %5.lo16, -1, 0, 0, implicit $mode, implicit $exec
118+ $vgpr0 = COPY %6
119+ S_ENDPGM 0, implicit $vgpr0
120+ ...
121+
122+ ---
123+ name : fold_16bit_subreg_2_clamp
124+ tracksRegLiveness : true
125+ registers :
126+ body : |
127+ bb.0:
128+ liveins: $vgpr0, $vgpr1, $vgpr2
129+ ; CHECK-LABEL: name: fold_16bit_subreg_2_clamp
130+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
131+ ; CHECK-NEXT: {{ $}}
132+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
133+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
134+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
135+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
136+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
137+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
138+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
139+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
140+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
141+ %0:vgpr_32 = COPY $vgpr2
142+ %1:vgpr_32 = COPY $vgpr1
143+ %2:vgpr_32 = COPY $vgpr0
144+ %3:sreg_32 = IMPLICIT_DEF
145+ %4:vgpr_32 = COPY %3
146+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
147+ %6:vgpr_16 = COPY %5.lo16
148+ %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
149+ $vgpr0 = COPY %7
150+ S_ENDPGM 0, implicit $vgpr0
151+ ...
152+
153+ ---
154+ name : fold_16bit_phyreg_clamp
155+ tracksRegLiveness : true
156+ registers :
157+ body : |
158+ bb.0:
159+ liveins: $vgpr0, $vgpr1, $vgpr2
160+ ; CHECK-LABEL: name: fold_16bit_phyreg_clamp
161+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
162+ ; CHECK-NEXT: {{ $}}
163+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
164+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
165+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
166+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
167+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
168+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
169+ ; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
170+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
171+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
172+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
173+ %0:vgpr_32 = COPY $vgpr2
174+ %1:vgpr_32 = COPY $vgpr1
175+ %2:vgpr_32 = COPY $vgpr0
176+ %3:sreg_32 = IMPLICIT_DEF
177+ %4:vgpr_32 = COPY %3
178+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
179+ $vgpr10_lo16 = COPY %5
180+ %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
181+ $vgpr0 = COPY %6
182+ S_ENDPGM 0, implicit $vgpr0
183+ ...
184+
185+ ---
186+ name : fold_16bit_undef_clamp
187+ tracksRegLiveness : true
188+ registers :
189+ body : |
190+ bb.0:
191+ liveins: $vgpr0, $vgpr1, $vgpr2
192+ ; CHECK-LABEL: name: fold_16bit_undef_clamp
193+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
194+ ; CHECK-NEXT: {{ $}}
195+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
196+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
197+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
198+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
199+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[DEF]], 0, [[DEF]], -1, 0, 0, implicit $mode, implicit $exec
200+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
201+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
202+ %0:vgpr_32 = COPY $vgpr2
203+ %1:vgpr_32 = COPY $vgpr1
204+ %2:vgpr_32 = COPY $vgpr0
205+ %3:vgpr_16 = IMPLICIT_DEF
206+ %4:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %3, 0, %3, -1, 0, 0, implicit $mode, implicit $exec
207+ $vgpr0 = COPY %4
208+ S_ENDPGM 0, implicit $vgpr0
60209 ...
0 commit comments