@@ -73,18 +73,139 @@ body: |
7373 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
7474 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
7575 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
76- ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1 , [[COPY3]], 0, 0, implicit $mode, implicit $exec
76+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0 , [[COPY3]], 0, 0, implicit $mode, implicit $exec
7777 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
78- ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]]
78+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
79+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
7980 ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
80- %10:vgpr_32 = COPY $vgpr2
81- %9:vgpr_32 = COPY $vgpr1
82- %8:vgpr_32 = COPY $vgpr0
83- %12:sreg_32 = IMPLICIT_DEF
84- %13:vgpr_32 = COPY %12:sreg_32
85- %11:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %8:vgpr_32, 8, %9:vgpr_32, 0, %10:vgpr_32, 0, %13:vgpr_32, 0, 0, implicit $mode, implicit $exec
86- %15:vgpr_16 = COPY %11:vgpr_32
87- %14:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %15:vgpr_16, 0, %15:vgpr_16, -1, 0, 0, implicit $mode, implicit $exec
88- $vgpr0 = COPY %14:vgpr_16
81+ %0:vgpr_32 = COPY $vgpr2
82+ %1:vgpr_32 = COPY $vgpr1
83+ %2:vgpr_32 = COPY $vgpr0
84+ %3:sreg_32 = IMPLICIT_DEF
85+ %4:vgpr_32 = COPY %3
86+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
87+ %6:vgpr_16 = COPY %5
88+ %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
89+ $vgpr0 = COPY %7
90+ S_ENDPGM 0, implicit $vgpr0
91+ ...
92+
93+ ---
94+ name : fold_16bit_subreg_folded_clamp
95+ tracksRegLiveness : true
96+ registers :
97+ body : |
98+ bb.0:
99+ liveins: $vgpr0, $vgpr1, $vgpr2
100+ ; CHECK-LABEL: name: fold_16bit_madmix_clamp
101+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
102+ ; CHECK-NEXT: {{ $}}
103+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
104+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
105+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
106+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
107+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
108+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
109+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
110+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
111+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
112+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
113+ %0:vgpr_32 = COPY $vgpr2
114+ %1:vgpr_32 = COPY $vgpr1
115+ %2:vgpr_32 = COPY $vgpr0
116+ %3:sreg_32 = IMPLICIT_DEF
117+ %4:vgpr_32 = COPY %3
118+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
119+ %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %5.lo16, 0, %5.lo16, -1, 0, 0, implicit $mode, implicit $exec
120+ $vgpr0 = COPY %6
121+ S_ENDPGM 0, implicit $vgpr0
122+ ...
123+
124+ ---
125+ name : fold_16bit_subreg_clamp
126+ tracksRegLiveness : true
127+ registers :
128+ body : |
129+ bb.0:
130+ liveins: $vgpr0, $vgpr1, $vgpr2
131+ ; CHECK-LABEL: name: fold_16bit_subreg_clamp
132+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
133+ ; CHECK-NEXT: {{ $}}
134+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
135+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
136+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
137+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
138+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
139+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
140+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
141+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
142+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
143+ %0:vgpr_32 = COPY $vgpr2
144+ %1:vgpr_32 = COPY $vgpr1
145+ %2:vgpr_32 = COPY $vgpr0
146+ %3:sreg_32 = IMPLICIT_DEF
147+ %4:vgpr_32 = COPY %3
148+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
149+ %6:vgpr_16 = COPY %5.lo16
150+ %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
151+ $vgpr0 = COPY %7
152+ S_ENDPGM 0, implicit $vgpr0
153+ ...
154+
155+ ---
156+ name : fold_16bit_phyreg_clamp
157+ tracksRegLiveness : true
158+ registers :
159+ body : |
160+ bb.0:
161+ liveins: $vgpr0, $vgpr1, $vgpr2
162+ ; CHECK-LABEL: name: fold_16bit_phyreg_clamp
163+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
164+ ; CHECK-NEXT: {{ $}}
165+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
166+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
167+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
168+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
169+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
170+ ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
171+ ; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
172+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
173+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
174+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
175+ %0:vgpr_32 = COPY $vgpr2
176+ %1:vgpr_32 = COPY $vgpr1
177+ %2:vgpr_32 = COPY $vgpr0
178+ %3:sreg_32 = IMPLICIT_DEF
179+ %4:vgpr_32 = COPY %3
180+ %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
181+ $vgpr10_lo16 = COPY %5
182+ %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
183+ $vgpr0 = COPY %6
184+ S_ENDPGM 0, implicit $vgpr0
185+ ...
186+
187+ ---
188+ name : fold_16bit_undef_clamp
189+ tracksRegLiveness : true
190+ registers :
191+ body : |
192+ bb.0:
193+ liveins: $vgpr0, $vgpr1, $vgpr2
194+ ; CHECK-LABEL: name: fold_16bit_undef_clamp
195+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
196+ ; CHECK-NEXT: {{ $}}
197+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
198+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
199+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
200+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
201+ ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[DEF]], 0, [[DEF]], -1, 0, 0, implicit $mode, implicit $exec
202+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
203+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
204+ %0:vgpr_32 = COPY $vgpr2
205+ %1:vgpr_32 = COPY $vgpr1
206+ %2:vgpr_32 = COPY $vgpr0
207+ %3:vgpr_16 = IMPLICIT_DEF
208+ %4:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %3, 0, %3, -1, 0, 0, implicit $mode, implicit $exec
209+ $vgpr0 = COPY %4
89210 S_ENDPGM 0, implicit $vgpr0
90211 ...
0 commit comments