@@ -196,9 +196,83 @@ define i32 @v_add_i32(i32 %a, i32 %b) {
196196 ret i32 %c
197197}
198198
199- ; TODO: Add test for s_add_v2i16. The current test does not work
200- ; due to a bug associated with
201- ; AMDGPURegBankLegalizeCombiner::tryEliminateReadAnyLane(..)
199+ define <2 x i16 > @s_add_v2i16 (<2 x i16 > inreg %a , <2 x i16 > inreg %b ) {
200+ ; GFX7-LABEL: s_add_v2i16:
201+ ; GFX7: ; %bb.0:
202+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203+ ; GFX7-NEXT: s_add_i32 s16, s16, s18
204+ ; GFX7-NEXT: s_add_i32 s17, s17, s19
205+ ; GFX7-NEXT: v_mov_b32_e32 v0, s16
206+ ; GFX7-NEXT: v_mov_b32_e32 v1, s17
207+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
208+ ;
209+ ; GFX9-LABEL: s_add_v2i16:
210+ ; GFX9: ; %bb.0:
211+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212+ ; GFX9-NEXT: s_lshr_b32 s4, s16, 16
213+ ; GFX9-NEXT: s_lshr_b32 s5, s17, 16
214+ ; GFX9-NEXT: s_add_i32 s16, s16, s17
215+ ; GFX9-NEXT: s_add_i32 s4, s4, s5
216+ ; GFX9-NEXT: s_pack_ll_b32_b16 s4, s16, s4
217+ ; GFX9-NEXT: v_mov_b32_e32 v0, s4
218+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
219+ ;
220+ ; GFX8-LABEL: s_add_v2i16:
221+ ; GFX8: ; %bb.0:
222+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223+ ; GFX8-NEXT: s_lshr_b32 s4, s16, 16
224+ ; GFX8-NEXT: s_lshr_b32 s5, s17, 16
225+ ; GFX8-NEXT: s_add_i32 s4, s4, s5
226+ ; GFX8-NEXT: s_add_i32 s16, s16, s17
227+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
228+ ; GFX8-NEXT: s_and_b32 s5, 0xffff, s16
229+ ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
230+ ; GFX8-NEXT: s_or_b32 s4, s5, s4
231+ ; GFX8-NEXT: v_mov_b32_e32 v0, s4
232+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
233+ ;
234+ ; GFX10-LABEL: s_add_v2i16:
235+ ; GFX10: ; %bb.0:
236+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237+ ; GFX10-NEXT: s_lshr_b32 s4, s16, 16
238+ ; GFX10-NEXT: s_lshr_b32 s5, s17, 16
239+ ; GFX10-NEXT: s_add_i32 s16, s16, s17
240+ ; GFX10-NEXT: s_add_i32 s4, s4, s5
241+ ; GFX10-NEXT: s_pack_ll_b32_b16 s4, s16, s4
242+ ; GFX10-NEXT: v_mov_b32_e32 v0, s4
243+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
244+ ;
245+ ; GFX11-LABEL: s_add_v2i16:
246+ ; GFX11: ; %bb.0:
247+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248+ ; GFX11-NEXT: s_lshr_b32 s2, s0, 16
249+ ; GFX11-NEXT: s_lshr_b32 s3, s1, 16
250+ ; GFX11-NEXT: s_add_i32 s0, s0, s1
251+ ; GFX11-NEXT: s_add_i32 s2, s2, s3
252+ ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2
253+ ; GFX11-NEXT: v_mov_b32_e32 v0, s0
254+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
255+ ;
256+ ; GFX12-LABEL: s_add_v2i16:
257+ ; GFX12: ; %bb.0:
258+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
259+ ; GFX12-NEXT: s_wait_expcnt 0x0
260+ ; GFX12-NEXT: s_wait_samplecnt 0x0
261+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
262+ ; GFX12-NEXT: s_wait_kmcnt 0x0
263+ ; GFX12-NEXT: s_lshr_b32 s2, s0, 16
264+ ; GFX12-NEXT: s_lshr_b32 s3, s1, 16
265+ ; GFX12-NEXT: s_add_co_i32 s0, s0, s1
266+ ; GFX12-NEXT: s_wait_alu 0xfffe
267+ ; GFX12-NEXT: s_add_co_i32 s2, s2, s3
268+ ; GFX12-NEXT: s_wait_alu 0xfffe
269+ ; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s2
270+ ; GFX12-NEXT: s_wait_alu 0xfffe
271+ ; GFX12-NEXT: v_mov_b32_e32 v0, s0
272+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
273+ %c = add <2 x i16 > %a , %b
274+ ret <2 x i16 > %c
275+ }
202276
203277define <2 x i16 > @v_add_v2i16 (<2 x i16 > %a , <2 x i16 > %b ) {
204278; GFX7-LABEL: v_add_v2i16:
0 commit comments