@@ -235,22 +235,19 @@ define i32 @v_udot2_fnegf32_c(<2 x i16> %a, <2 x i16> %b, float %c) {
235235; GFX906-LABEL: v_udot2_fnegf32_c:
236236; GFX906: ; %bb.0:
237237; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238- ; GFX906-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
239- ; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
238+ ; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
240239; GFX906-NEXT: s_setpc_b64 s[30:31]
241240;
242241; GFX908-LABEL: v_udot2_fnegf32_c:
243242; GFX908: ; %bb.0:
244243; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245- ; GFX908-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
246- ; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
244+ ; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
247245; GFX908-NEXT: s_setpc_b64 s[30:31]
248246;
249247; GFX10-LABEL: v_udot2_fnegf32_c:
250248; GFX10: ; %bb.0:
251249; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
252- ; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
253- ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
250+ ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
254251; GFX10-NEXT: s_setpc_b64 s[30:31]
255252 %neg.c = fneg float %c
256253 %cast.neg.c = bitcast float %neg.c to i32
@@ -262,22 +259,19 @@ define i32 @v_udot2_fnegv2f16_c(<2 x i16> %a, <2 x i16> %b, <2 x half> %c) {
262259; GFX906-LABEL: v_udot2_fnegv2f16_c:
263260; GFX906: ; %bb.0:
264261; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
265- ; GFX906-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
266- ; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
262+ ; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
267263; GFX906-NEXT: s_setpc_b64 s[30:31]
268264;
269265; GFX908-LABEL: v_udot2_fnegv2f16_c:
270266; GFX908: ; %bb.0:
271267; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272- ; GFX908-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
273- ; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
268+ ; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
274269; GFX908-NEXT: s_setpc_b64 s[30:31]
275270;
276271; GFX10-LABEL: v_udot2_fnegv2f16_c:
277272; GFX10: ; %bb.0:
278273; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279- ; GFX10-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
280- ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
274+ ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
281275; GFX10-NEXT: s_setpc_b64 s[30:31]
282276 %neg.c = fneg <2 x half > %c
283277 %cast.neg.c = bitcast <2 x half > %neg.c to i32
@@ -289,22 +283,19 @@ define i32 @v_udot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) {
289283; GFX906-LABEL: v_udot2_shuffle10_a:
290284; GFX906: ; %bb.0:
291285; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292- ; GFX906-NEXT: v_alignbit_b32 v0, v0, v0, 16
293- ; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
286+ ; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[0,1,1]
294287; GFX906-NEXT: s_setpc_b64 s[30:31]
295288;
296289; GFX908-LABEL: v_udot2_shuffle10_a:
297290; GFX908: ; %bb.0:
298291; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299- ; GFX908-NEXT: v_alignbit_b32 v0, v0, v0, 16
300- ; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
292+ ; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[0,1,1]
301293; GFX908-NEXT: s_setpc_b64 s[30:31]
302294;
303295; GFX10-LABEL: v_udot2_shuffle10_a:
304296; GFX10: ; %bb.0:
305297; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306- ; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16
307- ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
298+ ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[0,1,1]
308299; GFX10-NEXT: s_setpc_b64 s[30:31]
309300 %shuf.a = shufflevector <2 x i16 > %a , <2 x i16 > undef , <2 x i32 > <i32 1 , i32 0 >
310301 %r = call i32 @llvm.amdgcn.udot2 (<2 x i16 > %shuf.a , <2 x i16 > %b , i32 %c , i1 false )
@@ -315,22 +306,19 @@ define i32 @v_udot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
315306; GFX906-LABEL: v_udot2_shuffle10_b:
316307; GFX906: ; %bb.0:
317308; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318- ; GFX906-NEXT: v_alignbit_b32 v1, v1, v1, 16
319- ; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
309+ ; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 op_sel:[0,1,0] op_sel_hi:[1,0,1]
320310; GFX906-NEXT: s_setpc_b64 s[30:31]
321311;
322312; GFX908-LABEL: v_udot2_shuffle10_b:
323313; GFX908: ; %bb.0:
324314; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
325- ; GFX908-NEXT: v_alignbit_b32 v1, v1, v1, 16
326- ; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
315+ ; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 op_sel:[0,1,0] op_sel_hi:[1,0,1]
327316; GFX908-NEXT: s_setpc_b64 s[30:31]
328317;
329318; GFX10-LABEL: v_udot2_shuffle10_b:
330319; GFX10: ; %bb.0:
331320; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332- ; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16
333- ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
321+ ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 op_sel:[0,1,0] op_sel_hi:[1,0,1]
334322; GFX10-NEXT: s_setpc_b64 s[30:31]
335323 %shuf.b = shufflevector <2 x i16 > %b , <2 x i16 > undef , <2 x i32 > <i32 1 , i32 0 >
336324 %r = call i32 @llvm.amdgcn.udot2 (<2 x i16 > %a , <2 x i16 > %shuf.b , i32 %c , i1 false )
0 commit comments