@@ -40,11 +40,8 @@ define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
4040; GFX7-LABEL: v_add_v2i16_fneg_lhs:
4141; GFX7: ; %bb.0:
4242; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43- ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
44- ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
45- ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
46- ; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
47- ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0
43+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0
44+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1
4845; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
4946; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
5047; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -79,11 +76,8 @@ define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
7976; GFX7-LABEL: v_add_v2i16_fneg_rhs:
8077; GFX7: ; %bb.0:
8178; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82- ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
83- ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
84- ; GFX7-NEXT: v_or_b32_e32 v2, v3, v2
85- ; GFX7-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
86- ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
79+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v2, -v2
80+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v3, -v3
8781; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
8882; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
8983; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -118,18 +112,12 @@ define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
118112; GFX7-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
119113; GFX7: ; %bb.0:
120114; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121- ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
122- ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
123- ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
124- ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v3
125- ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
126- ; GFX7-NEXT: v_or_b32_e32 v1, v1, v2
127- ; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
128- ; GFX7-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
129- ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0
130- ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v1
131- ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
132- ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v3
115+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0
116+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v2, -v2
117+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1
118+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v3, -v3
119+ ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
120+ ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
133121; GFX7-NEXT: s_setpc_b64 s[30:31]
134122;
135123; GFX9-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
@@ -434,17 +422,15 @@ define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
434422define amdgpu_ps i32 @s_add_v2i16_fneg_lhs (<2 x half > inreg %a , <2 x i16 > inreg %b ) {
435423; GFX7-LABEL: s_add_v2i16_fneg_lhs:
436424; GFX7: ; %bb.0:
437- ; GFX7-NEXT: s_lshl_b32 s1, s1, 16
438- ; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
439- ; GFX7-NEXT: s_or_b32 s0, s1, s0
440- ; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000
441- ; GFX7-NEXT: s_lshr_b32 s1, s0, 16
442- ; GFX7-NEXT: s_add_i32 s1, s1, s3
443- ; GFX7-NEXT: s_add_i32 s0, s0, s2
444- ; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
445- ; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
446- ; GFX7-NEXT: s_lshl_b32 s1, s1, 16
447- ; GFX7-NEXT: s_or_b32 s0, s0, s1
425+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -s1
426+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -s0
427+ ; GFX7-NEXT: v_add_i32_e32 v1, vcc, s3, v1
428+ ; GFX7-NEXT: v_add_i32_e32 v0, vcc, s2, v0
429+ ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
430+ ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
431+ ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
432+ ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
433+ ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
448434; GFX7-NEXT: ; return to shader part epilog
449435;
450436; GFX9-LABEL: s_add_v2i16_fneg_lhs:
@@ -490,17 +476,15 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg
490476define amdgpu_ps i32 @s_add_v2i16_fneg_rhs (<2 x i16 > inreg %a , <2 x half > inreg %b ) {
491477; GFX7-LABEL: s_add_v2i16_fneg_rhs:
492478; GFX7: ; %bb.0:
493- ; GFX7-NEXT: s_lshl_b32 s3, s3, 16
494- ; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
495- ; GFX7-NEXT: s_or_b32 s2, s3, s2
496- ; GFX7-NEXT: s_xor_b32 s2, s2, 0x80008000
497- ; GFX7-NEXT: s_lshr_b32 s3, s2, 16
498- ; GFX7-NEXT: s_add_i32 s1, s1, s3
499- ; GFX7-NEXT: s_add_i32 s0, s0, s2
500- ; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
501- ; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
502- ; GFX7-NEXT: s_lshl_b32 s1, s1, 16
503- ; GFX7-NEXT: s_or_b32 s0, s0, s1
479+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -s3
480+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -s2
481+ ; GFX7-NEXT: v_add_i32_e32 v1, vcc, s1, v1
482+ ; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
483+ ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
484+ ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
485+ ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
486+ ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
487+ ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
504488; GFX7-NEXT: ; return to shader part epilog
505489;
506490; GFX9-LABEL: s_add_v2i16_fneg_rhs:
@@ -546,22 +530,17 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg
546530define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs (<2 x half > inreg %a , <2 x half > inreg %b ) {
547531; GFX7-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
548532; GFX7: ; %bb.0:
549- ; GFX7-NEXT: s_lshl_b32 s1, s1, 16
550- ; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
551- ; GFX7-NEXT: s_or_b32 s0, s1, s0
552- ; GFX7-NEXT: s_lshl_b32 s1, s3, 16
553- ; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
554- ; GFX7-NEXT: s_or_b32 s1, s1, s2
555- ; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000
556- ; GFX7-NEXT: s_xor_b32 s1, s1, 0x80008000
557- ; GFX7-NEXT: s_lshr_b32 s2, s0, 16
558- ; GFX7-NEXT: s_lshr_b32 s3, s1, 16
559- ; GFX7-NEXT: s_add_i32 s2, s2, s3
560- ; GFX7-NEXT: s_add_i32 s0, s0, s1
561- ; GFX7-NEXT: s_and_b32 s1, s2, 0xffff
562- ; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
563- ; GFX7-NEXT: s_lshl_b32 s1, s1, 16
564- ; GFX7-NEXT: s_or_b32 s0, s0, s1
533+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -s0
534+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -s2
535+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v2, -s1
536+ ; GFX7-NEXT: v_cvt_f16_f32_e64 v3, -s3
537+ ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
538+ ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v3
539+ ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
540+ ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
541+ ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
542+ ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
543+ ; GFX7-NEXT: v_readfirstlane_b32 s0, v0
565544; GFX7-NEXT: ; return to shader part epilog
566545;
567546; GFX9-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
0 commit comments