@@ -26,23 +26,20 @@ define amdgpu_cs i8 @abs_sgpr_i8(i8 inreg %arg) {
2626;
2727; SDAG8-LABEL: abs_sgpr_i8:
2828; SDAG8: ; %bb.0:
29- ; SDAG8-NEXT: s_bfe_i32 s0, s0, 0x80000
30- ; SDAG8-NEXT: s_sext_i32_i16 s0, s0
29+ ; SDAG8-NEXT: s_sext_i32_i8 s0, s0
3130; SDAG8-NEXT: s_abs_i32 s0, s0
3231; SDAG8-NEXT: ; return to shader part epilog
3332;
3433; SDAG10-LABEL: abs_sgpr_i8:
3534; SDAG10: ; %bb.0:
36- ; SDAG10-NEXT: s_bfe_i32 s0, s0, 0x80000
37- ; SDAG10-NEXT: s_sext_i32_i16 s0, s0
35+ ; SDAG10-NEXT: s_sext_i32_i8 s0, s0
3836; SDAG10-NEXT: s_abs_i32 s0, s0
3937; SDAG10-NEXT: ; return to shader part epilog
4038;
4139; SDAG1250-LABEL: abs_sgpr_i8:
4240; SDAG1250: ; %bb.0:
43- ; SDAG1250-NEXT: s_bfe_i32 s0, s0, 0x80000
44- ; SDAG1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
45- ; SDAG1250-NEXT: s_sext_i32_i16 s0, s0
41+ ; SDAG1250-NEXT: s_sext_i32_i8 s0, s0
42+ ; SDAG1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4643; SDAG1250-NEXT: s_abs_i32 s0, s0
4744; SDAG1250-NEXT: ; return to shader part epilog
4845;
@@ -77,31 +74,39 @@ define amdgpu_cs i8 @abs_sgpr_i8(i8 inreg %arg) {
7774define amdgpu_cs i8 @abs_sgpr_i8_neg (i8 inreg %arg ) {
7875; SDAG6-LABEL: abs_sgpr_i8_neg:
7976; SDAG6: ; %bb.0:
80- ; SDAG6-NEXT: s_sext_i32_i8 s0, s0
81- ; SDAG6-NEXT: s_abs_i32 s0, s0
82- ; SDAG6-NEXT: s_sub_i32 s0, 0, s0
77+ ; SDAG6-NEXT: s_sext_i32_i8 s1, s0
78+ ; SDAG6-NEXT: s_ashr_i32 s1, s1, 7
79+ ; SDAG6-NEXT: s_xor_b32 s0, s0, s1
80+ ; SDAG6-NEXT: s_sub_i32 s0, s1, s0
8381; SDAG6-NEXT: ; return to shader part epilog
8482;
8583; SDAG8-LABEL: abs_sgpr_i8_neg:
8684; SDAG8: ; %bb.0:
87- ; SDAG8-NEXT: s_sext_i32_i8 s0, s0
88- ; SDAG8-NEXT: s_abs_i32 s0, s0
89- ; SDAG8-NEXT: s_sub_i32 s0, 0, s0
85+ ; SDAG8-NEXT: s_bfe_i32 s1, s0, 0x80000
86+ ; SDAG8-NEXT: s_sext_i32_i16 s1, s1
87+ ; SDAG8-NEXT: s_ashr_i32 s1, s1, 7
88+ ; SDAG8-NEXT: s_xor_b32 s0, s0, s1
89+ ; SDAG8-NEXT: s_sub_i32 s0, s1, s0
9090; SDAG8-NEXT: ; return to shader part epilog
9191;
9292; SDAG10-LABEL: abs_sgpr_i8_neg:
9393; SDAG10: ; %bb.0:
94- ; SDAG10-NEXT: s_sext_i32_i8 s0, s0
95- ; SDAG10-NEXT: s_abs_i32 s0, s0
96- ; SDAG10-NEXT: s_sub_i32 s0, 0, s0
94+ ; SDAG10-NEXT: s_bfe_i32 s1, s0, 0x80000
95+ ; SDAG10-NEXT: s_sext_i32_i16 s1, s1
96+ ; SDAG10-NEXT: s_ashr_i32 s1, s1, 7
97+ ; SDAG10-NEXT: s_xor_b32 s0, s0, s1
98+ ; SDAG10-NEXT: s_sub_i32 s0, s1, s0
9799; SDAG10-NEXT: ; return to shader part epilog
98100;
99101; SDAG1250-LABEL: abs_sgpr_i8_neg:
100102; SDAG1250: ; %bb.0:
101- ; SDAG1250-NEXT: s_sext_i32_i8 s0 , s0
103+ ; SDAG1250-NEXT: s_bfe_i32 s1 , s0, 0x80000
102104; SDAG1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
103- ; SDAG1250-NEXT: s_abs_i32 s0, s0
104- ; SDAG1250-NEXT: s_sub_co_i32 s0, 0, s0
105+ ; SDAG1250-NEXT: s_sext_i32_i16 s1, s1
106+ ; SDAG1250-NEXT: s_ashr_i32 s1, s1, 7
107+ ; SDAG1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
108+ ; SDAG1250-NEXT: s_xor_b32 s0, s0, s1
109+ ; SDAG1250-NEXT: s_sub_co_i32 s0, s1, s0
105110; SDAG1250-NEXT: ; return to shader part epilog
106111;
107112; GFX6-LABEL: abs_sgpr_i8_neg:
@@ -195,9 +200,10 @@ define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
195200define amdgpu_ps i16 @abs_sgpr_i16_neg (i16 inreg %arg ) {
196201; SDAG6-LABEL: abs_sgpr_i16_neg:
197202; SDAG6: ; %bb.0:
198- ; SDAG6-NEXT: s_sext_i32_i16 s0, s0
199- ; SDAG6-NEXT: s_abs_i32 s0, s0
200- ; SDAG6-NEXT: s_sub_i32 s0, 0, s0
203+ ; SDAG6-NEXT: s_sext_i32_i16 s1, s0
204+ ; SDAG6-NEXT: s_ashr_i32 s1, s1, 15
205+ ; SDAG6-NEXT: s_xor_b32 s0, s0, s1
206+ ; SDAG6-NEXT: s_sub_i32 s0, s1, s0
201207; SDAG6-NEXT: ; return to shader part epilog
202208;
203209; SDAG8-LABEL: abs_sgpr_i16_neg:
@@ -745,10 +751,8 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
745751;
746752; SDAG8-LABEL: abs_sgpr_v2i8:
747753; SDAG8: ; %bb.0:
748- ; SDAG8-NEXT: s_bfe_i32 s1, s1, 0x80000
749- ; SDAG8-NEXT: s_bfe_i32 s0, s0, 0x80000
750- ; SDAG8-NEXT: s_sext_i32_i16 s1, s1
751- ; SDAG8-NEXT: s_sext_i32_i16 s0, s0
754+ ; SDAG8-NEXT: s_sext_i32_i8 s1, s1
755+ ; SDAG8-NEXT: s_sext_i32_i8 s0, s0
752756; SDAG8-NEXT: s_abs_i32 s1, s1
753757; SDAG8-NEXT: s_abs_i32 s0, s0
754758; SDAG8-NEXT: s_lshl_b32 s2, s1, 8
@@ -757,10 +761,8 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
757761;
758762; SDAG10-LABEL: abs_sgpr_v2i8:
759763; SDAG10: ; %bb.0:
760- ; SDAG10-NEXT: s_bfe_i32 s1, s1, 0x80000
761- ; SDAG10-NEXT: s_bfe_i32 s0, s0, 0x80000
762- ; SDAG10-NEXT: s_sext_i32_i16 s1, s1
763- ; SDAG10-NEXT: s_sext_i32_i16 s0, s0
764+ ; SDAG10-NEXT: s_sext_i32_i8 s1, s1
765+ ; SDAG10-NEXT: s_sext_i32_i8 s0, s0
764766; SDAG10-NEXT: s_abs_i32 s1, s1
765767; SDAG10-NEXT: s_abs_i32 s0, s0
766768; SDAG10-NEXT: s_lshl_b32 s2, s1, 8
@@ -769,10 +771,8 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
769771;
770772; SDAG1250-LABEL: abs_sgpr_v2i8:
771773; SDAG1250: ; %bb.0:
772- ; SDAG1250-NEXT: s_bfe_i32 s1, s1, 0x80000
773- ; SDAG1250-NEXT: s_bfe_i32 s0, s0, 0x80000
774- ; SDAG1250-NEXT: s_sext_i32_i16 s1, s1
775- ; SDAG1250-NEXT: s_sext_i32_i16 s0, s0
774+ ; SDAG1250-NEXT: s_sext_i32_i8 s1, s1
775+ ; SDAG1250-NEXT: s_sext_i32_i8 s0, s0
776776; SDAG1250-NEXT: s_abs_i32 s1, s1
777777; SDAG1250-NEXT: s_abs_i32 s0, s0
778778; SDAG1250-NEXT: s_lshl_b32 s2, s1, 8
@@ -917,14 +917,11 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
917917;
918918; SDAG8-LABEL: abs_sgpr_v3i8:
919919; SDAG8: ; %bb.0:
920- ; SDAG8-NEXT: s_bfe_i32 s1, s1, 0x80000
921- ; SDAG8-NEXT: s_bfe_i32 s0, s0, 0x80000
922- ; SDAG8-NEXT: s_sext_i32_i16 s1, s1
923- ; SDAG8-NEXT: s_sext_i32_i16 s0, s0
920+ ; SDAG8-NEXT: s_sext_i32_i8 s1, s1
921+ ; SDAG8-NEXT: s_sext_i32_i8 s0, s0
924922; SDAG8-NEXT: s_abs_i32 s1, s1
925- ; SDAG8-NEXT: s_bfe_i32 s2, s2, 0x80000
926923; SDAG8-NEXT: s_abs_i32 s0, s0
927- ; SDAG8-NEXT: s_sext_i32_i16 s2, s2
924+ ; SDAG8-NEXT: s_sext_i32_i8 s2, s2
928925; SDAG8-NEXT: s_lshl_b32 s1, s1, 8
929926; SDAG8-NEXT: s_abs_i32 s2, s2
930927; SDAG8-NEXT: s_or_b32 s0, s0, s1
@@ -936,40 +933,34 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
936933;
937934; SDAG10-LABEL: abs_sgpr_v3i8:
938935; SDAG10: ; %bb.0:
939- ; SDAG10-NEXT: s_bfe_i32 s1, s1, 0x80000
940- ; SDAG10-NEXT: s_bfe_i32 s0, s0, 0x80000
941- ; SDAG10-NEXT: s_sext_i32_i16 s1, s1
942- ; SDAG10-NEXT: s_bfe_i32 s2, s2, 0x80000
943- ; SDAG10-NEXT: s_sext_i32_i16 s0, s0
936+ ; SDAG10-NEXT: s_sext_i32_i8 s1, s1
937+ ; SDAG10-NEXT: s_sext_i32_i8 s0, s0
944938; SDAG10-NEXT: s_abs_i32 s1, s1
945939; SDAG10-NEXT: s_abs_i32 s0, s0
940+ ; SDAG10-NEXT: s_sext_i32_i8 s2, s2
946941; SDAG10-NEXT: s_lshl_b32 s1, s1, 8
947- ; SDAG10-NEXT: s_sext_i32_i16 s2, s2
948- ; SDAG10-NEXT: s_or_b32 s0, s0, s1
949942; SDAG10-NEXT: s_abs_i32 s2, s2
950- ; SDAG10-NEXT: s_and_b32 s1, s0, 0xffff
951- ; SDAG10-NEXT: s_lshl_b32 s3, s2, 16
952- ; SDAG10-NEXT: s_or_b32 s1, s1, s3
943+ ; SDAG10-NEXT: s_or_b32 s0, s0, s1
944+ ; SDAG10-NEXT: s_lshl_b32 s1, s2, 16
945+ ; SDAG10-NEXT: s_and_b32 s3, s0, 0xffff
946+ ; SDAG10-NEXT: s_or_b32 s1, s3, s1
953947; SDAG10-NEXT: s_lshr_b32 s1, s1, 8
954948; SDAG10-NEXT: ; return to shader part epilog
955949;
956950; SDAG1250-LABEL: abs_sgpr_v3i8:
957951; SDAG1250: ; %bb.0:
958- ; SDAG1250-NEXT: s_bfe_i32 s1, s1, 0x80000
959- ; SDAG1250-NEXT: s_bfe_i32 s0, s0, 0x80000
960- ; SDAG1250-NEXT: s_sext_i32_i16 s1, s1
961- ; SDAG1250-NEXT: s_bfe_i32 s2, s2, 0x80000
962- ; SDAG1250-NEXT: s_sext_i32_i16 s0, s0
952+ ; SDAG1250-NEXT: s_sext_i32_i8 s1, s1
953+ ; SDAG1250-NEXT: s_sext_i32_i8 s0, s0
963954; SDAG1250-NEXT: s_abs_i32 s1, s1
964955; SDAG1250-NEXT: s_abs_i32 s0, s0
956+ ; SDAG1250-NEXT: s_sext_i32_i8 s2, s2
965957; SDAG1250-NEXT: s_lshl_b32 s1, s1, 8
966- ; SDAG1250-NEXT: s_sext_i32_i16 s2, s2
967- ; SDAG1250-NEXT: s_or_b32 s0, s0, s1
968958; SDAG1250-NEXT: s_abs_i32 s2, s2
969- ; SDAG1250-NEXT: s_and_b32 s1, s0, 0xffff
970- ; SDAG1250-NEXT: s_lshl_b32 s3, s2, 16
959+ ; SDAG1250-NEXT: s_or_b32 s0, s0, s1
960+ ; SDAG1250-NEXT: s_lshl_b32 s1, s2, 16
961+ ; SDAG1250-NEXT: s_and_b32 s3, s0, 0xffff
971962; SDAG1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
972- ; SDAG1250-NEXT: s_or_b32 s1, s1, s3
963+ ; SDAG1250-NEXT: s_or_b32 s1, s3, s1
973964; SDAG1250-NEXT: s_lshr_b32 s1, s1, 8
974965; SDAG1250-NEXT: ; return to shader part epilog
975966;
0 commit comments