Skip to content

Commit 0d9be16

Browse files
committed
Update testcases to test for the inferior code generation resulting from this approach.
1 parent bf479e5 commit 0d9be16

File tree

1 file changed

+51
-60
lines changed

1 file changed

+51
-60
lines changed

llvm/test/CodeGen/AMDGPU/llvm.abs.ll

Lines changed: 51 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -26,23 +26,20 @@ define amdgpu_cs i8 @abs_sgpr_i8(i8 inreg %arg) {
2626
;
2727
; SDAG8-LABEL: abs_sgpr_i8:
2828
; SDAG8: ; %bb.0:
29-
; SDAG8-NEXT: s_bfe_i32 s0, s0, 0x80000
30-
; SDAG8-NEXT: s_sext_i32_i16 s0, s0
29+
; SDAG8-NEXT: s_sext_i32_i8 s0, s0
3130
; SDAG8-NEXT: s_abs_i32 s0, s0
3231
; SDAG8-NEXT: ; return to shader part epilog
3332
;
3433
; SDAG10-LABEL: abs_sgpr_i8:
3534
; SDAG10: ; %bb.0:
36-
; SDAG10-NEXT: s_bfe_i32 s0, s0, 0x80000
37-
; SDAG10-NEXT: s_sext_i32_i16 s0, s0
35+
; SDAG10-NEXT: s_sext_i32_i8 s0, s0
3836
; SDAG10-NEXT: s_abs_i32 s0, s0
3937
; SDAG10-NEXT: ; return to shader part epilog
4038
;
4139
; SDAG1250-LABEL: abs_sgpr_i8:
4240
; SDAG1250: ; %bb.0:
43-
; SDAG1250-NEXT: s_bfe_i32 s0, s0, 0x80000
44-
; SDAG1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
45-
; SDAG1250-NEXT: s_sext_i32_i16 s0, s0
41+
; SDAG1250-NEXT: s_sext_i32_i8 s0, s0
42+
; SDAG1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4643
; SDAG1250-NEXT: s_abs_i32 s0, s0
4744
; SDAG1250-NEXT: ; return to shader part epilog
4845
;
@@ -77,31 +74,39 @@ define amdgpu_cs i8 @abs_sgpr_i8(i8 inreg %arg) {
7774
define amdgpu_cs i8 @abs_sgpr_i8_neg(i8 inreg %arg) {
7875
; SDAG6-LABEL: abs_sgpr_i8_neg:
7976
; SDAG6: ; %bb.0:
80-
; SDAG6-NEXT: s_sext_i32_i8 s0, s0
81-
; SDAG6-NEXT: s_abs_i32 s0, s0
82-
; SDAG6-NEXT: s_sub_i32 s0, 0, s0
77+
; SDAG6-NEXT: s_sext_i32_i8 s1, s0
78+
; SDAG6-NEXT: s_ashr_i32 s1, s1, 7
79+
; SDAG6-NEXT: s_xor_b32 s0, s0, s1
80+
; SDAG6-NEXT: s_sub_i32 s0, s1, s0
8381
; SDAG6-NEXT: ; return to shader part epilog
8482
;
8583
; SDAG8-LABEL: abs_sgpr_i8_neg:
8684
; SDAG8: ; %bb.0:
87-
; SDAG8-NEXT: s_sext_i32_i8 s0, s0
88-
; SDAG8-NEXT: s_abs_i32 s0, s0
89-
; SDAG8-NEXT: s_sub_i32 s0, 0, s0
85+
; SDAG8-NEXT: s_bfe_i32 s1, s0, 0x80000
86+
; SDAG8-NEXT: s_sext_i32_i16 s1, s1
87+
; SDAG8-NEXT: s_ashr_i32 s1, s1, 7
88+
; SDAG8-NEXT: s_xor_b32 s0, s0, s1
89+
; SDAG8-NEXT: s_sub_i32 s0, s1, s0
9090
; SDAG8-NEXT: ; return to shader part epilog
9191
;
9292
; SDAG10-LABEL: abs_sgpr_i8_neg:
9393
; SDAG10: ; %bb.0:
94-
; SDAG10-NEXT: s_sext_i32_i8 s0, s0
95-
; SDAG10-NEXT: s_abs_i32 s0, s0
96-
; SDAG10-NEXT: s_sub_i32 s0, 0, s0
94+
; SDAG10-NEXT: s_bfe_i32 s1, s0, 0x80000
95+
; SDAG10-NEXT: s_sext_i32_i16 s1, s1
96+
; SDAG10-NEXT: s_ashr_i32 s1, s1, 7
97+
; SDAG10-NEXT: s_xor_b32 s0, s0, s1
98+
; SDAG10-NEXT: s_sub_i32 s0, s1, s0
9799
; SDAG10-NEXT: ; return to shader part epilog
98100
;
99101
; SDAG1250-LABEL: abs_sgpr_i8_neg:
100102
; SDAG1250: ; %bb.0:
101-
; SDAG1250-NEXT: s_sext_i32_i8 s0, s0
103+
; SDAG1250-NEXT: s_bfe_i32 s1, s0, 0x80000
102104
; SDAG1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
103-
; SDAG1250-NEXT: s_abs_i32 s0, s0
104-
; SDAG1250-NEXT: s_sub_co_i32 s0, 0, s0
105+
; SDAG1250-NEXT: s_sext_i32_i16 s1, s1
106+
; SDAG1250-NEXT: s_ashr_i32 s1, s1, 7
107+
; SDAG1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
108+
; SDAG1250-NEXT: s_xor_b32 s0, s0, s1
109+
; SDAG1250-NEXT: s_sub_co_i32 s0, s1, s0
105110
; SDAG1250-NEXT: ; return to shader part epilog
106111
;
107112
; GFX6-LABEL: abs_sgpr_i8_neg:
@@ -195,9 +200,10 @@ define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
195200
define amdgpu_ps i16 @abs_sgpr_i16_neg(i16 inreg %arg) {
196201
; SDAG6-LABEL: abs_sgpr_i16_neg:
197202
; SDAG6: ; %bb.0:
198-
; SDAG6-NEXT: s_sext_i32_i16 s0, s0
199-
; SDAG6-NEXT: s_abs_i32 s0, s0
200-
; SDAG6-NEXT: s_sub_i32 s0, 0, s0
203+
; SDAG6-NEXT: s_sext_i32_i16 s1, s0
204+
; SDAG6-NEXT: s_ashr_i32 s1, s1, 15
205+
; SDAG6-NEXT: s_xor_b32 s0, s0, s1
206+
; SDAG6-NEXT: s_sub_i32 s0, s1, s0
201207
; SDAG6-NEXT: ; return to shader part epilog
202208
;
203209
; SDAG8-LABEL: abs_sgpr_i16_neg:
@@ -745,10 +751,8 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
745751
;
746752
; SDAG8-LABEL: abs_sgpr_v2i8:
747753
; SDAG8: ; %bb.0:
748-
; SDAG8-NEXT: s_bfe_i32 s1, s1, 0x80000
749-
; SDAG8-NEXT: s_bfe_i32 s0, s0, 0x80000
750-
; SDAG8-NEXT: s_sext_i32_i16 s1, s1
751-
; SDAG8-NEXT: s_sext_i32_i16 s0, s0
754+
; SDAG8-NEXT: s_sext_i32_i8 s1, s1
755+
; SDAG8-NEXT: s_sext_i32_i8 s0, s0
752756
; SDAG8-NEXT: s_abs_i32 s1, s1
753757
; SDAG8-NEXT: s_abs_i32 s0, s0
754758
; SDAG8-NEXT: s_lshl_b32 s2, s1, 8
@@ -757,10 +761,8 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
757761
;
758762
; SDAG10-LABEL: abs_sgpr_v2i8:
759763
; SDAG10: ; %bb.0:
760-
; SDAG10-NEXT: s_bfe_i32 s1, s1, 0x80000
761-
; SDAG10-NEXT: s_bfe_i32 s0, s0, 0x80000
762-
; SDAG10-NEXT: s_sext_i32_i16 s1, s1
763-
; SDAG10-NEXT: s_sext_i32_i16 s0, s0
764+
; SDAG10-NEXT: s_sext_i32_i8 s1, s1
765+
; SDAG10-NEXT: s_sext_i32_i8 s0, s0
764766
; SDAG10-NEXT: s_abs_i32 s1, s1
765767
; SDAG10-NEXT: s_abs_i32 s0, s0
766768
; SDAG10-NEXT: s_lshl_b32 s2, s1, 8
@@ -769,10 +771,8 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
769771
;
770772
; SDAG1250-LABEL: abs_sgpr_v2i8:
771773
; SDAG1250: ; %bb.0:
772-
; SDAG1250-NEXT: s_bfe_i32 s1, s1, 0x80000
773-
; SDAG1250-NEXT: s_bfe_i32 s0, s0, 0x80000
774-
; SDAG1250-NEXT: s_sext_i32_i16 s1, s1
775-
; SDAG1250-NEXT: s_sext_i32_i16 s0, s0
774+
; SDAG1250-NEXT: s_sext_i32_i8 s1, s1
775+
; SDAG1250-NEXT: s_sext_i32_i8 s0, s0
776776
; SDAG1250-NEXT: s_abs_i32 s1, s1
777777
; SDAG1250-NEXT: s_abs_i32 s0, s0
778778
; SDAG1250-NEXT: s_lshl_b32 s2, s1, 8
@@ -917,14 +917,11 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
917917
;
918918
; SDAG8-LABEL: abs_sgpr_v3i8:
919919
; SDAG8: ; %bb.0:
920-
; SDAG8-NEXT: s_bfe_i32 s1, s1, 0x80000
921-
; SDAG8-NEXT: s_bfe_i32 s0, s0, 0x80000
922-
; SDAG8-NEXT: s_sext_i32_i16 s1, s1
923-
; SDAG8-NEXT: s_sext_i32_i16 s0, s0
920+
; SDAG8-NEXT: s_sext_i32_i8 s1, s1
921+
; SDAG8-NEXT: s_sext_i32_i8 s0, s0
924922
; SDAG8-NEXT: s_abs_i32 s1, s1
925-
; SDAG8-NEXT: s_bfe_i32 s2, s2, 0x80000
926923
; SDAG8-NEXT: s_abs_i32 s0, s0
927-
; SDAG8-NEXT: s_sext_i32_i16 s2, s2
924+
; SDAG8-NEXT: s_sext_i32_i8 s2, s2
928925
; SDAG8-NEXT: s_lshl_b32 s1, s1, 8
929926
; SDAG8-NEXT: s_abs_i32 s2, s2
930927
; SDAG8-NEXT: s_or_b32 s0, s0, s1
@@ -936,40 +933,34 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
936933
;
937934
; SDAG10-LABEL: abs_sgpr_v3i8:
938935
; SDAG10: ; %bb.0:
939-
; SDAG10-NEXT: s_bfe_i32 s1, s1, 0x80000
940-
; SDAG10-NEXT: s_bfe_i32 s0, s0, 0x80000
941-
; SDAG10-NEXT: s_sext_i32_i16 s1, s1
942-
; SDAG10-NEXT: s_bfe_i32 s2, s2, 0x80000
943-
; SDAG10-NEXT: s_sext_i32_i16 s0, s0
936+
; SDAG10-NEXT: s_sext_i32_i8 s1, s1
937+
; SDAG10-NEXT: s_sext_i32_i8 s0, s0
944938
; SDAG10-NEXT: s_abs_i32 s1, s1
945939
; SDAG10-NEXT: s_abs_i32 s0, s0
940+
; SDAG10-NEXT: s_sext_i32_i8 s2, s2
946941
; SDAG10-NEXT: s_lshl_b32 s1, s1, 8
947-
; SDAG10-NEXT: s_sext_i32_i16 s2, s2
948-
; SDAG10-NEXT: s_or_b32 s0, s0, s1
949942
; SDAG10-NEXT: s_abs_i32 s2, s2
950-
; SDAG10-NEXT: s_and_b32 s1, s0, 0xffff
951-
; SDAG10-NEXT: s_lshl_b32 s3, s2, 16
952-
; SDAG10-NEXT: s_or_b32 s1, s1, s3
943+
; SDAG10-NEXT: s_or_b32 s0, s0, s1
944+
; SDAG10-NEXT: s_lshl_b32 s1, s2, 16
945+
; SDAG10-NEXT: s_and_b32 s3, s0, 0xffff
946+
; SDAG10-NEXT: s_or_b32 s1, s3, s1
953947
; SDAG10-NEXT: s_lshr_b32 s1, s1, 8
954948
; SDAG10-NEXT: ; return to shader part epilog
955949
;
956950
; SDAG1250-LABEL: abs_sgpr_v3i8:
957951
; SDAG1250: ; %bb.0:
958-
; SDAG1250-NEXT: s_bfe_i32 s1, s1, 0x80000
959-
; SDAG1250-NEXT: s_bfe_i32 s0, s0, 0x80000
960-
; SDAG1250-NEXT: s_sext_i32_i16 s1, s1
961-
; SDAG1250-NEXT: s_bfe_i32 s2, s2, 0x80000
962-
; SDAG1250-NEXT: s_sext_i32_i16 s0, s0
952+
; SDAG1250-NEXT: s_sext_i32_i8 s1, s1
953+
; SDAG1250-NEXT: s_sext_i32_i8 s0, s0
963954
; SDAG1250-NEXT: s_abs_i32 s1, s1
964955
; SDAG1250-NEXT: s_abs_i32 s0, s0
956+
; SDAG1250-NEXT: s_sext_i32_i8 s2, s2
965957
; SDAG1250-NEXT: s_lshl_b32 s1, s1, 8
966-
; SDAG1250-NEXT: s_sext_i32_i16 s2, s2
967-
; SDAG1250-NEXT: s_or_b32 s0, s0, s1
968958
; SDAG1250-NEXT: s_abs_i32 s2, s2
969-
; SDAG1250-NEXT: s_and_b32 s1, s0, 0xffff
970-
; SDAG1250-NEXT: s_lshl_b32 s3, s2, 16
959+
; SDAG1250-NEXT: s_or_b32 s0, s0, s1
960+
; SDAG1250-NEXT: s_lshl_b32 s1, s2, 16
961+
; SDAG1250-NEXT: s_and_b32 s3, s0, 0xffff
971962
; SDAG1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
972-
; SDAG1250-NEXT: s_or_b32 s1, s1, s3
963+
; SDAG1250-NEXT: s_or_b32 s1, s3, s1
973964
; SDAG1250-NEXT: s_lshr_b32 s1, s1, 8
974965
; SDAG1250-NEXT: ; return to shader part epilog
975966
;

0 commit comments

Comments
 (0)