@@ -70,15 +70,30 @@ define i8 @v_ashr_i8_7(i8 %value) {
7070}
7171
7272define amdgpu_ps i8 @s_ashr_i8 (i8 inreg %value , i8 inreg %amount ) {
73- ; GCN-LABEL: s_ashr_i8:
74- ; GCN: ; %bb.0:
75- ; GCN-NEXT: s_sext_i32_i8 s0, s0
76- ; GCN-NEXT: s_ashr_i32 s0, s0, s1
77- ; GCN-NEXT: ; return to shader part epilog
73+ ; GFX6-LABEL: s_ashr_i8:
74+ ; GFX6: ; %bb.0:
75+ ; GFX6-NEXT: s_sext_i32_i8 s0, s0
76+ ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
77+ ; GFX6-NEXT: ; return to shader part epilog
78+ ;
79+ ; GFX8-LABEL: s_ashr_i8:
80+ ; GFX8: ; %bb.0:
81+ ; GFX8-NEXT: s_sext_i32_i8 s0, s0
82+ ; GFX8-NEXT: s_sext_i32_i8 s1, s1
83+ ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
84+ ; GFX8-NEXT: ; return to shader part epilog
85+ ;
86+ ; GFX9-LABEL: s_ashr_i8:
87+ ; GFX9: ; %bb.0:
88+ ; GFX9-NEXT: s_sext_i32_i8 s0, s0
89+ ; GFX9-NEXT: s_sext_i32_i8 s1, s1
90+ ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
91+ ; GFX9-NEXT: ; return to shader part epilog
7892;
7993; GFX10PLUS-LABEL: s_ashr_i8:
8094; GFX10PLUS: ; %bb.0:
8195; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
96+ ; GFX10PLUS-NEXT: s_sext_i32_i8 s1, s1
8297; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
8398; GFX10PLUS-NEXT: ; return to shader part epilog
8499 %result = ashr i8 %value , %amount
@@ -627,15 +642,30 @@ define i16 @v_ashr_i16_15(i16 %value) {
627642}
628643
629644define amdgpu_ps i16 @s_ashr_i16 (i16 inreg %value , i16 inreg %amount ) {
630- ; GCN-LABEL: s_ashr_i16:
631- ; GCN: ; %bb.0:
632- ; GCN-NEXT: s_sext_i32_i16 s0, s0
633- ; GCN-NEXT: s_ashr_i32 s0, s0, s1
634- ; GCN-NEXT: ; return to shader part epilog
645+ ; GFX6-LABEL: s_ashr_i16:
646+ ; GFX6: ; %bb.0:
647+ ; GFX6-NEXT: s_sext_i32_i16 s0, s0
648+ ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
649+ ; GFX6-NEXT: ; return to shader part epilog
650+ ;
651+ ; GFX8-LABEL: s_ashr_i16:
652+ ; GFX8: ; %bb.0:
653+ ; GFX8-NEXT: s_sext_i32_i16 s0, s0
654+ ; GFX8-NEXT: s_sext_i32_i16 s1, s1
655+ ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
656+ ; GFX8-NEXT: ; return to shader part epilog
657+ ;
658+ ; GFX9-LABEL: s_ashr_i16:
659+ ; GFX9: ; %bb.0:
660+ ; GFX9-NEXT: s_sext_i32_i16 s0, s0
661+ ; GFX9-NEXT: s_sext_i32_i16 s1, s1
662+ ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
663+ ; GFX9-NEXT: ; return to shader part epilog
635664;
636665; GFX10PLUS-LABEL: s_ashr_i16:
637666; GFX10PLUS: ; %bb.0:
638667; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
668+ ; GFX10PLUS-NEXT: s_sext_i32_i16 s1, s1
639669; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
640670; GFX10PLUS-NEXT: ; return to shader part epilog
641671 %result = ashr i16 %value , %amount
@@ -796,15 +826,14 @@ define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
796826;
797827; GFX8-LABEL: s_ashr_v2i16:
798828; GFX8: ; %bb.0:
799- ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
800- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
801- ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
829+ ; GFX8-NEXT: s_sext_i32_i16 s2, s0
830+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
831+ ; GFX8-NEXT: s_sext_i32_i16 s3, s1
832+ ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
833+ ; GFX8-NEXT: s_ashr_i32 s2, s2, s3
802834; GFX8-NEXT: s_ashr_i32 s0, s0, s1
803- ; GFX8-NEXT: s_sext_i32_i16 s1, s2
804- ; GFX8-NEXT: s_ashr_i32 s1, s1, s3
805- ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
806- ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
807- ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
835+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
836+ ; GFX8-NEXT: s_and_b32 s1, s2, 0xffff
808837; GFX8-NEXT: s_or_b32 s0, s0, s1
809838; GFX8-NEXT: ; return to shader part epilog
810839;
@@ -999,25 +1028,23 @@ define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
9991028;
10001029; GFX8-LABEL: s_ashr_v4i16:
10011030; GFX8: ; %bb.0:
1002- ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1003- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1004- ; GFX8-NEXT: s_lshr_b32 s6, s2, 16
1031+ ; GFX8-NEXT: s_sext_i32_i16 s4, s0
1032+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1033+ ; GFX8-NEXT: s_sext_i32_i16 s5, s1
1034+ ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1035+ ; GFX8-NEXT: s_sext_i32_i16 s6, s2
1036+ ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1037+ ; GFX8-NEXT: s_sext_i32_i16 s7, s3
1038+ ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1039+ ; GFX8-NEXT: s_ashr_i32 s4, s4, s6
10051040; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1006- ; GFX8-NEXT: s_sext_i32_i16 s2, s4
1007- ; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1008- ; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1009- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1010- ; GFX8-NEXT: s_lshr_b32 s7, s3, 16
1041+ ; GFX8-NEXT: s_ashr_i32 s2, s5, s7
10111042; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1012- ; GFX8-NEXT: s_sext_i32_i16 s3, s5
1013- ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1014- ; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1015- ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1016- ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1017- ; GFX8-NEXT: s_or_b32 s0, s0, s2
1018- ; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1019- ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1020- ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1043+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1044+ ; GFX8-NEXT: s_and_b32 s3, s4, 0xffff
1045+ ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1046+ ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
1047+ ; GFX8-NEXT: s_or_b32 s0, s0, s3
10211048; GFX8-NEXT: s_or_b32 s1, s1, s2
10221049; GFX8-NEXT: ; return to shader part epilog
10231050;
@@ -1208,45 +1235,41 @@ define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
12081235;
12091236; GFX8-LABEL: s_ashr_v8i16:
12101237; GFX8: ; %bb.0:
1211- ; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1212- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1213- ; GFX8-NEXT: s_lshr_b32 s12, s4, 16
1238+ ; GFX8-NEXT: s_sext_i32_i16 s8, s0
1239+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1240+ ; GFX8-NEXT: s_sext_i32_i16 s9, s1
1241+ ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1242+ ; GFX8-NEXT: s_sext_i32_i16 s12, s4
1243+ ; GFX8-NEXT: s_bfe_i32 s4, s4, 0x100010
1244+ ; GFX8-NEXT: s_sext_i32_i16 s13, s5
1245+ ; GFX8-NEXT: s_bfe_i32 s5, s5, 0x100010
1246+ ; GFX8-NEXT: s_sext_i32_i16 s10, s2
1247+ ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1248+ ; GFX8-NEXT: s_sext_i32_i16 s14, s6
1249+ ; GFX8-NEXT: s_bfe_i32 s6, s6, 0x100010
12141250; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1215- ; GFX8-NEXT: s_sext_i32_i16 s4, s8
1216- ; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1217- ; GFX8-NEXT: s_ashr_i32 s4, s4, s12
1218- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1219- ; GFX8-NEXT: s_lshr_b32 s13, s5, 16
1251+ ; GFX8-NEXT: s_ashr_i32 s4, s9, s13
12201252; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1221- ; GFX8-NEXT: s_sext_i32_i16 s5, s9
1222- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1223- ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1224- ; GFX8-NEXT: s_ashr_i32 s5, s5, s13
1225- ; GFX8-NEXT: s_sext_i32_i16 s2, s2
1226- ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1227- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1228- ; GFX8-NEXT: s_lshr_b32 s14, s6, 16
1253+ ; GFX8-NEXT: s_sext_i32_i16 s11, s3
1254+ ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1255+ ; GFX8-NEXT: s_sext_i32_i16 s15, s7
1256+ ; GFX8-NEXT: s_bfe_i32 s7, s7, 0x100010
1257+ ; GFX8-NEXT: s_ashr_i32 s5, s10, s14
12291258; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1230- ; GFX8-NEXT: s_sext_i32_i16 s6, s10
1231- ; GFX8-NEXT: s_or_b32 s0, s0, s4
1232- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1233- ; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1234- ; GFX8-NEXT: s_ashr_i32 s6, s6, s14
1235- ; GFX8-NEXT: s_sext_i32_i16 s3, s3
1236- ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1237- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1238- ; GFX8-NEXT: s_lshr_b32 s15, s7, 16
1259+ ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1260+ ; GFX8-NEXT: s_and_b32 s4, s4, 0xffff
1261+ ; GFX8-NEXT: s_ashr_i32 s8, s8, s12
1262+ ; GFX8-NEXT: s_ashr_i32 s6, s11, s15
12391263; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1240- ; GFX8-NEXT: s_sext_i32_i16 s7, s11
12411264; GFX8-NEXT: s_or_b32 s1, s1, s4
1242- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1243- ; GFX8-NEXT: s_ashr_i32 s7, s7, s15
1244- ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1245- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1265+ ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1266+ ; GFX8-NEXT: s_and_b32 s4, s5, 0xffff
1267+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1268+ ; GFX8-NEXT: s_and_b32 s7, s8, 0xffff
12461269; GFX8-NEXT: s_or_b32 s2, s2, s4
1247- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1248- ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1249- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1270+ ; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1271+ ; GFX8-NEXT: s_and_b32 s4, s6, 0xffff
1272+ ; GFX8-NEXT: s_or_b32 s0, s0, s7
12501273; GFX8-NEXT: s_or_b32 s3, s3, s4
12511274; GFX8-NEXT: ; return to shader part epilog
12521275;
0 commit comments