@@ -70,30 +70,15 @@ define i8 @v_ashr_i8_7(i8 %value) {
7070}
7171
7272define amdgpu_ps i8 @s_ashr_i8 (i8 inreg %value , i8 inreg %amount ) {
73- ; GFX6-LABEL: s_ashr_i8:
74- ; GFX6: ; %bb.0:
75- ; GFX6-NEXT: s_sext_i32_i8 s0, s0
76- ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
77- ; GFX6-NEXT: ; return to shader part epilog
78- ;
79- ; GFX8-LABEL: s_ashr_i8:
80- ; GFX8: ; %bb.0:
81- ; GFX8-NEXT: s_sext_i32_i8 s0, s0
82- ; GFX8-NEXT: s_sext_i32_i8 s1, s1
83- ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
84- ; GFX8-NEXT: ; return to shader part epilog
85- ;
86- ; GFX9-LABEL: s_ashr_i8:
87- ; GFX9: ; %bb.0:
88- ; GFX9-NEXT: s_sext_i32_i8 s0, s0
89- ; GFX9-NEXT: s_sext_i32_i8 s1, s1
90- ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
91- ; GFX9-NEXT: ; return to shader part epilog
73+ ; GCN-LABEL: s_ashr_i8:
74+ ; GCN: ; %bb.0:
75+ ; GCN-NEXT: s_sext_i32_i8 s0, s0
76+ ; GCN-NEXT: s_ashr_i32 s0, s0, s1
77+ ; GCN-NEXT: ; return to shader part epilog
9278;
9379; GFX10PLUS-LABEL: s_ashr_i8:
9480; GFX10PLUS: ; %bb.0:
9581; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
96- ; GFX10PLUS-NEXT: s_sext_i32_i8 s1, s1
9782; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
9883; GFX10PLUS-NEXT: ; return to shader part epilog
9984 %result = ashr i8 %value , %amount
@@ -642,30 +627,15 @@ define i16 @v_ashr_i16_15(i16 %value) {
642627}
643628
644629define amdgpu_ps i16 @s_ashr_i16 (i16 inreg %value , i16 inreg %amount ) {
645- ; GFX6-LABEL: s_ashr_i16:
646- ; GFX6: ; %bb.0:
647- ; GFX6-NEXT: s_sext_i32_i16 s0, s0
648- ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
649- ; GFX6-NEXT: ; return to shader part epilog
650- ;
651- ; GFX8-LABEL: s_ashr_i16:
652- ; GFX8: ; %bb.0:
653- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
654- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
655- ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
656- ; GFX8-NEXT: ; return to shader part epilog
657- ;
658- ; GFX9-LABEL: s_ashr_i16:
659- ; GFX9: ; %bb.0:
660- ; GFX9-NEXT: s_sext_i32_i16 s0, s0
661- ; GFX9-NEXT: s_sext_i32_i16 s1, s1
662- ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
663- ; GFX9-NEXT: ; return to shader part epilog
630+ ; GCN-LABEL: s_ashr_i16:
631+ ; GCN: ; %bb.0:
632+ ; GCN-NEXT: s_sext_i32_i16 s0, s0
633+ ; GCN-NEXT: s_ashr_i32 s0, s0, s1
634+ ; GCN-NEXT: ; return to shader part epilog
664635;
665636; GFX10PLUS-LABEL: s_ashr_i16:
666637; GFX10PLUS: ; %bb.0:
667638; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
668- ; GFX10PLUS-NEXT: s_sext_i32_i16 s1, s1
669639; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
670640; GFX10PLUS-NEXT: ; return to shader part epilog
671641 %result = ashr i16 %value , %amount
@@ -826,14 +796,15 @@ define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
826796;
827797; GFX8-LABEL: s_ashr_v2i16:
828798; GFX8: ; %bb.0:
829- ; GFX8-NEXT: s_sext_i32_i16 s2, s0
830- ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
831- ; GFX8-NEXT: s_sext_i32_i16 s3, s1
832- ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
833- ; GFX8-NEXT: s_ashr_i32 s2, s2, s3
799+ ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
800+ ; GFX8-NEXT: s_sext_i32_i16 s0, s0
801+ ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
834802; GFX8-NEXT: s_ashr_i32 s0, s0, s1
835- ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
836- ; GFX8-NEXT: s_and_b32 s1, s2, 0xffff
803+ ; GFX8-NEXT: s_sext_i32_i16 s1, s2
804+ ; GFX8-NEXT: s_ashr_i32 s1, s1, s3
805+ ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
806+ ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
807+ ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
837808; GFX8-NEXT: s_or_b32 s0, s0, s1
838809; GFX8-NEXT: ; return to shader part epilog
839810;
@@ -1028,23 +999,25 @@ define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
1028999;
10291000; GFX8-LABEL: s_ashr_v4i16:
10301001; GFX8: ; %bb.0:
1031- ; GFX8-NEXT: s_sext_i32_i16 s4, s0
1032- ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1033- ; GFX8-NEXT: s_sext_i32_i16 s5, s1
1034- ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1035- ; GFX8-NEXT: s_sext_i32_i16 s6, s2
1036- ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1037- ; GFX8-NEXT: s_sext_i32_i16 s7, s3
1038- ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1039- ; GFX8-NEXT: s_ashr_i32 s4, s4, s6
1002+ ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1003+ ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1004+ ; GFX8-NEXT: s_lshr_b32 s6, s2, 16
10401005; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1041- ; GFX8-NEXT: s_ashr_i32 s2, s5, s7
1006+ ; GFX8-NEXT: s_sext_i32_i16 s2, s4
1007+ ; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1008+ ; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1009+ ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1010+ ; GFX8-NEXT: s_lshr_b32 s7, s3, 16
10421011; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1043- ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1044- ; GFX8-NEXT: s_and_b32 s3, s4, 0xffff
1045- ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1046- ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
1047- ; GFX8-NEXT: s_or_b32 s0, s0, s3
1012+ ; GFX8-NEXT: s_sext_i32_i16 s3, s5
1013+ ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1014+ ; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1015+ ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1016+ ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1017+ ; GFX8-NEXT: s_or_b32 s0, s0, s2
1018+ ; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1019+ ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1020+ ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
10481021; GFX8-NEXT: s_or_b32 s1, s1, s2
10491022; GFX8-NEXT: ; return to shader part epilog
10501023;
@@ -1235,41 +1208,45 @@ define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
12351208;
12361209; GFX8-LABEL: s_ashr_v8i16:
12371210; GFX8: ; %bb.0:
1238- ; GFX8-NEXT: s_sext_i32_i16 s8, s0
1239- ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1240- ; GFX8-NEXT: s_sext_i32_i16 s9, s1
1241- ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1242- ; GFX8-NEXT: s_sext_i32_i16 s12, s4
1243- ; GFX8-NEXT: s_bfe_i32 s4, s4, 0x100010
1244- ; GFX8-NEXT: s_sext_i32_i16 s13, s5
1245- ; GFX8-NEXT: s_bfe_i32 s5, s5, 0x100010
1246- ; GFX8-NEXT: s_sext_i32_i16 s10, s2
1247- ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1248- ; GFX8-NEXT: s_sext_i32_i16 s14, s6
1249- ; GFX8-NEXT: s_bfe_i32 s6, s6, 0x100010
1211+ ; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1212+ ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1213+ ; GFX8-NEXT: s_lshr_b32 s12, s4, 16
12501214; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1251- ; GFX8-NEXT: s_ashr_i32 s4, s9, s13
1215+ ; GFX8-NEXT: s_sext_i32_i16 s4, s8
1216+ ; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1217+ ; GFX8-NEXT: s_ashr_i32 s4, s4, s12
1218+ ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1219+ ; GFX8-NEXT: s_lshr_b32 s13, s5, 16
12521220; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1253- ; GFX8-NEXT: s_sext_i32_i16 s11, s3
1254- ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1255- ; GFX8-NEXT: s_sext_i32_i16 s15, s7
1256- ; GFX8-NEXT: s_bfe_i32 s7, s7, 0x100010
1257- ; GFX8-NEXT: s_ashr_i32 s5, s10, s14
1221+ ; GFX8-NEXT: s_sext_i32_i16 s5, s9
1222+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1223+ ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1224+ ; GFX8-NEXT: s_ashr_i32 s5, s5, s13
1225+ ; GFX8-NEXT: s_sext_i32_i16 s2, s2
1226+ ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1227+ ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1228+ ; GFX8-NEXT: s_lshr_b32 s14, s6, 16
12581229; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1259- ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1260- ; GFX8-NEXT: s_and_b32 s4, s4, 0xffff
1261- ; GFX8-NEXT: s_ashr_i32 s8, s8, s12
1262- ; GFX8-NEXT: s_ashr_i32 s6, s11, s15
1230+ ; GFX8-NEXT: s_sext_i32_i16 s6, s10
1231+ ; GFX8-NEXT: s_or_b32 s0, s0, s4
1232+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1233+ ; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1234+ ; GFX8-NEXT: s_ashr_i32 s6, s6, s14
1235+ ; GFX8-NEXT: s_sext_i32_i16 s3, s3
1236+ ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1237+ ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1238+ ; GFX8-NEXT: s_lshr_b32 s15, s7, 16
12631239; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1240+ ; GFX8-NEXT: s_sext_i32_i16 s7, s11
12641241; GFX8-NEXT: s_or_b32 s1, s1, s4
1265- ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1266- ; GFX8-NEXT: s_and_b32 s4, s5, 0xffff
1267- ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1268- ; GFX8-NEXT: s_and_b32 s7, s8, 0xffff
1242+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1243+ ; GFX8-NEXT: s_ashr_i32 s7, s7, s15
1244+ ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1245+ ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
12691246; GFX8-NEXT: s_or_b32 s2, s2, s4
1270- ; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1271- ; GFX8-NEXT: s_and_b32 s4, s6, 0xffff
1272- ; GFX8-NEXT: s_or_b32 s0, s0, s7
1247+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1248+ ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1249+ ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
12731250; GFX8-NEXT: s_or_b32 s3, s3, s4
12741251; GFX8-NEXT: ; return to shader part epilog
12751252;
0 commit comments