@@ -811,16 +811,15 @@ define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
811811;
812812; GFX8-LABEL: s_ashr_v2i16:
813813; GFX8: ; %bb.0:
814- ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
815- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
816- ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
817- ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
818- ; GFX8-NEXT: s_sext_i32_i16 s1, s2
819- ; GFX8-NEXT: s_ashr_i32 s1, s1, s3
820- ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
814+ ; GFX8-NEXT: s_lshr_b32 s2, s1, 16
815+ ; GFX8-NEXT: s_sext_i32_i16 s3, s0
816+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
817+ ; GFX8-NEXT: s_ashr_i32 s0, s0, s2
818+ ; GFX8-NEXT: s_ashr_i32 s1, s3, s1
821819; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
822- ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
823- ; GFX8-NEXT: s_or_b32 s0, s0, s1
820+ ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
821+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
822+ ; GFX8-NEXT: s_or_b32 s0, s1, s0
824823; GFX8-NEXT: ; return to shader part epilog
825824;
826825; GFX9-LABEL: s_ashr_v2i16:
@@ -1014,26 +1013,24 @@ define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
10141013;
10151014; GFX8-LABEL: s_ashr_v4i16:
10161015; GFX8: ; %bb.0:
1017- ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1018- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1019- ; GFX8-NEXT: s_lshr_b32 s6, s2, 16
1020- ; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1021- ; GFX8-NEXT: s_sext_i32_i16 s2, s4
1022- ; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1023- ; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1024- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1025- ; GFX8-NEXT: s_lshr_b32 s7, s3, 16
1026- ; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1027- ; GFX8-NEXT: s_sext_i32_i16 s3, s5
1028- ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1029- ; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1016+ ; GFX8-NEXT: s_lshr_b32 s4, s2, 16
1017+ ; GFX8-NEXT: s_sext_i32_i16 s6, s0
1018+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1019+ ; GFX8-NEXT: s_lshr_b32 s5, s3, 16
1020+ ; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1021+ ; GFX8-NEXT: s_sext_i32_i16 s4, s1
1022+ ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1023+ ; GFX8-NEXT: s_ashr_i32 s2, s6, s2
1024+ ; GFX8-NEXT: s_ashr_i32 s1, s1, s5
10301025; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1031- ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1032- ; GFX8-NEXT: s_or_b32 s0, s0 , s2
1033- ; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1026+ ; GFX8-NEXT: s_ashr_i32 s3, s4, s3
1027+ ; GFX8-NEXT: s_and_b32 s2, 0xffff , s2
1028+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
10341029; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1035- ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1036- ; GFX8-NEXT: s_or_b32 s1, s1, s2
1030+ ; GFX8-NEXT: s_or_b32 s0, s2, s0
1031+ ; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1032+ ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1033+ ; GFX8-NEXT: s_or_b32 s1, s2, s1
10371034; GFX8-NEXT: ; return to shader part epilog
10381035;
10391036; GFX9-LABEL: s_ashr_v4i16:
@@ -1223,46 +1220,42 @@ define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
12231220;
12241221; GFX8-LABEL: s_ashr_v8i16:
12251222; GFX8: ; %bb.0:
1226- ; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1227- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1228- ; GFX8-NEXT: s_lshr_b32 s12, s4, 16
1229- ; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1230- ; GFX8-NEXT: s_sext_i32_i16 s4, s8
1231- ; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1232- ; GFX8-NEXT: s_ashr_i32 s4, s4, s12
1233- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1234- ; GFX8-NEXT: s_lshr_b32 s13, s5, 16
1235- ; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1236- ; GFX8-NEXT: s_sext_i32_i16 s5, s9
1237- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1238- ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1239- ; GFX8-NEXT: s_ashr_i32 s5, s5, s13
1240- ; GFX8-NEXT: s_sext_i32_i16 s2, s2
1223+ ; GFX8-NEXT: s_lshr_b32 s8, s4, 16
1224+ ; GFX8-NEXT: s_sext_i32_i16 s12, s0
1225+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1226+ ; GFX8-NEXT: s_lshr_b32 s9, s5, 16
1227+ ; GFX8-NEXT: s_ashr_i32 s0, s0, s8
1228+ ; GFX8-NEXT: s_sext_i32_i16 s8, s1
1229+ ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1230+ ; GFX8-NEXT: s_lshr_b32 s10, s6, 16
1231+ ; GFX8-NEXT: s_ashr_i32 s4, s12, s4
1232+ ; GFX8-NEXT: s_ashr_i32 s5, s8, s5
1233+ ; GFX8-NEXT: s_ashr_i32 s1, s1, s9
1234+ ; GFX8-NEXT: s_sext_i32_i16 s8, s2
1235+ ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
12411236; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1242- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1243- ; GFX8-NEXT: s_lshr_b32 s14, s6, 16
1244- ; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1245- ; GFX8-NEXT: s_sext_i32_i16 s6, s10
1246- ; GFX8-NEXT: s_or_b32 s0, s0, s4
1247- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1248- ; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1249- ; GFX8-NEXT: s_ashr_i32 s6, s6, s14
1250- ; GFX8-NEXT: s_sext_i32_i16 s3, s3
1237+ ; GFX8-NEXT: s_lshr_b32 s11, s7, 16
1238+ ; GFX8-NEXT: s_ashr_i32 s6, s8, s6
1239+ ; GFX8-NEXT: s_ashr_i32 s2, s2, s10
1240+ ; GFX8-NEXT: s_sext_i32_i16 s8, s3
1241+ ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1242+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1243+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
12511244; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1252- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1253- ; GFX8-NEXT: s_lshr_b32 s15, s7, 16
1254- ; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1255- ; GFX8-NEXT: s_sext_i32_i16 s7, s11
1256- ; GFX8-NEXT: s_or_b32 s1, s1, s4
1257- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1258- ; GFX8-NEXT: s_ashr_i32 s7, s7, s15
1245+ ; GFX8-NEXT: s_ashr_i32 s3, s3, s11
1246+ ; GFX8-NEXT: s_or_b32 s0, s4, s0
1247+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1248+ ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
12591249; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1260- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1261- ; GFX8-NEXT: s_or_b32 s2, s2, s4
1262- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1250+ ; GFX8-NEXT: s_ashr_i32 s7, s8, s7
1251+ ; GFX8-NEXT: s_or_b32 s1, s4, s1
1252+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1253+ ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
12631254; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1264- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1265- ; GFX8-NEXT: s_or_b32 s3, s3, s4
1255+ ; GFX8-NEXT: s_or_b32 s2, s4, s2
1256+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1257+ ; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1258+ ; GFX8-NEXT: s_or_b32 s3, s4, s3
12661259; GFX8-NEXT: ; return to shader part epilog
12671260;
12681261; GFX9-LABEL: s_ashr_v8i16:
0 commit comments