@@ -667,30 +667,30 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
667667; CHECK-NEXT: add x10, x3, #12
668668; CHECK-NEXT: bic v1.8h, #255, lsl #8
669669; CHECK-NEXT: ld1 { v0.s }[3], [x3], #4
670- ; CHECK-NEXT: ldr s3 , [x0, #12]
671- ; CHECK-NEXT: ldp s2, s7 , [x0 , #4]
670+ ; CHECK-NEXT: ldr s4 , [x0, #12]
671+ ; CHECK-NEXT: ldp s5, s2 , [x2 , #4]
672672; CHECK-NEXT: ldr s6, [x2, #12]
673- ; CHECK-NEXT: ldp s5, s4 , [x2 , #4]
674- ; CHECK-NEXT: ld1 { v3 .s }[1], [x11]
673+ ; CHECK-NEXT: ldp s3, s7 , [x0 , #4]
674+ ; CHECK-NEXT: ld1 { v4 .s }[1], [x11]
675675; CHECK-NEXT: ld1 { v6.s }[1], [x10]
676- ; CHECK-NEXT: ld1 { v2.s }[1], [x9]
677- ; CHECK-NEXT: ld1 { v4.s }[1], [x8]
676+ ; CHECK-NEXT: ld1 { v2.s }[1], [x8]
678677; CHECK-NEXT: ld1 { v5.s }[1], [x3]
679678; CHECK-NEXT: add x8, x1, #8
679+ ; CHECK-NEXT: ld1 { v3.s }[1], [x9]
680680; CHECK-NEXT: ld1 { v7.s }[1], [x8]
681- ; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
682- ; CHECK-NEXT: ushll v4 .8h, v4 .8b, #0
683- ; CHECK-NEXT: uaddl v3 .8h, v5.8b, v6.8b
681+ ; CHECK-NEXT: ushll v2.8h, v2.8b, #0
682+ ; CHECK-NEXT: uaddl v3 .8h, v3 .8b, v4.8b
683+ ; CHECK-NEXT: uaddl v4 .8h, v5.8b, v6.8b
684684; CHECK-NEXT: uaddw v1.8h, v1.8h, v7.8b
685- ; CHECK-NEXT: uaddw2 v4.8h, v4.8h, v0.16b
686- ; CHECK-NEXT: ushll v0.4s, v2.4h, #3
687- ; CHECK-NEXT: ushll v5.4s, v3.4h, #3
685+ ; CHECK-NEXT: uaddw2 v2.8h, v2.8h, v0.16b
686+ ; CHECK-NEXT: ushll v0.4s, v3.4h, #3
687+ ; CHECK-NEXT: ushll v5.4s, v4.4h, #3
688+ ; CHECK-NEXT: ushll2 v4.4s, v4.8h, #3
688689; CHECK-NEXT: ushll2 v3.4s, v3.8h, #3
689- ; CHECK-NEXT: ushll2 v2.4s, v2.8h, #3
690690; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
691- ; CHECK-NEXT: uaddw2 v1.4s, v2 .4s, v1.8h
692- ; CHECK-NEXT: uaddw2 v3.4s, v3 .4s, v4 .8h
693- ; CHECK-NEXT: uaddw v2.4s, v5.4s, v4 .4h
691+ ; CHECK-NEXT: uaddw2 v1.4s, v3 .4s, v1.8h
692+ ; CHECK-NEXT: uaddw2 v3.4s, v4 .4s, v2 .8h
693+ ; CHECK-NEXT: uaddw v2.4s, v5.4s, v2 .4h
694694; CHECK-NEXT: ret
695695 %lp1 = load <4 x i8 >, ptr %p
696696 store <4 x i8 > %lp1 , ptr %z
@@ -1073,24 +1073,24 @@ define <16 x i32> @extrause_ext2(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
10731073; CHECK-NEXT: ld1 { v6.s }[1], [x10]
10741074; CHECK-NEXT: ld1 { v5.s }[1], [x9]
10751075; CHECK-NEXT: ld1 { v7.s }[1], [x8]
1076- ; CHECK-NEXT: uaddl v16 .8h, v2.8b, v3.8b
1077- ; CHECK-NEXT: uaddl v3 .8h, v1.8b, v6.8b
1078- ; CHECK-NEXT: uaddl v2 .8h, v4.8b, v5.8b
1076+ ; CHECK-NEXT: uaddl v2 .8h, v2.8b, v3.8b
1077+ ; CHECK-NEXT: uaddl v1 .8h, v1.8b, v6.8b
1078+ ; CHECK-NEXT: uaddl v3 .8h, v4.8b, v5.8b
10791079; CHECK-NEXT: uaddl v4.8h, v0.8b, v7.8b
1080- ; CHECK-NEXT: ushll v0.4s, v16.4h, #3
1081- ; CHECK-NEXT: ushll2 v1.4s, v16.8h, #3
1082- ; CHECK-NEXT: ushll2 v18.4s, v16.8h, #0
1083- ; CHECK-NEXT: ushll v6.4s, v2.4h, #3
1084- ; CHECK-NEXT: ushll2 v7.4s, v2.8h, #3
1085- ; CHECK-NEXT: ushll2 v5.4s, v2.8h, #0
1080+ ; CHECK-NEXT: ushll2 v0.4s, v2.8h, #0
1081+ ; CHECK-NEXT: ushll v5.4s, v2.4h, #3
1082+ ; CHECK-NEXT: ushll2 v16.4s, v2.8h, #3
1083+ ; CHECK-NEXT: ushll v6.4s, v3.4h, #3
1084+ ; CHECK-NEXT: ushll2 v7.4s, v3.8h, #3
10861085; CHECK-NEXT: ushll v17.4s, v2.4h, #0
1087- ; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v3.8h
1088- ; CHECK-NEXT: uaddw v0.4s, v0.4s, v3.4h
1086+ ; CHECK-NEXT: ushll2 v18.4s, v3.8h, #0
1087+ ; CHECK-NEXT: ushll v19.4s, v3.4h, #0
1088+ ; CHECK-NEXT: stp q17, q0, [x4]
1089+ ; CHECK-NEXT: uaddw v0.4s, v5.4s, v1.4h
1090+ ; CHECK-NEXT: uaddw2 v1.4s, v16.4s, v1.8h
10891091; CHECK-NEXT: uaddw2 v3.4s, v7.4s, v4.8h
10901092; CHECK-NEXT: uaddw v2.4s, v6.4s, v4.4h
1091- ; CHECK-NEXT: ushll v4.4s, v16.4h, #0
1092- ; CHECK-NEXT: stp q17, q5, [x4, #32]
1093- ; CHECK-NEXT: stp q4, q18, [x4]
1093+ ; CHECK-NEXT: stp q19, q18, [x4, #32]
10941094; CHECK-NEXT: ret
10951095 %lp1 = load <4 x i8 >, ptr %p
10961096 %p2 = getelementptr i8 , ptr %p , i32 4
@@ -1176,19 +1176,20 @@ define <16 x i32> @extrause_shl(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
11761176; CHECK-NEXT: ld1 { v5.s }[1], [x9]
11771177; CHECK-NEXT: ld1 { v7.s }[1], [x8]
11781178; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
1179+ ; CHECK-NEXT: uaddl v1.8h, v1.8b, v6.8b
11791180; CHECK-NEXT: uaddl v3.8h, v4.8b, v5.8b
1180- ; CHECK-NEXT: uaddl v4 .8h, v1 .8b, v6 .8b
1181- ; CHECK-NEXT: ushll v5 .4s, v2.4h, #3
1182- ; CHECK-NEXT: ushll2 v6 .4s, v2.8h, #3
1183- ; CHECK-NEXT: uaddl v2.8h, v0.8b, v7.8b
1184- ; CHECK-NEXT: ushll v7.4s, v3.4h , #3
1185- ; CHECK-NEXT: ushll2 v16 .4s, v3.8h, #3
1186- ; CHECK-NEXT: uaddw2 v1.4s, v6 .4s, v4 .8h
1187- ; CHECK-NEXT: uaddw v0.4s, v5.4s, v4.4h
1188- ; CHECK-NEXT: stp q5 , q6, [x4]
1189- ; CHECK-NEXT: uaddw2 v3.4s, v16 .4s, v2 .8h
1190- ; CHECK-NEXT: uaddw v2.4s, v7 .4s, v2 .4h
1191- ; CHECK-NEXT: stp q7, q16, [x4, #32 ]
1181+ ; CHECK-NEXT: uaddl v5 .8h, v0 .8b, v7 .8b
1182+ ; CHECK-NEXT: ushll v4 .4s, v2.4h, #3
1183+ ; CHECK-NEXT: ushll2 v2 .4s, v2.8h, #3
1184+ ; CHECK-NEXT: ushll v6.4s, v3.4h, #3
1185+ ; CHECK-NEXT: ushll2 v7.4s, v3.8h , #3
1186+ ; CHECK-NEXT: uaddw v0 .4s, v4.4s, v1.4h
1187+ ; CHECK-NEXT: uaddw2 v1.4s, v2 .4s, v1 .8h
1188+ ; CHECK-NEXT: str q4, [x4]
1189+ ; CHECK-NEXT: stp q2 , q6, [x4, #16 ]
1190+ ; CHECK-NEXT: uaddw2 v3.4s, v7 .4s, v5 .8h
1191+ ; CHECK-NEXT: uaddw v2.4s, v6 .4s, v5 .4h
1192+ ; CHECK-NEXT: str q7, [x4, #48 ]
11921193; CHECK-NEXT: ret
11931194 %lp1 = load <4 x i8 >, ptr %p
11941195 %p2 = getelementptr i8 , ptr %p , i32 4
0 commit comments