@@ -66,57 +66,57 @@ define i32 @large(ptr nocapture noundef readonly %p1, i32 noundef %st1, ptr noca
6666; CHECK-NEXT: ldr d5, [x11, x9]
6767; CHECK-NEXT: shll2 v6.4s, v0.8h, #16
6868; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b
69+ ; CHECK-NEXT: shll2 v7.4s, v1.8h, #16
6970; CHECK-NEXT: usubl v3.8h, v4.8b, v5.8b
70- ; CHECK-NEXT: shll2 v4.4s, v1.8h, #16
7171; CHECK-NEXT: saddw v0.4s, v6.4s, v0.4h
72- ; CHECK-NEXT: shll2 v6.4s, v2.8h, #16
72+ ; CHECK-NEXT: shll2 v4.4s, v2.8h, #16
73+ ; CHECK-NEXT: saddw v1.4s, v7.4s, v1.4h
7374; CHECK-NEXT: shll2 v5.4s, v3.8h, #16
74- ; CHECK-NEXT: saddw v1 .4s, v4 .4s, v1.4h
75- ; CHECK-NEXT: rev64 v4 .4s, v0 .4s
76- ; CHECK-NEXT: saddw v2 .4s, v6 .4s, v2.4h
75+ ; CHECK-NEXT: rev64 v6 .4s, v0 .4s
76+ ; CHECK-NEXT: saddw v2 .4s, v4 .4s, v2.4h
77+ ; CHECK-NEXT: rev64 v7 .4s, v1 .4s
7778; CHECK-NEXT: saddw v3.4s, v5.4s, v3.4h
78- ; CHECK-NEXT: rev64 v5.4s, v1.4s
79- ; CHECK-NEXT: rev64 v6.4s, v2.4s
80- ; CHECK-NEXT: sub v4.4s, v0.4s, v4.4s
79+ ; CHECK-NEXT: rev64 v4.4s, v2.4s
80+ ; CHECK-NEXT: sub v6.4s, v0.4s, v6.4s
8181; CHECK-NEXT: addp v0.4s, v1.4s, v0.4s
82- ; CHECK-NEXT: rev64 v7 .4s, v3.4s
83- ; CHECK-NEXT: sub v5 .4s, v1.4s, v5 .4s
84- ; CHECK-NEXT: sub v6 .4s, v2.4s, v6 .4s
82+ ; CHECK-NEXT: rev64 v5 .4s, v3.4s
83+ ; CHECK-NEXT: sub v7 .4s, v1.4s, v7 .4s
84+ ; CHECK-NEXT: sub v4 .4s, v2.4s, v4 .4s
8585; CHECK-NEXT: addp v2.4s, v3.4s, v2.4s
86- ; CHECK-NEXT: zip1 v16.4s, v5.4s, v4.4s
87- ; CHECK-NEXT: sub v7.4s, v3.4s, v7.4s
88- ; CHECK-NEXT: trn1 v4.4s, v5.4s, v4.4s
89- ; CHECK-NEXT: zip2 v3.4s, v6.4s, v7.4s
90- ; CHECK-NEXT: mov v6.s[1], v7.s[0]
86+ ; CHECK-NEXT: zip1 v16.4s, v7.4s, v6.4s
87+ ; CHECK-NEXT: sub v5.4s, v3.4s, v5.4s
88+ ; CHECK-NEXT: trn1 v3.4s, v7.4s, v6.4s
89+ ; CHECK-NEXT: zip1 v6.4s, v4.4s, v5.4s
90+ ; CHECK-NEXT: zip2 v4.4s, v4.4s, v5.4s
91+ ; CHECK-NEXT: ext v5.16b, v7.16b, v16.16b, #8
9192; CHECK-NEXT: ext v7.16b, v2.16b, v2.16b, #8
92- ; CHECK-NEXT: ext v5.16b, v5.16b, v16.16b, #8
93- ; CHECK-NEXT: mov v3.d[1], v4.d[1]
94- ; CHECK-NEXT: uzp1 v1.4s, v7.4s, v0.4s
95- ; CHECK-NEXT: uzp2 v4.4s, v7.4s, v0.4s
93+ ; CHECK-NEXT: mov v4.d[1], v3.d[1]
9694; CHECK-NEXT: mov v6.d[1], v5.d[1]
95+ ; CHECK-NEXT: uzp1 v1.4s, v7.4s, v0.4s
96+ ; CHECK-NEXT: uzp2 v3.4s, v7.4s, v0.4s
9797; CHECK-NEXT: addp v0.4s, v2.4s, v0.4s
98- ; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s
98+ ; CHECK-NEXT: add v5.4s, v4.4s, v6.4s
99+ ; CHECK-NEXT: sub v4.4s, v6.4s, v4.4s
100+ ; CHECK-NEXT: sub v1.4s, v1.4s, v3.4s
99101; CHECK-NEXT: rev64 v7.4s, v0.4s
100- ; CHECK-NEXT: add v5.4s, v3.4s, v6 .4s
101- ; CHECK-NEXT: sub v3.4s, v6.4s, v3 .4s
102+ ; CHECK-NEXT: rev64 v3.4s, v5 .4s
103+ ; CHECK-NEXT: rev64 v6.4s, v4 .4s
102104; CHECK-NEXT: rev64 v2.4s, v1.4s
103- ; CHECK-NEXT: rev64 v4.4s, v5.4s
104- ; CHECK-NEXT: rev64 v6.4s, v3.4s
105105; CHECK-NEXT: addp v16.4s, v0.4s, v5.4s
106106; CHECK-NEXT: sub v0.4s, v0.4s, v7.4s
107- ; CHECK-NEXT: zip1 v21.4s, v16.4s, v16.4s
108- ; CHECK-NEXT: sub v4.4s, v5.4s, v4.4s
109- ; CHECK-NEXT: addp v5.4s, v1.4s, v3.4s
110- ; CHECK-NEXT: sub v3.4s, v3.4s, v6.4s
107+ ; CHECK-NEXT: sub v3.4s, v5.4s, v3.4s
108+ ; CHECK-NEXT: addp v5.4s, v1.4s, v4.4s
109+ ; CHECK-NEXT: sub v4.4s, v4.4s, v6.4s
111110; CHECK-NEXT: sub v1.4s, v1.4s, v2.4s
112111; CHECK-NEXT: ext v7.16b, v0.16b, v16.16b, #4
113- ; CHECK-NEXT: ext v2.16b, v16.16b, v4.16b, #4
114- ; CHECK-NEXT: ext v6.16b, v5.16b, v3.16b, #4
115- ; CHECK-NEXT: mov v19.16b, v4.16b
112+ ; CHECK-NEXT: zip1 v21.4s, v16.4s, v16.4s
113+ ; CHECK-NEXT: ext v2.16b, v16.16b, v3.16b, #4
114+ ; CHECK-NEXT: ext v6.16b, v5.16b, v4.16b, #4
115+ ; CHECK-NEXT: mov v19.16b, v3.16b
116116; CHECK-NEXT: ext v17.16b, v1.16b, v5.16b, #8
117- ; CHECK-NEXT: mov v20.16b, v3.16b
118- ; CHECK-NEXT: trn2 v0.4s, v21.4s, v0.4s
117+ ; CHECK-NEXT: mov v20.16b, v4.16b
119118; CHECK-NEXT: ext v7.16b, v7.16b, v7.16b, #4
119+ ; CHECK-NEXT: trn2 v0.4s, v21.4s, v0.4s
120120; CHECK-NEXT: mov v19.s[2], v16.s[3]
121121; CHECK-NEXT: zip2 v2.4s, v2.4s, v16.4s
122122; CHECK-NEXT: zip2 v6.4s, v6.4s, v5.4s
@@ -125,8 +125,8 @@ define i32 @large(ptr nocapture noundef readonly %p1, i32 noundef %st1, ptr noca
125125; CHECK-NEXT: mov v1.s[2], v5.s[1]
126126; CHECK-NEXT: mov v21.16b, v7.16b
127127; CHECK-NEXT: sub v7.4s, v0.4s, v7.4s
128- ; CHECK-NEXT: ext v2.16b, v4 .16b, v2.16b, #12
129- ; CHECK-NEXT: ext v3.16b, v3 .16b, v6.16b, #12
128+ ; CHECK-NEXT: ext v2.16b, v3 .16b, v2.16b, #12
129+ ; CHECK-NEXT: ext v3.16b, v4 .16b, v6.16b, #12
130130; CHECK-NEXT: uzp2 v4.4s, v17.4s, v18.4s
131131; CHECK-NEXT: mov v6.16b, v1.16b
132132; CHECK-NEXT: mov v17.16b, v19.16b
0 commit comments