@@ -66,86 +66,86 @@ define i32 @large(ptr nocapture noundef readonly %p1, i32 noundef %st1, ptr noca
6666; CHECK-NEXT: ldr d5, [x11, x9]
6767; CHECK-NEXT: shll2 v6.4s, v0.8h, #16
6868; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b
69+ ; CHECK-NEXT: shll2 v7.4s, v1.8h, #16
6970; CHECK-NEXT: usubl v3.8h, v4.8b, v5.8b
70- ; CHECK-NEXT: shll2 v4.4s, v1.8h, #16
7171; CHECK-NEXT: saddw v0.4s, v6.4s, v0.4h
72- ; CHECK-NEXT: shll2 v6 .4s, v2.8h, #16
73- ; CHECK-NEXT: shll2 v5 .4s, v3.8h, #16
74- ; CHECK-NEXT: saddw v1 .4s, v4.4s, v1.4h
75- ; CHECK-NEXT: rev64 v4 .4s, v0.4s
76- ; CHECK-NEXT: saddw v2.4s, v6 .4s, v2.4h
77- ; CHECK-NEXT: saddw v3 .4s, v5 .4s, v3.4h
78- ; CHECK-NEXT: rev64 v5 .4s, v1 .4s
79- ; CHECK-NEXT: rev64 v6 .4s, v2.4s
80- ; CHECK-NEXT: sub v4 .4s, v0.4s, v4 .4s
72+ ; CHECK-NEXT: shll2 v5 .4s, v2.8h, #16
73+ ; CHECK-NEXT: saddw v1 .4s, v7.4s, v1.4h
74+ ; CHECK-NEXT: shll2 v4 .4s, v3.8h, #16
75+ ; CHECK-NEXT: rev64 v6 .4s, v0.4s
76+ ; CHECK-NEXT: saddw v2.4s, v5 .4s, v2.4h
77+ ; CHECK-NEXT: rev64 v7 .4s, v1 .4s
78+ ; CHECK-NEXT: saddw v3 .4s, v4 .4s, v3.4h
79+ ; CHECK-NEXT: rev64 v4 .4s, v2.4s
80+ ; CHECK-NEXT: sub v6 .4s, v0.4s, v6 .4s
8181; CHECK-NEXT: addp v0.4s, v1.4s, v0.4s
82- ; CHECK-NEXT: rev64 v7 .4s, v3.4s
83- ; CHECK-NEXT: sub v5 .4s, v1.4s, v5 .4s
84- ; CHECK-NEXT: sub v6 .4s, v2.4s, v6 .4s
82+ ; CHECK-NEXT: rev64 v5 .4s, v3.4s
83+ ; CHECK-NEXT: sub v7 .4s, v1.4s, v7 .4s
84+ ; CHECK-NEXT: sub v4 .4s, v2.4s, v4 .4s
8585; CHECK-NEXT: addp v2.4s, v3.4s, v2.4s
86- ; CHECK-NEXT: zip1 v16.4s, v5.4s, v4.4s
87- ; CHECK-NEXT: sub v7.4s, v3.4s, v7.4s
88- ; CHECK-NEXT: zip2 v3.4s, v6.4s, v7.4s
89- ; CHECK-NEXT: mov v6.s[1], v7.s[0]
90- ; CHECK-NEXT: ext v7.16b, v5.16b, v16.16b, #8
91- ; CHECK-NEXT: mov v5.s[3], v4.s[2]
92- ; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8
93- ; CHECK-NEXT: mov v6.d[1], v7.d[1]
86+ ; CHECK-NEXT: zip1 v16.4s, v7.4s, v6.4s
87+ ; CHECK-NEXT: sub v5.4s, v3.4s, v5.4s
88+ ; CHECK-NEXT: zip1 v3.4s, v4.4s, v5.4s
89+ ; CHECK-NEXT: zip2 v4.4s, v4.4s, v5.4s
90+ ; CHECK-NEXT: ext v5.16b, v7.16b, v16.16b, #8
91+ ; CHECK-NEXT: mov v7.s[3], v6.s[2]
92+ ; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8
9493; CHECK-NEXT: mov v3.d[1], v5.d[1]
95- ; CHECK-NEXT: uzp1 v1.4s, v4.4s, v0.4s
96- ; CHECK-NEXT: uzp2 v4.4s, v4.4s, v0.4s
94+ ; CHECK-NEXT: mov v4.d[1], v7.d[1]
95+ ; CHECK-NEXT: uzp1 v1.4s, v6.4s, v0.4s
96+ ; CHECK-NEXT: uzp2 v5.4s, v6.4s, v0.4s
9797; CHECK-NEXT: addp v0.4s, v2.4s, v0.4s
98- ; CHECK-NEXT: add v5 .4s, v3 .4s, v6 .4s
99- ; CHECK-NEXT: sub v3.4s, v6 .4s, v3 .4s
98+ ; CHECK-NEXT: add v6 .4s, v4 .4s, v3 .4s
99+ ; CHECK-NEXT: sub v3.4s, v3 .4s, v4 .4s
100100; CHECK-NEXT: rev64 v7.4s, v0.4s
101- ; CHECK-NEXT: sub v1.4s, v1.4s, v4 .4s
102- ; CHECK-NEXT: rev64 v4.4s, v5 .4s
103- ; CHECK-NEXT: rev64 v6 .4s, v3.4s
104- ; CHECK-NEXT: addp v16.4s, v0.4s, v5 .4s
101+ ; CHECK-NEXT: sub v1.4s, v1.4s, v5 .4s
102+ ; CHECK-NEXT: rev64 v4.4s, v6 .4s
103+ ; CHECK-NEXT: rev64 v5 .4s, v3.4s
104+ ; CHECK-NEXT: addp v16.4s, v0.4s, v6 .4s
105105; CHECK-NEXT: rev64 v2.4s, v1.4s
106106; CHECK-NEXT: sub v0.4s, v0.4s, v7.4s
107107; CHECK-NEXT: zip1 v21.4s, v16.4s, v16.4s
108- ; CHECK-NEXT: sub v4.4s, v5 .4s, v4.4s
109- ; CHECK-NEXT: addp v5 .4s, v1.4s, v3.4s
110- ; CHECK-NEXT: sub v3.4s, v3.4s, v6 .4s
108+ ; CHECK-NEXT: sub v4.4s, v6 .4s, v4.4s
109+ ; CHECK-NEXT: addp v6 .4s, v1.4s, v3.4s
110+ ; CHECK-NEXT: sub v3.4s, v3.4s, v5 .4s
111111; CHECK-NEXT: sub v1.4s, v1.4s, v2.4s
112112; CHECK-NEXT: ext v7.16b, v0.16b, v16.16b, #4
113113; CHECK-NEXT: ext v2.16b, v16.16b, v4.16b, #4
114- ; CHECK-NEXT: ext v6 .16b, v5 .16b, v3.16b, #4
114+ ; CHECK-NEXT: ext v5 .16b, v6 .16b, v3.16b, #4
115115; CHECK-NEXT: mov v19.16b, v4.16b
116- ; CHECK-NEXT: ext v17.16b, v1.16b, v5 .16b, #8
116+ ; CHECK-NEXT: ext v17.16b, v1.16b, v6 .16b, #8
117117; CHECK-NEXT: mov v20.16b, v3.16b
118118; CHECK-NEXT: trn2 v0.4s, v21.4s, v0.4s
119119; CHECK-NEXT: ext v7.16b, v7.16b, v7.16b, #4
120120; CHECK-NEXT: mov v19.s[2], v16.s[3]
121121; CHECK-NEXT: zip2 v2.4s, v2.4s, v16.4s
122- ; CHECK-NEXT: zip2 v6 .4s, v6 .4s, v5 .4s
123- ; CHECK-NEXT: mov v20.s[2], v5 .s[3]
122+ ; CHECK-NEXT: zip2 v5 .4s, v5 .4s, v6 .4s
123+ ; CHECK-NEXT: mov v20.s[2], v6 .s[3]
124124; CHECK-NEXT: ext v18.16b, v17.16b, v1.16b, #4
125- ; CHECK-NEXT: mov v1.s[2], v5 .s[1]
125+ ; CHECK-NEXT: mov v1.s[2], v6 .s[1]
126126; CHECK-NEXT: mov v21.16b, v7.16b
127127; CHECK-NEXT: sub v7.4s, v0.4s, v7.4s
128128; CHECK-NEXT: ext v2.16b, v4.16b, v2.16b, #12
129- ; CHECK-NEXT: ext v3.16b, v3.16b, v6 .16b, #12
129+ ; CHECK-NEXT: ext v3.16b, v3.16b, v5 .16b, #12
130130; CHECK-NEXT: uzp2 v4.4s, v17.4s, v18.4s
131- ; CHECK-NEXT: mov v6 .16b, v1.16b
131+ ; CHECK-NEXT: mov v5 .16b, v1.16b
132132; CHECK-NEXT: mov v17.16b, v19.16b
133133; CHECK-NEXT: mov v18.16b, v20.16b
134134; CHECK-NEXT: mov v21.s[0], v16.s[1]
135- ; CHECK-NEXT: mov v6 .s[1], v5 .s[0]
135+ ; CHECK-NEXT: mov v5 .s[1], v6 .s[0]
136136; CHECK-NEXT: mov v17.s[1], v16.s[2]
137137; CHECK-NEXT: sub v16.4s, v19.4s, v2.4s
138- ; CHECK-NEXT: mov v18.s[1], v5 .s[2]
138+ ; CHECK-NEXT: mov v18.s[1], v6 .s[2]
139139; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s
140- ; CHECK-NEXT: sub v5 .4s, v20.4s, v3.4s
140+ ; CHECK-NEXT: sub v6 .4s, v20.4s, v3.4s
141141; CHECK-NEXT: add v0.4s, v0.4s, v21.4s
142- ; CHECK-NEXT: add v4.4s, v6 .4s, v4.4s
142+ ; CHECK-NEXT: add v4.4s, v5 .4s, v4.4s
143143; CHECK-NEXT: add v2.4s, v17.4s, v2.4s
144144; CHECK-NEXT: add v3.4s, v18.4s, v3.4s
145145; CHECK-NEXT: mov v0.d[1], v7.d[1]
146146; CHECK-NEXT: mov v4.d[1], v1.d[1]
147147; CHECK-NEXT: mov v2.d[1], v16.d[1]
148- ; CHECK-NEXT: mov v3.d[1], v5 .d[1]
148+ ; CHECK-NEXT: mov v3.d[1], v6 .d[1]
149149; CHECK-NEXT: cmlt v7.8h, v0.8h, #0
150150; CHECK-NEXT: cmlt v1.8h, v4.8h, #0
151151; CHECK-NEXT: cmlt v6.8h, v2.8h, #0
0 commit comments