Skip to content

Commit ebcb492

Browse files
committed
[AArch64][ISel] Subvector extracts can use undef for second EXT input
This will later allow us to use the SVE2 constructive variant of EXT without requiring a MOV. That is because that variant of EXT requires two consecutive Z registers as input. As a consequence, extracting a subvector from e.g. z2 into z0 would require: z3 = MOV z2 z0 = EXT_ZZI_B { z2, z3 }, idx With this change, the z3 part of the { z2, z3 } tuple will be marked as undef, allowing the MOV to be simplified. We just need to add patterns for EXT_ZZI_B now, currently only the the destructive EXT_ZZI variant is selected.
1 parent 73245b0 commit ebcb492

14 files changed

+700
-357
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15544,7 +15544,9 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
1554415544

1554515545
assert(InVT.isScalableVector() && "Unexpected vector type!");
1554615546
// Move requested subvector to the start of the vector and try again.
15547-
SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, InVT, Vec, Vec, Idx);
15547+
// There's no need for a second input to vector_splice, so use undef there.
15548+
SDValue Splice =
15549+
DAG.getNode(ISD::VECTOR_SPLICE, DL, InVT, Vec, DAG.getUNDEF(InVT), Idx);
1554815550
return convertFromScalableVector(DAG, VT, Splice);
1554915551
}
1555015552

llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
180180
; CHECK-SVE-NEXT: mov z1.s, p0/z, #1 // =0x1
181181
; CHECK-SVE-NEXT: fmov s0, w8
182182
; CHECK-SVE-NEXT: mov v0.s[1], v1.s[1]
183-
; CHECK-SVE-NEXT: ext z1.b, z1.b, z1.b, #8
183+
; CHECK-SVE-NEXT: ext z1.b, z1.b, z0.b, #8
184184
; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 killed $q0
185185
; CHECK-SVE-NEXT: // kill: def $d1 killed $d1 killed $z1
186186
; CHECK-SVE-NEXT: b use
@@ -192,7 +192,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
192192
; CHECK-SVE2p1-NEXT: mov z1.s, p0/z, #1 // =0x1
193193
; CHECK-SVE2p1-NEXT: fmov s0, w8
194194
; CHECK-SVE2p1-NEXT: mov v0.s[1], v1.s[1]
195-
; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z1.b, #8
195+
; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z0.b, #8
196196
; CHECK-SVE2p1-NEXT: // kill: def $d0 killed $d0 killed $q0
197197
; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $z1
198198
; CHECK-SVE2p1-NEXT: b use
@@ -204,10 +204,10 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
204204
; CHECK-SME2-NEXT: mov z1.s, p0/z, #1 // =0x1
205205
; CHECK-SME2-NEXT: fmov s2, w8
206206
; CHECK-SME2-NEXT: mov z0.s, z1.s[1]
207-
; CHECK-SME2-NEXT: ext z1.b, z1.b, z1.b, #8
208-
; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
209207
; CHECK-SME2-NEXT: zip1 z0.s, z2.s, z0.s
208+
; CHECK-SME2-NEXT: ext z1.b, z1.b, z0.b, #8
210209
; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
210+
; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
211211
; CHECK-SME2-NEXT: b use
212212
%r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n)
213213
%v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0)

llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
125125
; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b
126126
; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h
127127
; VBITS_GE_256-NEXT: sunpklo z3.s, z0.h
128-
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
128+
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
129129
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
130130
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
131131
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
@@ -210,7 +210,7 @@ define void @sdiv_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
210210
; CHECK-NEXT: sunpklo z0.h, z0.b
211211
; CHECK-NEXT: sunpklo z2.s, z1.h
212212
; CHECK-NEXT: sunpklo z3.s, z0.h
213-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
213+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
214214
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
215215
; CHECK-NEXT: sunpklo z1.s, z1.h
216216
; CHECK-NEXT: sunpklo z0.s, z0.h
@@ -239,24 +239,24 @@ define void @sdiv_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
239239
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
240240
; CHECK-NEXT: sunpklo z2.h, z1.b
241241
; CHECK-NEXT: sunpklo z3.h, z0.b
242-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
243-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
242+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
244243
; CHECK-NEXT: sunpklo z1.h, z1.b
245244
; CHECK-NEXT: sunpklo z4.s, z2.h
246245
; CHECK-NEXT: sunpklo z5.s, z3.h
247-
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
248-
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
249-
; CHECK-NEXT: sunpklo z0.h, z0.b
246+
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
247+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
248+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
250249
; CHECK-NEXT: sunpklo z2.s, z2.h
251250
; CHECK-NEXT: sunpklo z3.s, z3.h
251+
; CHECK-NEXT: sunpklo z0.h, z0.b
252252
; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s
253253
; CHECK-NEXT: sunpklo z5.s, z0.h
254-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
255-
; CHECK-NEXT: sunpklo z0.s, z0.h
256254
; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s
257255
; CHECK-NEXT: sunpklo z3.s, z1.h
258-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
256+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
257+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
259258
; CHECK-NEXT: sunpklo z1.s, z1.h
259+
; CHECK-NEXT: sunpklo z0.s, z0.h
260260
; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z5.s
261261
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
262262
; CHECK-NEXT: sdiv z0.s, p1/m, z0.s, z1.s
@@ -398,7 +398,7 @@ define void @sdiv_v16i16(ptr %a, ptr %b) #0 {
398398
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1]
399399
; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h
400400
; VBITS_GE_256-NEXT: sunpklo z3.s, z0.h
401-
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
401+
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
402402
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
403403
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
404404
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
@@ -476,7 +476,7 @@ define void @sdiv_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
476476
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
477477
; CHECK-NEXT: sunpklo z2.s, z1.h
478478
; CHECK-NEXT: sunpklo z3.s, z0.h
479-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
479+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
480480
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
481481
; CHECK-NEXT: sunpklo z1.s, z1.h
482482
; CHECK-NEXT: sunpklo z0.s, z0.h
@@ -858,7 +858,7 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
858858
; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b
859859
; VBITS_GE_256-NEXT: uunpklo z2.s, z1.h
860860
; VBITS_GE_256-NEXT: uunpklo z3.s, z0.h
861-
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
861+
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
862862
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
863863
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
864864
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
@@ -930,12 +930,12 @@ define void @udiv_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
930930
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x1]
931931
; CHECK-NEXT: ld1b { z1.h }, p0/z, [x0]
932932
; CHECK-NEXT: uunpklo z2.s, z0.h
933-
; CHECK-NEXT: uunpklo z3.s, z1.h
934933
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
935-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
934+
; CHECK-NEXT: uunpklo z3.s, z1.h
935+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
936936
; CHECK-NEXT: uunpklo z0.s, z0.h
937-
; CHECK-NEXT: uunpklo z1.s, z1.h
938937
; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s
938+
; CHECK-NEXT: uunpklo z1.s, z1.h
939939
; CHECK-NEXT: udivr z0.s, p1/m, z0.s, z1.s
940940
; CHECK-NEXT: ptrue p1.h, vl64
941941
; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
@@ -959,24 +959,24 @@ define void @udiv_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
959959
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
960960
; CHECK-NEXT: uunpklo z2.h, z1.b
961961
; CHECK-NEXT: uunpklo z3.h, z0.b
962-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
963-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
962+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
964963
; CHECK-NEXT: uunpklo z1.h, z1.b
965964
; CHECK-NEXT: uunpklo z4.s, z2.h
966965
; CHECK-NEXT: uunpklo z5.s, z3.h
967-
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
968-
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
969-
; CHECK-NEXT: uunpklo z0.h, z0.b
966+
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
967+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
968+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
970969
; CHECK-NEXT: uunpklo z2.s, z2.h
971970
; CHECK-NEXT: uunpklo z3.s, z3.h
971+
; CHECK-NEXT: uunpklo z0.h, z0.b
972972
; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s
973973
; CHECK-NEXT: uunpklo z5.s, z0.h
974-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
975-
; CHECK-NEXT: uunpklo z0.s, z0.h
976974
; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s
977975
; CHECK-NEXT: uunpklo z3.s, z1.h
978-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
976+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
977+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
979978
; CHECK-NEXT: uunpklo z1.s, z1.h
979+
; CHECK-NEXT: uunpklo z0.s, z0.h
980980
; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z5.s
981981
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
982982
; CHECK-NEXT: udiv z0.s, p1/m, z0.s, z1.s
@@ -1118,7 +1118,7 @@ define void @udiv_v16i16(ptr %a, ptr %b) #0 {
11181118
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1]
11191119
; VBITS_GE_256-NEXT: uunpklo z2.s, z1.h
11201120
; VBITS_GE_256-NEXT: uunpklo z3.s, z0.h
1121-
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
1121+
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
11221122
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
11231123
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
11241124
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
@@ -1187,7 +1187,7 @@ define void @udiv_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
11871187
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
11881188
; CHECK-NEXT: uunpklo z2.s, z1.h
11891189
; CHECK-NEXT: uunpklo z3.s, z0.h
1190-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
1190+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
11911191
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
11921192
; CHECK-NEXT: uunpklo z1.s, z1.h
11931193
; CHECK-NEXT: uunpklo z0.s, z0.h

llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
129129
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
130130
; VBITS_GE_256-NEXT: sunpklo z4.s, z2.h
131131
; VBITS_GE_256-NEXT: sunpklo z5.s, z3.h
132-
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16
133-
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16
132+
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z0.b, #16
133+
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z0.b, #16
134134
; VBITS_GE_256-NEXT: sunpklo z2.s, z2.h
135135
; VBITS_GE_256-NEXT: sunpklo z3.s, z3.h
136136
; VBITS_GE_256-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
@@ -222,8 +222,8 @@ define void @srem_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
222222
; CHECK-NEXT: sunpklo z3.h, z0.b
223223
; CHECK-NEXT: sunpklo z4.s, z2.h
224224
; CHECK-NEXT: sunpklo z5.s, z3.h
225-
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
226-
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
225+
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
226+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
227227
; CHECK-NEXT: sunpklo z2.s, z2.h
228228
; CHECK-NEXT: sunpklo z3.s, z3.h
229229
; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s
@@ -254,24 +254,24 @@ define void @srem_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
254254
; CHECK-NEXT: sunpklo z3.h, z0.b
255255
; CHECK-NEXT: sunpklo z4.s, z2.h
256256
; CHECK-NEXT: sunpklo z5.s, z3.h
257-
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
258-
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
257+
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
258+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
259259
; CHECK-NEXT: sunpklo z2.s, z2.h
260260
; CHECK-NEXT: sunpklo z3.s, z3.h
261261
; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s
262262
; CHECK-NEXT: mov z5.d, z0.d
263263
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #128
264264
; CHECK-NEXT: sunpklo z5.h, z5.b
265265
; CHECK-NEXT: sunpklo z7.s, z5.h
266-
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #128
266+
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #128
267267
; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s
268268
; CHECK-NEXT: mov z3.d, z1.d
269269
; CHECK-NEXT: sunpklo z5.s, z5.h
270-
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128
270+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
271271
; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
272272
; CHECK-NEXT: sunpklo z3.h, z3.b
273273
; CHECK-NEXT: sunpklo z6.s, z3.h
274-
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
274+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
275275
; CHECK-NEXT: sunpklo z3.s, z3.h
276276
; CHECK-NEXT: sdivr z6.s, p1/m, z6.s, z7.s
277277
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
@@ -425,7 +425,7 @@ define void @srem_v16i16(ptr %a, ptr %b) #0 {
425425
; VBITS_GE_256-NEXT: sdivr z2.s, p1/m, z2.s, z3.s
426426
; VBITS_GE_256-NEXT: mov z3.d, z1.d
427427
; VBITS_GE_256-NEXT: sunpklo z4.s, z4.h
428-
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z1.b, #16
428+
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z0.b, #16
429429
; VBITS_GE_256-NEXT: sunpklo z3.s, z3.h
430430
; VBITS_GE_256-NEXT: sdivr z3.s, p1/m, z3.s, z4.s
431431
; VBITS_GE_256-NEXT: ptrue p1.h, vl8
@@ -512,7 +512,7 @@ define void @srem_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
512512
; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s
513513
; CHECK-NEXT: mov z3.d, z1.d
514514
; CHECK-NEXT: sunpklo z4.s, z4.h
515-
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128
515+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
516516
; CHECK-NEXT: sunpklo z3.s, z3.h
517517
; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z4.s
518518
; CHECK-NEXT: ptrue p1.h, vl64
@@ -947,8 +947,8 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
947947
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
948948
; VBITS_GE_256-NEXT: uunpklo z4.s, z2.h
949949
; VBITS_GE_256-NEXT: uunpklo z5.s, z3.h
950-
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16
951-
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16
950+
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z0.b, #16
951+
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z0.b, #16
952952
; VBITS_GE_256-NEXT: uunpklo z2.s, z2.h
953953
; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h
954954
; VBITS_GE_256-NEXT: udivr z4.s, p0/m, z4.s, z5.s
@@ -1040,8 +1040,8 @@ define void @urem_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
10401040
; CHECK-NEXT: uunpklo z3.h, z0.b
10411041
; CHECK-NEXT: uunpklo z4.s, z2.h
10421042
; CHECK-NEXT: uunpklo z5.s, z3.h
1043-
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
1044-
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
1043+
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
1044+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
10451045
; CHECK-NEXT: uunpklo z2.s, z2.h
10461046
; CHECK-NEXT: uunpklo z3.s, z3.h
10471047
; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s
@@ -1072,24 +1072,24 @@ define void @urem_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
10721072
; CHECK-NEXT: uunpklo z3.h, z0.b
10731073
; CHECK-NEXT: uunpklo z4.s, z2.h
10741074
; CHECK-NEXT: uunpklo z5.s, z3.h
1075-
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
1076-
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
1075+
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
1076+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
10771077
; CHECK-NEXT: uunpklo z2.s, z2.h
10781078
; CHECK-NEXT: uunpklo z3.s, z3.h
10791079
; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s
10801080
; CHECK-NEXT: mov z5.d, z0.d
10811081
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #128
10821082
; CHECK-NEXT: uunpklo z5.h, z5.b
10831083
; CHECK-NEXT: uunpklo z7.s, z5.h
1084-
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #128
1084+
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #128
10851085
; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s
10861086
; CHECK-NEXT: mov z3.d, z1.d
10871087
; CHECK-NEXT: uunpklo z5.s, z5.h
1088-
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128
1088+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
10891089
; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
10901090
; CHECK-NEXT: uunpklo z3.h, z3.b
10911091
; CHECK-NEXT: uunpklo z6.s, z3.h
1092-
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
1092+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
10931093
; CHECK-NEXT: uunpklo z3.s, z3.h
10941094
; CHECK-NEXT: udivr z6.s, p1/m, z6.s, z7.s
10951095
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
@@ -1243,7 +1243,7 @@ define void @urem_v16i16(ptr %a, ptr %b) #0 {
12431243
; VBITS_GE_256-NEXT: udivr z2.s, p1/m, z2.s, z3.s
12441244
; VBITS_GE_256-NEXT: mov z3.d, z1.d
12451245
; VBITS_GE_256-NEXT: uunpklo z4.s, z4.h
1246-
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z1.b, #16
1246+
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z0.b, #16
12471247
; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h
12481248
; VBITS_GE_256-NEXT: udivr z3.s, p1/m, z3.s, z4.s
12491249
; VBITS_GE_256-NEXT: ptrue p1.h, vl8
@@ -1330,7 +1330,7 @@ define void @urem_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
13301330
; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s
13311331
; CHECK-NEXT: mov z3.d, z1.d
13321332
; CHECK-NEXT: uunpklo z4.s, z4.h
1333-
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128
1333+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
13341334
; CHECK-NEXT: uunpklo z3.s, z3.h
13351335
; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z4.s
13361336
; CHECK-NEXT: ptrue p1.h, vl64

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -338,14 +338,14 @@ define void @masked_scatter_v8i32(ptr %a, ptr %b) #0 {
338338
; VBITS_GE_256-NEXT: ld1d { z4.d }, p1/z, [x1, x8, lsl #3]
339339
; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z0.s, #0
340340
; VBITS_GE_256-NEXT: uunpklo z2.d, z0.s
341-
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
342-
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
343341
; VBITS_GE_256-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
344342
; VBITS_GE_256-NEXT: punpklo p0.h, p0.b
345343
; VBITS_GE_256-NEXT: and p0.b, p0/z, p0.b, p1.b
346-
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
344+
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
345+
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
347346
; VBITS_GE_256-NEXT: st1w { z2.d }, p0, [z3.d]
348347
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
348+
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
349349
; VBITS_GE_256-NEXT: cmpne p0.d, p1/z, z1.d, #0
350350
; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [z4.d]
351351
; VBITS_GE_256-NEXT: ret
@@ -715,14 +715,14 @@ define void @masked_scatter_v8f32(ptr %a, ptr %b) #0 {
715715
; VBITS_GE_256-NEXT: ld1d { z4.d }, p1/z, [x1, x8, lsl #3]
716716
; VBITS_GE_256-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
717717
; VBITS_GE_256-NEXT: uunpklo z2.d, z0.s
718-
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
719-
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
720718
; VBITS_GE_256-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
721719
; VBITS_GE_256-NEXT: punpklo p0.h, p0.b
722720
; VBITS_GE_256-NEXT: and p0.b, p0/z, p0.b, p1.b
723-
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
721+
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
722+
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
724723
; VBITS_GE_256-NEXT: st1w { z2.d }, p0, [z3.d]
725724
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
725+
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
726726
; VBITS_GE_256-NEXT: cmpne p0.d, p1/z, z1.d, #0
727727
; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [z4.d]
728728
; VBITS_GE_256-NEXT: ret

llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) v
1313
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1414
; CHECK-NEXT: mov z0.s, s0
1515
; CHECK-NEXT: mov z1.d, z0.d
16-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #16
16+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
1717
; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x0]
1818
; CHECK-NEXT: ret
1919
%splat = shufflevector <2 x i32> %b, <2 x i32> poison, <8 x i32> zeroinitializer

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) {
127127
; CHECK-NEXT: uunpklo z2.s, z1.h
128128
; CHECK-NEXT: uunpklo z3.s, z0.h
129129
; CHECK-NEXT: ptrue p0.s, vl4
130-
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
130+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
131131
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
132132
; CHECK-NEXT: uunpklo z1.s, z1.h
133133
; CHECK-NEXT: uunpklo z0.s, z0.h

0 commit comments

Comments
 (0)