Skip to content

Commit 7ea47e5

Browse files
committed
Add unpacked/overpacked tests and move up VScale
1 parent 85921fc commit 7ea47e5

File tree

2 files changed

+178
-2
lines changed

2 files changed

+178
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7379,10 +7379,11 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
73797379
if (N.getOpcode() != ISD::ADD)
73807380
return false;
73817381

7382+
SDValue VScale = N.getOperand(1);
73827383
int64_t MulImm = std::numeric_limits<int64_t>::max();
7383-
if (SDValue VScale = N.getOperand(1); VScale.getOpcode() == ISD::VSCALE)
7384+
if (VScale.getOpcode() == ISD::VSCALE)
73847385
MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7385-
else if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7386+
else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
73867387
int64_t ByteOffset = C->getSExtValue();
73877388
constexpr auto SVEBitsPerBlock = AArch64::SVEBitsPerBlock;
73887389
auto MinVScale = Subtarget->getMinSVEVectorSizeInBits() / SVEBitsPerBlock;

llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,178 @@ define void @nxv2i64(ptr %ldptr, ptr %stptr) {
185185
store <vscale x 2 x i64> %x, ptr %stoff, align 8
186186
ret void
187187
}
188+
189+
define void @nxv4i8(ptr %ldptr, ptr %stptr) {
190+
; CHECK-LABEL: nxv4i8:
191+
; CHECK: // %bb.0:
192+
; CHECK-NEXT: ptrue p0.s
193+
; CHECK-NEXT: mov w8, #32 // =0x20
194+
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, x8]
195+
; CHECK-NEXT: st1b { z0.s }, p0, [x1, x8]
196+
; CHECK-NEXT: ret
197+
;
198+
; CHECK-128-LABEL: nxv4i8:
199+
; CHECK-128: // %bb.0:
200+
; CHECK-128-NEXT: ptrue p0.s
201+
; CHECK-128-NEXT: mov w8, #32 // =0x20
202+
; CHECK-128-NEXT: ld1b { z0.s }, p0/z, [x0, x8]
203+
; CHECK-128-NEXT: st1b { z0.s }, p0, [x1, x8]
204+
; CHECK-128-NEXT: ret
205+
;
206+
; CHECK-256-LABEL: nxv4i8:
207+
; CHECK-256: // %bb.0:
208+
; CHECK-256-NEXT: ptrue p0.s
209+
; CHECK-256-NEXT: ld1b { z0.s }, p0/z, [x0, #4, mul vl]
210+
; CHECK-256-NEXT: st1b { z0.s }, p0, [x1, #4, mul vl]
211+
; CHECK-256-NEXT: ret
212+
;
213+
; CHECK-512-LABEL: nxv4i8:
214+
; CHECK-512: // %bb.0:
215+
; CHECK-512-NEXT: ptrue p0.s
216+
; CHECK-512-NEXT: ld1b { z0.s }, p0/z, [x0, #2, mul vl]
217+
; CHECK-512-NEXT: st1b { z0.s }, p0, [x1, #2, mul vl]
218+
; CHECK-512-NEXT: ret
219+
;
220+
; CHECK-1024-LABEL: nxv4i8:
221+
; CHECK-1024: // %bb.0:
222+
; CHECK-1024-NEXT: ptrue p0.s
223+
; CHECK-1024-NEXT: ld1b { z0.s }, p0/z, [x0, #1, mul vl]
224+
; CHECK-1024-NEXT: st1b { z0.s }, p0, [x1, #1, mul vl]
225+
; CHECK-1024-NEXT: ret
226+
;
227+
; CHECK-2048-LABEL: nxv4i8:
228+
; CHECK-2048: // %bb.0:
229+
; CHECK-2048-NEXT: ptrue p0.s
230+
; CHECK-2048-NEXT: mov w8, #32 // =0x20
231+
; CHECK-2048-NEXT: ld1b { z0.s }, p0/z, [x0, x8]
232+
; CHECK-2048-NEXT: st1b { z0.s }, p0, [x1, x8]
233+
; CHECK-2048-NEXT: ret
234+
%ldoff = getelementptr inbounds nuw i8, ptr %ldptr, i64 32
235+
%stoff = getelementptr inbounds nuw i8, ptr %stptr, i64 32
236+
%x = load <vscale x 4 x i8>, ptr %ldoff, align 1
237+
store <vscale x 4 x i8> %x, ptr %stoff, align 1
238+
ret void
239+
}
240+
241+
define void @nxv2f32(ptr %ldptr, ptr %stptr) {
242+
; CHECK-LABEL: nxv2f32:
243+
; CHECK: // %bb.0:
244+
; CHECK-NEXT: ptrue p0.d
245+
; CHECK-NEXT: mov x8, #16 // =0x10
246+
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
247+
; CHECK-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
248+
; CHECK-NEXT: ret
249+
;
250+
; CHECK-128-LABEL: nxv2f32:
251+
; CHECK-128: // %bb.0:
252+
; CHECK-128-NEXT: ptrue p0.d
253+
; CHECK-128-NEXT: mov x8, #16 // =0x10
254+
; CHECK-128-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
255+
; CHECK-128-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
256+
; CHECK-128-NEXT: ret
257+
;
258+
; CHECK-256-LABEL: nxv2f32:
259+
; CHECK-256: // %bb.0:
260+
; CHECK-256-NEXT: ptrue p0.d
261+
; CHECK-256-NEXT: ld1w { z0.d }, p0/z, [x0, #4, mul vl]
262+
; CHECK-256-NEXT: st1w { z0.d }, p0, [x1, #4, mul vl]
263+
; CHECK-256-NEXT: ret
264+
;
265+
; CHECK-512-LABEL: nxv2f32:
266+
; CHECK-512: // %bb.0:
267+
; CHECK-512-NEXT: ptrue p0.d
268+
; CHECK-512-NEXT: ld1w { z0.d }, p0/z, [x0, #2, mul vl]
269+
; CHECK-512-NEXT: st1w { z0.d }, p0, [x1, #2, mul vl]
270+
; CHECK-512-NEXT: ret
271+
;
272+
; CHECK-1024-LABEL: nxv2f32:
273+
; CHECK-1024: // %bb.0:
274+
; CHECK-1024-NEXT: ptrue p0.d
275+
; CHECK-1024-NEXT: ld1w { z0.d }, p0/z, [x0, #1, mul vl]
276+
; CHECK-1024-NEXT: st1w { z0.d }, p0, [x1, #1, mul vl]
277+
; CHECK-1024-NEXT: ret
278+
;
279+
; CHECK-2048-LABEL: nxv2f32:
280+
; CHECK-2048: // %bb.0:
281+
; CHECK-2048-NEXT: ptrue p0.d
282+
; CHECK-2048-NEXT: mov x8, #16 // =0x10
283+
; CHECK-2048-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
284+
; CHECK-2048-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
285+
; CHECK-2048-NEXT: ret
286+
%ldoff = getelementptr inbounds nuw i8, ptr %ldptr, i64 64
287+
%stoff = getelementptr inbounds nuw i8, ptr %stptr, i64 64
288+
%x = load <vscale x 2 x float>, ptr %ldoff, align 1
289+
store <vscale x 2 x float> %x, ptr %stoff, align 1
290+
ret void
291+
}
292+
293+
define void @nxv4f64(ptr %ldptr, ptr %stptr) {
294+
; CHECK-LABEL: nxv4f64:
295+
; CHECK: // %bb.0:
296+
; CHECK-NEXT: ptrue p0.d
297+
; CHECK-NEXT: mov x8, #16 // =0x10
298+
; CHECK-NEXT: add x9, x0, #128
299+
; CHECK-NEXT: ldr z1, [x9, #1, mul vl]
300+
; CHECK-NEXT: add x9, x1, #128
301+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
302+
; CHECK-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
303+
; CHECK-NEXT: str z1, [x9, #1, mul vl]
304+
; CHECK-NEXT: ret
305+
;
306+
; CHECK-128-LABEL: nxv4f64:
307+
; CHECK-128: // %bb.0:
308+
; CHECK-128-NEXT: add x8, x0, #128
309+
; CHECK-128-NEXT: ldr z1, [x0, #8, mul vl]
310+
; CHECK-128-NEXT: ldr z0, [x8, #1, mul vl]
311+
; CHECK-128-NEXT: add x8, x1, #128
312+
; CHECK-128-NEXT: str z0, [x8, #1, mul vl]
313+
; CHECK-128-NEXT: str z1, [x1, #8, mul vl]
314+
; CHECK-128-NEXT: ret
315+
;
316+
; CHECK-256-LABEL: nxv4f64:
317+
; CHECK-256: // %bb.0:
318+
; CHECK-256-NEXT: add x8, x0, #128
319+
; CHECK-256-NEXT: ldr z1, [x0, #4, mul vl]
320+
; CHECK-256-NEXT: ldr z0, [x8, #1, mul vl]
321+
; CHECK-256-NEXT: add x8, x1, #128
322+
; CHECK-256-NEXT: str z0, [x8, #1, mul vl]
323+
; CHECK-256-NEXT: str z1, [x1, #4, mul vl]
324+
; CHECK-256-NEXT: ret
325+
;
326+
; CHECK-512-LABEL: nxv4f64:
327+
; CHECK-512: // %bb.0:
328+
; CHECK-512-NEXT: add x8, x0, #128
329+
; CHECK-512-NEXT: ldr z1, [x0, #2, mul vl]
330+
; CHECK-512-NEXT: ldr z0, [x8, #1, mul vl]
331+
; CHECK-512-NEXT: add x8, x1, #128
332+
; CHECK-512-NEXT: str z0, [x8, #1, mul vl]
333+
; CHECK-512-NEXT: str z1, [x1, #2, mul vl]
334+
; CHECK-512-NEXT: ret
335+
;
336+
; CHECK-1024-LABEL: nxv4f64:
337+
; CHECK-1024: // %bb.0:
338+
; CHECK-1024-NEXT: add x8, x0, #128
339+
; CHECK-1024-NEXT: ldr z1, [x0, #1, mul vl]
340+
; CHECK-1024-NEXT: ldr z0, [x8, #1, mul vl]
341+
; CHECK-1024-NEXT: add x8, x1, #128
342+
; CHECK-1024-NEXT: str z0, [x8, #1, mul vl]
343+
; CHECK-1024-NEXT: str z1, [x1, #1, mul vl]
344+
; CHECK-1024-NEXT: ret
345+
;
346+
; CHECK-2048-LABEL: nxv4f64:
347+
; CHECK-2048: // %bb.0:
348+
; CHECK-2048-NEXT: ptrue p0.d
349+
; CHECK-2048-NEXT: mov x8, #16 // =0x10
350+
; CHECK-2048-NEXT: add x9, x0, #128
351+
; CHECK-2048-NEXT: ldr z1, [x9, #1, mul vl]
352+
; CHECK-2048-NEXT: add x9, x1, #128
353+
; CHECK-2048-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
354+
; CHECK-2048-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
355+
; CHECK-2048-NEXT: str z1, [x9, #1, mul vl]
356+
; CHECK-2048-NEXT: ret
357+
%ldoff = getelementptr inbounds nuw i8, ptr %ldptr, i64 128
358+
%stoff = getelementptr inbounds nuw i8, ptr %stptr, i64 128
359+
%x = load <vscale x 4 x double>, ptr %ldoff, align 1
360+
store <vscale x 4 x double> %x, ptr %stoff, align 1
361+
ret void
362+
}

0 commit comments

Comments
 (0)