Skip to content

Commit ac52098

Browse files
committed
Addressed comments
1 parent aae8b5b commit ac52098

File tree

6 files changed

+202
-51
lines changed

6 files changed

+202
-51
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2941,8 +2941,8 @@ AArch64TargetLowering::EmitDynamicProbedAlloc(MachineInstr &MI,
29412941
}
29422942

29432943
MachineBasicBlock *
2944-
AArch64TargetLowering::EmitCheckVL(MachineInstr &MI,
2945-
MachineBasicBlock *MBB) const {
2944+
AArch64TargetLowering::EmitCheckMatchingVL(MachineInstr &MI,
2945+
MachineBasicBlock *MBB) const {
29462946
MachineFunction *MF = MBB->getParent();
29472947
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
29482948
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
@@ -3389,8 +3389,8 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
33893389
case AArch64::PROBED_STACKALLOC_DYN:
33903390
return EmitDynamicProbedAlloc(MI, BB);
33913391

3392-
case AArch64::CHECK_MATCHING_VL:
3393-
return EmitCheckVL(MI, BB);
3392+
case AArch64::CHECK_MATCHING_VL_PSEUDO:
3393+
return EmitCheckMatchingVL(MI, BB);
33943394

33953395
case AArch64::LD1_MXIPXX_H_PSEUDO_B:
33963396
return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB);
@@ -9162,11 +9162,9 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
91629162
}
91639163
}
91649164

9165-
SDValue AArch64TargetLowering::changeStreamingMode(SelectionDAG &DAG, SDLoc DL,
9166-
bool Enable, SDValue Chain,
9167-
SDValue InGlue,
9168-
unsigned Condition,
9169-
bool HasSVECC) const {
9165+
SDValue AArch64TargetLowering::changeStreamingMode(
9166+
SelectionDAG &DAG, SDLoc DL, bool Enable, SDValue Chain, SDValue InGlue,
9167+
unsigned Condition, bool InsertVectorLengthCheck) const {
91709168
MachineFunction &MF = DAG.getMachineFunction();
91719169
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
91729170
FuncInfo->setHasStreamingModeChanges(true);
@@ -9197,20 +9195,18 @@ SDValue AArch64TargetLowering::changeStreamingMode(SelectionDAG &DAG, SDLoc DL,
91979195
if (InGlue)
91989196
Ops.push_back(InGlue);
91999197

9200-
if (!HasSVECC)
9198+
if (!InsertVectorLengthCheck)
92019199
return DAG.getNode(Opcode, DL, DAG.getVTList(MVT::Other, MVT::Glue), Ops);
92029200

92039201
auto GetCheckVL = [&](SDValue Chain, SDValue InGlue = SDValue()) -> SDValue {
92049202
SmallVector<SDValue, 2> Ops = {Chain};
92059203
if (InGlue)
92069204
Ops.push_back(InGlue);
9207-
return SDValue(DAG.getMachineNode(AArch64::CHECK_MATCHING_VL, DL,
9208-
DAG.getVTList(MVT::Other, MVT::Glue),
9209-
Ops),
9210-
0);
9205+
return DAG.getNode(AArch64ISD::CHECK_MATCHING_VL, DL,
9206+
DAG.getVTList(MVT::Other, MVT::Glue), Ops);
92119207
};
92129208

9213-
// NS -> S
9209+
// Non-streaming -> Streaming
92149210
if (Enable) {
92159211
SDValue CheckVL = GetCheckVL(Chain, InGlue);
92169212

@@ -9226,7 +9222,7 @@ SDValue AArch64TargetLowering::changeStreamingMode(SelectionDAG &DAG, SDLoc DL,
92269222
return DAG.getNode(Opcode, DL, DAG.getVTList(MVT::Other, MVT::Glue), Ops);
92279223
}
92289224

9229-
// S -> NS
9225+
// Streaming -> Non-streaming
92309226
SDValue StreamingModeInstr =
92319227
DAG.getNode(Opcode, DL, DAG.getVTList(MVT::Other, MVT::Glue), Ops);
92329228
return GetCheckVL(StreamingModeInstr.getValue(0),
@@ -9813,10 +9809,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
98139809

98149810
SDValue InGlue;
98159811
if (RequiresSMChange) {
9816-
Chain =
9817-
changeStreamingMode(DAG, DL, CallAttrs.callee().hasStreamingInterface(),
9818-
Chain, InGlue, getSMToggleCondition(CallAttrs),
9819-
CallConv == CallingConv::AArch64_SVE_VectorCall);
9812+
bool InsertVectorLengthCheck =
9813+
(CallConv == CallingConv::AArch64_SVE_VectorCall);
9814+
Chain = changeStreamingMode(
9815+
DAG, DL, CallAttrs.callee().hasStreamingInterface(), Chain, InGlue,
9816+
getSMToggleCondition(CallAttrs), InsertVectorLengthCheck);
98209817
InGlue = Chain.getValue(1);
98219818
}
98229819

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,8 @@ class AArch64TargetLowering : public TargetLowering {
168168
MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
169169
MachineBasicBlock *MBB) const;
170170

171-
MachineBasicBlock *EmitCheckVL(MachineInstr &MI,
172-
MachineBasicBlock *MBB) const;
171+
MachineBasicBlock *EmitCheckMatchingVL(MachineInstr &MI,
172+
MachineBasicBlock *MBB) const;
173173

174174
MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
175175
MachineInstr &MI,
@@ -536,7 +536,7 @@ class AArch64TargetLowering : public TargetLowering {
536536
/// AArch64SME::ToggleCondition.
537537
SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
538538
SDValue Chain, SDValue InGlue, unsigned Condition,
539-
bool HasSVECC = false) const;
539+
bool InsertVectorLengthCheck = false) const;
540540

541541
bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
542542

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,12 +1367,6 @@ def PROBED_STACKALLOC_DYN : Pseudo<(outs),
13671367
} // Defs = [SP, NZCV], Uses = [SP] in
13681368
} // hasSideEffects = 1, isCodeGenOnly = 1
13691369

1370-
// Pseudo-instruction that compares the current SVE vector length (VL) with the
1371-
// streaming vector length (SVL). If the two lengths do not match, the check
1372-
// lowers to a `brk`, causing a trap.
1373-
let hasSideEffects = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in
1374-
def CHECK_MATCHING_VL : Pseudo<(outs), (ins), []>, Sched<[]>;
1375-
13761370
let isReMaterializable = 1, isCodeGenOnly = 1 in {
13771371
// FIXME: The following pseudo instructions are only needed because remat
13781372
// cannot handle multiple instructions. When that changes, they can be

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,17 @@ let usesCustomInserter = 1 in {
4848
}
4949
def : Pat<(i64 (AArch64EntryPStateSM)), (EntryPStateSM)>;
5050

51+
// Pseudo-instruction that compares the current SVE vector length (VL) with the
52+
// streaming vector length (SVL). If the two lengths do not match, the check
53+
// lowers to a `brk`, causing a trap.
54+
let hasSideEffects = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in
55+
def CHECK_MATCHING_VL_PSEUDO : Pseudo<(outs), (ins), []>, Sched<[]>;
56+
57+
def AArch64_check_matching_vl
58+
: SDNode<"AArch64ISD::CHECK_MATCHING_VL", SDTypeProfile<0, 0,[]>,
59+
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
60+
def : Pat<(AArch64_check_matching_vl), (CHECK_MATCHING_VL_PSEUDO)>;
61+
5162
//===----------------------------------------------------------------------===//
5263
// Old SME ABI lowering ISD nodes/pseudos (deprecated)
5364
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/AArch64/sme-streaming-checkvl.ll

Lines changed: 170 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme,+sme2p1 < %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme,+sme2p1 < %s | FileCheck %s
33

44
target triple = "aarch64-unknown-linux-gnu"
55

66
declare void @bar_enabled(<vscale x 4 x i32>) #0
7+
declare void @bar(<vscale x 4 x i32>)
8+
declare <vscale x 4 x i32> @bar_retv_enabled() #0
9+
declare <vscale x 4 x i32> @bar_retv()
710

811
; Non-streaming -> calls streaming callee
912
define void @foo_non_streaming_pass_arg(ptr %arg) {
@@ -107,24 +110,22 @@ define void @foo_streaming_compatible_pass_arg(ptr %arg) #1 {
107110
; CHECK-NEXT: .cfi_offset b15, -1136
108111
; CHECK-NEXT: sub sp, sp, #1024
109112
; CHECK-NEXT: addvl sp, sp, #-1
110-
; CHECK-NEXT: mov x8, x0
111-
; CHECK-NEXT: bl __arm_sme_state
112-
; CHECK-NEXT: rdvl x9, #1
113-
; CHECK-NEXT: rdsvl x10, #1
114-
; CHECK-NEXT: cmp x9, x10
113+
; CHECK-NEXT: mrs x19, SVCR
114+
; CHECK-NEXT: rdvl x8, #1
115+
; CHECK-NEXT: rdsvl x9, #1
116+
; CHECK-NEXT: cmp x8, x9
115117
; CHECK-NEXT: b.eq .LBB1_2
116118
; CHECK-NEXT: // %bb.1: // %entry
117119
; CHECK-NEXT: brk #0x1
118120
; CHECK-NEXT: .LBB1_2: // %entry
119-
; CHECK-NEXT: ldr z0, [x8]
121+
; CHECK-NEXT: ldr z0, [x0]
120122
; CHECK-NEXT: sub x8, x29, #1088
121123
; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
122-
; CHECK-NEXT: tbnz w0, #0, .LBB1_4
124+
; CHECK-NEXT: tbnz w19, #0, .LBB1_4
123125
; CHECK-NEXT: // %bb.3: // %entry
124126
; CHECK-NEXT: smstart sm
125127
; CHECK-NEXT: .LBB1_4: // %entry
126128
; CHECK-NEXT: ldr z0, [x8, #-1, mul vl] // 16-byte Folded Reload
127-
; CHECK-NEXT: mov x19, x0
128129
; CHECK-NEXT: bl bar_enabled
129130
; CHECK-NEXT: tbnz w19, #0, .LBB1_6
130131
; CHECK-NEXT: // %bb.5: // %entry
@@ -163,7 +164,75 @@ entry:
163164
ret void
164165
}
165166

166-
declare <vscale x 4 x i32> @bar_retv_enabled() #0
167+
; Streaming -> calls non-streaming callee
168+
define void @foo_streaming_pass_arg(ptr %arg) #0 {
169+
; CHECK-LABEL: foo_streaming_pass_arg:
170+
; CHECK: // %bb.0: // %entry
171+
; CHECK-NEXT: sub sp, sp, #1120
172+
; CHECK-NEXT: .cfi_def_cfa_offset 1120
173+
; CHECK-NEXT: cntd x9
174+
; CHECK-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill
175+
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
176+
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
177+
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
178+
; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill
179+
; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill
180+
; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill
181+
; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill
182+
; CHECK-NEXT: .cfi_offset w28, -8
183+
; CHECK-NEXT: .cfi_offset vg, -16
184+
; CHECK-NEXT: .cfi_offset w30, -24
185+
; CHECK-NEXT: .cfi_offset w29, -32
186+
; CHECK-NEXT: .cfi_offset b8, -1064
187+
; CHECK-NEXT: .cfi_offset b9, -1072
188+
; CHECK-NEXT: .cfi_offset b10, -1080
189+
; CHECK-NEXT: .cfi_offset b11, -1088
190+
; CHECK-NEXT: .cfi_offset b12, -1096
191+
; CHECK-NEXT: .cfi_offset b13, -1104
192+
; CHECK-NEXT: .cfi_offset b14, -1112
193+
; CHECK-NEXT: .cfi_offset b15, -1120
194+
; CHECK-NEXT: sub sp, sp, #1024
195+
; CHECK-NEXT: .cfi_def_cfa_offset 2144
196+
; CHECK-NEXT: smstop sm
197+
; CHECK-NEXT: rdvl x8, #1
198+
; CHECK-NEXT: rdsvl x9, #1
199+
; CHECK-NEXT: cmp x8, x9
200+
; CHECK-NEXT: b.eq .LBB2_2
201+
; CHECK-NEXT: // %bb.1: // %entry
202+
; CHECK-NEXT: brk #0x1
203+
; CHECK-NEXT: .LBB2_2: // %entry
204+
; CHECK-NEXT: ldr z0, [x0]
205+
; CHECK-NEXT: bl bar
206+
; CHECK-NEXT: smstart sm
207+
; CHECK-NEXT: add sp, sp, #1024
208+
; CHECK-NEXT: .cfi_def_cfa_offset 1120
209+
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
210+
; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload
211+
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
212+
; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload
213+
; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload
214+
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
215+
; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload
216+
; CHECK-NEXT: add sp, sp, #1120
217+
; CHECK-NEXT: .cfi_def_cfa_offset 0
218+
; CHECK-NEXT: .cfi_restore w28
219+
; CHECK-NEXT: .cfi_restore vg
220+
; CHECK-NEXT: .cfi_restore w30
221+
; CHECK-NEXT: .cfi_restore w29
222+
; CHECK-NEXT: .cfi_restore b8
223+
; CHECK-NEXT: .cfi_restore b9
224+
; CHECK-NEXT: .cfi_restore b10
225+
; CHECK-NEXT: .cfi_restore b11
226+
; CHECK-NEXT: .cfi_restore b12
227+
; CHECK-NEXT: .cfi_restore b13
228+
; CHECK-NEXT: .cfi_restore b14
229+
; CHECK-NEXT: .cfi_restore b15
230+
; CHECK-NEXT: ret
231+
entry:
232+
%v = load <vscale x 4 x i32>, ptr %arg, align 16
233+
tail call void @bar(<vscale x 4 x i32> %v)
234+
ret void
235+
}
167236

168237
; Non-streaming -> returns SVE value from streaming callee
169238
define void @foo_non_streaming_retval(ptr %ptr) {
@@ -197,10 +266,10 @@ define void @foo_non_streaming_retval(ptr %ptr) {
197266
; CHECK-NEXT: rdvl x8, #1
198267
; CHECK-NEXT: rdsvl x9, #1
199268
; CHECK-NEXT: cmp x8, x9
200-
; CHECK-NEXT: b.eq .LBB2_2
269+
; CHECK-NEXT: b.eq .LBB3_2
201270
; CHECK-NEXT: // %bb.1: // %entry
202271
; CHECK-NEXT: brk #0x1
203-
; CHECK-NEXT: .LBB2_2: // %entry
272+
; CHECK-NEXT: .LBB3_2: // %entry
204273
; CHECK-NEXT: mov x19, x0
205274
; CHECK-NEXT: smstart sm
206275
; CHECK-NEXT: bl bar_retv_enabled
@@ -273,27 +342,26 @@ define void @foo_streaming_compatible_retval(ptr %ptr) #1 {
273342
; CHECK-NEXT: .cfi_offset b15, -1136
274343
; CHECK-NEXT: sub sp, sp, #1024
275344
; CHECK-NEXT: addvl sp, sp, #-1
276-
; CHECK-NEXT: mov x19, x0
277-
; CHECK-NEXT: bl __arm_sme_state
345+
; CHECK-NEXT: mrs x20, SVCR
278346
; CHECK-NEXT: rdvl x8, #1
279347
; CHECK-NEXT: rdsvl x9, #1
280348
; CHECK-NEXT: cmp x8, x9
281-
; CHECK-NEXT: b.eq .LBB3_2
349+
; CHECK-NEXT: b.eq .LBB4_2
282350
; CHECK-NEXT: // %bb.1: // %entry
283351
; CHECK-NEXT: brk #0x1
284-
; CHECK-NEXT: .LBB3_2: // %entry
285-
; CHECK-NEXT: tbnz w0, #0, .LBB3_4
352+
; CHECK-NEXT: .LBB4_2: // %entry
353+
; CHECK-NEXT: mov x19, x0
354+
; CHECK-NEXT: tbnz w20, #0, .LBB4_4
286355
; CHECK-NEXT: // %bb.3: // %entry
287356
; CHECK-NEXT: smstart sm
288-
; CHECK-NEXT: .LBB3_4: // %entry
289-
; CHECK-NEXT: mov x20, x0
357+
; CHECK-NEXT: .LBB4_4: // %entry
290358
; CHECK-NEXT: bl bar_retv_enabled
291359
; CHECK-NEXT: sub x8, x29, #1088
292360
; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
293-
; CHECK-NEXT: tbnz w20, #0, .LBB3_6
361+
; CHECK-NEXT: tbnz w20, #0, .LBB4_6
294362
; CHECK-NEXT: // %bb.5: // %entry
295363
; CHECK-NEXT: smstop sm
296-
; CHECK-NEXT: .LBB3_6: // %entry
364+
; CHECK-NEXT: .LBB4_6: // %entry
297365
; CHECK-NEXT: ldr z0, [x8, #-1, mul vl] // 16-byte Folded Reload
298366
; CHECK-NEXT: str z0, [x19]
299367
; CHECK-NEXT: addvl sp, sp, #1
@@ -331,5 +399,86 @@ entry:
331399
ret void
332400
}
333401

402+
; Streaming -> returns SVE value from non-streaming callee
403+
define void @foo_streaming_retval(ptr %ptr) #0 {
404+
; CHECK-LABEL: foo_streaming_retval:
405+
; CHECK: // %bb.0: // %entry
406+
; CHECK-NEXT: sub sp, sp, #1136
407+
; CHECK-NEXT: .cfi_def_cfa_offset 1136
408+
; CHECK-NEXT: cntd x9
409+
; CHECK-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill
410+
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
411+
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
412+
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
413+
; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill
414+
; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill
415+
; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill
416+
; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill
417+
; CHECK-NEXT: str x19, [sp, #1120] // 8-byte Folded Spill
418+
; CHECK-NEXT: add x29, sp, #1088
419+
; CHECK-NEXT: .cfi_def_cfa w29, 48
420+
; CHECK-NEXT: .cfi_offset w19, -16
421+
; CHECK-NEXT: .cfi_offset w28, -24
422+
; CHECK-NEXT: .cfi_offset vg, -32
423+
; CHECK-NEXT: .cfi_offset w30, -40
424+
; CHECK-NEXT: .cfi_offset w29, -48
425+
; CHECK-NEXT: .cfi_offset b8, -1080
426+
; CHECK-NEXT: .cfi_offset b9, -1088
427+
; CHECK-NEXT: .cfi_offset b10, -1096
428+
; CHECK-NEXT: .cfi_offset b11, -1104
429+
; CHECK-NEXT: .cfi_offset b12, -1112
430+
; CHECK-NEXT: .cfi_offset b13, -1120
431+
; CHECK-NEXT: .cfi_offset b14, -1128
432+
; CHECK-NEXT: .cfi_offset b15, -1136
433+
; CHECK-NEXT: sub sp, sp, #1024
434+
; CHECK-NEXT: addvl sp, sp, #-1
435+
; CHECK-NEXT: smstop sm
436+
; CHECK-NEXT: rdvl x8, #1
437+
; CHECK-NEXT: rdsvl x9, #1
438+
; CHECK-NEXT: cmp x8, x9
439+
; CHECK-NEXT: b.eq .LBB5_2
440+
; CHECK-NEXT: // %bb.1: // %entry
441+
; CHECK-NEXT: brk #0x1
442+
; CHECK-NEXT: .LBB5_2: // %entry
443+
; CHECK-NEXT: mov x19, x0
444+
; CHECK-NEXT: bl bar_retv
445+
; CHECK-NEXT: sub x8, x29, #1088
446+
; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
447+
; CHECK-NEXT: smstart sm
448+
; CHECK-NEXT: ldr z0, [x8, #-1, mul vl] // 16-byte Folded Reload
449+
; CHECK-NEXT: str z0, [x19]
450+
; CHECK-NEXT: addvl sp, sp, #1
451+
; CHECK-NEXT: add sp, sp, #1024
452+
; CHECK-NEXT: .cfi_def_cfa wsp, 1136
453+
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
454+
; CHECK-NEXT: ldr x19, [sp, #1120] // 8-byte Folded Reload
455+
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
456+
; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload
457+
; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload
458+
; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload
459+
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
460+
; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload
461+
; CHECK-NEXT: add sp, sp, #1136
462+
; CHECK-NEXT: .cfi_def_cfa_offset 0
463+
; CHECK-NEXT: .cfi_restore w19
464+
; CHECK-NEXT: .cfi_restore w28
465+
; CHECK-NEXT: .cfi_restore vg
466+
; CHECK-NEXT: .cfi_restore w30
467+
; CHECK-NEXT: .cfi_restore w29
468+
; CHECK-NEXT: .cfi_restore b8
469+
; CHECK-NEXT: .cfi_restore b9
470+
; CHECK-NEXT: .cfi_restore b10
471+
; CHECK-NEXT: .cfi_restore b11
472+
; CHECK-NEXT: .cfi_restore b12
473+
; CHECK-NEXT: .cfi_restore b13
474+
; CHECK-NEXT: .cfi_restore b14
475+
; CHECK-NEXT: .cfi_restore b15
476+
; CHECK-NEXT: ret
477+
entry:
478+
%v = tail call <vscale x 4 x i32> @bar_retv()
479+
store <vscale x 4 x i32> %v, ptr %ptr, align 16
480+
ret void
481+
}
482+
334483
attributes #0 = { "aarch64_pstate_sm_enabled" }
335484
attributes #1 = { "aarch64_pstate_sm_compatible" }

0 commit comments

Comments
 (0)