Skip to content

Commit 3ade12c

Browse files
committed
[AArch64][SME] Propagate desired ZA states in the MachineSMEABIPass
This patch adds a propagation step to the MachineSMEABIPass that propagates desired ZA states forwards/backwards (from predecessors to successors, or vice versa). The aim of this is to pick better ZA states for edge bundles, as when many (or all) blocks in a bundle do not have a preferred ZA state, the ZA state assigned to a bundle can be less than ideal. An important case is nested loops, where only the inner loop has a preferred ZA state. Here we'd like to propagate the ZA state up from the inner loop to the outer loops (to avoid saves/restores in any loop). Change-Id: I39f9c7d7608e2fa070be2fb88351b4d1d0079041
1 parent 6b6e62a commit 3ade12c

File tree

6 files changed

+495
-175
lines changed

6 files changed

+495
-175
lines changed

llvm/lib/Target/AArch64/MachineSMEABIPass.cpp

Lines changed: 115 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,10 @@ struct InstInfo {
121121
/// Contains the needed ZA state for each instruction in a block. Instructions
122122
/// that do not require a ZA state are not recorded.
123123
struct BlockInfo {
124-
ZAState FixedEntryState{ZAState::ANY};
125124
SmallVector<InstInfo> Insts;
125+
ZAState FixedEntryState{ZAState::ANY};
126+
ZAState DesiredIncomingState{ZAState::ANY};
127+
ZAState DesiredOutgoingState{ZAState::ANY};
126128
LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
127129
LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
128130
};
@@ -268,6 +270,11 @@ struct MachineSMEABI : public MachineFunctionPass {
268270
const EdgeBundles &Bundles,
269271
ArrayRef<ZAState> BundleStates);
270272

273+
/// Propagates desired states forwards (from predecessors -> successors) if
274+
/// \p Forwards, otherwise, propagates backwards (from successors ->
275+
/// predecessors).
276+
void propagateDesiredStates(FunctionInfo &FnInfo, bool Forwards = true);
277+
271278
// Emission routines for private and shared ZA functions (using lazy saves).
272279
void emitNewZAPrologue(MachineBasicBlock &MBB,
273280
MachineBasicBlock::iterator MBBI);
@@ -426,12 +433,70 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
426433

427434
// Reverse vector (as we had to iterate backwards for liveness).
428435
std::reverse(Block.Insts.begin(), Block.Insts.end());
436+
437+
// Record the desired states on entry/exit of this block. These are the
438+
// states that would not incur a state transition.
439+
if (!Block.Insts.empty()) {
440+
Block.DesiredIncomingState = Block.Insts.front().NeededState;
441+
Block.DesiredOutgoingState = Block.Insts.back().NeededState;
442+
}
429443
}
430444

431445
return FunctionInfo{std::move(Blocks), AfterSMEProloguePt,
432446
PhysLiveRegsAfterSMEPrologue};
433447
}
434448

449+
void MachineSMEABI::propagateDesiredStates(FunctionInfo &FnInfo,
450+
bool Forwards) {
451+
// If `Forwards`, this propagates desired states from predecessors to
452+
// successors, otherwise, this propagates states from successors to
453+
// predecessors.
454+
auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & {
455+
return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState;
456+
};
457+
458+
SmallVector<MachineBasicBlock *> Worklist;
459+
for (auto [BlockID, BlockInfo] : enumerate(FnInfo.Blocks)) {
460+
if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards)))
461+
Worklist.push_back(MF->getBlockNumbered(BlockID));
462+
}
463+
464+
while (!Worklist.empty()) {
465+
MachineBasicBlock *MBB = Worklist.pop_back_val();
466+
auto &BlockInfo = FnInfo.Blocks[MBB->getNumber()];
467+
468+
// Pick a legal edge bundle state that matches the majority of
469+
// predecessors/successors.
470+
int StateCounts[ZAState::NUM_ZA_STATE] = {0};
471+
for (MachineBasicBlock *PredOrSucc :
472+
Forwards ? predecessors(MBB) : successors(MBB)) {
473+
auto &PredOrSuccBlockInfo = FnInfo.Blocks[PredOrSucc->getNumber()];
474+
auto ZAState = GetBlockState(PredOrSuccBlockInfo, !Forwards);
475+
if (isLegalEdgeBundleZAState(ZAState))
476+
StateCounts[ZAState]++;
477+
}
478+
479+
ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts);
480+
auto &CurrentState = GetBlockState(BlockInfo, Forwards);
481+
if (PropagatedState != CurrentState) {
482+
CurrentState = PropagatedState;
483+
auto &OtherState = GetBlockState(BlockInfo, !Forwards);
484+
// Propagate to the incoming/outgoing state if that is also "ANY".
485+
if (OtherState == ZAState::ANY)
486+
OtherState = PropagatedState;
487+
// Push any successors/predecessors that may need updating to the
488+
// worklist.
489+
for (MachineBasicBlock *SuccOrPred :
490+
Forwards ? successors(MBB) : predecessors(MBB)) {
491+
auto &SuccOrPredBlockInfo = FnInfo.Blocks[SuccOrPred->getNumber()];
492+
if (!isLegalEdgeBundleZAState(
493+
GetBlockState(SuccOrPredBlockInfo, Forwards)))
494+
Worklist.push_back(SuccOrPred);
495+
}
496+
}
497+
}
498+
}
499+
435500
/// Assigns each edge bundle a ZA state based on the needed states of blocks
436501
/// that have incoming or outgoing edges in that bundle.
437502
SmallVector<ZAState>
@@ -444,40 +509,36 @@ MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
444509
// Attempt to assign a ZA state for this bundle that minimizes state
445510
// transitions. Edges within loops are given a higher weight as we assume
446511
// they will be executed more than once.
447-
// TODO: We should propagate desired incoming/outgoing states through blocks
448-
// that have the "ANY" state first to make better global decisions.
449512
int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
450513
for (unsigned BlockID : Bundles.getBlocks(I)) {
451514
LLVM_DEBUG(dbgs() << "- bb." << BlockID);
452515

453516
const BlockInfo &Block = FnInfo.Blocks[BlockID];
454-
if (Block.Insts.empty()) {
455-
LLVM_DEBUG(dbgs() << " (no state preference)\n");
456-
continue;
457-
}
458517
bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I;
459518
bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I;
460519

461-
ZAState DesiredIncomingState = Block.Insts.front().NeededState;
462-
if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) {
463-
EdgeStateCounts[DesiredIncomingState]++;
520+
bool LegalInEdge =
521+
InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState);
522+
bool LegalOutEgde =
523+
OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState);
524+
if (LegalInEdge) {
464525
LLVM_DEBUG(dbgs() << " DesiredIncomingState: "
465-
<< getZAStateString(DesiredIncomingState));
526+
<< getZAStateString(Block.DesiredIncomingState));
527+
EdgeStateCounts[Block.DesiredIncomingState]++;
466528
}
467-
ZAState DesiredOutgoingState = Block.Insts.back().NeededState;
468-
if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) {
469-
EdgeStateCounts[DesiredOutgoingState]++;
529+
if (LegalOutEgde) {
470530
LLVM_DEBUG(dbgs() << " DesiredOutgoingState: "
471-
<< getZAStateString(DesiredOutgoingState));
531+
<< getZAStateString(Block.DesiredOutgoingState));
532+
EdgeStateCounts[Block.DesiredOutgoingState]++;
472533
}
534+
if (!LegalInEdge && !LegalOutEgde)
535+
LLVM_DEBUG(dbgs() << " (no state preference)");
473536
LLVM_DEBUG(dbgs() << '\n');
474537
}
475538

476539
ZAState BundleState =
477540
ZAState(max_element(EdgeStateCounts) - EdgeStateCounts);
478541

479-
// Force ZA to be active in bundles that don't have a preferred state.
480-
// TODO: Something better here (to avoid extra mode switches).
481542
if (BundleState == ZAState::ANY)
482543
BundleState = ZAState::ACTIVE;
483544

@@ -922,6 +983,43 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
922983
getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
923984

924985
FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs);
986+
987+
if (OptLevel != CodeGenOptLevel::None) {
988+
// Propagate desired states forwards then backwards. We propagate forwards
989+
// first as this propagates desired states from inner to outer loops.
990+
// Backwards propagation is then used to fill in any gaps. Note: Doing both
991+
// in one step can give poor results. For example:
992+
//
993+
// ┌─────┐
994+
// ┌─┤ BB0 ◄───┐
995+
// │ └─┬───┘ │
996+
// │ ┌─▼───◄──┐│
997+
// │ │ BB1 │ ││
998+
// │ └─┬┬──┘ ││
999+
// │ │└─────┘│
1000+
// │ ┌─▼───┐ │
1001+
// │ │ BB2 ├───┘
1002+
// │ └─┬───┘
1003+
// │ ┌─▼───┐
1004+
// └─► BB3 │
1005+
// └─────┘
1006+
//
1007+
// If:
1008+
// - "BB0" and "BB2" (outer loop) has no state preference
1009+
// - "BB1" (inner loop) desires the ACTIVE state on entry/exit
1010+
// - "BB3" desires the LOCAL_SAVED state on entry
1011+
//
1012+
// If we propagate forwards first, ACTIVE is propagated from BB1 to BB2,
1013+
// then from BB2 to BB0. Which results in the inner and outer loops having
1014+
// the "ACTIVE" state. This avoids any state changes in the loops.
1015+
//
1016+
// If we propagate backwards first, we _could_ propagate LOCAL_SAVED from
1017+
// BB3 to BB0, which would result in a transition from ACTIVE -> LOCAL_SAVED
1018+
// in the outer loop.
1019+
for (bool Forwards : {true, false})
1020+
propagateDesiredStates(FnInfo, Forwards);
1021+
}
1022+
9251023
SmallVector<ZAState> BundleStates = assignBundleZAStates(Bundles, FnInfo);
9261024

9271025
EmitContext Context;

llvm/test/CodeGen/AArch64/sme-agnostic-za.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,6 @@ define i64 @test_many_callee_arguments(
351351
ret i64 %ret
352352
}
353353

354-
; FIXME: The new lowering should avoid saves/restores in the probing loop.
355354
define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{
356355
; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
357356
; CHECK: // %bb.0:
@@ -389,16 +388,14 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s
389388
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
390389
; CHECK-NEWLOWERING-NEXT: mov x8, sp
391390
; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0
392-
; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
393-
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
394391
; CHECK-NEWLOWERING-NEXT: mov x0, x19
395392
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
393+
; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
394+
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
396395
; CHECK-NEWLOWERING-NEXT: cmp sp, x19
397396
; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3
398397
; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
399-
; CHECK-NEWLOWERING-NEXT: mov x0, x19
400398
; CHECK-NEWLOWERING-NEXT: str xzr, [sp]
401-
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
402399
; CHECK-NEWLOWERING-NEXT: b .LBB7_1
403400
; CHECK-NEWLOWERING-NEXT: .LBB7_3:
404401
; CHECK-NEWLOWERING-NEXT: mov sp, x19

llvm/test/CodeGen/AArch64/sme-za-control-flow.ll

Lines changed: 40 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -228,65 +228,34 @@ exit:
228228
ret void
229229
}
230230

231-
; FIXME: The codegen for this case could be improved (by tuning weights).
232-
; Here the ZA save has been hoisted out of the conditional, but would be better
233-
; to sink it.
234231
define void @cond_private_za_call(i1 %cond) "aarch64_inout_za" nounwind {
235-
; CHECK-LABEL: cond_private_za_call:
236-
; CHECK: // %bb.0:
237-
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
238-
; CHECK-NEXT: mov x29, sp
239-
; CHECK-NEXT: sub sp, sp, #16
240-
; CHECK-NEXT: rdsvl x8, #1
241-
; CHECK-NEXT: mov x9, sp
242-
; CHECK-NEXT: msub x9, x8, x8, x9
243-
; CHECK-NEXT: mov sp, x9
244-
; CHECK-NEXT: stp x9, x8, [x29, #-16]
245-
; CHECK-NEXT: tbz w0, #0, .LBB3_4
246-
; CHECK-NEXT: // %bb.1: // %private_za_call
247-
; CHECK-NEXT: sub x8, x29, #16
248-
; CHECK-NEXT: msr TPIDR2_EL0, x8
249-
; CHECK-NEXT: bl private_za_call
250-
; CHECK-NEXT: smstart za
251-
; CHECK-NEXT: mrs x8, TPIDR2_EL0
252-
; CHECK-NEXT: sub x0, x29, #16
253-
; CHECK-NEXT: cbnz x8, .LBB3_3
254-
; CHECK-NEXT: // %bb.2: // %private_za_call
255-
; CHECK-NEXT: bl __arm_tpidr2_restore
256-
; CHECK-NEXT: .LBB3_3: // %private_za_call
257-
; CHECK-NEXT: msr TPIDR2_EL0, xzr
258-
; CHECK-NEXT: .LBB3_4: // %exit
259-
; CHECK-NEXT: mov sp, x29
260-
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
261-
; CHECK-NEXT: b shared_za_call
262-
;
263-
; CHECK-NEWLOWERING-LABEL: cond_private_za_call:
264-
; CHECK-NEWLOWERING: // %bb.0:
265-
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
266-
; CHECK-NEWLOWERING-NEXT: mov x29, sp
267-
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
268-
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
269-
; CHECK-NEWLOWERING-NEXT: mov x9, sp
270-
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
271-
; CHECK-NEWLOWERING-NEXT: mov sp, x9
272-
; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
273-
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
274-
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
275-
; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB3_2
276-
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %private_za_call
277-
; CHECK-NEWLOWERING-NEXT: bl private_za_call
278-
; CHECK-NEWLOWERING-NEXT: .LBB3_2: // %exit
279-
; CHECK-NEWLOWERING-NEXT: smstart za
280-
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
281-
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
282-
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB3_4
283-
; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit
284-
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
285-
; CHECK-NEWLOWERING-NEXT: .LBB3_4: // %exit
286-
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
287-
; CHECK-NEWLOWERING-NEXT: mov sp, x29
288-
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
289-
; CHECK-NEWLOWERING-NEXT: b shared_za_call
232+
; CHECK-COMMON-LABEL: cond_private_za_call:
233+
; CHECK-COMMON: // %bb.0:
234+
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
235+
; CHECK-COMMON-NEXT: mov x29, sp
236+
; CHECK-COMMON-NEXT: sub sp, sp, #16
237+
; CHECK-COMMON-NEXT: rdsvl x8, #1
238+
; CHECK-COMMON-NEXT: mov x9, sp
239+
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
240+
; CHECK-COMMON-NEXT: mov sp, x9
241+
; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
242+
; CHECK-COMMON-NEXT: tbz w0, #0, .LBB3_4
243+
; CHECK-COMMON-NEXT: // %bb.1: // %private_za_call
244+
; CHECK-COMMON-NEXT: sub x8, x29, #16
245+
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
246+
; CHECK-COMMON-NEXT: bl private_za_call
247+
; CHECK-COMMON-NEXT: smstart za
248+
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
249+
; CHECK-COMMON-NEXT: sub x0, x29, #16
250+
; CHECK-COMMON-NEXT: cbnz x8, .LBB3_3
251+
; CHECK-COMMON-NEXT: // %bb.2: // %private_za_call
252+
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
253+
; CHECK-COMMON-NEXT: .LBB3_3: // %private_za_call
254+
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
255+
; CHECK-COMMON-NEXT: .LBB3_4: // %exit
256+
; CHECK-COMMON-NEXT: mov sp, x29
257+
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
258+
; CHECK-COMMON-NEXT: b shared_za_call
290259
br i1 %cond, label %private_za_call, label %exit
291260

292261
private_za_call:
@@ -910,7 +879,7 @@ define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwin
910879
; CHECK-NEWLOWERING-LABEL: loop_with_external_entry:
911880
; CHECK-NEWLOWERING: // %bb.0: // %entry
912881
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
913-
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
882+
; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
914883
; CHECK-NEWLOWERING-NEXT: mov x29, sp
915884
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
916885
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
@@ -923,23 +892,27 @@ define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwin
923892
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %init
924893
; CHECK-NEWLOWERING-NEXT: bl shared_za_call
925894
; CHECK-NEWLOWERING-NEXT: .LBB11_2: // %loop.preheader
926-
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
927-
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
895+
; CHECK-NEWLOWERING-NEXT: sub x20, x29, #16
896+
; CHECK-NEWLOWERING-NEXT: b .LBB11_4
928897
; CHECK-NEWLOWERING-NEXT: .LBB11_3: // %loop
898+
; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB11_4 Depth=1
899+
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
900+
; CHECK-NEWLOWERING-NEXT: tbz w19, #0, .LBB11_6
901+
; CHECK-NEWLOWERING-NEXT: .LBB11_4: // %loop
929902
; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1
903+
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20
930904
; CHECK-NEWLOWERING-NEXT: bl private_za_call
931-
; CHECK-NEWLOWERING-NEXT: tbnz w19, #0, .LBB11_3
932-
; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit
933905
; CHECK-NEWLOWERING-NEXT: smstart za
934906
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
935907
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
936-
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_6
937-
; CHECK-NEWLOWERING-NEXT: // %bb.5: // %exit
908+
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_3
909+
; CHECK-NEWLOWERING-NEXT: // %bb.5: // %loop
910+
; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB11_4 Depth=1
938911
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
912+
; CHECK-NEWLOWERING-NEXT: b .LBB11_3
939913
; CHECK-NEWLOWERING-NEXT: .LBB11_6: // %exit
940-
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
941914
; CHECK-NEWLOWERING-NEXT: mov sp, x29
942-
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
915+
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
943916
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
944917
; CHECK-NEWLOWERING-NEXT: ret
945918
entry:

llvm/test/CodeGen/AArch64/sme-za-exceptions.ll

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -63,25 +63,17 @@ define void @za_with_raii(i1 %fail) "aarch64_inout_za" personality ptr @__gxx_pe
6363
; CHECK-NEXT: ldr x1, [x1, :got_lo12:typeinfo_for_char_const_ptr]
6464
; CHECK-NEXT: bl __cxa_throw
6565
; CHECK-NEXT: .Ltmp1: // EH_LABEL
66-
; CHECK-NEXT: smstart za
67-
; CHECK-NEXT: mrs x8, TPIDR2_EL0
68-
; CHECK-NEXT: sub x0, x29, #16
69-
; CHECK-NEXT: cbnz x8, .LBB0_4
70-
; CHECK-NEXT: // %bb.3: // %throw_exception
71-
; CHECK-NEXT: bl __arm_tpidr2_restore
72-
; CHECK-NEXT: .LBB0_4: // %throw_exception
73-
; CHECK-NEXT: msr TPIDR2_EL0, xzr
74-
; CHECK-NEXT: // %bb.5: // %throw_fail
75-
; CHECK-NEXT: .LBB0_6: // %unwind_dtors
66+
; CHECK-NEXT: // %bb.3: // %throw_fail
67+
; CHECK-NEXT: .LBB0_4: // %unwind_dtors
7668
; CHECK-NEXT: .Ltmp2: // EH_LABEL
7769
; CHECK-NEXT: mov x19, x0
7870
; CHECK-NEXT: smstart za
7971
; CHECK-NEXT: mrs x8, TPIDR2_EL0
8072
; CHECK-NEXT: sub x0, x29, #16
81-
; CHECK-NEXT: cbnz x8, .LBB0_8
82-
; CHECK-NEXT: // %bb.7: // %unwind_dtors
73+
; CHECK-NEXT: cbnz x8, .LBB0_6
74+
; CHECK-NEXT: // %bb.5: // %unwind_dtors
8375
; CHECK-NEXT: bl __arm_tpidr2_restore
84-
; CHECK-NEXT: .LBB0_8: // %unwind_dtors
76+
; CHECK-NEXT: .LBB0_6: // %unwind_dtors
8577
; CHECK-NEXT: msr TPIDR2_EL0, xzr
8678
; CHECK-NEXT: bl shared_za_call
8779
; CHECK-NEXT: sub x8, x29, #16

0 commit comments

Comments
 (0)