Skip to content

Commit d0c9599

Browse files
authored
[AArch64][SME] Use entry pstate.sm for conditional streaming-mode changes (#152169)
We only do conditional streaming mode changes in two cases: - Around calls in streaming-compatible functions that don't have a streaming body - At the entry/exit of streaming-compatible functions with a streaming body In both cases, the condition depends on the entry pstate.sm value. Given this, we don't need to emit calls to __arm_sme_state at every mode change. This patch handles this by placing a "AArch64ISD::ENTRY_PSTATE_SM" node in the entry block and copying the result to a register. The register is then used whenever we need to emit a conditional streaming mode change. The "ENTRY_PSTATE_SM" node expands to a call to "__arm_sme_state" only if (after SelectionDAG) the function is determined to have streaming-mode changes. This has two main advantages: 1. It allows back-to-back conditional smstart/stop pairs to be folded 2. It has the correct behaviour for EH landing pads - These are entered with pstate.sm = 0, and should switch mode based on the entry pstate.sm - Note: This is not fully implemented yet
1 parent e9ece17 commit d0c9599

19 files changed

+199
-176
lines changed

llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1178,7 +1178,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
11781178
if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
11791179
for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
11801180
if (F->getOpcode() == ISD::CopyFromReg) {
1181-
UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
1181+
Register Reg = cast<RegisterSDNode>(F->getOperand(1))->getReg();
1182+
if (Reg.isPhysical())
1183+
UsedRegs.push_back(Reg);
11821184
continue;
11831185
} else if (F->getOpcode() == ISD::CopyToReg) {
11841186
// Skip CopyToReg nodes that are internal to the glue chain.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 60 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3101,6 +3101,31 @@ AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr &MI,
31013101
return BB;
31023102
}
31033103

3104+
MachineBasicBlock *
3105+
AArch64TargetLowering::EmitEntryPStateSM(MachineInstr &MI,
3106+
MachineBasicBlock *BB) const {
3107+
MachineFunction *MF = BB->getParent();
3108+
AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
3109+
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3110+
Register ResultReg = MI.getOperand(0).getReg();
3111+
if (FuncInfo->isPStateSMRegUsed()) {
3112+
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3113+
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL))
3114+
.addExternalSymbol("__arm_sme_state")
3115+
.addReg(AArch64::X0, RegState::ImplicitDefine)
3116+
.addRegMask(TRI->getCallPreservedMask(
3117+
*MF, CallingConv::
3118+
AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2));
3119+
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), ResultReg)
3120+
.addReg(AArch64::X0);
3121+
} else {
3122+
assert(MI.getMF()->getRegInfo().use_empty(ResultReg) &&
3123+
"Expected no users of the entry pstate.sm!");
3124+
}
3125+
MI.eraseFromParent();
3126+
return BB;
3127+
}
3128+
31043129
// Helper function to find the instruction that defined a virtual register.
31053130
// If unable to find such instruction, returns nullptr.
31063131
static const MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI,
@@ -3216,6 +3241,8 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
32163241
return EmitAllocateSMESaveBuffer(MI, BB);
32173242
case AArch64::GetSMESaveSize:
32183243
return EmitGetSMESaveSize(MI, BB);
3244+
case AArch64::EntryPStateSM:
3245+
return EmitEntryPStateSM(MI, BB);
32193246
case AArch64::F128CSEL:
32203247
return EmitF128CSEL(MI, BB);
32213248
case TargetOpcode::STATEPOINT:
@@ -8133,19 +8160,26 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
81338160
}
81348161
assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
81358162

8163+
if (Attrs.hasStreamingCompatibleInterface()) {
8164+
SDValue EntryPStateSM =
8165+
DAG.getNode(AArch64ISD::ENTRY_PSTATE_SM, DL,
8166+
DAG.getVTList(MVT::i64, MVT::Other), {Chain});
8167+
8168+
// Copy the value to a virtual register, and save that in FuncInfo.
8169+
Register EntryPStateSMReg =
8170+
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
8171+
Chain = DAG.getCopyToReg(EntryPStateSM.getValue(1), DL, EntryPStateSMReg,
8172+
EntryPStateSM);
8173+
FuncInfo->setPStateSMReg(EntryPStateSMReg);
8174+
}
8175+
81368176
// Insert the SMSTART if this is a locally streaming function and
81378177
// make sure it is Glued to the last CopyFromReg value.
81388178
if (IsLocallyStreaming) {
8139-
SDValue PStateSM;
8140-
if (Attrs.hasStreamingCompatibleInterface()) {
8141-
PStateSM = getRuntimePStateSM(DAG, Chain, DL, MVT::i64);
8142-
Register Reg = MF.getRegInfo().createVirtualRegister(
8143-
getRegClassFor(PStateSM.getValueType().getSimpleVT()));
8144-
FuncInfo->setPStateSMReg(Reg);
8145-
Chain = DAG.getCopyToReg(Chain, DL, Reg, PStateSM);
8179+
if (Attrs.hasStreamingCompatibleInterface())
81468180
Chain = changeStreamingMode(DAG, DL, /*Enable*/ true, Chain, Glue,
8147-
AArch64SME::IfCallerIsNonStreaming, PStateSM);
8148-
} else
8181+
AArch64SME::IfCallerIsNonStreaming);
8182+
else
81498183
Chain = changeStreamingMode(DAG, DL, /*Enable*/ true, Chain, Glue,
81508184
AArch64SME::Always);
81518185

@@ -8836,8 +8870,7 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
88368870
SDValue AArch64TargetLowering::changeStreamingMode(SelectionDAG &DAG, SDLoc DL,
88378871
bool Enable, SDValue Chain,
88388872
SDValue InGlue,
8839-
unsigned Condition,
8840-
SDValue PStateSM) const {
8873+
unsigned Condition) const {
88418874
MachineFunction &MF = DAG.getMachineFunction();
88428875
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
88438876
FuncInfo->setHasStreamingModeChanges(true);
@@ -8849,9 +8882,16 @@ SDValue AArch64TargetLowering::changeStreamingMode(SelectionDAG &DAG, SDLoc DL,
88498882
SmallVector<SDValue> Ops = {Chain, MSROp};
88508883
unsigned Opcode;
88518884
if (Condition != AArch64SME::Always) {
8885+
FuncInfo->setPStateSMRegUsed(true);
8886+
Register PStateReg = FuncInfo->getPStateSMReg();
8887+
assert(PStateReg.isValid() && "PStateSM Register is invalid");
8888+
SDValue PStateSM =
8889+
DAG.getCopyFromReg(Chain, DL, PStateReg, MVT::i64, InGlue);
8890+
// Use chain and glue from the CopyFromReg.
8891+
Ops[0] = PStateSM.getValue(1);
8892+
InGlue = PStateSM.getValue(2);
88528893
SDValue ConditionOp = DAG.getTargetConstant(Condition, DL, MVT::i64);
88538894
Opcode = Enable ? AArch64ISD::COND_SMSTART : AArch64ISD::COND_SMSTOP;
8854-
assert(PStateSM && "PStateSM should be defined");
88558895
Ops.push_back(ConditionOp);
88568896
Ops.push_back(PStateSM);
88578897
} else {
@@ -9126,15 +9166,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
91269166
/*IsSave=*/true);
91279167
}
91289168

9129-
SDValue PStateSM;
91309169
bool RequiresSMChange = CallAttrs.requiresSMChange();
91319170
if (RequiresSMChange) {
9132-
if (CallAttrs.caller().hasStreamingInterfaceOrBody())
9133-
PStateSM = DAG.getConstant(1, DL, MVT::i64);
9134-
else if (CallAttrs.caller().hasNonStreamingInterface())
9135-
PStateSM = DAG.getConstant(0, DL, MVT::i64);
9136-
else
9137-
PStateSM = getRuntimePStateSM(DAG, Chain, DL, MVT::i64);
91389171
OptimizationRemarkEmitter ORE(&MF.getFunction());
91399172
ORE.emit([&]() {
91409173
auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMETransition",
@@ -9449,9 +9482,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
94499482
InGlue = Chain.getValue(1);
94509483
}
94519484

9452-
SDValue NewChain = changeStreamingMode(
9453-
DAG, DL, CallAttrs.callee().hasStreamingInterface(), Chain, InGlue,
9454-
getSMToggleCondition(CallAttrs), PStateSM);
9485+
SDValue NewChain =
9486+
changeStreamingMode(DAG, DL, CallAttrs.callee().hasStreamingInterface(),
9487+
Chain, InGlue, getSMToggleCondition(CallAttrs));
94559488
Chain = NewChain.getValue(0);
94569489
InGlue = NewChain.getValue(1);
94579490
}
@@ -9635,10 +9668,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
96359668
InGlue = Result.getValue(Result->getNumValues() - 1);
96369669

96379670
if (RequiresSMChange) {
9638-
assert(PStateSM && "Expected a PStateSM to be set");
96399671
Result = changeStreamingMode(
96409672
DAG, DL, !CallAttrs.callee().hasStreamingInterface(), Result, InGlue,
9641-
getSMToggleCondition(CallAttrs), PStateSM);
9673+
getSMToggleCondition(CallAttrs));
96429674

96439675
if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
96449676
InGlue = Result.getValue(1);
@@ -9804,14 +9836,11 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
98049836
// Emit SMSTOP before returning from a locally streaming function
98059837
SMEAttrs FuncAttrs = FuncInfo->getSMEFnAttrs();
98069838
if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface()) {
9807-
if (FuncAttrs.hasStreamingCompatibleInterface()) {
9808-
Register Reg = FuncInfo->getPStateSMReg();
9809-
assert(Reg.isValid() && "PStateSM Register is invalid");
9810-
SDValue PStateSM = DAG.getCopyFromReg(Chain, DL, Reg, MVT::i64);
9839+
if (FuncAttrs.hasStreamingCompatibleInterface())
98119840
Chain = changeStreamingMode(DAG, DL, /*Enable*/ false, Chain,
98129841
/*Glue*/ SDValue(),
9813-
AArch64SME::IfCallerIsNonStreaming, PStateSM);
9814-
} else
9842+
AArch64SME::IfCallerIsNonStreaming);
9843+
else
98159844
Chain = changeStreamingMode(DAG, DL, /*Enable*/ false, Chain,
98169845
/*Glue*/ SDValue(), AArch64SME::Always);
98179846
Glue = Chain.getValue(1);
@@ -28196,6 +28225,7 @@ void AArch64TargetLowering::ReplaceNodeResults(
2819628225
case Intrinsic::aarch64_sme_in_streaming_mode: {
2819728226
SDLoc DL(N);
2819828227
SDValue Chain = DAG.getEntryNode();
28228+
2819928229
SDValue RuntimePStateSM =
2820028230
getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0));
2820128231
Results.push_back(

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ class AArch64TargetLowering : public TargetLowering {
181181
MachineBasicBlock *BB) const;
182182
MachineBasicBlock *EmitGetSMESaveSize(MachineInstr &MI,
183183
MachineBasicBlock *BB) const;
184+
MachineBasicBlock *EmitEntryPStateSM(MachineInstr &MI,
185+
MachineBasicBlock *BB) const;
184186

185187
/// Replace (0, vreg) discriminator components with the operands of blend
186188
/// or with (immediate, NoRegister) when possible.
@@ -523,8 +525,8 @@ class AArch64TargetLowering : public TargetLowering {
523525
/// node. \p Condition should be one of the enum values from
524526
/// AArch64SME::ToggleCondition.
525527
SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
526-
SDValue Chain, SDValue InGlue, unsigned Condition,
527-
SDValue PStateSM = SDValue()) const;
528+
SDValue Chain, SDValue InGlue,
529+
unsigned Condition) const;
528530

529531
bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
530532

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
231231
// on function entry to record the initial pstate of a function.
232232
Register PStateSMReg = MCRegister::NoRegister;
233233

234+
// true if PStateSMReg is used.
235+
bool PStateSMRegUsed = false;
236+
234237
// Holds a pointer to a buffer that is large enough to represent
235238
// all SME ZA state and any additional state required by the
236239
// __arm_sme_save/restore support routines.
@@ -274,6 +277,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
274277
Register getPStateSMReg() const { return PStateSMReg; };
275278
void setPStateSMReg(Register Reg) { PStateSMReg = Reg; };
276279

280+
unsigned isPStateSMRegUsed() const { return PStateSMRegUsed; };
281+
void setPStateSMRegUsed(bool Used = true) { PStateSMRegUsed = Used; };
282+
277283
int64_t getVGIdx() const { return VGIdx; };
278284
void setVGIdx(unsigned Idx) { VGIdx = Idx; };
279285

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@ def AArch64_save_zt : SDNode<"AArch64ISD::SAVE_ZT", SDTypeProfile<0, 2,
3939
def AArch64CoalescerBarrier
4040
: SDNode<"AArch64ISD::COALESCER_BARRIER", SDTypeProfile<1, 1, []>, [SDNPOptInGlue, SDNPOutGlue]>;
4141

42+
def AArch64EntryPStateSM
43+
: SDNode<"AArch64ISD::ENTRY_PSTATE_SM", SDTypeProfile<1, 0,
44+
[SDTCisInt<0>]>, [SDNPHasChain, SDNPSideEffect]>;
45+
46+
let usesCustomInserter = 1 in {
47+
def EntryPStateSM : Pseudo<(outs GPR64:$is_streaming), (ins), []>, Sched<[]> {}
48+
}
49+
def : Pat<(i64 (AArch64EntryPStateSM)), (EntryPStateSM)>;
50+
4251
def AArch64VGSave : SDNode<"AArch64ISD::VG_SAVE", SDTypeProfile<0, 0, []>,
4352
[SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
4453

llvm/lib/Target/AArch64/SMEPeepholeOpt.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,16 +80,10 @@ static bool isMatchingStartStopPair(const MachineInstr *MI1,
8080
if (MI1->getOperand(4).getRegMask() != MI2->getOperand(4).getRegMask())
8181
return false;
8282

83-
// This optimisation is unlikely to happen in practice for conditional
84-
// smstart/smstop pairs as the virtual registers for pstate.sm will always
85-
// be different.
86-
// TODO: For this optimisation to apply to conditional smstart/smstop,
87-
// this pass will need to do more work to remove redundant calls to
88-
// __arm_sme_state.
89-
9083
// Only consider conditional start/stop pairs which read the same register
91-
// holding the original value of pstate.sm, as some conditional start/stops
92-
// require the state on entry to the function.
84+
// holding the original value of pstate.sm. This is somewhat over conservative
85+
// as all conditional streaming mode changes only look at the state on entry
86+
// to the function.
9387
if (MI1->getOperand(3).isReg() && MI2->getOperand(3).isReg()) {
9488
Register Reg1 = MI1->getOperand(3).getReg();
9589
Register Reg2 = MI2->getOperand(3).getReg();

llvm/test/CodeGen/AArch64/sme-agnostic-za.ll

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -150,42 +150,40 @@ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(
150150
; CHECK-NEXT: add x29, sp, #64
151151
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
152152
; CHECK-NEXT: mov x8, x0
153+
; CHECK-NEXT: bl __arm_sme_state
154+
; CHECK-NEXT: mov x19, x0
153155
; CHECK-NEXT: bl __arm_sme_state_size
154156
; CHECK-NEXT: sub sp, sp, x0
155-
; CHECK-NEXT: mov x19, sp
156-
; CHECK-NEXT: mov x0, x19
157+
; CHECK-NEXT: mov x20, sp
158+
; CHECK-NEXT: mov x0, x20
157159
; CHECK-NEXT: bl __arm_sme_save
158-
; CHECK-NEXT: bl __arm_sme_state
159-
; CHECK-NEXT: and x20, x0, #0x1
160-
; CHECK-NEXT: tbz w20, #0, .LBB5_2
160+
; CHECK-NEXT: tbz w19, #0, .LBB5_2
161161
; CHECK-NEXT: // %bb.1:
162162
; CHECK-NEXT: smstop sm
163163
; CHECK-NEXT: .LBB5_2:
164164
; CHECK-NEXT: mov x0, x8
165165
; CHECK-NEXT: bl private_za_decl
166-
; CHECK-NEXT: mov x2, x0
167-
; CHECK-NEXT: tbz w20, #0, .LBB5_4
166+
; CHECK-NEXT: mov x1, x0
167+
; CHECK-NEXT: tbz w19, #0, .LBB5_4
168168
; CHECK-NEXT: // %bb.3:
169169
; CHECK-NEXT: smstart sm
170170
; CHECK-NEXT: .LBB5_4:
171-
; CHECK-NEXT: mov x0, x19
171+
; CHECK-NEXT: mov x0, x20
172172
; CHECK-NEXT: bl __arm_sme_restore
173-
; CHECK-NEXT: mov x0, x19
173+
; CHECK-NEXT: mov x0, x20
174174
; CHECK-NEXT: bl __arm_sme_save
175-
; CHECK-NEXT: bl __arm_sme_state
176-
; CHECK-NEXT: and x20, x0, #0x1
177-
; CHECK-NEXT: tbz w20, #0, .LBB5_6
175+
; CHECK-NEXT: tbz w19, #0, .LBB5_6
178176
; CHECK-NEXT: // %bb.5:
179177
; CHECK-NEXT: smstop sm
180178
; CHECK-NEXT: .LBB5_6:
181-
; CHECK-NEXT: mov x0, x2
179+
; CHECK-NEXT: mov x0, x1
182180
; CHECK-NEXT: bl private_za_decl
183181
; CHECK-NEXT: mov x1, x0
184-
; CHECK-NEXT: tbz w20, #0, .LBB5_8
182+
; CHECK-NEXT: tbz w19, #0, .LBB5_8
185183
; CHECK-NEXT: // %bb.7:
186184
; CHECK-NEXT: smstart sm
187185
; CHECK-NEXT: .LBB5_8:
188-
; CHECK-NEXT: mov x0, x19
186+
; CHECK-NEXT: mov x0, x20
189187
; CHECK-NEXT: bl __arm_sme_restore
190188
; CHECK-NEXT: mov x0, x1
191189
; CHECK-NEXT: sub sp, x29, #64

llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define void @streaming_compatible() #0 {
1818
; CHECK-NEXT: bl __arm_get_current_vg
1919
; CHECK-NEXT: stp x0, x19, [sp, #72] // 16-byte Folded Spill
2020
; CHECK-NEXT: bl __arm_sme_state
21-
; CHECK-NEXT: and x19, x0, #0x1
21+
; CHECK-NEXT: mov x19, x0
2222
; CHECK-NEXT: tbz w19, #0, .LBB0_2
2323
; CHECK-NEXT: // %bb.1:
2424
; CHECK-NEXT: smstop sm
@@ -57,7 +57,7 @@ define void @streaming_compatible_arg(float %f) #0 {
5757
; CHECK-NEXT: stp x0, x19, [sp, #88] // 16-byte Folded Spill
5858
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
5959
; CHECK-NEXT: bl __arm_sme_state
60-
; CHECK-NEXT: and x19, x0, #0x1
60+
; CHECK-NEXT: mov x19, x0
6161
; CHECK-NEXT: tbz w19, #0, .LBB1_2
6262
; CHECK-NEXT: // %bb.1:
6363
; CHECK-NEXT: smstop sm

llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ define void @fbyte(<vscale x 16 x i8> %v) #0{
4444
; NOPAIR-NEXT: addvl sp, sp, #-1
4545
; NOPAIR-NEXT: str z0, [sp] // 16-byte Folded Spill
4646
; NOPAIR-NEXT: bl __arm_sme_state
47-
; NOPAIR-NEXT: and x19, x0, #0x1
47+
; NOPAIR-NEXT: mov x19, x0
4848
; NOPAIR-NEXT: tbz w19, #0, .LBB0_2
4949
; NOPAIR-NEXT: // %bb.1:
5050
; NOPAIR-NEXT: smstop sm
@@ -126,7 +126,7 @@ define void @fbyte(<vscale x 16 x i8> %v) #0{
126126
; PAIR-NEXT: addvl sp, sp, #-1
127127
; PAIR-NEXT: str z0, [sp] // 16-byte Folded Spill
128128
; PAIR-NEXT: bl __arm_sme_state
129-
; PAIR-NEXT: and x19, x0, #0x1
129+
; PAIR-NEXT: mov x19, x0
130130
; PAIR-NEXT: tbz w19, #0, .LBB0_2
131131
; PAIR-NEXT: // %bb.1:
132132
; PAIR-NEXT: smstop sm

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
441441
; CHECK-COMMON-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
442442
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
443443
; CHECK-COMMON-NEXT: bl __arm_sme_state
444-
; CHECK-COMMON-NEXT: and x19, x0, #0x1
444+
; CHECK-COMMON-NEXT: mov x19, x0
445445
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
446446
; CHECK-COMMON-NEXT: // %bb.1:
447447
; CHECK-COMMON-NEXT: smstop sm

0 commit comments

Comments
 (0)