Skip to content

Commit 3a2c02a

Browse files
committed
Replaced RDSVL+SUB with ADDSVL, simplified changeStreamingMode
1 parent f3f31c9 commit 3a2c02a

File tree

7 files changed

+99
-108
lines changed

7 files changed

+99
-108
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 47 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2944,35 +2944,42 @@ MachineBasicBlock *
29442944
AArch64TargetLowering::EmitCheckMatchingVL(MachineInstr &MI,
29452945
MachineBasicBlock *MBB) const {
29462946
MachineFunction *MF = MBB->getParent();
2947-
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2948-
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2949-
DebugLoc DL = MI.getDebugLoc();
2950-
MachineFunction::iterator It = ++MBB->getIterator();
2951-
2952-
const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
29532947
MachineRegisterInfo &MRI = MF->getRegInfo();
29542948

2955-
Register RegVL = MRI.createVirtualRegister(RC);
2956-
Register RegSVL = MRI.createVirtualRegister(RC);
2957-
Register RegCheck = MRI.createVirtualRegister(RC);
2949+
const TargetRegisterClass *RC_GPR = &AArch64::GPR64RegClass;
2950+
const TargetRegisterClass *RC_GPRsp = &AArch64::GPR64spRegClass;
29582951

2959-
// Read VL and Streaming VL
2960-
BuildMI(*MBB, MI, DL, TII->get(AArch64::RDVLI_XI), RegVL).addImm(1);
2961-
BuildMI(*MBB, MI, DL, TII->get(AArch64::RDSVLI_XI), RegSVL).addImm(1);
2952+
Register RegVL_GPR = MRI.createVirtualRegister(RC_GPR);
2953+
Register RegVL_GPRsp = MRI.createVirtualRegister(RC_GPRsp); // for ADDSVL src
2954+
Register RegSVL_GPR = MRI.createVirtualRegister(RC_GPR);
2955+
Register RegSVL_GPRsp = MRI.createVirtualRegister(RC_GPRsp); // for ADDSVL dst
29622956

2963-
// Compare vector lengths
2964-
BuildMI(*MBB, MI, DL, TII->get(AArch64::SUBXrr), RegCheck)
2965-
.addReg(RegVL)
2966-
.addReg(RegSVL);
2957+
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2958+
DebugLoc DL = MI.getDebugLoc();
2959+
2960+
// RDVL requires GPR64, ADDSVL requires GPR64sp
2961+
// We need to insert COPY instructions, these will later be removed by the
2962+
// RegisterCoalescer
2963+
BuildMI(*MBB, MI, DL, TII->get(AArch64::RDVLI_XI), RegVL_GPR).addImm(1);
2964+
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), RegVL_GPRsp)
2965+
.addReg(RegVL_GPR);
29672966

2967+
BuildMI(*MBB, MI, DL, TII->get(AArch64::ADDSVL_XXI), RegSVL_GPRsp)
2968+
.addReg(RegVL_GPRsp)
2969+
.addImm(-1);
2970+
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), RegSVL_GPR)
2971+
.addReg(RegSVL_GPRsp);
2972+
2973+
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2974+
MachineFunction::iterator It = ++MBB->getIterator();
29682975
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(LLVM_BB);
29692976
MachineBasicBlock *PassBB = MF->CreateMachineBasicBlock(LLVM_BB);
29702977
MF->insert(It, TrapBB);
29712978
MF->insert(It, PassBB);
29722979

29732980
// Continue if vector lengths match
29742981
BuildMI(*MBB, MI, DL, TII->get(AArch64::CBZX))
2975-
.addReg(RegCheck)
2982+
.addReg(RegSVL_GPR)
29762983
.addMBB(PassBB);
29772984

29782985
// Transfer rest of current BB to PassBB
@@ -9173,6 +9180,22 @@ SDValue AArch64TargetLowering::changeStreamingMode(
91739180
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
91749181
FuncInfo->setHasStreamingModeChanges(true);
91759182

9183+
auto GetCheckVL = [&](SDValue Chain, SDValue InGlue = SDValue()) -> SDValue {
9184+
SmallVector<SDValue, 2> Ops = {Chain};
9185+
if (InGlue)
9186+
Ops.push_back(InGlue);
9187+
return DAG.getNode(AArch64ISD::CHECK_MATCHING_VL, DL,
9188+
DAG.getVTList(MVT::Other, MVT::Glue), Ops);
9189+
};
9190+
9191+
if (InsertVectorLengthCheck && Enable) {
9192+
// Non-streaming -> Streaming
9193+
// Insert vector length check before smstart
9194+
SDValue CheckVL = GetCheckVL(Chain, InGlue);
9195+
Chain = CheckVL.getValue(0);
9196+
InGlue = CheckVL.getValue(1);
9197+
}
9198+
91769199
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
91779200
SDValue RegMask = DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask());
91789201
SDValue MSROp =
@@ -9199,38 +9222,16 @@ SDValue AArch64TargetLowering::changeStreamingMode(
91999222
if (InGlue)
92009223
Ops.push_back(InGlue);
92019224

9202-
if (!InsertVectorLengthCheck)
9203-
return DAG.getNode(Opcode, DL, DAG.getVTList(MVT::Other, MVT::Glue), Ops);
9204-
9205-
auto GetCheckVL = [&](SDValue Chain, SDValue InGlue = SDValue()) -> SDValue {
9206-
SmallVector<SDValue, 2> Ops = {Chain};
9207-
if (InGlue)
9208-
Ops.push_back(InGlue);
9209-
return DAG.getNode(AArch64ISD::CHECK_MATCHING_VL, DL,
9210-
DAG.getVTList(MVT::Other, MVT::Glue), Ops);
9211-
};
9212-
9213-
// Non-streaming -> Streaming
9214-
if (Enable) {
9215-
SDValue CheckVL = GetCheckVL(Chain, InGlue);
9216-
9217-
// Replace chain
9218-
Ops[0] = CheckVL.getValue(0);
9219-
9220-
// Replace/append glue
9221-
if (InGlue)
9222-
Ops.back() = CheckVL.getValue(1);
9223-
else
9224-
Ops.push_back(CheckVL.getValue(1));
9225+
SDValue SMChange =
9226+
DAG.getNode(Opcode, DL, DAG.getVTList(MVT::Other, MVT::Glue), Ops);
92259227

9226-
return DAG.getNode(Opcode, DL, DAG.getVTList(MVT::Other, MVT::Glue), Ops);
9227-
}
9228+
if (!InsertVectorLengthCheck || Enable)
9229+
return SMChange;
92289230

92299231
// Streaming -> Non-streaming
9230-
SDValue StreamingModeInstr =
9231-
DAG.getNode(Opcode, DL, DAG.getVTList(MVT::Other, MVT::Glue), Ops);
9232-
return GetCheckVL(StreamingModeInstr.getValue(0),
9233-
StreamingModeInstr.getValue(1));
9232+
// Insert vector length check after smstop since we cannot read VL
9233+
// in streaming mode
9234+
return GetCheckVL(SMChange.getValue(0), SMChange.getValue(1));
92349235
}
92359236

92369237
// Emit a call to __arm_sme_save or __arm_sme_restore.

llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,8 @@ define void @fbyte(<vscale x 16 x i8> %v) #0{
4848
; NOPAIR-NEXT: smstop sm
4949
; NOPAIR-NEXT: .LBB0_2:
5050
; NOPAIR-NEXT: rdvl x8, #1
51-
; NOPAIR-NEXT: rdsvl x9, #1
52-
; NOPAIR-NEXT: cmp x8, x9
53-
; NOPAIR-NEXT: b.eq .LBB0_4
51+
; NOPAIR-NEXT: addsvl x8, x8, #-1
52+
; NOPAIR-NEXT: cbz x8, .LBB0_4
5453
; NOPAIR-NEXT: // %bb.3:
5554
; NOPAIR-NEXT: brk #0x1
5655
; NOPAIR-NEXT: .LBB0_4:
@@ -135,9 +134,8 @@ define void @fbyte(<vscale x 16 x i8> %v) #0{
135134
; PAIR-NEXT: smstop sm
136135
; PAIR-NEXT: .LBB0_2:
137136
; PAIR-NEXT: rdvl x8, #1
138-
; PAIR-NEXT: rdsvl x9, #1
139-
; PAIR-NEXT: cmp x8, x9
140-
; PAIR-NEXT: b.eq .LBB0_4
137+
; PAIR-NEXT: addsvl x8, x8, #-1
138+
; PAIR-NEXT: cbz x8, .LBB0_4
141139
; PAIR-NEXT: // %bb.3:
142140
; PAIR-NEXT: brk #0x1
143141
; PAIR-NEXT: .LBB0_4:

llvm/test/CodeGen/AArch64/sme-peephole-opts.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -530,9 +530,8 @@ define void @test13(ptr %ptr) nounwind "aarch64_pstate_sm_enabled" {
530530
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
531531
; CHECK-NEXT: smstop sm
532532
; CHECK-NEXT: rdvl x8, #1
533-
; CHECK-NEXT: rdsvl x9, #1
534-
; CHECK-NEXT: cmp x8, x9
535-
; CHECK-NEXT: b.ne .LBB14_2
533+
; CHECK-NEXT: addsvl x8, x8, #-1
534+
; CHECK-NEXT: cbnz x8, .LBB14_2
536535
; CHECK-NEXT: // %bb.1:
537536
; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
538537
; CHECK-NEXT: mov x19, x0
@@ -541,9 +540,8 @@ define void @test13(ptr %ptr) nounwind "aarch64_pstate_sm_enabled" {
541540
; CHECK-NEXT: smstart sm
542541
; CHECK-NEXT: smstop sm
543542
; CHECK-NEXT: rdvl x8, #1
544-
; CHECK-NEXT: rdsvl x9, #1
545-
; CHECK-NEXT: cmp x8, x9
546-
; CHECK-NEXT: b.eq .LBB14_3
543+
; CHECK-NEXT: addsvl x8, x8, #-1
544+
; CHECK-NEXT: cbz x8, .LBB14_3
547545
; CHECK-NEXT: .LBB14_2:
548546
; CHECK-NEXT: brk #0x1
549547
; CHECK-NEXT: .LBB14_3:

llvm/test/CodeGen/AArch64/sme-streaming-checkvl-mir.ll

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,10 @@ define void @foo_non_streaming_pass_arg(ptr %arg) {
3535
; CHECK-AFTER-ISEL-NEXT: [[LDR_ZXI:%[0-9]+]]:zpr = LDR_ZXI [[COPY]], 0 :: (load (<vscale x 1 x s128>) from %ir.arg)
3636
; CHECK-AFTER-ISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
3737
; CHECK-AFTER-ISEL-NEXT: [[RDVLI_XI:%[0-9]+]]:gpr64 = RDVLI_XI 1, implicit $vg
38-
; CHECK-AFTER-ISEL-NEXT: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg
39-
; CHECK-AFTER-ISEL-NEXT: [[SUBXrr:%[0-9]+]]:gpr64 = SUBXrr [[RDVLI_XI]], [[RDSVLI_XI]]
40-
; CHECK-AFTER-ISEL-NEXT: CBZX [[SUBXrr]], %bb.2
38+
; CHECK-AFTER-ISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[RDVLI_XI]]
39+
; CHECK-AFTER-ISEL-NEXT: [[ADDSVL_XXI:%[0-9]+]]:gpr64sp = ADDSVL_XXI [[COPY1]], -1, implicit $vg
40+
; CHECK-AFTER-ISEL-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDSVL_XXI]]
41+
; CHECK-AFTER-ISEL-NEXT: CBZX [[COPY2]], %bb.2
4142
; CHECK-AFTER-ISEL-NEXT: {{ $}}
4243
; CHECK-AFTER-ISEL-NEXT: bb.1.entry:
4344
; CHECK-AFTER-ISEL-NEXT: successors:
@@ -84,9 +85,10 @@ define void @foo_streaming_pass_arg(ptr %arg) #0 {
8485
; CHECK-AFTER-ISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
8586
; CHECK-AFTER-ISEL-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $sp, implicit-def $z0, implicit $vg, implicit-def $vg, implicit-def $fpmr
8687
; CHECK-AFTER-ISEL-NEXT: [[RDVLI_XI:%[0-9]+]]:gpr64 = RDVLI_XI 1, implicit $vg
87-
; CHECK-AFTER-ISEL-NEXT: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg
88-
; CHECK-AFTER-ISEL-NEXT: [[SUBXrr:%[0-9]+]]:gpr64 = SUBXrr [[RDVLI_XI]], [[RDSVLI_XI]]
89-
; CHECK-AFTER-ISEL-NEXT: CBZX [[SUBXrr]], %bb.2
88+
; CHECK-AFTER-ISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[RDVLI_XI]]
89+
; CHECK-AFTER-ISEL-NEXT: [[ADDSVL_XXI:%[0-9]+]]:gpr64sp = ADDSVL_XXI [[COPY1]], -1, implicit $vg
90+
; CHECK-AFTER-ISEL-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDSVL_XXI]]
91+
; CHECK-AFTER-ISEL-NEXT: CBZX [[COPY2]], %bb.2
9092
; CHECK-AFTER-ISEL-NEXT: {{ $}}
9193
; CHECK-AFTER-ISEL-NEXT: bb.1.entry:
9294
; CHECK-AFTER-ISEL-NEXT: successors:
@@ -131,9 +133,10 @@ define void @foo_non_streaming_retval(ptr %ptr) {
131133
; CHECK-AFTER-ISEL-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
132134
; CHECK-AFTER-ISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
133135
; CHECK-AFTER-ISEL-NEXT: [[RDVLI_XI:%[0-9]+]]:gpr64 = RDVLI_XI 1, implicit $vg
134-
; CHECK-AFTER-ISEL-NEXT: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg
135-
; CHECK-AFTER-ISEL-NEXT: [[SUBXrr:%[0-9]+]]:gpr64 = SUBXrr [[RDVLI_XI]], [[RDSVLI_XI]]
136-
; CHECK-AFTER-ISEL-NEXT: CBZX [[SUBXrr]], %bb.2
136+
; CHECK-AFTER-ISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[RDVLI_XI]]
137+
; CHECK-AFTER-ISEL-NEXT: [[ADDSVL_XXI:%[0-9]+]]:gpr64sp = ADDSVL_XXI [[COPY1]], -1, implicit $vg
138+
; CHECK-AFTER-ISEL-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDSVL_XXI]]
139+
; CHECK-AFTER-ISEL-NEXT: CBZX [[COPY2]], %bb.2
137140
; CHECK-AFTER-ISEL-NEXT: {{ $}}
138141
; CHECK-AFTER-ISEL-NEXT: bb.1.entry:
139142
; CHECK-AFTER-ISEL-NEXT: successors:
@@ -144,10 +147,10 @@ define void @foo_non_streaming_retval(ptr %ptr) {
144147
; CHECK-AFTER-ISEL-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $sp, implicit-def $z0, implicit $vg, implicit-def $vg, implicit-def $fpmr
145148
; CHECK-AFTER-ISEL-NEXT: BL @bar_retv_enabled, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0
146149
; CHECK-AFTER-ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
147-
; CHECK-AFTER-ISEL-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z0
150+
; CHECK-AFTER-ISEL-NEXT: [[COPY3:%[0-9]+]]:zpr = COPY $z0
148151
; CHECK-AFTER-ISEL-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr
149-
; CHECK-AFTER-ISEL-NEXT: [[COPY2:%[0-9]+]]:zpr = COPY [[COPY1]]
150-
; CHECK-AFTER-ISEL-NEXT: STR_ZXI [[COPY2]], [[COPY]], 0 :: (store (<vscale x 1 x s128>) into %ir.ptr)
152+
; CHECK-AFTER-ISEL-NEXT: [[COPY4:%[0-9]+]]:zpr = COPY [[COPY3]]
153+
; CHECK-AFTER-ISEL-NEXT: STR_ZXI [[COPY4]], [[COPY]], 0 :: (store (<vscale x 1 x s128>) into %ir.ptr)
151154
; CHECK-AFTER-ISEL-NEXT: RET_ReallyLR
152155
entry:
153156
%v = tail call <vscale x 4 x i32> @bar_retv_enabled() #0
@@ -182,9 +185,10 @@ define void @foo_streaming_retval(ptr %ptr) #0 {
182185
; CHECK-AFTER-ISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
183186
; CHECK-AFTER-ISEL-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $sp, implicit-def $z0, implicit $vg, implicit-def $vg, implicit-def $fpmr
184187
; CHECK-AFTER-ISEL-NEXT: [[RDVLI_XI:%[0-9]+]]:gpr64 = RDVLI_XI 1, implicit $vg
185-
; CHECK-AFTER-ISEL-NEXT: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg
186-
; CHECK-AFTER-ISEL-NEXT: [[SUBXrr:%[0-9]+]]:gpr64 = SUBXrr [[RDVLI_XI]], [[RDSVLI_XI]]
187-
; CHECK-AFTER-ISEL-NEXT: CBZX [[SUBXrr]], %bb.2
188+
; CHECK-AFTER-ISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[RDVLI_XI]]
189+
; CHECK-AFTER-ISEL-NEXT: [[ADDSVL_XXI:%[0-9]+]]:gpr64sp = ADDSVL_XXI [[COPY1]], -1, implicit $vg
190+
; CHECK-AFTER-ISEL-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDSVL_XXI]]
191+
; CHECK-AFTER-ISEL-NEXT: CBZX [[COPY2]], %bb.2
188192
; CHECK-AFTER-ISEL-NEXT: {{ $}}
189193
; CHECK-AFTER-ISEL-NEXT: bb.1.entry:
190194
; CHECK-AFTER-ISEL-NEXT: successors:
@@ -194,10 +198,10 @@ define void @foo_streaming_retval(ptr %ptr) #0 {
194198
; CHECK-AFTER-ISEL-NEXT: bb.2.entry:
195199
; CHECK-AFTER-ISEL-NEXT: BL @bar_retv, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0
196200
; CHECK-AFTER-ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
197-
; CHECK-AFTER-ISEL-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z0
201+
; CHECK-AFTER-ISEL-NEXT: [[COPY3:%[0-9]+]]:zpr = COPY $z0
198202
; CHECK-AFTER-ISEL-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr
199-
; CHECK-AFTER-ISEL-NEXT: [[COPY2:%[0-9]+]]:zpr = COPY [[COPY1]]
200-
; CHECK-AFTER-ISEL-NEXT: STR_ZXI [[COPY2]], [[COPY]], 0 :: (store (<vscale x 1 x s128>) into %ir.ptr)
203+
; CHECK-AFTER-ISEL-NEXT: [[COPY4:%[0-9]+]]:zpr = COPY [[COPY3]]
204+
; CHECK-AFTER-ISEL-NEXT: STR_ZXI [[COPY4]], [[COPY]], 0 :: (store (<vscale x 1 x s128>) into %ir.ptr)
201205
; CHECK-AFTER-ISEL-NEXT: RET_ReallyLR
202206
entry:
203207
%v = tail call <vscale x 4 x i32> @bar_retv()

llvm/test/CodeGen/AArch64/sme-streaming-checkvl.ll

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,8 @@ define void @foo_non_streaming_pass_arg(ptr %arg) {
3636
; CHECK-NEXT: .cfi_offset b15, -96
3737
; CHECK-NEXT: addvl sp, sp, #-1
3838
; CHECK-NEXT: rdvl x8, #1
39-
; CHECK-NEXT: rdsvl x9, #1
40-
; CHECK-NEXT: cmp x8, x9
41-
; CHECK-NEXT: b.eq .LBB0_2
39+
; CHECK-NEXT: addsvl x8, x8, #-1
40+
; CHECK-NEXT: cbz x8, .LBB0_2
4241
; CHECK-NEXT: // %bb.1: // %entry
4342
; CHECK-NEXT: brk #0x1
4443
; CHECK-NEXT: .LBB0_2: // %entry
@@ -110,11 +109,10 @@ define void @foo_streaming_compatible_pass_arg(ptr %arg) #1 {
110109
; CHECK-NEXT: .cfi_offset b15, -1136
111110
; CHECK-NEXT: sub sp, sp, #1024
112111
; CHECK-NEXT: addvl sp, sp, #-1
113-
; CHECK-NEXT: mrs x19, SVCR
114112
; CHECK-NEXT: rdvl x8, #1
115-
; CHECK-NEXT: rdsvl x9, #1
116-
; CHECK-NEXT: cmp x8, x9
117-
; CHECK-NEXT: b.eq .LBB1_2
113+
; CHECK-NEXT: mrs x19, SVCR
114+
; CHECK-NEXT: addsvl x8, x8, #-1
115+
; CHECK-NEXT: cbz x8, .LBB1_2
118116
; CHECK-NEXT: // %bb.1: // %entry
119117
; CHECK-NEXT: brk #0x1
120118
; CHECK-NEXT: .LBB1_2: // %entry
@@ -195,9 +193,8 @@ define void @foo_streaming_pass_arg(ptr %arg) #0 {
195193
; CHECK-NEXT: .cfi_def_cfa_offset 2144
196194
; CHECK-NEXT: smstop sm
197195
; CHECK-NEXT: rdvl x8, #1
198-
; CHECK-NEXT: rdsvl x9, #1
199-
; CHECK-NEXT: cmp x8, x9
200-
; CHECK-NEXT: b.eq .LBB2_2
196+
; CHECK-NEXT: addsvl x8, x8, #-1
197+
; CHECK-NEXT: cbz x8, .LBB2_2
201198
; CHECK-NEXT: // %bb.1: // %entry
202199
; CHECK-NEXT: brk #0x1
203200
; CHECK-NEXT: .LBB2_2: // %entry
@@ -264,9 +261,8 @@ define void @foo_non_streaming_retval(ptr %ptr) {
264261
; CHECK-NEXT: .cfi_offset b15, -112
265262
; CHECK-NEXT: addvl sp, sp, #-1
266263
; CHECK-NEXT: rdvl x8, #1
267-
; CHECK-NEXT: rdsvl x9, #1
268-
; CHECK-NEXT: cmp x8, x9
269-
; CHECK-NEXT: b.eq .LBB3_2
264+
; CHECK-NEXT: addsvl x8, x8, #-1
265+
; CHECK-NEXT: cbz x8, .LBB3_2
270266
; CHECK-NEXT: // %bb.1: // %entry
271267
; CHECK-NEXT: brk #0x1
272268
; CHECK-NEXT: .LBB3_2: // %entry
@@ -342,11 +338,10 @@ define void @foo_streaming_compatible_retval(ptr %ptr) #1 {
342338
; CHECK-NEXT: .cfi_offset b15, -1136
343339
; CHECK-NEXT: sub sp, sp, #1024
344340
; CHECK-NEXT: addvl sp, sp, #-1
345-
; CHECK-NEXT: mrs x20, SVCR
346341
; CHECK-NEXT: rdvl x8, #1
347-
; CHECK-NEXT: rdsvl x9, #1
348-
; CHECK-NEXT: cmp x8, x9
349-
; CHECK-NEXT: b.eq .LBB4_2
342+
; CHECK-NEXT: mrs x20, SVCR
343+
; CHECK-NEXT: addsvl x8, x8, #-1
344+
; CHECK-NEXT: cbz x8, .LBB4_2
350345
; CHECK-NEXT: // %bb.1: // %entry
351346
; CHECK-NEXT: brk #0x1
352347
; CHECK-NEXT: .LBB4_2: // %entry
@@ -434,9 +429,8 @@ define void @foo_streaming_retval(ptr %ptr) #0 {
434429
; CHECK-NEXT: addvl sp, sp, #-1
435430
; CHECK-NEXT: smstop sm
436431
; CHECK-NEXT: rdvl x8, #1
437-
; CHECK-NEXT: rdsvl x9, #1
438-
; CHECK-NEXT: cmp x8, x9
439-
; CHECK-NEXT: b.eq .LBB5_2
432+
; CHECK-NEXT: addsvl x8, x8, #-1
433+
; CHECK-NEXT: cbz x8, .LBB5_2
440434
; CHECK-NEXT: // %bb.1: // %entry
441435
; CHECK-NEXT: brk #0x1
442436
; CHECK-NEXT: .LBB5_2: // %entry

llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -210,9 +210,8 @@ define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale
210210
; CHECK-NEXT: smstop sm
211211
; CHECK-NEXT: .LBB5_2:
212212
; CHECK-NEXT: rdvl x8, #1
213-
; CHECK-NEXT: rdsvl x9, #1
214-
; CHECK-NEXT: cmp x8, x9
215-
; CHECK-NEXT: b.eq .LBB5_4
213+
; CHECK-NEXT: addsvl x8, x8, #-1
214+
; CHECK-NEXT: cbz x8, .LBB5_4
216215
; CHECK-NEXT: // %bb.3:
217216
; CHECK-NEXT: brk #0x1
218217
; CHECK-NEXT: .LBB5_4:
@@ -308,9 +307,8 @@ define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x
308307
; CHECK-NEXT: smstop sm
309308
; CHECK-NEXT: .LBB6_2:
310309
; CHECK-NEXT: rdvl x8, #1
311-
; CHECK-NEXT: rdsvl x9, #1
312-
; CHECK-NEXT: cmp x8, x9
313-
; CHECK-NEXT: b.eq .LBB6_4
310+
; CHECK-NEXT: addsvl x8, x8, #-1
311+
; CHECK-NEXT: cbz x8, .LBB6_4
314312
; CHECK-NEXT: // %bb.3:
315313
; CHECK-NEXT: brk #0x1
316314
; CHECK-NEXT: .LBB6_4:

llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -377,9 +377,8 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
377377
; CHECK-NEXT: //NO_APP
378378
; CHECK-NEXT: smstop sm
379379
; CHECK-NEXT: rdvl x8, #1
380-
; CHECK-NEXT: rdsvl x9, #1
381-
; CHECK-NEXT: cmp x8, x9
382-
; CHECK-NEXT: b.eq .LBB3_2
380+
; CHECK-NEXT: addsvl x8, x8, #-1
381+
; CHECK-NEXT: cbz x8, .LBB3_2
383382
; CHECK-NEXT: // %bb.1:
384383
; CHECK-NEXT: brk #0x1
385384
; CHECK-NEXT: .LBB3_2:
@@ -480,9 +479,8 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
480479
; FP-CHECK-NEXT: //NO_APP
481480
; FP-CHECK-NEXT: smstop sm
482481
; FP-CHECK-NEXT: rdvl x8, #1
483-
; FP-CHECK-NEXT: rdsvl x9, #1
484-
; FP-CHECK-NEXT: cmp x8, x9
485-
; FP-CHECK-NEXT: b.eq .LBB3_2
482+
; FP-CHECK-NEXT: addsvl x8, x8, #-1
483+
; FP-CHECK-NEXT: cbz x8, .LBB3_2
486484
; FP-CHECK-NEXT: // %bb.1:
487485
; FP-CHECK-NEXT: brk #0x1
488486
; FP-CHECK-NEXT: .LBB3_2:

0 commit comments

Comments
 (0)