Skip to content

Commit 371598d

Browse files
committed
Add support for probed agnostic ZA allocas
Change-Id: I868b20fc09b7c971edf778924ac56a065df19772
1 parent f97a3d6 commit 371598d

File tree

2 files changed

+76
-1
lines changed

2 files changed

+76
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8298,6 +8298,15 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
82988298
SDValue SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
82998299
DAG.getConstant(1, DL, MVT::i32));
83008300
Size = DAG.getNode(ISD::MUL, DL, MVT::i64, SVL, SVL);
8301+
} else if (Attrs.hasAgnosticZAInterface()) {
8302+
RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
8303+
SDValue Callee = DAG.getExternalSymbol(
8304+
getLibcallName(LC), getPointerTy(DAG.getDataLayout()));
8305+
auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.getContext());
8306+
TargetLowering::CallLoweringInfo CLI(DAG);
8307+
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
8308+
getLibcallCallingConv(LC), RetTy, Callee, {});
8309+
std::tie(Size, Chain) = LowerCallTo(CLI);
83018310
}
83028311
if (Size) {
83038312
SDValue Buffer = DAG.getNode(
@@ -8363,7 +8372,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
83638372
Register BufferPtr =
83648373
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
83658374
FuncInfo->setSMESaveBufferAddr(BufferPtr);
8366-
Chain = DAG.getCopyToReg(Chain, DL, BufferPtr, Buffer);
8375+
Chain = DAG.getCopyToReg(Buffer.getValue(1), DL, BufferPtr, Buffer);
83678376
}
83688377
}
83698378

llvm/test/CodeGen/AArch64/sme-agnostic-za.ll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
target triple = "aarch64"
66

77
declare i64 @private_za_decl(i64)
8+
declare void @private_za()
89
declare i64 @agnostic_decl(i64) "aarch64_za_state_agnostic"
910

1011
; No calls. Test that no buffer is allocated.
@@ -361,3 +362,68 @@ define i64 @test_many_callee_arguments(
361362
i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9)
362363
ret i64 %ret
363364
}
365+
366+
; FIXME: The new lowering should avoid saves/restores in the probing loop.
367+
define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{
368+
; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
369+
; CHECK: // %bb.0:
370+
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
371+
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
372+
; CHECK-NEXT: mov x29, sp
373+
; CHECK-NEXT: bl __arm_sme_state_size
374+
; CHECK-NEXT: mov x8, sp
375+
; CHECK-NEXT: sub x19, x8, x0
376+
; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
377+
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
378+
; CHECK-NEXT: cmp sp, x19
379+
; CHECK-NEXT: b.le .LBB7_3
380+
; CHECK-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
381+
; CHECK-NEXT: str xzr, [sp]
382+
; CHECK-NEXT: b .LBB7_1
383+
; CHECK-NEXT: .LBB7_3:
384+
; CHECK-NEXT: mov sp, x19
385+
; CHECK-NEXT: ldr xzr, [sp]
386+
; CHECK-NEXT: mov x0, x19
387+
; CHECK-NEXT: bl __arm_sme_save
388+
; CHECK-NEXT: bl private_za
389+
; CHECK-NEXT: mov x0, x19
390+
; CHECK-NEXT: bl __arm_sme_restore
391+
; CHECK-NEXT: mov sp, x29
392+
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
393+
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
394+
; CHECK-NEXT: ret
395+
;
396+
; CHECK-NEWLOWERING-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
397+
; CHECK-NEWLOWERING: // %bb.0:
398+
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
399+
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
400+
; CHECK-NEWLOWERING-NEXT: mov x29, sp
401+
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
402+
; CHECK-NEWLOWERING-NEXT: mov x8, sp
403+
; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0
404+
; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
405+
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
406+
; CHECK-NEWLOWERING-NEXT: cmp sp, x19
407+
; CHECK-NEWLOWERING-NEXT: mov x0, x19
408+
; CHECK-NEWLOWERING-NEXT: mrs x8, NZCV
409+
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
410+
; CHECK-NEWLOWERING-NEXT: msr NZCV, x8
411+
; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3
412+
; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
413+
; CHECK-NEWLOWERING-NEXT: mov x0, x19
414+
; CHECK-NEWLOWERING-NEXT: str xzr, [sp]
415+
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
416+
; CHECK-NEWLOWERING-NEXT: b .LBB7_1
417+
; CHECK-NEWLOWERING-NEXT: .LBB7_3:
418+
; CHECK-NEWLOWERING-NEXT: mov sp, x19
419+
; CHECK-NEWLOWERING-NEXT: ldr xzr, [sp]
420+
; CHECK-NEWLOWERING-NEXT: bl private_za
421+
; CHECK-NEWLOWERING-NEXT: mov x0, x19
422+
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
423+
; CHECK-NEWLOWERING-NEXT: mov sp, x29
424+
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
425+
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
426+
; CHECK-NEWLOWERING-NEXT: ret
427+
call void @private_za()
428+
ret void
429+
}

0 commit comments

Comments
 (0)