Skip to content

Commit 846aba4

Browse files
committed
Add support for probed agnostic ZA allocas
Change-Id: I868b20fc09b7c971edf778924ac56a065df19772
1 parent 2553947 commit 846aba4

File tree

2 files changed

+76
-1
lines changed

2 files changed

+76
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8496,6 +8496,15 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
84968496
SDValue SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
84978497
DAG.getConstant(1, DL, MVT::i32));
84988498
Size = DAG.getNode(ISD::MUL, DL, MVT::i64, SVL, SVL);
8499+
} else if (Attrs.hasAgnosticZAInterface()) {
8500+
RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
8501+
SDValue Callee = DAG.getExternalSymbol(
8502+
getLibcallName(LC), getPointerTy(DAG.getDataLayout()));
8503+
auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.getContext());
8504+
TargetLowering::CallLoweringInfo CLI(DAG);
8505+
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
8506+
getLibcallCallingConv(LC), RetTy, Callee, {});
8507+
std::tie(Size, Chain) = LowerCallTo(CLI);
84998508
}
85008509
if (Size) {
85018510
SDValue Buffer = DAG.getNode(
@@ -8561,7 +8570,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
85618570
Register BufferPtr =
85628571
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
85638572
FuncInfo->setSMESaveBufferAddr(BufferPtr);
8564-
Chain = DAG.getCopyToReg(Chain, DL, BufferPtr, Buffer);
8573+
Chain = DAG.getCopyToReg(Buffer.getValue(1), DL, BufferPtr, Buffer);
85658574
}
85668575
}
85678576

llvm/test/CodeGen/AArch64/sme-agnostic-za.ll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
target triple = "aarch64"
66

77
declare i64 @private_za_decl(i64)
8+
declare void @private_za()
89
declare i64 @agnostic_decl(i64) "aarch64_za_state_agnostic"
910

1011
; No calls. Test that no buffer is allocated.
@@ -360,3 +361,68 @@ define i64 @test_many_callee_arguments(
360361
i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9)
361362
ret i64 %ret
362363
}
364+
365+
; FIXME: The new lowering should avoid saves/restores in the probing loop.
366+
define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{
367+
; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
368+
; CHECK: // %bb.0:
369+
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
370+
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
371+
; CHECK-NEXT: mov x29, sp
372+
; CHECK-NEXT: bl __arm_sme_state_size
373+
; CHECK-NEXT: mov x8, sp
374+
; CHECK-NEXT: sub x19, x8, x0
375+
; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
376+
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
377+
; CHECK-NEXT: cmp sp, x19
378+
; CHECK-NEXT: b.le .LBB7_3
379+
; CHECK-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
380+
; CHECK-NEXT: str xzr, [sp]
381+
; CHECK-NEXT: b .LBB7_1
382+
; CHECK-NEXT: .LBB7_3:
383+
; CHECK-NEXT: mov sp, x19
384+
; CHECK-NEXT: ldr xzr, [sp]
385+
; CHECK-NEXT: mov x0, x19
386+
; CHECK-NEXT: bl __arm_sme_save
387+
; CHECK-NEXT: bl private_za
388+
; CHECK-NEXT: mov x0, x19
389+
; CHECK-NEXT: bl __arm_sme_restore
390+
; CHECK-NEXT: mov sp, x29
391+
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
392+
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
393+
; CHECK-NEXT: ret
394+
;
395+
; CHECK-NEWLOWERING-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
396+
; CHECK-NEWLOWERING: // %bb.0:
397+
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
398+
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
399+
; CHECK-NEWLOWERING-NEXT: mov x29, sp
400+
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
401+
; CHECK-NEWLOWERING-NEXT: mov x8, sp
402+
; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0
403+
; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
404+
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
405+
; CHECK-NEWLOWERING-NEXT: cmp sp, x19
406+
; CHECK-NEWLOWERING-NEXT: mov x0, x19
407+
; CHECK-NEWLOWERING-NEXT: mrs x8, NZCV
408+
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
409+
; CHECK-NEWLOWERING-NEXT: msr NZCV, x8
410+
; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3
411+
; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
412+
; CHECK-NEWLOWERING-NEXT: mov x0, x19
413+
; CHECK-NEWLOWERING-NEXT: str xzr, [sp]
414+
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
415+
; CHECK-NEWLOWERING-NEXT: b .LBB7_1
416+
; CHECK-NEWLOWERING-NEXT: .LBB7_3:
417+
; CHECK-NEWLOWERING-NEXT: mov sp, x19
418+
; CHECK-NEWLOWERING-NEXT: ldr xzr, [sp]
419+
; CHECK-NEWLOWERING-NEXT: bl private_za
420+
; CHECK-NEWLOWERING-NEXT: mov x0, x19
421+
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
422+
; CHECK-NEWLOWERING-NEXT: mov sp, x29
423+
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
424+
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
425+
; CHECK-NEWLOWERING-NEXT: ret
426+
call void @private_za()
427+
ret void
428+
}

0 commit comments

Comments
 (0)