From 3251573e1223bc990b025c054ff6e8a425fc76b8 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Thu, 24 Jul 2025 14:39:34 +0000 Subject: [PATCH 1/4] Precommit test --- .../Inline/AArch64/sme-pstatesm-attrs.ll | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll index 6cb16928ae6ca..3c0f517501353 100644 --- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll +++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll @@ -676,4 +676,46 @@ define void @streaming_caller_multiple_streaming_compatible_callees_inline() #0 ret void } +define void @simple_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @simple_streaming_function +; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: store zeroinitializer, ptr [[PTR]], align 16 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %ptr + ret void +} + +; Don't allow inlining a streaming function into a non-streaming function. +define void @non_streaming_caller_streaming_callee_dont_inline(ptr %ptr) #0 { +; CHECK-LABEL: define void @non_streaming_caller_streaming_callee_dont_inline +; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: store zeroinitializer, ptr [[PTR]], align 16 +; CHECK-NEXT: ret void +; + call void @simple_streaming_function(ptr %ptr) + ret void +} + +define void @simple_locally_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_body" { +; CHECK-LABEL: define void @simple_locally_streaming_function +; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: store zeroinitializer, ptr [[PTR]], align 16 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %ptr + ret void +} + +; Don't allow inlining a locally-streaming function into a non-streaming function. +define void @non_streaming_caller_locally_streaming_callee_dont_inline(ptr %ptr) #0 { +; CHECK-LABEL: define void @non_streaming_caller_locally_streaming_callee_dont_inline +; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: store zeroinitializer, ptr [[PTR]], align 16 +; CHECK-NEXT: ret void +; + call void @simple_locally_streaming_function(ptr %ptr) + ret void +} + attributes #0 = { "target-features"="+sve,+sme" } From 4cfce256c0ed7902dc837fe71ff93df9bae2b238 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Thu, 24 Jul 2025 14:16:49 +0000 Subject: [PATCH 2/4] [AArch64] Dont inline streaming fn into non-streaming caller Without this change, the following test would fail to compile with `-march=armv8-a+sme`: void func1(const svuint32_t *in, svuint32_t *out) { [&]() __arm_streaming { *out = *in; }(); } But in general, it's probably better never to inline streaming functions into non-streaming functions, because they will have been marked as 'streaming' for a reason by the user. --- .../AArch64/AArch64TargetTransformInfo.cpp | 7 ++ .../sme-pstatesm-attrs-low-threshold.ll | 3 +- .../Inline/AArch64/sme-pstatesm-attrs.ll | 89 +++++++++---------- 3 files changed, 52 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 40f49dade6131..2bb61ddf6d818 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -270,6 +270,13 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { SMECallAttrs CallAttrs(*Caller, *Callee); + // Never inline a function explicitly marked as being streaming, + // into a non-streaming function. Assume it was marked as streaming + // for a reason. + if (CallAttrs.caller().hasNonStreamingInterfaceAndBody() && + CallAttrs.callee().hasStreamingInterfaceOrBody()) + return false; + // When inlining, we should consider the body of the function, not the // interface. if (CallAttrs.callee().hasStreamingBody()) { diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll index b57a45fe41834..1878b62e88881 100644 --- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll +++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll @@ -25,8 +25,7 @@ define void @streaming_callee() #0 "aarch64_pstate_sm_enabled" { define void @non_streaming_caller_inline() #0 { ; CHECK-LABEL: define void @non_streaming_caller_inline ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: call void @streaming_compatible_f() -; CHECK-NEXT: call void @streaming_compatible_f() +; CHECK-NEXT: call void @streaming_callee() ; CHECK-NEXT: ret void ; call void @streaming_callee() diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll index 3c0f517501353..dc6255170b9c6 100644 --- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll +++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll @@ -86,7 +86,7 @@ entry: ; [ ] N -> SC + B define i32 @normal_caller_normal_callee_inline() #0 { ; CHECK-LABEL: define i32 @normal_caller_normal_callee_inline -; CHECK-SAME: () #[[ATTR6:[0-9]+]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -103,7 +103,7 @@ entry: ; [ ] N -> SC + B define i32 @normal_caller_streaming_callee_dont_inline() #0 { ; CHECK-LABEL: define i32 @normal_caller_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -120,7 +120,7 @@ entry: ; [ ] N -> SC + B define i32 @normal_caller_streaming_compatible_callee_inline() #0 { ; CHECK-LABEL: define i32 @normal_caller_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -137,7 +137,7 @@ entry: ; [ ] N -> SC + B define i32 @normal_caller_locally_streaming_callee_dont_inline() #0 { ; CHECK-LABEL: define i32 @normal_caller_locally_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @locally_streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -154,7 +154,7 @@ entry: ; [x] N -> SC + B define i32 @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline() #0 { ; CHECK-LABEL: define i32 @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_compatible_locally_streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -171,7 +171,7 @@ entry: ; [ ] S -> SC + B define i32 @streaming_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i32 @streaming_caller_normal_callee_dont_inline -; CHECK-SAME: () #[[ATTR7:[0-9]+]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -188,7 +188,7 @@ entry: ; [ ] S -> SC + B define i32 @streaming_caller_streaming_callee_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i32 @streaming_caller_streaming_callee_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -205,7 +205,7 @@ entry: ; [ ] S -> SC + B define i32 @streaming_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i32 @streaming_caller_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -222,7 +222,7 @@ entry: ; [ ] S -> SC + B define i32 @streaming_caller_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i32 @streaming_caller_locally_streaming_callee_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -239,7 +239,7 @@ entry: ; [x] S -> SC + B define i32 @streaming_caller_streaming_compatible_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i32 @streaming_caller_streaming_compatible_locally_streaming_callee_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -256,7 +256,7 @@ entry: ; [ ] N + B -> SC + B define i32 @locally_streaming_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @locally_streaming_caller_normal_callee_dont_inline -; CHECK-SAME: () #[[ATTR8:[0-9]+]] { +; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -273,7 +273,7 @@ entry: ; [ ] N + B -> SC + B define i32 @locally_streaming_caller_streaming_callee_inline() #0 "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_callee_inline -; CHECK-SAME: () #[[ATTR8]] { +; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -290,7 +290,7 @@ entry: ; [ ] N + B -> SC + B define i32 @locally_streaming_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR8]] { +; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -307,7 +307,7 @@ entry: ; [ ] N + B -> SC + B define i32 @locally_streaming_caller_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @locally_streaming_caller_locally_streaming_callee_inline -; CHECK-SAME: () #[[ATTR8]] { +; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -324,7 +324,7 @@ entry: ; [x] N + B -> SC + B define i32 @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline -; CHECK-SAME: () #[[ATTR8]] { +; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -341,7 +341,7 @@ entry: ; [ ] SC -> SC + B define i32 @streaming_compatible_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: define i32 @streaming_compatible_caller_normal_callee_dont_inline -; CHECK-SAME: () #[[ATTR9:[0-9]+]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -358,7 +358,7 @@ entry: ; [ ] SC -> SC + B define i32 @streaming_compatible_caller_streaming_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR9]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -375,7 +375,7 @@ entry: ; [ ] SC -> SC + B define i32 @streaming_compatible_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR9]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -392,7 +392,7 @@ entry: ; [ ] SC -> SC + B define i32 @streaming_compatible_caller_locally_streaming_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: define i32 @streaming_compatible_caller_locally_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR9]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @locally_streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -409,7 +409,7 @@ entry: ; [x] SC -> SC + B define i32 @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR9]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_compatible_locally_streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -425,7 +425,7 @@ entry: ; [ ] SC + B -> SC + B define i32 @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline -; CHECK-SAME: () #[[ATTR10:[0-9]+]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -442,7 +442,7 @@ entry: ; [ ] SC + B -> SC + B define i32 @streaming_compatible_locally_streaming_caller_streaming_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_streaming_callee_inline -; CHECK-SAME: () #[[ATTR10]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -459,7 +459,7 @@ entry: ; [ ] SC + B -> SC + B define i32 @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR10]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -476,7 +476,7 @@ entry: ; [ ] SC + B -> SC + B define i32 @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline -; CHECK-SAME: () #[[ATTR10]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -493,7 +493,7 @@ entry: ; [x] SC + B -> SC + B define i32 @streaming_compatible_locally_streaming_caller_and_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_and_callee_inline -; CHECK-SAME: () #[[ATTR10]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -505,7 +505,7 @@ entry: define void @normal_callee_with_inlineasm() #0 { ; CHECK-LABEL: define void @normal_callee_with_inlineasm -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void asm sideeffect " ; CHECK-NEXT: ret void @@ -517,7 +517,7 @@ entry: define void @streaming_caller_normal_callee_with_inlineasm_dont_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define void @streaming_caller_normal_callee_with_inlineasm_dont_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @normal_callee_with_inlineasm() ; CHECK-NEXT: ret void @@ -529,7 +529,7 @@ entry: define i64 @normal_callee_with_intrinsic_call() #0 { ; CHECK-LABEL: define i64 @normal_callee_with_intrinsic_call -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.aarch64.sve.cntb(i32 4) ; CHECK-NEXT: ret i64 [[RES]] @@ -541,7 +541,7 @@ entry: define i64 @streaming_caller_normal_callee_with_intrinsic_call_dont_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i64 @streaming_caller_normal_callee_with_intrinsic_call_dont_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i64 @normal_callee_with_intrinsic_call() ; CHECK-NEXT: ret i64 [[RES]] @@ -555,7 +555,7 @@ declare i64 @llvm.aarch64.sve.cntb(i32) define i64 @normal_callee_call_sme_state() #0 { ; CHECK-LABEL: define i64 @normal_callee_call_sme_state -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call { i64, i64 } @__arm_sme_state() ; CHECK-NEXT: [[RES_0:%.*]] = extractvalue { i64, i64 } [[RES]], 0 @@ -571,7 +571,7 @@ declare {i64, i64} @__arm_sme_state() define i64 @streaming_caller_normal_callee_call_sme_state_dont_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i64 @streaming_caller_normal_callee_call_sme_state_dont_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i64 @normal_callee_call_sme_state() ; CHECK-NEXT: ret i64 [[RES]] @@ -587,7 +587,7 @@ declare void @streaming_body() "aarch64_pstate_sm_enabled" define void @streaming_caller_single_streaming_callee() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define void @streaming_caller_single_streaming_callee -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: call void @streaming_body() ; CHECK-NEXT: ret void ; @@ -597,7 +597,7 @@ define void @streaming_caller_single_streaming_callee() #0 "aarch64_pstate_sm_e define void @streaming_caller_multiple_streaming_callees() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: call void @streaming_body() ; CHECK-NEXT: call void @streaming_body() ; CHECK-NEXT: ret void @@ -610,8 +610,8 @@ define void @streaming_caller_multiple_streaming_callees() #0 "aarch64_pstate_s ; Allow inlining, as inline it would not increase the number of streaming-mode changes. define void @streaming_caller_single_streaming_callee_inline() #0 { ; CHECK-LABEL: define void @streaming_caller_single_streaming_callee_inline -; CHECK-SAME: () #[[ATTR6]] { -; CHECK-NEXT: call void @streaming_body() +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @streaming_caller_single_streaming_callee() ; CHECK-NEXT: ret void ; call void @streaming_caller_single_streaming_callee() @@ -621,7 +621,7 @@ define void @streaming_caller_single_streaming_callee_inline() #0 { ; Prevent inlining, as inline it would lead to multiple streaming-mode changes. define void @streaming_caller_multiple_streaming_callees_dont_inline() #0 { ; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees_dont_inline -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: call void @streaming_caller_multiple_streaming_callees() ; CHECK-NEXT: ret void ; @@ -633,7 +633,7 @@ declare void @streaming_compatible_body() "aarch64_pstate_sm_compatible" define void @streaming_caller_single_streaming_compatible_callee() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: ret void ; @@ -643,7 +643,7 @@ define void @streaming_caller_single_streaming_compatible_callee() #0 "aarch64_ define void @streaming_caller_multiple_streaming_compatible_callees() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: ret void @@ -656,8 +656,8 @@ define void @streaming_caller_multiple_streaming_compatible_callees() #0 "aarch ; Allow inlining, as inline would remove a streaming-mode change. define void @streaming_caller_single_streaming_compatible_callee_inline() #0 { ; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR6]] { -; CHECK-NEXT: call void @streaming_compatible_body() +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @streaming_caller_single_streaming_compatible_callee() ; CHECK-NEXT: ret void ; call void @streaming_caller_single_streaming_compatible_callee() @@ -667,9 +667,8 @@ define void @streaming_caller_single_streaming_compatible_callee_inline() #0 { ; Allow inlining, as inline would remove several stremaing-mode changes. define void @streaming_caller_multiple_streaming_compatible_callees_inline() #0 { ; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees_inline -; CHECK-SAME: () #[[ATTR6]] { -; CHECK-NEXT: call void @streaming_compatible_body() -; CHECK-NEXT: call void @streaming_compatible_body() +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @streaming_caller_multiple_streaming_compatible_callees() ; CHECK-NEXT: ret void ; call void @streaming_caller_multiple_streaming_compatible_callees() @@ -690,7 +689,7 @@ define void @simple_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_enabled" define void @non_streaming_caller_streaming_callee_dont_inline(ptr %ptr) #0 { ; CHECK-LABEL: define void @non_streaming_caller_streaming_callee_dont_inline ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: store zeroinitializer, ptr [[PTR]], align 16 +; CHECK-NEXT: call void @simple_streaming_function(ptr [[PTR]]) ; CHECK-NEXT: ret void ; call void @simple_streaming_function(ptr %ptr) @@ -711,7 +710,7 @@ define void @simple_locally_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_b define void @non_streaming_caller_locally_streaming_callee_dont_inline(ptr %ptr) #0 { ; CHECK-LABEL: define void @non_streaming_caller_locally_streaming_callee_dont_inline ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: store zeroinitializer, ptr [[PTR]], align 16 +; CHECK-NEXT: call void @simple_locally_streaming_function(ptr [[PTR]]) ; CHECK-NEXT: ret void ; call void @simple_locally_streaming_function(ptr %ptr) From 443e282057dfa7db877b93186cb2bd5cf680373a Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Thu, 31 Jul 2025 12:49:52 +0000 Subject: [PATCH 3/4] Fix tests Because the behaviour now changed, I've had to modify some of the other tests to test inlining with streaming mode changes the other way around (streaming <- non-streaming instead of non-streaming <- streaming) --- .../sme-pstatesm-attrs-low-threshold.ll | 21 ++--- .../Inline/AArch64/sme-pstatesm-attrs.ll | 83 ++++++++++--------- 2 files changed, 53 insertions(+), 51 deletions(-) diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll index 1878b62e88881..597f0cf479d16 100644 --- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll +++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll @@ -9,8 +9,8 @@ declare void @streaming_compatible_f() #0 "aarch64_pstate_sm_compatible" ; Function @streaming_callee doesn't contain any operations that may use ZA ; state and therefore can be legally inlined into a normal function. -define void @streaming_callee() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_callee +define void @non_streaming_callee() #0 { +; CHECK-LABEL: define void @non_streaming_callee ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: call void @streaming_compatible_f() ; CHECK-NEXT: call void @streaming_compatible_f() @@ -22,24 +22,25 @@ define void @streaming_callee() #0 "aarch64_pstate_sm_enabled" { } ; Inline call to @streaming_callee to remove a streaming mode change. -define void @non_streaming_caller_inline() #0 { -; CHECK-LABEL: define void @non_streaming_caller_inline +define void @streaming_caller_inline() #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @streaming_caller_inline ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: call void @streaming_callee() +; CHECK-NEXT: call void @streaming_compatible_f() +; CHECK-NEXT: call void @streaming_compatible_f() ; CHECK-NEXT: ret void ; - call void @streaming_callee() + call void @non_streaming_callee() ret void } ; Don't inline call to @streaming_callee when the inline-threshold is set to 1, because it does not eliminate a streaming-mode change. -define void @streaming_caller_dont_inline() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_caller_dont_inline +define void @non_streaming_caller_dont_inline() #0 { +; CHECK-LABEL: define void @non_streaming_caller_dont_inline ; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: call void @streaming_callee() +; CHECK-NEXT: call void @non_streaming_callee() ; CHECK-NEXT: ret void ; - call void @streaming_callee() + call void @non_streaming_callee() ret void } diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll index dc6255170b9c6..077a3aa49fb41 100644 --- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll +++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll @@ -583,57 +583,57 @@ entry: -declare void @streaming_body() "aarch64_pstate_sm_enabled" +declare void @nonstreaming_body() -define void @streaming_caller_single_streaming_callee() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_caller_single_streaming_callee -; CHECK-SAME: () #[[ATTR2]] { -; CHECK-NEXT: call void @streaming_body() +define void @nonstreaming_caller_single_nonstreaming_callee() #0 { +; CHECK-LABEL: define void @nonstreaming_caller_single_nonstreaming_callee +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @nonstreaming_body() ; CHECK-NEXT: ret void ; - call void @streaming_body() + call void @nonstreaming_body() ret void } -define void @streaming_caller_multiple_streaming_callees() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees -; CHECK-SAME: () #[[ATTR2]] { -; CHECK-NEXT: call void @streaming_body() -; CHECK-NEXT: call void @streaming_body() +define void @nonstreaming_caller_multiple_nonstreaming_callees() #0 { +; CHECK-LABEL: define void @nonstreaming_caller_multiple_nonstreaming_callees +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @nonstreaming_body() +; CHECK-NEXT: call void @nonstreaming_body() ; CHECK-NEXT: ret void ; - call void @streaming_body() - call void @streaming_body() + call void @nonstreaming_body() + call void @nonstreaming_body() ret void } ; Allow inlining, as inline it would not increase the number of streaming-mode changes. -define void @streaming_caller_single_streaming_callee_inline() #0 { -; CHECK-LABEL: define void @streaming_caller_single_streaming_callee_inline -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: call void @streaming_caller_single_streaming_callee() +define void @streaming_caller_to_nonstreaming_callee_with_single_nonstreaming_callee_inline() #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_single_nonstreaming_callee_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: call void @nonstreaming_body() ; CHECK-NEXT: ret void ; - call void @streaming_caller_single_streaming_callee() + call void @nonstreaming_caller_single_nonstreaming_callee() ret void } -; Prevent inlining, as inline it would lead to multiple streaming-mode changes. -define void @streaming_caller_multiple_streaming_callees_dont_inline() #0 { -; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees_dont_inline -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: call void @streaming_caller_multiple_streaming_callees() +; Prevent inlining, as inlining it would lead to multiple streaming-mode changes. +define void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline() #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: call void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline() ; CHECK-NEXT: ret void ; - call void @streaming_caller_multiple_streaming_callees() + call void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline() ret void } declare void @streaming_compatible_body() "aarch64_pstate_sm_compatible" -define void @streaming_caller_single_streaming_compatible_callee() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee -; CHECK-SAME: () #[[ATTR2]] { +define void @nonstreaming_caller_single_streaming_compatible_callee() #0 { +; CHECK-LABEL: define void @nonstreaming_caller_single_streaming_compatible_callee +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: ret void ; @@ -641,9 +641,9 @@ define void @streaming_caller_single_streaming_compatible_callee() #0 "aarch64_ ret void } -define void @streaming_caller_multiple_streaming_compatible_callees() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees -; CHECK-SAME: () #[[ATTR2]] { +define void @nonstreaming_caller_multiple_streaming_compatible_callees() #0 { +; CHECK-LABEL: define void @nonstreaming_caller_multiple_streaming_compatible_callees +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: ret void @@ -654,24 +654,25 @@ define void @streaming_caller_multiple_streaming_compatible_callees() #0 "aarch } ; Allow inlining, as inline would remove a streaming-mode change. -define void @streaming_caller_single_streaming_compatible_callee_inline() #0 { -; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: call void @streaming_caller_single_streaming_compatible_callee() +define void @streaming_caller_to_nonstreaming_callee_with_single_streamingcompatible_callee_inline() #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_single_streamingcompatible_callee_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: ret void ; - call void @streaming_caller_single_streaming_compatible_callee() + call void @nonstreaming_caller_single_streaming_compatible_callee() ret void } -; Allow inlining, as inline would remove several stremaing-mode changes. -define void @streaming_caller_multiple_streaming_compatible_callees_inline() #0 { -; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees_inline -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: call void @streaming_caller_multiple_streaming_compatible_callees() +; Allow inlining, as inline would remove several streaming-mode changes. +define void @streaming_caller_to_nonstreaming_callee_with_multiple_streamingcompatible_callees_inline() #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_multiple_streamingcompatible_callees_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: call void @streaming_compatible_body() +; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: ret void ; - call void @streaming_caller_multiple_streaming_compatible_callees() + call void @nonstreaming_caller_multiple_streaming_compatible_callees() ret void } From 4ca6a64b50b63989ba7401af6243c79d8381e545 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Thu, 31 Jul 2025 19:59:45 +0000 Subject: [PATCH 4/4] Update comments in test --- .../Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll index 597f0cf479d16..8a608a1b8e156 100644 --- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll +++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll @@ -7,7 +7,7 @@ target triple = "aarch64" declare void @streaming_compatible_f() #0 "aarch64_pstate_sm_compatible" -; Function @streaming_callee doesn't contain any operations that may use ZA +; Function @non_streaming_callee doesn't contain any operations that may use ZA ; state and therefore can be legally inlined into a normal function. define void @non_streaming_callee() #0 { ; CHECK-LABEL: define void @non_streaming_callee @@ -21,7 +21,7 @@ define void @non_streaming_callee() #0 { ret void } -; Inline call to @streaming_callee to remove a streaming mode change. +; Inline call to @non_streaming_callee to remove a streaming mode change. define void @streaming_caller_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define void @streaming_caller_inline ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { @@ -33,7 +33,7 @@ define void @streaming_caller_inline() #0 "aarch64_pstate_sm_enabled" { ret void } -; Don't inline call to @streaming_callee when the inline-threshold is set to 1, because it does not eliminate a streaming-mode change. +; Don't inline call to @non_streaming_callee when the inline-threshold is set to 1, because it does not eliminate a streaming-mode change. define void @non_streaming_caller_dont_inline() #0 { ; CHECK-LABEL: define void @non_streaming_caller_dont_inline ; CHECK-SAME: () #[[ATTR1]] {