Skip to content

Commit 90daf9c

Browse files
committed
[clang][SME] Ignore flatten for callees mismatched streaming attributes
If `__attribute__((flatten))` is used on a function don't inline any callees with incompatible streaming attributes. Without this check, clang may produce incorrect code when `flatten` is used in code with streaming functions. Note: The docs for flatten say it can be ignored when inlining is impossible: "causes calls within the attributed function to be inlined unless it is impossible to do so".
1 parent 7c8e05a commit 90daf9c

File tree

4 files changed

+143
-15
lines changed

4 files changed

+143
-15
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5112,9 +5112,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
51125112

51135113
// Some architectures (such as x86-64) have the ABI changed based on
51145114
// attribute-target/features. Give them a chance to diagnose.
5115-
CGM.getTargetCodeGenInfo().checkFunctionCallABI(
5116-
CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl),
5117-
dyn_cast_or_null<FunctionDecl>(TargetDecl), CallArgs, RetTy);
5115+
const FunctionDecl *CallerDecl = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
5116+
const FunctionDecl *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl);
5117+
CGM.getTargetCodeGenInfo().checkFunctionCallABI(CGM, Loc, CallerDecl,
5118+
CalleeDecl, CallArgs, RetTy);
51185119

51195120
// 1. Set up the arguments.
51205121

@@ -5705,7 +5706,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
57055706
// FIXME: should this really take priority over __try, below?
57065707
if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
57075708
!InNoInlineAttributedStmt &&
5708-
!(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) {
5709+
!(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>()) &&
5710+
!CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI(
5711+
CallerDecl, CalleeDecl)) {
57095712
Attrs =
57105713
Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
57115714
}

clang/lib/CodeGen/TargetInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,15 @@ class TargetCodeGenInfo {
9898
const CallArgList &Args,
9999
QualType ReturnType) const {}
100100

101+
/// Returns true if inlining the function call would produce incorrect code
102+
/// for the current target and should be ignored (even with the always_inline
103+
/// or flatten attributes).
104+
virtual bool
105+
wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller,
106+
const FunctionDecl *Callee) const {
107+
return false;
108+
}
109+
101110
/// Determines the size of struct _Unwind_Exception on this platform,
102111
/// in 8-bit units. The Itanium ABI defines this as:
103112
/// struct _Unwind_Exception {

clang/lib/CodeGen/Targets/AArch64.cpp

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,9 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
177177
const FunctionDecl *Callee, const CallArgList &Args,
178178
QualType ReturnType) const override;
179179

180+
bool wouldInliningViolateFunctionCallABI(
181+
const FunctionDecl *Caller, const FunctionDecl *Callee) const override;
182+
180183
private:
181184
// Diagnose calls between functions with incompatible Streaming SVE
182185
// attributes.
@@ -1143,30 +1146,62 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
11431146
}
11441147
}
11451148

1146-
void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
1147-
CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1148-
const FunctionDecl *Callee) const {
1149-
if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
1150-
return;
1149+
enum class ArmStreamingInlinability : uint8_t {
1150+
Ok = 0,
1151+
IncompatibleStreamingModes = 1,
1152+
MismatchedStreamingCompatibility = 1 << 1,
1153+
CalleeHasNewZA = 1 << 2,
1154+
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/CalleeHasNewZA),
1155+
};
11511156

1157+
/// Determines if there are any streaming ABI issues with inlining \p Callee
1158+
/// into \p Caller. Returns the issues in the ArmStreamingInlinability bit enum
1159+
/// (multiple bits can be set).
1160+
static ArmStreamingInlinability
1161+
GetArmStreamingInlinability(const FunctionDecl *Caller,
1162+
const FunctionDecl *Callee) {
11521163
bool CallerIsStreaming =
11531164
IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true);
11541165
bool CalleeIsStreaming =
11551166
IsArmStreamingFunction(Callee, /*IncludeLocallyStreaming=*/true);
11561167
bool CallerIsStreamingCompatible = isStreamingCompatible(Caller);
11571168
bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee);
11581169

1170+
ArmStreamingInlinability Inlinability = ArmStreamingInlinability::Ok;
1171+
11591172
if (!CalleeIsStreamingCompatible &&
1160-
(CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible))
1173+
(CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) {
1174+
Inlinability |= ArmStreamingInlinability::MismatchedStreamingCompatibility;
1175+
if (CalleeIsStreaming)
1176+
Inlinability |= ArmStreamingInlinability::IncompatibleStreamingModes;
1177+
}
1178+
if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
1179+
if (NewAttr->isNewZA())
1180+
Inlinability |= ArmStreamingInlinability::CalleeHasNewZA;
1181+
1182+
return Inlinability;
1183+
}
1184+
1185+
void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
1186+
CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1187+
const FunctionDecl *Callee) const {
1188+
if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
1189+
return;
1190+
1191+
ArmStreamingInlinability Inlinability =
1192+
GetArmStreamingInlinability(Caller, Callee);
1193+
1194+
if (bool(Inlinability &
1195+
ArmStreamingInlinability::MismatchedStreamingCompatibility))
11611196
CGM.getDiags().Report(
1162-
CallLoc, CalleeIsStreaming
1197+
CallLoc, bool(Inlinability &
1198+
ArmStreamingInlinability::IncompatibleStreamingModes)
11631199
? diag::err_function_always_inline_attribute_mismatch
11641200
: diag::warn_function_always_inline_attribute_mismatch)
11651201
<< Caller->getDeclName() << Callee->getDeclName() << "streaming";
1166-
if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
1167-
if (NewAttr->isNewZA())
1168-
CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
1169-
<< Callee->getDeclName();
1202+
if (bool(Inlinability & ArmStreamingInlinability::CalleeHasNewZA))
1203+
CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
1204+
<< Callee->getDeclName();
11701205
}
11711206

11721207
// If the target does not have floating-point registers, but we are using a
@@ -1200,6 +1235,13 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
12001235
checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType);
12011236
}
12021237

1238+
bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI(
1239+
const FunctionDecl *Caller, const FunctionDecl *Callee) const {
1240+
return Caller && Callee &&
1241+
GetArmStreamingInlinability(Caller, Callee) !=
1242+
ArmStreamingInlinability::Ok;
1243+
}
1244+
12031245
void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
12041246
unsigned Index,
12051247
raw_ostream &Out) const {
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -o - | FileCheck %s
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
extern void was_inlined(void);
6+
7+
#define __flatten __attribute__((flatten))
8+
void fn(void) { was_inlined(); }
9+
void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); }
10+
void fn_streaming(void) __arm_streaming { was_inlined(); }
11+
__arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); }
12+
__arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); }
13+
14+
__flatten
15+
void caller(void) {
16+
fn();
17+
fn_streaming_compatible();
18+
fn_streaming();
19+
fn_locally_streaming();
20+
fn_streaming_new_za();
21+
}
22+
// CHECK-LABEL: void @caller()
23+
// CHECK-NEXT: entry:
24+
// CHECK-NEXT: call void @was_inlined
25+
// CHECK-NEXT: call void @was_inlined
26+
// CHECK-NEXT: call void @fn_streaming
27+
// CHECK-NEXT: call void @fn_locally_streaming
28+
// CHECK-NEXT: call void @fn_streaming_new_za
29+
30+
__flatten void caller_streaming_compatible(void) __arm_streaming_compatible {
31+
fn();
32+
fn_streaming_compatible();
33+
fn_streaming();
34+
fn_locally_streaming();
35+
fn_streaming_new_za();
36+
}
37+
// CHECK-LABEL: void @caller_streaming_compatible()
38+
// CHECK-NEXT: entry:
39+
// CHECK-NEXT: call void @fn
40+
// CHECK-NEXT: call void @was_inlined
41+
// CHECK-NEXT: call void @fn_streaming
42+
// CHECK-NEXT: call void @fn_locally_streaming
43+
// CHECK-NEXT: call void @fn_streaming_new_za
44+
45+
__flatten void caller_streaming(void) __arm_streaming {
46+
fn();
47+
fn_streaming_compatible();
48+
fn_streaming();
49+
fn_locally_streaming();
50+
fn_streaming_new_za();
51+
}
52+
// CHECK-LABEL: void @caller_streaming()
53+
// CHECK-NEXT: entry:
54+
// CHECK-NEXT: call void @fn
55+
// CHECK-NEXT: call void @was_inlined
56+
// CHECK-NEXT: call void @was_inlined
57+
// CHECK-NEXT: call void @was_inlined
58+
// CHECK-NEXT: call void @fn_streaming_new_za
59+
60+
__flatten __arm_locally_streaming
61+
void caller_locally_streaming(void) {
62+
fn();
63+
fn_streaming_compatible();
64+
fn_streaming();
65+
fn_locally_streaming();
66+
fn_streaming_new_za();
67+
}
68+
// CHECK-LABEL: void @caller_locally_streaming()
69+
// CHECK-NEXT: entry:
70+
// CHECK-NEXT: call void @fn
71+
// CHECK-NEXT: call void @was_inlined
72+
// CHECK-NEXT: call void @was_inlined
73+
// CHECK-NEXT: call void @was_inlined
74+
// CHECK-NEXT: call void @fn_streaming_new_za

0 commit comments

Comments
 (0)