Skip to content

Commit 1df7b51

Browse files
[Sema] Suggest missing format attributes (#166738)
Implements the `-Wmissing-format-attribute` diagnostic as a subgroup of `-Wformat-nonliteral`. It suggests adding format attributes to function declarations that call other format functions and pass the format string to them. This is an updated implementation of #105479. --------- Co-authored-by: Budimir Arandjelovic <[email protected]>
1 parent 6b040b4 commit 1df7b51

File tree

14 files changed

+717
-60
lines changed

14 files changed

+717
-60
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,10 @@ Improvements to Clang's diagnostics
460460
- A new warning ``-Wshadow-header`` has been added to detect when a header file
461461
is found in multiple search directories (excluding system paths).
462462

463+
- Clang now detects potential missing format and format_matches attributes on function,
464+
Objective-C method and block declarations when calling format functions. It is part
465+
of the format-nonliteral diagnostic (#GH60718)
466+
463467
Improvements to Clang's time-trace
464468
----------------------------------
465469

clang/include/clang/Basic/DiagnosticGroups.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,6 @@ def MainReturnType : DiagGroup<"main-return-type">;
609609
def MaxUnsignedZero : DiagGroup<"max-unsigned-zero">;
610610
def MissingBraces : DiagGroup<"missing-braces">;
611611
def MissingDeclarations: DiagGroup<"missing-declarations">;
612-
def : DiagGroup<"missing-format-attribute">;
613612
def MissingIncludeDirs : DiagGroup<"missing-include-dirs">;
614613
def MissingNoreturn : DiagGroup<"missing-noreturn">;
615614
def MultiChar : DiagGroup<"multichar">;
@@ -1188,6 +1187,7 @@ def FormatY2K : DiagGroup<"format-y2k">;
11881187
def FormatPedantic : DiagGroup<"format-pedantic">;
11891188
def FormatSignedness : DiagGroup<"format-signedness">;
11901189
def FormatTypeConfusion : DiagGroup<"format-type-confusion">;
1190+
def MissingFormatAttribute : DiagGroup<"missing-format-attribute">;
11911191

11921192
def FormatOverflowNonKprintf: DiagGroup<"format-overflow-non-kprintf">;
11931193
def FormatOverflow: DiagGroup<"format-overflow", [FormatOverflowNonKprintf]>;
@@ -1199,7 +1199,7 @@ def Format : DiagGroup<"format",
11991199
FormatSecurity, FormatY2K, FormatInvalidSpecifier,
12001200
FormatInsufficientArgs, FormatOverflow, FormatTruncation]>,
12011201
DiagCategory<"Format String Issue">;
1202-
def FormatNonLiteral : DiagGroup<"format-nonliteral">;
1202+
def FormatNonLiteral : DiagGroup<"format-nonliteral", [MissingFormatAttribute]>;
12031203
def Format2 : DiagGroup<"format=2",
12041204
[FormatNonLiteral, FormatSecurity, FormatY2K]>;
12051205

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3496,6 +3496,10 @@ def err_format_attribute_result_not : Error<"function does not return %0">;
34963496
def err_format_attribute_implicit_this_format_string : Error<
34973497
"format attribute cannot specify the implicit this argument as the format "
34983498
"string">;
3499+
def warn_missing_format_attribute : Warning<
3500+
"diagnostic behavior may be improved by adding the '%0' attribute to the "
3501+
"declaration of %1">,
3502+
InGroup<MissingFormatAttribute>, DefaultIgnore;
34993503
def err_callback_attribute_no_callee : Error<
35003504
"'callback' attribute specifies no callback callee">;
35013505
def err_callback_attribute_invalid_callee : Error<

clang/include/clang/Sema/Sema.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3019,7 +3019,7 @@ class Sema final : public SemaBase {
30193019
llvm::SmallBitVector &CheckedVarArgs);
30203020
bool CheckFormatArguments(ArrayRef<const Expr *> Args,
30213021
FormatArgumentPassingKind FAPK,
3022-
const StringLiteral *ReferenceFormatString,
3022+
StringLiteral *ReferenceFormatString,
30233023
unsigned format_idx, unsigned firstDataArg,
30243024
FormatStringType Type, VariadicCallType CallType,
30253025
SourceLocation Loc, SourceRange range,

clang/lib/Sema/SemaChecking.cpp

Lines changed: 182 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6609,13 +6609,16 @@ static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
66096609
// If this function returns false on the arguments to a function expecting a
66106610
// format string, we will usually need to emit a warning.
66116611
// True string literals are then checked by CheckFormatString.
6612-
static StringLiteralCheckType checkFormatStringExpr(
6613-
Sema &S, const StringLiteral *ReferenceFormatString, const Expr *E,
6614-
ArrayRef<const Expr *> Args, Sema::FormatArgumentPassingKind APK,
6615-
unsigned format_idx, unsigned firstDataArg, FormatStringType Type,
6616-
VariadicCallType CallType, bool InFunctionCall,
6617-
llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
6618-
llvm::APSInt Offset, bool IgnoreStringsWithoutSpecifiers = false) {
6612+
static StringLiteralCheckType
6613+
checkFormatStringExpr(Sema &S, const StringLiteral *ReferenceFormatString,
6614+
const Expr *E, ArrayRef<const Expr *> Args,
6615+
Sema::FormatArgumentPassingKind APK, unsigned format_idx,
6616+
unsigned firstDataArg, FormatStringType Type,
6617+
VariadicCallType CallType, bool InFunctionCall,
6618+
llvm::SmallBitVector &CheckedVarArgs,
6619+
UncoveredArgHandler &UncoveredArg, llvm::APSInt Offset,
6620+
std::optional<unsigned> *CallerFormatParamIdx = nullptr,
6621+
bool IgnoreStringsWithoutSpecifiers = false) {
66196622
if (S.isConstantEvaluatedContext())
66206623
return SLCT_NotALiteral;
66216624
tryAgain:
@@ -6637,10 +6640,11 @@ static StringLiteralCheckType checkFormatStringExpr(
66376640
case Stmt::InitListExprClass:
66386641
// Handle expressions like {"foobar"}.
66396642
if (const clang::Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) {
6640-
return checkFormatStringExpr(
6641-
S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
6642-
Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
6643-
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
6643+
return checkFormatStringExpr(S, ReferenceFormatString, SLE, Args, APK,
6644+
format_idx, firstDataArg, Type, CallType,
6645+
/*InFunctionCall*/ false, CheckedVarArgs,
6646+
UncoveredArg, Offset, CallerFormatParamIdx,
6647+
IgnoreStringsWithoutSpecifiers);
66446648
}
66456649
return SLCT_NotALiteral;
66466650
case Stmt::BinaryConditionalOperatorClass:
@@ -6672,10 +6676,11 @@ static StringLiteralCheckType checkFormatStringExpr(
66726676
if (!CheckLeft)
66736677
Left = SLCT_UncheckedLiteral;
66746678
else {
6675-
Left = checkFormatStringExpr(
6676-
S, ReferenceFormatString, C->getTrueExpr(), Args, APK, format_idx,
6677-
firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
6678-
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
6679+
Left = checkFormatStringExpr(S, ReferenceFormatString, C->getTrueExpr(),
6680+
Args, APK, format_idx, firstDataArg, Type,
6681+
CallType, InFunctionCall, CheckedVarArgs,
6682+
UncoveredArg, Offset, CallerFormatParamIdx,
6683+
IgnoreStringsWithoutSpecifiers);
66796684
if (Left == SLCT_NotALiteral || !CheckRight) {
66806685
return Left;
66816686
}
@@ -6684,7 +6689,8 @@ static StringLiteralCheckType checkFormatStringExpr(
66846689
StringLiteralCheckType Right = checkFormatStringExpr(
66856690
S, ReferenceFormatString, C->getFalseExpr(), Args, APK, format_idx,
66866691
firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
6687-
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
6692+
UncoveredArg, Offset, CallerFormatParamIdx,
6693+
IgnoreStringsWithoutSpecifiers);
66886694

66896695
return (CheckLeft && Left < Right) ? Left : Right;
66906696
}
@@ -6735,8 +6741,8 @@ static StringLiteralCheckType checkFormatStringExpr(
67356741
}
67366742
return checkFormatStringExpr(
67376743
S, ReferenceFormatString, Init, Args, APK, format_idx,
6738-
firstDataArg, Type, CallType,
6739-
/*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset);
6744+
firstDataArg, Type, CallType, /*InFunctionCall=*/false,
6745+
CheckedVarArgs, UncoveredArg, Offset, CallerFormatParamIdx);
67406746
}
67416747
}
67426748

@@ -6788,6 +6794,8 @@ static StringLiteralCheckType checkFormatStringExpr(
67886794
// format arguments, in all cases.
67896795
//
67906796
if (const auto *PV = dyn_cast<ParmVarDecl>(VD)) {
6797+
if (CallerFormatParamIdx)
6798+
*CallerFormatParamIdx = PV->getFunctionScopeIndex();
67916799
if (const auto *D = dyn_cast<Decl>(PV->getDeclContext())) {
67926800
for (const auto *PVFormatMatches :
67936801
D->specific_attrs<FormatMatchesAttr>()) {
@@ -6813,7 +6821,7 @@ static StringLiteralCheckType checkFormatStringExpr(
68136821
S, ReferenceFormatString, PVFormatMatches->getFormatString(),
68146822
Args, APK, format_idx, firstDataArg, Type, CallType,
68156823
/*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg,
6816-
Offset, IgnoreStringsWithoutSpecifiers);
6824+
Offset, CallerFormatParamIdx, IgnoreStringsWithoutSpecifiers);
68176825
}
68186826
}
68196827

@@ -6868,7 +6876,7 @@ static StringLiteralCheckType checkFormatStringExpr(
68686876
StringLiteralCheckType Result = checkFormatStringExpr(
68696877
S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
68706878
Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
6871-
Offset, IgnoreStringsWithoutSpecifiers);
6879+
Offset, CallerFormatParamIdx, IgnoreStringsWithoutSpecifiers);
68726880
if (IsFirst) {
68736881
CommonResult = Result;
68746882
IsFirst = false;
@@ -6885,15 +6893,17 @@ static StringLiteralCheckType checkFormatStringExpr(
68856893
return checkFormatStringExpr(
68866894
S, ReferenceFormatString, Arg, Args, APK, format_idx,
68876895
firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
6888-
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
6896+
UncoveredArg, Offset, CallerFormatParamIdx,
6897+
IgnoreStringsWithoutSpecifiers);
68896898
}
68906899
}
68916900
}
68926901
if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E))
6893-
return checkFormatStringExpr(
6894-
S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
6895-
Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
6896-
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
6902+
return checkFormatStringExpr(S, ReferenceFormatString, SLE, Args, APK,
6903+
format_idx, firstDataArg, Type, CallType,
6904+
/*InFunctionCall*/ false, CheckedVarArgs,
6905+
UncoveredArg, Offset, CallerFormatParamIdx,
6906+
IgnoreStringsWithoutSpecifiers);
68976907
return SLCT_NotALiteral;
68986908
}
68996909
case Stmt::ObjCMessageExprClass: {
@@ -6919,7 +6929,7 @@ static StringLiteralCheckType checkFormatStringExpr(
69196929
return checkFormatStringExpr(
69206930
S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
69216931
Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
6922-
Offset, IgnoreStringsWithoutSpecifiers);
6932+
Offset, CallerFormatParamIdx, IgnoreStringsWithoutSpecifiers);
69236933
}
69246934
}
69256935

@@ -7099,9 +7109,142 @@ bool Sema::CheckFormatString(const FormatMatchesAttr *Format,
70997109
return false;
71007110
}
71017111

7112+
static bool CheckMissingFormatAttribute(
7113+
Sema *S, ArrayRef<const Expr *> Args, Sema::FormatArgumentPassingKind APK,
7114+
StringLiteral *ReferenceFormatString, unsigned FormatIdx,
7115+
unsigned FirstDataArg, FormatStringType FormatType, unsigned CallerParamIdx,
7116+
SourceLocation Loc) {
7117+
if (S->getDiagnostics().isIgnored(diag::warn_missing_format_attribute, Loc))
7118+
return false;
7119+
7120+
DeclContext *DC = S->CurContext;
7121+
if (!isa<ObjCMethodDecl>(DC) && !isa<FunctionDecl>(DC) && !isa<BlockDecl>(DC))
7122+
return false;
7123+
Decl *Caller = cast<Decl>(DC)->getCanonicalDecl();
7124+
7125+
unsigned NumCallerParams = getFunctionOrMethodNumParams(Caller);
7126+
7127+
// Find the offset to convert between attribute and parameter indexes.
7128+
unsigned CallerArgumentIndexOffset =
7129+
hasImplicitObjectParameter(Caller) ? 2 : 1;
7130+
7131+
unsigned FirstArgumentIndex = -1;
7132+
switch (APK) {
7133+
case Sema::FormatArgumentPassingKind::FAPK_Fixed:
7134+
case Sema::FormatArgumentPassingKind::FAPK_Variadic: {
7135+
// As an extension, clang allows the format attribute on non-variadic
7136+
// functions.
7137+
// Caller must have fixed arguments to pass them to a fixed or variadic
7138+
// function. Try to match caller and callee arguments. If successful, then
7139+
// emit a diag with the caller idx, otherwise we can't determine the callee
7140+
// arguments.
7141+
unsigned NumCalleeArgs = Args.size() - FirstDataArg;
7142+
if (NumCalleeArgs == 0 || NumCallerParams < NumCalleeArgs) {
7143+
// There aren't enough arguments in the caller to pass to callee.
7144+
return false;
7145+
}
7146+
for (unsigned CalleeIdx = Args.size() - 1, CallerIdx = NumCallerParams - 1;
7147+
CalleeIdx >= FirstDataArg; --CalleeIdx, --CallerIdx) {
7148+
const auto *Arg =
7149+
dyn_cast<DeclRefExpr>(Args[CalleeIdx]->IgnoreParenCasts());
7150+
if (!Arg)
7151+
return false;
7152+
const auto *Param = dyn_cast<ParmVarDecl>(Arg->getDecl());
7153+
if (!Param || Param->getFunctionScopeIndex() != CallerIdx)
7154+
return false;
7155+
}
7156+
FirstArgumentIndex =
7157+
NumCallerParams + CallerArgumentIndexOffset - NumCalleeArgs;
7158+
break;
7159+
}
7160+
case Sema::FormatArgumentPassingKind::FAPK_VAList:
7161+
// Caller arguments are either variadic or a va_list.
7162+
FirstArgumentIndex = isFunctionOrMethodVariadic(Caller)
7163+
? (NumCallerParams + CallerArgumentIndexOffset)
7164+
: 0;
7165+
break;
7166+
case Sema::FormatArgumentPassingKind::FAPK_Elsewhere:
7167+
// The callee has a format_matches attribute. We will emit that instead.
7168+
if (!ReferenceFormatString)
7169+
return false;
7170+
break;
7171+
}
7172+
7173+
// Emit the diagnostic and fixit.
7174+
unsigned FormatStringIndex = CallerParamIdx + CallerArgumentIndexOffset;
7175+
StringRef FormatTypeName = S->GetFormatStringTypeName(FormatType);
7176+
NamedDecl *ND = dyn_cast<NamedDecl>(Caller);
7177+
do {
7178+
std::string Attr, Fixit;
7179+
llvm::raw_string_ostream AttrOS(Attr);
7180+
if (APK != Sema::FormatArgumentPassingKind::FAPK_Elsewhere) {
7181+
AttrOS << "format(" << FormatTypeName << ", " << FormatStringIndex << ", "
7182+
<< FirstArgumentIndex << ")";
7183+
} else {
7184+
AttrOS << "format_matches(" << FormatTypeName << ", " << FormatStringIndex
7185+
<< ", \"";
7186+
AttrOS.write_escaped(ReferenceFormatString->getString());
7187+
AttrOS << "\")";
7188+
}
7189+
AttrOS.flush();
7190+
auto DB = S->Diag(Loc, diag::warn_missing_format_attribute) << Attr;
7191+
if (ND)
7192+
DB << ND;
7193+
else
7194+
DB << "block";
7195+
7196+
// Blocks don't provide a correct end loc, so skip emitting a fixit.
7197+
if (isa<BlockDecl>(Caller))
7198+
break;
7199+
7200+
SourceLocation SL;
7201+
llvm::raw_string_ostream IS(Fixit);
7202+
// The attribute goes at the start of the declaration in C/C++ functions
7203+
// and methods, but after the declaration for Objective-C methods.
7204+
if (isa<ObjCMethodDecl>(Caller)) {
7205+
IS << ' ';
7206+
SL = Caller->getEndLoc();
7207+
}
7208+
const LangOptions &LO = S->getLangOpts();
7209+
if (LO.C23 || LO.CPlusPlus11)
7210+
IS << "[[gnu::" << Attr << "]]";
7211+
else if (LO.ObjC || LO.GNUMode)
7212+
IS << "__attribute__((" << Attr << "))";
7213+
else
7214+
break;
7215+
if (!isa<ObjCMethodDecl>(Caller)) {
7216+
IS << ' ';
7217+
SL = Caller->getBeginLoc();
7218+
}
7219+
IS.flush();
7220+
7221+
DB << FixItHint::CreateInsertion(SL, Fixit);
7222+
} while (false);
7223+
7224+
// Add implicit format or format_matches attribute.
7225+
if (APK != Sema::FormatArgumentPassingKind::FAPK_Elsewhere) {
7226+
Caller->addAttr(FormatAttr::CreateImplicit(
7227+
S->getASTContext(), &S->getASTContext().Idents.get(FormatTypeName),
7228+
FormatStringIndex, FirstArgumentIndex));
7229+
} else {
7230+
Caller->addAttr(FormatMatchesAttr::CreateImplicit(
7231+
S->getASTContext(), &S->getASTContext().Idents.get(FormatTypeName),
7232+
FormatStringIndex, ReferenceFormatString));
7233+
}
7234+
7235+
{
7236+
auto DB = S->Diag(Caller->getLocation(), diag::note_entity_declared_at);
7237+
if (ND)
7238+
DB << ND;
7239+
else
7240+
DB << "block";
7241+
}
7242+
return true;
7243+
}
7244+
71027245
bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
71037246
Sema::FormatArgumentPassingKind APK,
7104-
const StringLiteral *ReferenceFormatString,
7247+
StringLiteral *ReferenceFormatString,
71057248
unsigned format_idx, unsigned firstDataArg,
71067249
FormatStringType Type,
71077250
VariadicCallType CallType, SourceLocation Loc,
@@ -7128,11 +7271,12 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
71287271
// ObjC string uses the same format specifiers as C string, so we can use
71297272
// the same format string checking logic for both ObjC and C strings.
71307273
UncoveredArgHandler UncoveredArg;
7274+
std::optional<unsigned> CallerParamIdx;
71317275
StringLiteralCheckType CT = checkFormatStringExpr(
71327276
*this, ReferenceFormatString, OrigFormatExpr, Args, APK, format_idx,
71337277
firstDataArg, Type, CallType,
71347278
/*IsFunctionCall*/ true, CheckedVarArgs, UncoveredArg,
7135-
/*no string offset*/ llvm::APSInt(64, false) = 0);
7279+
/*no string offset*/ llvm::APSInt(64, false) = 0, &CallerParamIdx);
71367280

71377281
// Generate a diagnostic where an uncovered argument is detected.
71387282
if (UncoveredArg.hasUncoveredArg()) {
@@ -7145,11 +7289,6 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
71457289
// Literal format string found, check done!
71467290
return CT == SLCT_CheckedLiteral;
71477291

7148-
// Strftime is particular as it always uses a single 'time' argument,
7149-
// so it is safe to pass a non-literal string.
7150-
if (Type == FormatStringType::Strftime)
7151-
return false;
7152-
71537292
// Do not emit diag when the string param is a macro expansion and the
71547293
// format is either NSString or CFString. This is a hack to prevent
71557294
// diag when using the NSLocalizedString and CFCopyLocalizedString macros
@@ -7159,6 +7298,16 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
71597298
SourceMgr.isInSystemMacro(FormatLoc))
71607299
return false;
71617300

7301+
if (CallerParamIdx && CheckMissingFormatAttribute(
7302+
this, Args, APK, ReferenceFormatString, format_idx,
7303+
firstDataArg, Type, *CallerParamIdx, Loc))
7304+
return false;
7305+
7306+
// Strftime is particular as it always uses a single 'time' argument,
7307+
// so it is safe to pass a non-literal string.
7308+
if (Type == FormatStringType::Strftime)
7309+
return false;
7310+
71627311
// If there are no arguments specified, warn with -Wformat-security, otherwise
71637312
// warn only with -Wformat-nonliteral.
71647313
if (Args.size() == firstDataArg) {

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3634,7 +3634,7 @@ static void handleEnumExtensibilityAttr(Sema &S, Decl *D,
36343634
}
36353635

36363636
/// Handle __attribute__((format_arg((idx)))) attribute based on
3637-
/// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
3637+
/// https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html
36383638
static void handleFormatArgAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
36393639
const Expr *IdxExpr = AL.getArgAsExpr(0);
36403640
ParamIdx Idx;
@@ -3833,7 +3833,7 @@ struct FormatAttrCommon {
38333833
};
38343834

38353835
/// Handle __attribute__((format(type,idx,firstarg))) attributes based on
3836-
/// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
3836+
/// https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html
38373837
static bool handleFormatAttrCommon(Sema &S, Decl *D, const ParsedAttr &AL,
38383838
FormatAttrCommon *Info) {
38393839
// Checks the first two arguments of the attribute; this is shared between

0 commit comments

Comments
 (0)