Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,8 @@ Improvements to Clang's diagnostics
or continue (#GH166013)
- Clang now emits a diagnostic in case `vector_size` or `ext_vector_type`
attributes are used with a negative size (#GH165463).
- Clang now detects potential missing format attributes on function declarations
when calling format functions. (#GH60718)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#60718 seems to be closed already?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That issue was closed by #70024, but it was later reverted. Then #105479 was created to fix the issues, but never merged.

This is an updated version of #105479 with most comments addressed.


Improvements to Clang's time-trace
----------------------------------
Expand Down
1 change: 0 additions & 1 deletion clang/include/clang/Basic/DiagnosticGroups.td
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,6 @@ def MainReturnType : DiagGroup<"main-return-type">;
def MaxUnsignedZero : DiagGroup<"max-unsigned-zero">;
def MissingBraces : DiagGroup<"missing-braces">;
def MissingDeclarations: DiagGroup<"missing-declarations">;
def : DiagGroup<"missing-format-attribute">;
def MissingIncludeDirs : DiagGroup<"missing-include-dirs">;
def MissingNoreturn : DiagGroup<"missing-noreturn">;
def MultiChar : DiagGroup<"multichar">;
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -3478,6 +3478,11 @@ def err_format_attribute_result_not : Error<"function does not return %0">;
def err_format_attribute_implicit_this_format_string : Error<
"format attribute cannot specify the implicit this argument as the format "
"string">;
def warn_missing_format_attribute
: Warning<"diagnostic behavior may be improved by adding the '%0' format "
"attribute to the declaration of %1">,
InGroup<DiagGroup<"missing-format-attribute">>,
DefaultIgnore;
def err_callback_attribute_no_callee : Error<
"'callback' attribute specifies no callback callee">;
def err_callback_attribute_invalid_callee : Error<
Expand Down
125 changes: 103 additions & 22 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6517,7 +6517,8 @@ static StringLiteralCheckType checkFormatStringExpr(
unsigned format_idx, unsigned firstDataArg, FormatStringType Type,
VariadicCallType CallType, bool InFunctionCall,
llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
llvm::APSInt Offset, bool IgnoreStringsWithoutSpecifiers = false) {
llvm::APSInt Offset, std::optional<unsigned> *CallerParamIdx = nullptr,
bool IgnoreStringsWithoutSpecifiers = false) {
if (S.isConstantEvaluatedContext())
return SLCT_NotALiteral;
tryAgain:
Expand All @@ -6542,7 +6543,7 @@ static StringLiteralCheckType checkFormatStringExpr(
return checkFormatStringExpr(
S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
UncoveredArg, Offset, CallerParamIdx, IgnoreStringsWithoutSpecifiers);
}
return SLCT_NotALiteral;
case Stmt::BinaryConditionalOperatorClass:
Expand Down Expand Up @@ -6577,7 +6578,7 @@ static StringLiteralCheckType checkFormatStringExpr(
Left = checkFormatStringExpr(
S, ReferenceFormatString, C->getTrueExpr(), Args, APK, format_idx,
firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
UncoveredArg, Offset, CallerParamIdx, IgnoreStringsWithoutSpecifiers);
if (Left == SLCT_NotALiteral || !CheckRight) {
return Left;
}
Expand All @@ -6586,7 +6587,7 @@ static StringLiteralCheckType checkFormatStringExpr(
StringLiteralCheckType Right = checkFormatStringExpr(
S, ReferenceFormatString, C->getFalseExpr(), Args, APK, format_idx,
firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
UncoveredArg, Offset, CallerParamIdx, IgnoreStringsWithoutSpecifiers);

return (CheckLeft && Left < Right) ? Left : Right;
}
Expand Down Expand Up @@ -6635,10 +6636,11 @@ static StringLiteralCheckType checkFormatStringExpr(
if (InitList->isStringLiteralInit())
Init = InitList->getInit(0)->IgnoreParenImpCasts();
}
return checkFormatStringExpr(
S, ReferenceFormatString, Init, Args, APK, format_idx,
firstDataArg, Type, CallType,
/*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset);
return checkFormatStringExpr(S, ReferenceFormatString, Init, Args,
APK, format_idx, firstDataArg, Type,
CallType,
/*InFunctionCall*/ false, CheckedVarArgs,
UncoveredArg, Offset, CallerParamIdx);
}
}

Expand Down Expand Up @@ -6690,6 +6692,8 @@ static StringLiteralCheckType checkFormatStringExpr(
// format arguments, in all cases.
//
if (const auto *PV = dyn_cast<ParmVarDecl>(VD)) {
if (CallerParamIdx)
*CallerParamIdx = PV->getFunctionScopeIndex();
if (const auto *D = dyn_cast<Decl>(PV->getDeclContext())) {
for (const auto *PVFormatMatches :
D->specific_attrs<FormatMatchesAttr>()) {
Expand All @@ -6715,7 +6719,7 @@ static StringLiteralCheckType checkFormatStringExpr(
S, ReferenceFormatString, PVFormatMatches->getFormatString(),
Args, APK, format_idx, firstDataArg, Type, CallType,
/*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg,
Offset, IgnoreStringsWithoutSpecifiers);
Offset, CallerParamIdx, IgnoreStringsWithoutSpecifiers);
}
}

Expand Down Expand Up @@ -6770,7 +6774,7 @@ static StringLiteralCheckType checkFormatStringExpr(
StringLiteralCheckType Result = checkFormatStringExpr(
S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
Offset, IgnoreStringsWithoutSpecifiers);
Offset, CallerParamIdx, IgnoreStringsWithoutSpecifiers);
if (IsFirst) {
CommonResult = Result;
IsFirst = false;
Expand All @@ -6784,18 +6788,19 @@ static StringLiteralCheckType checkFormatStringExpr(
if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) {
const Expr *Arg = CE->getArg(0);
return checkFormatStringExpr(
S, ReferenceFormatString, Arg, Args, APK, format_idx,
firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
return checkFormatStringExpr(S, ReferenceFormatString, Arg, Args, APK,
format_idx, firstDataArg, Type, CallType,
InFunctionCall, CheckedVarArgs,
UncoveredArg, Offset, CallerParamIdx,
IgnoreStringsWithoutSpecifiers);
}
}
}
if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E))
return checkFormatStringExpr(
S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
UncoveredArg, Offset, CallerParamIdx, IgnoreStringsWithoutSpecifiers);
return SLCT_NotALiteral;
}
case Stmt::ObjCMessageExprClass: {
Expand All @@ -6821,7 +6826,7 @@ static StringLiteralCheckType checkFormatStringExpr(
return checkFormatStringExpr(
S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
Offset, IgnoreStringsWithoutSpecifiers);
Offset, CallerParamIdx, IgnoreStringsWithoutSpecifiers);
}
}

Expand Down Expand Up @@ -7001,6 +7006,77 @@ bool Sema::CheckFormatString(const FormatMatchesAttr *Format,
return false;
}

static void CheckMissingFormatAttributes(Sema *S, FormatStringType FormatType,
unsigned FormatIdx, unsigned FirstArg,
ArrayRef<const Expr *> Args,
Sema::FormatArgumentPassingKind APK,
unsigned CallerParamIdx,
SourceLocation Loc) {
const FunctionDecl *Caller = S->getCurFunctionDecl();
if (!Caller)
return;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will always fail in Objective-C because ObjCMethodDecl does not inherit from FunctionDecl. Would you add an Objective-C test to ensure this works? If you're not super familiar with it, the declaration/use syntax should be:

#include <stdarg.h>

@interface Foo
-(void)myprintf:(const char *)fmt, ... __attribute__((format(printf, 1, 2)));
-(void)myvprintf:(const char *)fmt list:(va_list)ap __attribute__((format(printf, 1, 0)));
@end

void bar(Foo *f) {
    [f myprintf:"hello %i", 123];
}

void baz(Foo *f, const char *fmt, va_list ap) {
    [f myvprintf:fmt list:ap];
}

Happy to answer other questions you may have.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this also always fails for blocks, but it matters less because __attribute__((format)) is sugar so adding the attribute on the block type doesn't guarantee that the block you receive also has it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not familiar with objective-c and there doesn't seem to be much documentation regarding attributes, but I think I have a working implementation. Just one question: is there an implicit 'this' argument on obj-c methods?

I'll have to look into blocks to see how they fit in here.

Copy link
Contributor

@apple-fcloutier apple-fcloutier Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Objective-C methods have two implicit arguments (self and _cmd), but IIRC index 1 corresponds to the first explicit argument (ie, not like how it's done for C++).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added support for Objective-C. Let me know if there are other cases I should test. Also, I wasn't sure which attribute syntax is supported, so I used the GNU extension.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems to cover it, thank you!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested where Objective-C tolerates attributes, and the fixit is incorrect currently:

#include <stdarg.h>

__attribute__((format(printf, 1, 0)))
int vprintf(const char *, va_list ap);

@interface Foo
-(void)printf:(const char *)fmt, ...;
@end

@implementation Foo
-(void)printf:(const char *)fmt, ...  {
	va_list ap;
	va_start(ap, fmt);
	vprintf(fmt, ap);
	va_end(ap);
}
@end

The fixit will suggest __attribute__((format(printf, 1, 2))) -(void)printf:(const char *)fmt, ... {, which is a syntax error. Attributes go at the end of the declaration, so it should be -(void)printf:(const char *)fmt, ... __attribute__((...)).

To make it as effective as possible, the fixit should also go to the canonical declaration, as returned by getCanonicalDecl(), which seems to do the right thing for both FunctionDecl (returns the first decl) and ObjCMethodDecl (finds the decl in the relevant @interface block).

Here's a patch for your change which I think does the trick (but that I didn't test extensively): objc-placement.patch


// Find the offset to convert between attribute and parameter indexes.
unsigned CallerArgumentIndexOffset =
hasImplicitObjectParameter(Caller) ? 2 : 1;

unsigned FirstArgumentIndex = -1;
switch (APK) {
case Sema::FormatArgumentPassingKind::FAPK_Fixed:
case Sema::FormatArgumentPassingKind::FAPK_Variadic: {
// As an extension, clang allows the format attribute on non-variadic
// functions.
// Caller must have fixed arguments to pass them to a fixed or variadic
// function. Try to match caller and callee arguments. If successful, then
// emit a diag with the caller idx, otherwise we can't determine the callee
// arguments.
unsigned NumCalleeArgs = Args.size() - FirstArg;
if (NumCalleeArgs == 0 || Caller->getNumParams() < NumCalleeArgs) {
// There aren't enough arugments in the caller to pass to callee.
return;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's possible in this case that one could use __attribute__((format_matches)) instead, which I believe I landed between your last PR and this one. I haven't thought very deeply about whether you have all the information you need here to do it, though, so there's a decent likelihood that you would look at it and decide it's not actually possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd have to generate a format string for format_matches which I guess should be doable based on the argument types in the call, unless some non-scalar type is used.

}
for (unsigned CalleeIdx = Args.size() - 1,
CallerIdx = Caller->getNumParams() - 1;
CalleeIdx >= FirstArg; --CalleeIdx, --CallerIdx) {
const auto *Arg =
dyn_cast<DeclRefExpr>(Args[CalleeIdx]->IgnoreParenCasts());
if (!Arg)
return;
const auto *Param = dyn_cast<ParmVarDecl>(Arg->getDecl());
if (!Param || Param->getFunctionScopeIndex() != CallerIdx)
return;
}
FirstArgumentIndex =
Caller->getNumParams() + CallerArgumentIndexOffset - NumCalleeArgs;
break;
}
case Sema::FormatArgumentPassingKind::FAPK_VAList:
// Caller arguments are either variadic or a va_list.
FirstArgumentIndex =
Caller->isVariadic()
? (Caller->getNumParams() + CallerArgumentIndexOffset)
: 0;
break;
case Sema::FormatArgumentPassingKind::FAPK_Elsewhere:
// Args are not passed to the callee.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should be able to suggest a format_matches attribute here, though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking further into this, I can suggest format_matches in a diagnostic message, but there are some problematic cases for creating the actual format string parameter to format_matches from the call arguments. From the attribute docs:

While checks for the format attribute tolerate sone size mismatches that standard argument promotion renders immaterial (such as formatting an int with %hhd, which specifies a char-sized integer), checks for format_matches require specified argument sizes to match exactly.

In this case the actual format string might be "%d", but pass a char argument. That is correct because of arg promotion, but based on the arg type, I would deduce this as "%c".

Format strings expecting a variable modifier (such as %*s) are incompatible with format strings that would itemize the variable modifiers (such as %i %s), even if the two specify ABI-compatible argument lists.

This is a bigger problem as it can't be deduced only based on the arguments.

All pointer specifiers, modifiers aside, are mutually incompatible. For instance, %s is not compatible with %p, and %p is not compatible with %n, and %hhn is incompatible with %s, even if the pointers are ABI-compatible or identical on the selected platform.

Similarly, some pointer type specifiers can't be determined based on the argument type. For example const char * could be printed as '%s' or '%p'.

Should I just emit a diagnostic without specific attribute parameters in this case?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two similar issues which I think we may be mixing up. There's the issue of suggesting a format_matches attribute out of thin air, which I attribute to this comment, and this convinces me that we can't have a fixit.

The issue I wanted to point to here is FAPK_Elsewhere indicates the callee has the format_matches attribute, so in this case there is a format attribute that we can just grab out of it instead of having to make one up.

return;
}

// Emit the diagnostic and fixit.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a recovery action, should we also add an implicit format attribute to Caller? It might catch issues in the rest of the file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That changes some behavior:

void foo(const char *fmt, va_list args) {
  vprintf(fmt, args);
  vscanf(fmt, args);
}

Previously, this would emit two diagnostics to suggest a printf and scanf format attribute. If we add an implicit attr, then when looking at vscanf we would get passing 'printf' format string where 'scanf' format string is expected. Also, this would eliminate duplicate diagnostics created by multiple format function calls of the same type.

I'll see if there are any other differences before updating the PR, but these changes make sense to me.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes also make sense to me.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, one thing to mention is that we should only add the attribute if the diagnostic is enabled at the call site, otherwise people will start seeing inexplicable format warnings that cannot be silenced. This would probably be a net positive, but the respectful way to do this would be to enable the diagnostic by default.

unsigned FormatStringIndex = CallerParamIdx + CallerArgumentIndexOffset;
StringRef FormatTypeName = S->GetFormatStringTypeName(FormatType);
S->Diag(Loc, diag::warn_missing_format_attribute)
<< FormatTypeName << Caller
<< FixItHint::CreateInsertion(Caller->getFirstDecl()->getLocation(),
(llvm::Twine("__attribute__((format(") +
FormatTypeName + ", " +
llvm::Twine(FormatStringIndex) + ", " +
llvm::Twine(FirstArgumentIndex) + ")))")
.str());
S->Diag(Caller->getFirstDecl()->getLocation(), diag::note_callee_decl)
<< Caller;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When it's not implicit, you can also show the source location of the format attribute as it's often a macro and it might inspire users to use it instead of raw attribute syntax.

}

bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
Sema::FormatArgumentPassingKind APK,
const StringLiteral *ReferenceFormatString,
Expand Down Expand Up @@ -7030,11 +7106,12 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
// ObjC string uses the same format specifiers as C string, so we can use
// the same format string checking logic for both ObjC and C strings.
UncoveredArgHandler UncoveredArg;
std::optional<unsigned> CallerParamIdx;
StringLiteralCheckType CT = checkFormatStringExpr(
*this, ReferenceFormatString, OrigFormatExpr, Args, APK, format_idx,
firstDataArg, Type, CallType,
/*IsFunctionCall*/ true, CheckedVarArgs, UncoveredArg,
/*no string offset*/ llvm::APSInt(64, false) = 0);
/*no string offset*/ llvm::APSInt(64, false) = 0, &CallerParamIdx);

// Generate a diagnostic where an uncovered argument is detected.
if (UncoveredArg.hasUncoveredArg()) {
Expand All @@ -7047,11 +7124,6 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
// Literal format string found, check done!
return CT == SLCT_CheckedLiteral;

// Strftime is particular as it always uses a single 'time' argument,
// so it is safe to pass a non-literal string.
if (Type == FormatStringType::Strftime)
return false;

// Do not emit diag when the string param is a macro expansion and the
// format is either NSString or CFString. This is a hack to prevent
// diag when using the NSLocalizedString and CFCopyLocalizedString macros
Expand All @@ -7061,6 +7133,15 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
SourceMgr.isInSystemMacro(FormatLoc))
return false;

if (CallerParamIdx)
CheckMissingFormatAttributes(this, Type, format_idx, firstDataArg, Args,
APK, *CallerParamIdx, Loc);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whenever -Wmissing-format-attribute hits, you will also get -Wformat-nonliteral from line 7173, which I think we need to think about.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an exception for strftime calls which don't emit -Wformat-nonliteral because of the early return on line 7153.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, what about the other format families? I haven't built your change to play with it, but my suspicion is that if you enable -Wformat-nonliteral and -Wmissing-format-attribute, then you usually will trip both:

#include <stdarg.h>

__attribute__((format(printf, 1, 0))) void foo(const char *, va_list);

void bar(const char *fmt, ...) {
	va_list ap;
	va_start(ap, fmt);
	foo(fmt, ap); // <---- 
	va_end(ap);
}

If that's not the case (or actually, even if that's the case), I think you should have a test that shows we emit only one or the other.


// Strftime is particular as it always uses a single 'time' argument,
// so it is safe to pass a non-literal string.
if (Type == FormatStringType::Strftime)
return false;

// If there are no arguments specified, warn with -Wformat-security, otherwise
// warn only with -Wformat-nonliteral.
if (Args.size() == firstDataArg) {
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3572,7 +3572,7 @@ static void handleEnumExtensibilityAttr(Sema &S, Decl *D,
}

/// Handle __attribute__((format_arg((idx)))) attribute based on
/// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
/// https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html
static void handleFormatArgAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
const Expr *IdxExpr = AL.getArgAsExpr(0);
ParamIdx Idx;
Expand Down Expand Up @@ -3771,7 +3771,7 @@ struct FormatAttrCommon {
};

/// Handle __attribute__((format(type,idx,firstarg))) attributes based on
/// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
/// https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html
static bool handleFormatAttrCommon(Sema &S, Decl *D, const ParsedAttr &AL,
FormatAttrCommon *Info) {
// Checks the first two arguments of the attribute; this is shared between
Expand Down
Loading