Skip to content

Commit 7e15ec2

Browse files
[clang] Constant-evaluate format strings as last resort
Clang's -Wformat checker can see through an inconsistent set of operations. We can fall back to the recently-updated constant string evaluation infrastructure when Clang's initial evaluation fails for a second chance at figuring out what the format string is intended to be. This enables analyzing format strings that were built at compile-time with std::string and other constexpr-capable types in C++, as long as all pieces are also constexpr-visible, and a number of other patterns. Radar-ID: rdar://99940060
1 parent 27bc8a1 commit 7e15ec2

File tree

7 files changed

+227
-59
lines changed

7 files changed

+227
-59
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,9 @@ related warnings within the method body.
265265
``format_matches`` accepts an example valid format string as its third
266266
argument. For more information, see the Clang attributes documentation.
267267

268+
- Format string checking now supports the compile-time evaluation of format
269+
strings as a fallback mechanism.
270+
268271
- Introduced a new statement attribute ``[[clang::atomic]]`` that enables
269272
fine-grained control over atomic code generation on a per-statement basis.
270273
Supported options include ``[no_]remote_memory``,

clang/include/clang/AST/Expr.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,14 @@ class Expr : public ValueStmt {
791791
const Expr *PtrExpression, ASTContext &Ctx,
792792
EvalResult &Status) const;
793793

794-
/// If the current Expr can be evaluated to a pointer to a null-terminated
794+
/// Fill \c Into with the first characters that can be constant-evaluated
795+
/// from this \c Expr . When encountering a null character, stop and return
796+
/// \c true (the null is not returned in \c Into ). Return \c false if
797+
/// evaluation runs off the end of the constant-evaluated string before it
798+
/// encounters a null character.
799+
bool tryEvaluateString(ASTContext &Ctx, std::string &Into) const;
800+
801+
/// If the current \c Expr can be evaluated to a pointer to a null-terminated
795802
/// constant string, return the constant string (without the terminating
796803
/// null).
797804
std::optional<std::string> tryEvaluateString(ASTContext &Ctx) const;

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10170,6 +10170,8 @@ def warn_format_bool_as_character : Warning<
1017010170
"using '%0' format specifier, but argument has boolean value">,
1017110171
InGroup<Format>;
1017210172
def note_format_string_defined : Note<"format string is defined here">;
10173+
def note_format_string_evaluated_to : Note<
10174+
"format string was constant-evaluated">;
1017310175
def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
1017410176
def note_printf_c_str: Note<"did you mean to call the %0 method?">;
1017510177
def note_format_security_fixit: Note<

clang/lib/AST/ExprConstant.cpp

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17945,15 +17945,36 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
1794517945

1794617946
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
1794717947
EvalInfo &Info, std::string *StringResult) {
17948-
if (!E->getType()->hasPointerRepresentation() || !E->isPRValue())
17948+
QualType Ty = E->getType();
17949+
if (!E->isPRValue())
1794917950
return false;
1795017951

1795117952
LValue String;
17952-
17953-
if (!EvaluatePointer(E, String, Info))
17953+
QualType CharTy;
17954+
if (Ty->canDecayToPointerType()) {
17955+
if (E->isGLValue()) {
17956+
if (!EvaluateLValue(E, String, Info))
17957+
return false;
17958+
} else {
17959+
APValue &Value = Info.CurrentCall->createTemporary(
17960+
E, Ty, ScopeKind::FullExpression, String);
17961+
if (!EvaluateInPlace(Value, Info, String, E))
17962+
return false;
17963+
}
17964+
// The result is a pointer to the first element of the array.
17965+
auto *AT = Info.Ctx.getAsArrayType(Ty);
17966+
CharTy = AT->getElementType();
17967+
if (auto *CAT = dyn_cast<ConstantArrayType>(AT))
17968+
String.addArray(Info, E, CAT);
17969+
else
17970+
String.addUnsizedArray(Info, E, CharTy);
17971+
} else if (Ty->hasPointerRepresentation()) {
17972+
if (!EvaluatePointer(E, String, Info))
17973+
return false;
17974+
CharTy = Ty->getPointeeType();
17975+
} else {
1795417976
return false;
17955-
17956-
QualType CharTy = E->getType()->getPointeeType();
17977+
}
1795717978

1795817979
// Fast path: if it's a string literal, search the string value.
1795917980
if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
@@ -17995,13 +18016,16 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
1799518016
}
1799618017
}
1799718018

17998-
std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
18019+
bool Expr::tryEvaluateString(ASTContext &Ctx, std::string &StringResult) const {
1799918020
Expr::EvalStatus Status;
1800018021
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
1800118022
uint64_t Result;
18002-
std::string StringResult;
18023+
return EvaluateBuiltinStrLen(this, Result, Info, &StringResult);
18024+
}
1800318025

18004-
if (EvaluateBuiltinStrLen(this, Result, Info, &StringResult))
18026+
std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
18027+
std::string StringResult;
18028+
if (tryEvaluateString(Ctx, StringResult))
1800518029
return StringResult;
1800618030
return {};
1800718031
}

clang/lib/Sema/SemaChecking.cpp

Lines changed: 94 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
#include "llvm/Support/Locale.h"
9999
#include "llvm/Support/MathExtras.h"
100100
#include "llvm/Support/SaveAndRestore.h"
101+
#include "llvm/Support/SmallVectorMemoryBuffer.h"
101102
#include "llvm/Support/raw_ostream.h"
102103
#include "llvm/TargetParser/RISCVTargetParser.h"
103104
#include "llvm/TargetParser/Triple.h"
@@ -5935,8 +5936,14 @@ static void CheckFormatString(
59355936
llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
59365937
bool IgnoreStringsWithoutSpecifiers);
59375938

5938-
static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
5939-
const Expr *E);
5939+
enum StringLiteralConstEvalResult {
5940+
SLCER_NotEvaluated,
5941+
SLCER_NotNullTerminated,
5942+
SLCER_Evaluated,
5943+
};
5944+
5945+
static StringLiteralConstEvalResult
5946+
constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL);
59405947

59415948
// Determine if an expression is a string literal or constant string.
59425949
// If this function returns false on the arguments to a function expecting a
@@ -5968,14 +5975,9 @@ static StringLiteralCheckType checkFormatStringExpr(
59685975

59695976
switch (E->getStmtClass()) {
59705977
case Stmt::InitListExprClass:
5971-
// Handle expressions like {"foobar"}.
5972-
if (const clang::Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) {
5973-
return checkFormatStringExpr(
5974-
S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
5975-
Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
5976-
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
5977-
}
5978-
return SLCT_NotALiteral;
5978+
// try to constant-evaluate the string
5979+
break;
5980+
59795981
case Stmt::BinaryConditionalOperatorClass:
59805982
case Stmt::ConditionalOperatorClass: {
59815983
// The expression is a literal if both sub-expressions were, and it was
@@ -6066,10 +6068,9 @@ static StringLiteralCheckType checkFormatStringExpr(
60666068
if (InitList->isStringLiteralInit())
60676069
Init = InitList->getInit(0)->IgnoreParenImpCasts();
60686070
}
6069-
return checkFormatStringExpr(
6070-
S, ReferenceFormatString, Init, Args, APK, format_idx,
6071-
firstDataArg, Type, CallType,
6072-
/*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset);
6071+
InFunctionCall = false;
6072+
E = Init;
6073+
goto tryAgain;
60736074
}
60746075
}
60756076

@@ -6142,11 +6143,9 @@ static StringLiteralCheckType checkFormatStringExpr(
61426143
}
61436144
return SLCT_UncheckedLiteral;
61446145
}
6145-
return checkFormatStringExpr(
6146-
S, ReferenceFormatString, PVFormatMatches->getFormatString(),
6147-
Args, APK, format_idx, firstDataArg, Type, CallType,
6148-
/*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg,
6149-
Offset, IgnoreStringsWithoutSpecifiers);
6146+
E = PVFormatMatches->getFormatString();
6147+
InFunctionCall = false;
6148+
goto tryAgain;
61506149
}
61516150
}
61526151

@@ -6214,20 +6213,13 @@ static StringLiteralCheckType checkFormatStringExpr(
62146213
unsigned BuiltinID = FD->getBuiltinID();
62156214
if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
62166215
BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) {
6217-
const Expr *Arg = CE->getArg(0);
6218-
return checkFormatStringExpr(
6219-
S, ReferenceFormatString, Arg, Args, APK, format_idx,
6220-
firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
6221-
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
6216+
E = CE->getArg(0);
6217+
goto tryAgain;
62226218
}
62236219
}
62246220
}
6225-
if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E))
6226-
return checkFormatStringExpr(
6227-
S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
6228-
Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
6229-
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
6230-
return SLCT_NotALiteral;
6221+
// try to constant-evaluate the string
6222+
break;
62316223
}
62326224
case Stmt::ObjCMessageExprClass: {
62336225
const auto *ME = cast<ObjCMessageExpr>(E);
@@ -6248,11 +6240,8 @@ static StringLiteralCheckType checkFormatStringExpr(
62486240
IgnoreStringsWithoutSpecifiers = true;
62496241
}
62506242

6251-
const Expr *Arg = ME->getArg(FA->getFormatIdx().getASTIndex());
6252-
return checkFormatStringExpr(
6253-
S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
6254-
Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
6255-
Offset, IgnoreStringsWithoutSpecifiers);
6243+
E = ME->getArg(FA->getFormatIdx().getASTIndex());
6244+
goto tryAgain;
62566245
}
62576246
}
62586247

@@ -6314,7 +6303,8 @@ static StringLiteralCheckType checkFormatStringExpr(
63146303
}
63156304
}
63166305

6317-
return SLCT_NotALiteral;
6306+
// try to constant-evaluate the string
6307+
break;
63186308
}
63196309
case Stmt::UnaryOperatorClass: {
63206310
const UnaryOperator *UnaOp = cast<UnaryOperator>(E);
@@ -6331,26 +6321,79 @@ static StringLiteralCheckType checkFormatStringExpr(
63316321
}
63326322
}
63336323

6334-
return SLCT_NotALiteral;
6324+
// try to constant-evaluate the string
6325+
break;
63356326
}
63366327

63376328
default:
6329+
// try to constant-evaluate the string
6330+
break;
6331+
}
6332+
6333+
const StringLiteral *FakeLiteral = nullptr;
6334+
switch (constEvalStringAsLiteral(S, E, FakeLiteral)) {
6335+
case SLCER_NotEvaluated:
63386336
return SLCT_NotALiteral;
6337+
6338+
case SLCER_NotNullTerminated:
6339+
S.Diag(Args[format_idx]->getBeginLoc(),
6340+
diag::warn_printf_format_string_not_null_terminated)
6341+
<< Args[format_idx]->getSourceRange();
6342+
if (!InFunctionCall)
6343+
S.Diag(E->getBeginLoc(), diag::note_format_string_defined);
6344+
// Stop checking, as this might just mean we're missing a chunk of the
6345+
// format string and there would be other spurious format issues.
6346+
return SLCT_UncheckedLiteral;
6347+
6348+
case SLCER_Evaluated:
6349+
InFunctionCall = false;
6350+
E = FakeLiteral;
6351+
goto tryAgain;
63396352
}
63406353
}
63416354

6342-
// If this expression can be evaluated at compile-time,
6343-
// check if the result is a StringLiteral and return it
6344-
// otherwise return nullptr
6345-
static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
6346-
const Expr *E) {
6355+
static StringLiteralConstEvalResult
6356+
constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL) {
6357+
// As a last resort, try to constant-evaluate the format string. If it
6358+
// evaluates to a string literal in the first place, we can point to that
6359+
// string literal in source and use that.
63476360
Expr::EvalResult Result;
6348-
if (E->EvaluateAsRValue(Result, Context) && Result.Val.isLValue()) {
6361+
if (E->EvaluateAsRValue(Result, S.Context) && Result.Val.isLValue()) {
63496362
const auto *LVE = Result.Val.getLValueBase().dyn_cast<const Expr *>();
6350-
if (isa_and_nonnull<StringLiteral>(LVE))
6351-
return LVE;
6363+
if (auto *BaseSL = dyn_cast_or_null<StringLiteral>(LVE)) {
6364+
SL = BaseSL;
6365+
return SLCER_Evaluated;
6366+
}
63526367
}
6353-
return nullptr;
6368+
6369+
// Otherwise, try to evaluate the expression as a string constant.
6370+
std::string FormatString;
6371+
if (!E->tryEvaluateString(S.Context, FormatString)) {
6372+
return FormatString.empty() ? SLCER_NotEvaluated : SLCER_NotNullTerminated;
6373+
}
6374+
6375+
std::unique_ptr<llvm::MemoryBuffer> MemBuf;
6376+
{
6377+
llvm::SmallString<80> EscapedString;
6378+
{
6379+
llvm::raw_svector_ostream OS(EscapedString);
6380+
OS << '"';
6381+
OS.write_escaped(FormatString);
6382+
OS << '"';
6383+
}
6384+
MemBuf.reset(new llvm::SmallVectorMemoryBuffer(std::move(EscapedString),
6385+
"<scratch space>", true));
6386+
}
6387+
6388+
// Plop that string into a scratch buffer, create a string literal and then
6389+
// go with that.
6390+
auto ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
6391+
SourceLocation Begin = S.getSourceManager().getLocForStartOfFile(ScratchFile);
6392+
QualType SLType = S.Context.getStringLiteralArrayType(S.Context.CharTy,
6393+
FormatString.length());
6394+
SL = StringLiteral::Create(S.Context, FormatString,
6395+
StringLiteralKind::Ordinary, false, SLType, Begin);
6396+
return SLCER_Evaluated;
63546397
}
63556398

63566399
StringRef Sema::GetFormatStringTypeName(Sema::FormatStringType FST) {
@@ -6973,10 +7016,11 @@ void CheckFormatHandler::EmitFormatDiagnostic(
69737016
S.Diag(IsStringLocation ? ArgumentExpr->getExprLoc() : Loc, PDiag)
69747017
<< ArgumentExpr->getSourceRange();
69757018

6976-
const Sema::SemaDiagnosticBuilder &Note =
6977-
S.Diag(IsStringLocation ? Loc : StringRange.getBegin(),
6978-
diag::note_format_string_defined);
6979-
7019+
SourceLocation DiagLoc = IsStringLocation ? Loc : StringRange.getBegin();
7020+
unsigned DiagID = S.getSourceManager().isWrittenInScratchSpace(DiagLoc)
7021+
? diag::note_format_string_evaluated_to
7022+
: diag::note_format_string_defined;
7023+
const Sema::SemaDiagnosticBuilder &Note = S.Diag(DiagLoc, DiagID);
69807024
Note << StringRange;
69817025
Note << FixIt;
69827026
}

clang/test/Sema/format-strings.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-unknown-fuchsia %s
44
// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-linux-android %s
55

6+
// expected-note@-5{{format string was constant-evaluated}}
7+
// ^^^ there will be a <scratch space> SourceLocation caused by the
8+
// test_consteval_init_array test, that -verify treats as if it showed up at
9+
// line 1 of this file.
10+
611
#include <stdarg.h>
712
#include <stddef.h>
813
#define __need_wint_t
@@ -900,3 +905,12 @@ void test_promotion(void) {
900905
// pointers
901906
printf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
902907
}
908+
909+
void test_consteval_init_array(void) {
910+
const char buf_not_terminated[] = {'%', 55 * 2 + 5, '\n'}; // expected-note{{format string is defined here}}
911+
printf(buf_not_terminated, "hello"); // expected-warning{{format string is not null-terminated}}
912+
913+
const char buf[] = {'%', 55 * 2 + 5, '\n', 0};
914+
printf(buf, "hello"); // no-warning
915+
printf(buf, 123); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
916+
}

0 commit comments

Comments
 (0)