Skip to content

Commit be63894

Browse files
[clang] Constant-evaluate format strings as last resort
Clang's -Wformat checker can see through an inconsistent set of operations. We can fall back to the recently-updated constant string evaluation infrastructure when Clang's initial evaluation fails for a second chance at figuring out what the format string is intended to be. This enables analyzing format strings that were built at compile-time with std::string and other constexpr-capable types in C++, as long as all pieces are also constexpr-visible, and a number of other patterns. Radar-ID: rdar://99940060
1 parent 650dca5 commit be63894

File tree

7 files changed

+227
-59
lines changed

7 files changed

+227
-59
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,9 @@ related warnings within the method body.
382382
``format_matches`` accepts an example valid format string as its third
383383
argument. For more information, see the Clang attributes documentation.
384384

385+
- Format string checking now supports the compile-time evaluation of format
386+
strings as a fallback mechanism.
387+
385388
- Introduced a new statement attribute ``[[clang::atomic]]`` that enables
386389
fine-grained control over atomic code generation on a per-statement basis.
387390
Supported options include ``[no_]remote_memory``,

clang/include/clang/AST/Expr.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,14 @@ class Expr : public ValueStmt {
791791
const Expr *PtrExpression, ASTContext &Ctx,
792792
EvalResult &Status) const;
793793

794-
/// If the current Expr can be evaluated to a pointer to a null-terminated
794+
/// Fill \c Into with the first characters that can be constant-evaluated
795+
/// from this \c Expr . When encountering a null character, stop and return
796+
/// \c true (the null is not returned in \c Into ). Return \c false if
797+
/// evaluation runs off the end of the constant-evaluated string before it
798+
/// encounters a null character.
799+
bool tryEvaluateString(ASTContext &Ctx, std::string &Into) const;
800+
801+
/// If the current \c Expr can be evaluated to a pointer to a null-terminated
795802
/// constant string, return the constant string (without the terminating
796803
/// null).
797804
std::optional<std::string> tryEvaluateString(ASTContext &Ctx) const;

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10354,6 +10354,8 @@ def warn_format_bool_as_character : Warning<
1035410354
"using '%0' format specifier, but argument has boolean value">,
1035510355
InGroup<Format>;
1035610356
def note_format_string_defined : Note<"format string is defined here">;
10357+
def note_format_string_evaluated_to : Note<
10358+
"format string was constant-evaluated">;
1035710359
def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
1035810360
def note_printf_c_str: Note<"did you mean to call the %0 method?">;
1035910361
def note_format_security_fixit: Note<

clang/lib/AST/ExprConstant.cpp

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17952,15 +17952,36 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
1795217952

1795317953
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
1795417954
EvalInfo &Info, std::string *StringResult) {
17955-
if (!E->getType()->hasPointerRepresentation() || !E->isPRValue())
17955+
QualType Ty = E->getType();
17956+
if (!E->isPRValue())
1795617957
return false;
1795717958

1795817959
LValue String;
17959-
17960-
if (!EvaluatePointer(E, String, Info))
17960+
QualType CharTy;
17961+
if (Ty->canDecayToPointerType()) {
17962+
if (E->isGLValue()) {
17963+
if (!EvaluateLValue(E, String, Info))
17964+
return false;
17965+
} else {
17966+
APValue &Value = Info.CurrentCall->createTemporary(
17967+
E, Ty, ScopeKind::FullExpression, String);
17968+
if (!EvaluateInPlace(Value, Info, String, E))
17969+
return false;
17970+
}
17971+
// The result is a pointer to the first element of the array.
17972+
auto *AT = Info.Ctx.getAsArrayType(Ty);
17973+
CharTy = AT->getElementType();
17974+
if (auto *CAT = dyn_cast<ConstantArrayType>(AT))
17975+
String.addArray(Info, E, CAT);
17976+
else
17977+
String.addUnsizedArray(Info, E, CharTy);
17978+
} else if (Ty->hasPointerRepresentation()) {
17979+
if (!EvaluatePointer(E, String, Info))
17980+
return false;
17981+
CharTy = Ty->getPointeeType();
17982+
} else {
1796117983
return false;
17962-
17963-
QualType CharTy = E->getType()->getPointeeType();
17984+
}
1796417985

1796517986
// Fast path: if it's a string literal, search the string value.
1796617987
if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
@@ -18002,13 +18023,16 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
1800218023
}
1800318024
}
1800418025

18005-
std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
18026+
bool Expr::tryEvaluateString(ASTContext &Ctx, std::string &StringResult) const {
1800618027
Expr::EvalStatus Status;
1800718028
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
1800818029
uint64_t Result;
18009-
std::string StringResult;
18030+
return EvaluateBuiltinStrLen(this, Result, Info, &StringResult);
18031+
}
1801018032

18011-
if (EvaluateBuiltinStrLen(this, Result, Info, &StringResult))
18033+
std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
18034+
std::string StringResult;
18035+
if (tryEvaluateString(Ctx, StringResult))
1801218036
return StringResult;
1801318037
return {};
1801418038
}

clang/lib/Sema/SemaChecking.cpp

Lines changed: 94 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
#include "llvm/Support/Locale.h"
100100
#include "llvm/Support/MathExtras.h"
101101
#include "llvm/Support/SaveAndRestore.h"
102+
#include "llvm/Support/SmallVectorMemoryBuffer.h"
102103
#include "llvm/Support/raw_ostream.h"
103104
#include "llvm/TargetParser/RISCVTargetParser.h"
104105
#include "llvm/TargetParser/Triple.h"
@@ -6047,8 +6048,14 @@ static void CheckFormatString(
60476048
llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
60486049
bool IgnoreStringsWithoutSpecifiers);
60496050

6050-
static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
6051-
const Expr *E);
6051+
enum StringLiteralConstEvalResult {
6052+
SLCER_NotEvaluated,
6053+
SLCER_NotNullTerminated,
6054+
SLCER_Evaluated,
6055+
};
6056+
6057+
static StringLiteralConstEvalResult
6058+
constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL);
60526059

60536060
// Determine if an expression is a string literal or constant string.
60546061
// If this function returns false on the arguments to a function expecting a
@@ -6080,14 +6087,9 @@ static StringLiteralCheckType checkFormatStringExpr(
60806087

60816088
switch (E->getStmtClass()) {
60826089
case Stmt::InitListExprClass:
6083-
// Handle expressions like {"foobar"}.
6084-
if (const clang::Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) {
6085-
return checkFormatStringExpr(
6086-
S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
6087-
Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
6088-
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
6089-
}
6090-
return SLCT_NotALiteral;
6090+
// try to constant-evaluate the string
6091+
break;
6092+
60916093
case Stmt::BinaryConditionalOperatorClass:
60926094
case Stmt::ConditionalOperatorClass: {
60936095
// The expression is a literal if both sub-expressions were, and it was
@@ -6178,10 +6180,9 @@ static StringLiteralCheckType checkFormatStringExpr(
61786180
if (InitList->isStringLiteralInit())
61796181
Init = InitList->getInit(0)->IgnoreParenImpCasts();
61806182
}
6181-
return checkFormatStringExpr(
6182-
S, ReferenceFormatString, Init, Args, APK, format_idx,
6183-
firstDataArg, Type, CallType,
6184-
/*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset);
6183+
InFunctionCall = false;
6184+
E = Init;
6185+
goto tryAgain;
61856186
}
61866187
}
61876188

@@ -6254,11 +6255,9 @@ static StringLiteralCheckType checkFormatStringExpr(
62546255
}
62556256
return SLCT_UncheckedLiteral;
62566257
}
6257-
return checkFormatStringExpr(
6258-
S, ReferenceFormatString, PVFormatMatches->getFormatString(),
6259-
Args, APK, format_idx, firstDataArg, Type, CallType,
6260-
/*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg,
6261-
Offset, IgnoreStringsWithoutSpecifiers);
6258+
E = PVFormatMatches->getFormatString();
6259+
InFunctionCall = false;
6260+
goto tryAgain;
62626261
}
62636262
}
62646263

@@ -6326,20 +6325,13 @@ static StringLiteralCheckType checkFormatStringExpr(
63266325
unsigned BuiltinID = FD->getBuiltinID();
63276326
if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
63286327
BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) {
6329-
const Expr *Arg = CE->getArg(0);
6330-
return checkFormatStringExpr(
6331-
S, ReferenceFormatString, Arg, Args, APK, format_idx,
6332-
firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
6333-
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
6328+
E = CE->getArg(0);
6329+
goto tryAgain;
63346330
}
63356331
}
63366332
}
6337-
if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E))
6338-
return checkFormatStringExpr(
6339-
S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
6340-
Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
6341-
UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
6342-
return SLCT_NotALiteral;
6333+
// try to constant-evaluate the string
6334+
break;
63436335
}
63446336
case Stmt::ObjCMessageExprClass: {
63456337
const auto *ME = cast<ObjCMessageExpr>(E);
@@ -6360,11 +6352,8 @@ static StringLiteralCheckType checkFormatStringExpr(
63606352
IgnoreStringsWithoutSpecifiers = true;
63616353
}
63626354

6363-
const Expr *Arg = ME->getArg(FA->getFormatIdx().getASTIndex());
6364-
return checkFormatStringExpr(
6365-
S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
6366-
Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
6367-
Offset, IgnoreStringsWithoutSpecifiers);
6355+
E = ME->getArg(FA->getFormatIdx().getASTIndex());
6356+
goto tryAgain;
63686357
}
63696358
}
63706359

@@ -6426,7 +6415,8 @@ static StringLiteralCheckType checkFormatStringExpr(
64266415
}
64276416
}
64286417

6429-
return SLCT_NotALiteral;
6418+
// try to constant-evaluate the string
6419+
break;
64306420
}
64316421
case Stmt::UnaryOperatorClass: {
64326422
const UnaryOperator *UnaOp = cast<UnaryOperator>(E);
@@ -6443,26 +6433,79 @@ static StringLiteralCheckType checkFormatStringExpr(
64436433
}
64446434
}
64456435

6446-
return SLCT_NotALiteral;
6436+
// try to constant-evaluate the string
6437+
break;
64476438
}
64486439

64496440
default:
6441+
// try to constant-evaluate the string
6442+
break;
6443+
}
6444+
6445+
const StringLiteral *FakeLiteral = nullptr;
6446+
switch (constEvalStringAsLiteral(S, E, FakeLiteral)) {
6447+
case SLCER_NotEvaluated:
64506448
return SLCT_NotALiteral;
6449+
6450+
case SLCER_NotNullTerminated:
6451+
S.Diag(Args[format_idx]->getBeginLoc(),
6452+
diag::warn_printf_format_string_not_null_terminated)
6453+
<< Args[format_idx]->getSourceRange();
6454+
if (!InFunctionCall)
6455+
S.Diag(E->getBeginLoc(), diag::note_format_string_defined);
6456+
// Stop checking, as this might just mean we're missing a chunk of the
6457+
// format string and there would be other spurious format issues.
6458+
return SLCT_UncheckedLiteral;
6459+
6460+
case SLCER_Evaluated:
6461+
InFunctionCall = false;
6462+
E = FakeLiteral;
6463+
goto tryAgain;
64516464
}
64526465
}
64536466

6454-
// If this expression can be evaluated at compile-time,
6455-
// check if the result is a StringLiteral and return it
6456-
// otherwise return nullptr
6457-
static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
6458-
const Expr *E) {
6467+
static StringLiteralConstEvalResult
6468+
constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL) {
6469+
// As a last resort, try to constant-evaluate the format string. If it
6470+
// evaluates to a string literal in the first place, we can point to that
6471+
// string literal in source and use that.
64596472
Expr::EvalResult Result;
6460-
if (E->EvaluateAsRValue(Result, Context) && Result.Val.isLValue()) {
6473+
if (E->EvaluateAsRValue(Result, S.Context) && Result.Val.isLValue()) {
64616474
const auto *LVE = Result.Val.getLValueBase().dyn_cast<const Expr *>();
6462-
if (isa_and_nonnull<StringLiteral>(LVE))
6463-
return LVE;
6475+
if (auto *BaseSL = dyn_cast_or_null<StringLiteral>(LVE)) {
6476+
SL = BaseSL;
6477+
return SLCER_Evaluated;
6478+
}
64646479
}
6465-
return nullptr;
6480+
6481+
// Otherwise, try to evaluate the expression as a string constant.
6482+
std::string FormatString;
6483+
if (!E->tryEvaluateString(S.Context, FormatString)) {
6484+
return FormatString.empty() ? SLCER_NotEvaluated : SLCER_NotNullTerminated;
6485+
}
6486+
6487+
std::unique_ptr<llvm::MemoryBuffer> MemBuf;
6488+
{
6489+
llvm::SmallString<80> EscapedString;
6490+
{
6491+
llvm::raw_svector_ostream OS(EscapedString);
6492+
OS << '"';
6493+
OS.write_escaped(FormatString);
6494+
OS << '"';
6495+
}
6496+
MemBuf.reset(new llvm::SmallVectorMemoryBuffer(std::move(EscapedString),
6497+
"<scratch space>", true));
6498+
}
6499+
6500+
// Plop that string into a scratch buffer, create a string literal and then
6501+
// go with that.
6502+
auto ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
6503+
SourceLocation Begin = S.getSourceManager().getLocForStartOfFile(ScratchFile);
6504+
QualType SLType = S.Context.getStringLiteralArrayType(S.Context.CharTy,
6505+
FormatString.length());
6506+
SL = StringLiteral::Create(S.Context, FormatString,
6507+
StringLiteralKind::Ordinary, false, SLType, Begin);
6508+
return SLCER_Evaluated;
64666509
}
64676510

64686511
StringRef Sema::GetFormatStringTypeName(FormatStringType FST) {
@@ -7086,10 +7129,11 @@ void CheckFormatHandler::EmitFormatDiagnostic(
70867129
S.Diag(IsStringLocation ? ArgumentExpr->getExprLoc() : Loc, PDiag)
70877130
<< ArgumentExpr->getSourceRange();
70887131

7089-
const Sema::SemaDiagnosticBuilder &Note =
7090-
S.Diag(IsStringLocation ? Loc : StringRange.getBegin(),
7091-
diag::note_format_string_defined);
7092-
7132+
SourceLocation DiagLoc = IsStringLocation ? Loc : StringRange.getBegin();
7133+
unsigned DiagID = S.getSourceManager().isWrittenInScratchSpace(DiagLoc)
7134+
? diag::note_format_string_evaluated_to
7135+
: diag::note_format_string_defined;
7136+
const Sema::SemaDiagnosticBuilder &Note = S.Diag(DiagLoc, DiagID);
70937137
Note << StringRange;
70947138
Note << FixIt;
70957139
}

clang/test/Sema/format-strings.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-unknown-fuchsia %s
44
// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-linux-android %s
55

6+
// expected-note@-5{{format string was constant-evaluated}}
7+
// ^^^ there will be a <scratch space> SourceLocation caused by the
8+
// test_consteval_init_array test, that -verify treats as if it showed up at
9+
// line 1 of this file.
10+
611
#include <stdarg.h>
712
#include <stddef.h>
813
#define __need_wint_t
@@ -900,3 +905,12 @@ void test_promotion(void) {
900905
// pointers
901906
printf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
902907
}
908+
909+
void test_consteval_init_array(void) {
910+
const char buf_not_terminated[] = {'%', 55 * 2 + 5, '\n'}; // expected-note{{format string is defined here}}
911+
printf(buf_not_terminated, "hello"); // expected-warning{{format string is not null-terminated}}
912+
913+
const char buf[] = {'%', 55 * 2 + 5, '\n', 0};
914+
printf(buf, "hello"); // no-warning
915+
printf(buf, 123); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
916+
}

0 commit comments

Comments
 (0)