Skip to content

Commit e93fac1

Browse files
author
serge-sans-paille
committed
New static checks for __builtin___sprintf_chk
Perform some pessimistic checks on buffer size based on an analysis of the format string.
1 parent 4be286b commit e93fac1

File tree

3 files changed

+327
-10
lines changed

3 files changed

+327
-10
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,12 @@ def warn_fortify_source_size_mismatch : Warning<
741741
"'%0' size argument is too large; destination buffer has size %1,"
742742
" but size argument is %2">, InGroup<FortifySource>;
743743

744+
def warn_fortify_source_format_overflow : Warning<
745+
"'%0' will always overflow; destination buffer has size %1,"
746+
" but format string expands to at least %2">,
747+
InGroup<FortifySource>;
748+
749+
744750
/// main()
745751
// static main() is not an error in C, just in C++.
746752
def warn_static_main : Warning<"'main' should not be declared static">,

clang/lib/Sema/SemaChecking.cpp

Lines changed: 234 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -390,13 +390,194 @@ static bool SemaBuiltinCallWithStaticChain(Sema &S, CallExpr *BuiltinCall) {
390390
return false;
391391
}
392392

393+
namespace {
394+
395+
class EstimateSizeFormatHandler
396+
: public analyze_format_string::FormatStringHandler {
397+
size_t Size;
398+
399+
public:
400+
EstimateSizeFormatHandler(StringRef Format)
401+
: Size(std::min(Format.find(0), Format.size()) +
402+
1 /* null byte always written by sprintf */) {}
403+
404+
bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
405+
const char *, unsigned SpecifierLen) override {
406+
407+
const size_t FieldWidth = computeFieldWidth(FS);
408+
const size_t Precision = computePrecision(FS);
409+
410+
// The actual format.
411+
switch (FS.getConversionSpecifier().getKind()) {
412+
// Just a char.
413+
case analyze_format_string::ConversionSpecifier::cArg:
414+
case analyze_format_string::ConversionSpecifier::CArg:
415+
Size += std::max(FieldWidth, (size_t)1);
416+
break;
417+
// Just an integer.
418+
case analyze_format_string::ConversionSpecifier::dArg:
419+
case analyze_format_string::ConversionSpecifier::DArg:
420+
case analyze_format_string::ConversionSpecifier::iArg:
421+
case analyze_format_string::ConversionSpecifier::oArg:
422+
case analyze_format_string::ConversionSpecifier::OArg:
423+
case analyze_format_string::ConversionSpecifier::uArg:
424+
case analyze_format_string::ConversionSpecifier::UArg:
425+
case analyze_format_string::ConversionSpecifier::xArg:
426+
case analyze_format_string::ConversionSpecifier::XArg:
427+
Size += std::max(FieldWidth, Precision);
428+
break;
429+
430+
// %g style conversion switches between %f or %e style dynamically.
431+
// %f always takes less space, so default to it.
432+
case analyze_format_string::ConversionSpecifier::gArg:
433+
case analyze_format_string::ConversionSpecifier::GArg:
434+
435+
// Floating point number in the form '[+]ddd.ddd'.
436+
case analyze_format_string::ConversionSpecifier::fArg:
437+
case analyze_format_string::ConversionSpecifier::FArg:
438+
Size += std::max(FieldWidth, 1 /* integer part */ +
439+
(Precision ? 1 + Precision
440+
: 0) /* period + decimal */);
441+
break;
442+
443+
// Floating point number in the form '[-]d.ddde[+-]dd'.
444+
case analyze_format_string::ConversionSpecifier::eArg:
445+
case analyze_format_string::ConversionSpecifier::EArg:
446+
Size +=
447+
std::max(FieldWidth,
448+
1 /* integer part */ +
449+
(Precision ? 1 + Precision : 0) /* period + decimal */ +
450+
1 /* e or E letter */ + 2 /* exponent */);
451+
break;
452+
453+
// Floating point number in the form '[-]0xh.hhhhp±dd'.
454+
case analyze_format_string::ConversionSpecifier::aArg:
455+
case analyze_format_string::ConversionSpecifier::AArg:
456+
Size +=
457+
std::max(FieldWidth,
458+
2 /* 0x */ + 1 /* integer part */ +
459+
(Precision ? 1 + Precision : 0) /* period + decimal */ +
460+
1 /* p or P letter */ + 1 /* + or - */ + 1 /* value */);
461+
break;
462+
463+
// Just a string.
464+
case analyze_format_string::ConversionSpecifier::sArg:
465+
case analyze_format_string::ConversionSpecifier::SArg:
466+
Size += FieldWidth;
467+
break;
468+
469+
// Just a pointer in the form '0xddd'.
470+
case analyze_format_string::ConversionSpecifier::pArg:
471+
Size += std::max(FieldWidth, 2 /* leading 0x */ + Precision);
472+
break;
473+
474+
// A plain percent.
475+
case analyze_format_string::ConversionSpecifier::PercentArg:
476+
Size += 1;
477+
break;
478+
479+
default:
480+
break;
481+
}
482+
483+
Size += FS.hasPlusPrefix() || FS.hasSpacePrefix();
484+
485+
if (FS.hasAlternativeForm()) {
486+
switch (FS.getConversionSpecifier().getKind()) {
487+
default:
488+
break;
489+
// Force a leading '0'.
490+
case analyze_format_string::ConversionSpecifier::oArg:
491+
Size += 1;
492+
break;
493+
// Force a leading '0x'.
494+
case analyze_format_string::ConversionSpecifier::xArg:
495+
case analyze_format_string::ConversionSpecifier::XArg:
496+
Size += 2;
497+
break;
498+
// Force a period '.' before decimal, even if precision is 0.
499+
case analyze_format_string::ConversionSpecifier::aArg:
500+
case analyze_format_string::ConversionSpecifier::AArg:
501+
case analyze_format_string::ConversionSpecifier::eArg:
502+
case analyze_format_string::ConversionSpecifier::EArg:
503+
case analyze_format_string::ConversionSpecifier::fArg:
504+
case analyze_format_string::ConversionSpecifier::FArg:
505+
case analyze_format_string::ConversionSpecifier::gArg:
506+
case analyze_format_string::ConversionSpecifier::GArg:
507+
Size += (Precision ? 0 : 1);
508+
break;
509+
}
510+
}
511+
assert(SpecifierLen <= Size && "no underflow");
512+
Size -= SpecifierLen;
513+
return true;
514+
}
515+
516+
size_t getSizeLowerBound() const { return Size; }
517+
518+
private:
519+
static size_t computeFieldWidth(const analyze_printf::PrintfSpecifier &FS) {
520+
const analyze_format_string::OptionalAmount &FW = FS.getFieldWidth();
521+
size_t FieldWidth = 0;
522+
if (FW.getHowSpecified() == analyze_format_string::OptionalAmount::Constant)
523+
FieldWidth = FW.getConstantAmount();
524+
return FieldWidth;
525+
}
526+
527+
static size_t computePrecision(const analyze_printf::PrintfSpecifier &FS) {
528+
const analyze_format_string::OptionalAmount &FW = FS.getPrecision();
529+
size_t Precision = 0;
530+
531+
// See man 3 printf for default precision value based on the specifier.
532+
switch (FW.getHowSpecified()) {
533+
case analyze_format_string::OptionalAmount::NotSpecified:
534+
switch (FS.getConversionSpecifier().getKind()) {
535+
default:
536+
break;
537+
case analyze_format_string::ConversionSpecifier::dArg: // %d
538+
case analyze_format_string::ConversionSpecifier::DArg: // %D
539+
case analyze_format_string::ConversionSpecifier::iArg: // %i
540+
Precision = 1;
541+
break;
542+
case analyze_format_string::ConversionSpecifier::oArg: // %d
543+
case analyze_format_string::ConversionSpecifier::OArg: // %D
544+
case analyze_format_string::ConversionSpecifier::uArg: // %d
545+
case analyze_format_string::ConversionSpecifier::UArg: // %D
546+
case analyze_format_string::ConversionSpecifier::xArg: // %d
547+
case analyze_format_string::ConversionSpecifier::XArg: // %D
548+
Precision = 1;
549+
break;
550+
case analyze_format_string::ConversionSpecifier::fArg: // %f
551+
case analyze_format_string::ConversionSpecifier::FArg: // %F
552+
case analyze_format_string::ConversionSpecifier::eArg: // %e
553+
case analyze_format_string::ConversionSpecifier::EArg: // %E
554+
case analyze_format_string::ConversionSpecifier::gArg: // %g
555+
case analyze_format_string::ConversionSpecifier::GArg: // %G
556+
Precision = 6;
557+
break;
558+
case analyze_format_string::ConversionSpecifier::pArg: // %d
559+
Precision = 1;
560+
break;
561+
}
562+
break;
563+
case analyze_format_string::OptionalAmount::Constant:
564+
Precision = FW.getConstantAmount();
565+
break;
566+
default:
567+
break;
568+
}
569+
return Precision;
570+
}
571+
};
572+
573+
} // namespace
574+
393575
/// Check a call to BuiltinID for buffer overflows. If BuiltinID is a
394576
/// __builtin_*_chk function, then use the object size argument specified in the
395577
/// source. Otherwise, infer the object size using __builtin_object_size.
396578
void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
397579
CallExpr *TheCall) {
398580
// FIXME: There are some more useful checks we could be doing here:
399-
// - Analyze the format string of sprintf to see how much of buffer is used.
400581
// - Evaluate strlen of strcpy arguments, use as object size.
401582

402583
if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
@@ -407,12 +588,55 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
407588
if (!BuiltinID)
408589
return;
409590

591+
const TargetInfo &TI = getASTContext().getTargetInfo();
592+
unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());
593+
410594
unsigned DiagID = 0;
411595
bool IsChkVariant = false;
596+
Optional<llvm::APSInt> UsedSize;
412597
unsigned SizeIndex, ObjectIndex;
413598
switch (BuiltinID) {
414599
default:
415600
return;
601+
case Builtin::BIsprintf:
602+
case Builtin::BI__builtin___sprintf_chk: {
603+
size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3;
604+
auto *FormatExpr = TheCall->getArg(FormatIndex)->IgnoreParenImpCasts();
605+
606+
if (auto *Format = dyn_cast<StringLiteral>(FormatExpr)) {
607+
608+
if (!Format->isAscii() && !Format->isUTF8())
609+
return;
610+
611+
StringRef FormatStrRef = Format->getString();
612+
EstimateSizeFormatHandler H(FormatStrRef);
613+
const char *FormatBytes = FormatStrRef.data();
614+
const ConstantArrayType *T =
615+
Context.getAsConstantArrayType(Format->getType());
616+
assert(T && "String literal not of constant array type!");
617+
size_t TypeSize = T->getSize().getZExtValue();
618+
619+
// In case there's a null byte somewhere.
620+
size_t StrLen =
621+
std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0));
622+
if (!analyze_format_string::ParsePrintfString(
623+
H, FormatBytes, FormatBytes + StrLen, getLangOpts(),
624+
Context.getTargetInfo(), false)) {
625+
DiagID = diag::warn_fortify_source_format_overflow;
626+
UsedSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
627+
.extOrTrunc(SizeTypeWidth);
628+
if (BuiltinID == Builtin::BI__builtin___sprintf_chk) {
629+
IsChkVariant = true;
630+
ObjectIndex = 2;
631+
} else {
632+
IsChkVariant = false;
633+
ObjectIndex = 0;
634+
}
635+
break;
636+
}
637+
}
638+
return;
639+
}
416640
case Builtin::BI__builtin___memcpy_chk:
417641
case Builtin::BI__builtin___memmove_chk:
418642
case Builtin::BI__builtin___memset_chk:
@@ -505,19 +729,19 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
505729
if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
506730
return;
507731
// Get the object size in the target's size_t width.
508-
const TargetInfo &TI = getASTContext().getTargetInfo();
509-
unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());
510732
ObjectSize = llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
511733
}
512734

513735
// Evaluate the number of bytes of the object that this call will use.
514-
Expr::EvalResult Result;
515-
Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
516-
if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
517-
return;
518-
llvm::APSInt UsedSize = Result.Val.getInt();
736+
if (!UsedSize) {
737+
Expr::EvalResult Result;
738+
Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
739+
if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
740+
return;
741+
UsedSize = Result.Val.getInt().extOrTrunc(SizeTypeWidth);
742+
}
519743

520-
if (UsedSize.ule(ObjectSize))
744+
if (UsedSize.getValue().ule(ObjectSize))
521745
return;
522746

523747
StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
@@ -533,7 +757,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
533757
DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
534758
PDiag(DiagID)
535759
<< FunctionName << ObjectSize.toString(/*Radix=*/10)
536-
<< UsedSize.toString(/*Radix=*/10));
760+
<< UsedSize.getValue().toString(/*Radix=*/10));
537761
}
538762

539763
static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,

0 commit comments

Comments
 (0)