@@ -390,13 +390,194 @@ static bool SemaBuiltinCallWithStaticChain(Sema &S, CallExpr *BuiltinCall) {
390390 return false;
391391}
392392
393+ namespace {
394+
395+ class EstimateSizeFormatHandler
396+ : public analyze_format_string::FormatStringHandler {
397+ size_t Size;
398+
399+ public:
400+ EstimateSizeFormatHandler(StringRef Format)
401+ : Size(std::min(Format.find(0), Format.size()) +
402+ 1 /* null byte always written by sprintf */) {}
403+
404+ bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
405+ const char *, unsigned SpecifierLen) override {
406+
407+ const size_t FieldWidth = computeFieldWidth(FS);
408+ const size_t Precision = computePrecision(FS);
409+
410+ // The actual format.
411+ switch (FS.getConversionSpecifier().getKind()) {
412+ // Just a char.
413+ case analyze_format_string::ConversionSpecifier::cArg:
414+ case analyze_format_string::ConversionSpecifier::CArg:
415+ Size += std::max(FieldWidth, (size_t)1);
416+ break;
417+ // Just an integer.
418+ case analyze_format_string::ConversionSpecifier::dArg:
419+ case analyze_format_string::ConversionSpecifier::DArg:
420+ case analyze_format_string::ConversionSpecifier::iArg:
421+ case analyze_format_string::ConversionSpecifier::oArg:
422+ case analyze_format_string::ConversionSpecifier::OArg:
423+ case analyze_format_string::ConversionSpecifier::uArg:
424+ case analyze_format_string::ConversionSpecifier::UArg:
425+ case analyze_format_string::ConversionSpecifier::xArg:
426+ case analyze_format_string::ConversionSpecifier::XArg:
427+ Size += std::max(FieldWidth, Precision);
428+ break;
429+
430+ // %g style conversion switches between %f or %e style dynamically.
431+ // %f always takes less space, so default to it.
432+ case analyze_format_string::ConversionSpecifier::gArg:
433+ case analyze_format_string::ConversionSpecifier::GArg:
434+
435+ // Floating point number in the form '[+]ddd.ddd'.
436+ case analyze_format_string::ConversionSpecifier::fArg:
437+ case analyze_format_string::ConversionSpecifier::FArg:
438+ Size += std::max(FieldWidth, 1 /* integer part */ +
439+ (Precision ? 1 + Precision
440+ : 0) /* period + decimal */);
441+ break;
442+
443+ // Floating point number in the form '[-]d.ddde[+-]dd'.
444+ case analyze_format_string::ConversionSpecifier::eArg:
445+ case analyze_format_string::ConversionSpecifier::EArg:
446+ Size +=
447+ std::max(FieldWidth,
448+ 1 /* integer part */ +
449+ (Precision ? 1 + Precision : 0) /* period + decimal */ +
450+ 1 /* e or E letter */ + 2 /* exponent */);
451+ break;
452+
453+ // Floating point number in the form '[-]0xh.hhhhp±dd'.
454+ case analyze_format_string::ConversionSpecifier::aArg:
455+ case analyze_format_string::ConversionSpecifier::AArg:
456+ Size +=
457+ std::max(FieldWidth,
458+ 2 /* 0x */ + 1 /* integer part */ +
459+ (Precision ? 1 + Precision : 0) /* period + decimal */ +
460+ 1 /* p or P letter */ + 1 /* + or - */ + 1 /* value */);
461+ break;
462+
463+ // Just a string.
464+ case analyze_format_string::ConversionSpecifier::sArg:
465+ case analyze_format_string::ConversionSpecifier::SArg:
466+ Size += FieldWidth;
467+ break;
468+
469+ // Just a pointer in the form '0xddd'.
470+ case analyze_format_string::ConversionSpecifier::pArg:
471+ Size += std::max(FieldWidth, 2 /* leading 0x */ + Precision);
472+ break;
473+
474+ // A plain percent.
475+ case analyze_format_string::ConversionSpecifier::PercentArg:
476+ Size += 1;
477+ break;
478+
479+ default:
480+ break;
481+ }
482+
483+ Size += FS.hasPlusPrefix() || FS.hasSpacePrefix();
484+
485+ if (FS.hasAlternativeForm()) {
486+ switch (FS.getConversionSpecifier().getKind()) {
487+ default:
488+ break;
489+ // Force a leading '0'.
490+ case analyze_format_string::ConversionSpecifier::oArg:
491+ Size += 1;
492+ break;
493+ // Force a leading '0x'.
494+ case analyze_format_string::ConversionSpecifier::xArg:
495+ case analyze_format_string::ConversionSpecifier::XArg:
496+ Size += 2;
497+ break;
498+ // Force a period '.' before decimal, even if precision is 0.
499+ case analyze_format_string::ConversionSpecifier::aArg:
500+ case analyze_format_string::ConversionSpecifier::AArg:
501+ case analyze_format_string::ConversionSpecifier::eArg:
502+ case analyze_format_string::ConversionSpecifier::EArg:
503+ case analyze_format_string::ConversionSpecifier::fArg:
504+ case analyze_format_string::ConversionSpecifier::FArg:
505+ case analyze_format_string::ConversionSpecifier::gArg:
506+ case analyze_format_string::ConversionSpecifier::GArg:
507+ Size += (Precision ? 0 : 1);
508+ break;
509+ }
510+ }
511+ assert(SpecifierLen <= Size && "no underflow");
512+ Size -= SpecifierLen;
513+ return true;
514+ }
515+
516+ size_t getSizeLowerBound() const { return Size; }
517+
518+ private:
519+ static size_t computeFieldWidth(const analyze_printf::PrintfSpecifier &FS) {
520+ const analyze_format_string::OptionalAmount &FW = FS.getFieldWidth();
521+ size_t FieldWidth = 0;
522+ if (FW.getHowSpecified() == analyze_format_string::OptionalAmount::Constant)
523+ FieldWidth = FW.getConstantAmount();
524+ return FieldWidth;
525+ }
526+
527+ static size_t computePrecision(const analyze_printf::PrintfSpecifier &FS) {
528+ const analyze_format_string::OptionalAmount &FW = FS.getPrecision();
529+ size_t Precision = 0;
530+
531+ // See man 3 printf for default precision value based on the specifier.
532+ switch (FW.getHowSpecified()) {
533+ case analyze_format_string::OptionalAmount::NotSpecified:
534+ switch (FS.getConversionSpecifier().getKind()) {
535+ default:
536+ break;
537+ case analyze_format_string::ConversionSpecifier::dArg: // %d
538+ case analyze_format_string::ConversionSpecifier::DArg: // %D
539+ case analyze_format_string::ConversionSpecifier::iArg: // %i
540+ Precision = 1;
541+ break;
542+ case analyze_format_string::ConversionSpecifier::oArg: // %d
543+ case analyze_format_string::ConversionSpecifier::OArg: // %D
544+ case analyze_format_string::ConversionSpecifier::uArg: // %d
545+ case analyze_format_string::ConversionSpecifier::UArg: // %D
546+ case analyze_format_string::ConversionSpecifier::xArg: // %d
547+ case analyze_format_string::ConversionSpecifier::XArg: // %D
548+ Precision = 1;
549+ break;
550+ case analyze_format_string::ConversionSpecifier::fArg: // %f
551+ case analyze_format_string::ConversionSpecifier::FArg: // %F
552+ case analyze_format_string::ConversionSpecifier::eArg: // %e
553+ case analyze_format_string::ConversionSpecifier::EArg: // %E
554+ case analyze_format_string::ConversionSpecifier::gArg: // %g
555+ case analyze_format_string::ConversionSpecifier::GArg: // %G
556+ Precision = 6;
557+ break;
558+ case analyze_format_string::ConversionSpecifier::pArg: // %d
559+ Precision = 1;
560+ break;
561+ }
562+ break;
563+ case analyze_format_string::OptionalAmount::Constant:
564+ Precision = FW.getConstantAmount();
565+ break;
566+ default:
567+ break;
568+ }
569+ return Precision;
570+ }
571+ };
572+
573+ } // namespace
574+
393575/// Check a call to BuiltinID for buffer overflows. If BuiltinID is a
394576/// __builtin_*_chk function, then use the object size argument specified in the
395577/// source. Otherwise, infer the object size using __builtin_object_size.
396578void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
397579 CallExpr *TheCall) {
398580 // FIXME: There are some more useful checks we could be doing here:
399- // - Analyze the format string of sprintf to see how much of buffer is used.
400581 // - Evaluate strlen of strcpy arguments, use as object size.
401582
402583 if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
@@ -407,12 +588,55 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
407588 if (!BuiltinID)
408589 return;
409590
591+ const TargetInfo &TI = getASTContext().getTargetInfo();
592+ unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());
593+
410594 unsigned DiagID = 0;
411595 bool IsChkVariant = false;
596+ Optional<llvm::APSInt> UsedSize;
412597 unsigned SizeIndex, ObjectIndex;
413598 switch (BuiltinID) {
414599 default:
415600 return;
601+ case Builtin::BIsprintf:
602+ case Builtin::BI__builtin___sprintf_chk: {
603+ size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3;
604+ auto *FormatExpr = TheCall->getArg(FormatIndex)->IgnoreParenImpCasts();
605+
606+ if (auto *Format = dyn_cast<StringLiteral>(FormatExpr)) {
607+
608+ if (!Format->isAscii() && !Format->isUTF8())
609+ return;
610+
611+ StringRef FormatStrRef = Format->getString();
612+ EstimateSizeFormatHandler H(FormatStrRef);
613+ const char *FormatBytes = FormatStrRef.data();
614+ const ConstantArrayType *T =
615+ Context.getAsConstantArrayType(Format->getType());
616+ assert(T && "String literal not of constant array type!");
617+ size_t TypeSize = T->getSize().getZExtValue();
618+
619+ // In case there's a null byte somewhere.
620+ size_t StrLen =
621+ std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0));
622+ if (!analyze_format_string::ParsePrintfString(
623+ H, FormatBytes, FormatBytes + StrLen, getLangOpts(),
624+ Context.getTargetInfo(), false)) {
625+ DiagID = diag::warn_fortify_source_format_overflow;
626+ UsedSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
627+ .extOrTrunc(SizeTypeWidth);
628+ if (BuiltinID == Builtin::BI__builtin___sprintf_chk) {
629+ IsChkVariant = true;
630+ ObjectIndex = 2;
631+ } else {
632+ IsChkVariant = false;
633+ ObjectIndex = 0;
634+ }
635+ break;
636+ }
637+ }
638+ return;
639+ }
416640 case Builtin::BI__builtin___memcpy_chk:
417641 case Builtin::BI__builtin___memmove_chk:
418642 case Builtin::BI__builtin___memset_chk:
@@ -505,19 +729,19 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
505729 if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
506730 return;
507731 // Get the object size in the target's size_t width.
508- const TargetInfo &TI = getASTContext().getTargetInfo();
509- unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());
510732 ObjectSize = llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
511733 }
512734
513735 // Evaluate the number of bytes of the object that this call will use.
514- Expr::EvalResult Result;
515- Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
516- if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
517- return;
518- llvm::APSInt UsedSize = Result.Val.getInt();
736+ if (!UsedSize) {
737+ Expr::EvalResult Result;
738+ Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
739+ if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
740+ return;
741+ UsedSize = Result.Val.getInt().extOrTrunc(SizeTypeWidth);
742+ }
519743
520- if (UsedSize.ule(ObjectSize))
744+ if (UsedSize.getValue(). ule(ObjectSize))
521745 return;
522746
523747 StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
@@ -533,7 +757,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
533757 DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
534758 PDiag(DiagID)
535759 << FunctionName << ObjectSize.toString(/*Radix=*/10)
536- << UsedSize.toString(/*Radix=*/10));
760+ << UsedSize.getValue(). toString(/*Radix=*/10));
537761}
538762
539763static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,
0 commit comments