-
Notifications
You must be signed in to change notification settings - Fork 1
Bugprone unsafe format string #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 14 commits
bad2fdc
e752e54
688c582
9947b3c
a5f6717
dd5eaca
73a64ff
3902f9a
a3b3546
e6199a5
fc91d74
69c6a13
7540250
a6c6ad7
77e2d97
795834a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,165 @@ | ||
| //===--- UnsafeFormatStringCheck.cpp - clang-tidy -----------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "UnsafeFormatStringCheck.h" | ||
| #include "clang/ASTMatchers/ASTMatchFinder.h" | ||
| #include "clang/Lex/Lexer.h" | ||
| #include "llvm/Support/ConvertUTF.h" | ||
| #include "llvm/Support/raw_ostream.h" | ||
|
|
||
| using namespace clang::ast_matchers; | ||
|
|
||
| namespace clang::tidy::bugprone { | ||
|
|
||
| UnsafeFormatStringCheck::UnsafeFormatStringCheck(StringRef Name, | ||
| ClangTidyContext *Context) | ||
| : ClangTidyCheck(Name, Context) {} | ||
|
|
||
| void UnsafeFormatStringCheck::registerMatchers(MatchFinder *Finder) { | ||
| // Match vulnerable format string functions | ||
dkrupp marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| auto VulnerableFunctions = hasAnyName( | ||
| "sprintf", "vsprintf", "scanf", "fscanf", "sscanf", "vscanf", "vfscanf", | ||
| "vsscanf", "wscanf", "fwscanf", "swscanf", "vwscanf", "vfwscanf", "vswscanf"); | ||
dkrupp marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| Finder->addMatcher( | ||
| callExpr(callee(functionDecl(VulnerableFunctions)), | ||
| anyOf(hasArgument(0, stringLiteral().bind("format")), | ||
| hasArgument(1, stringLiteral().bind("format")))) | ||
|
Comment on lines
+32
to
+33
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This logic is a bit hacky -- it would be more elegant if the checker knew that e.g. the format string of |
||
| .bind("call"), | ||
| this); | ||
| } | ||
|
|
||
| void UnsafeFormatStringCheck::check(const MatchFinder::MatchResult &Result) { | ||
| const auto *Call = Result.Nodes.getNodeAs<CallExpr>("call"); | ||
| const auto *Format = Result.Nodes.getNodeAs<StringLiteral>("format"); | ||
|
|
||
| if (!Call || !Format) | ||
| return; | ||
|
|
||
| std::string FormatString; | ||
| if (Format->getCharByteWidth() == 1) { | ||
| FormatString = Format->getString().str(); | ||
| } else if (Format->getCharByteWidth() == 2) { | ||
| // Handle wide strings by converting to narrow string for analysis | ||
| convertUTF16ToUTF8String(Format->getBytes(), FormatString); | ||
| } else if (Format->getCharByteWidth() == 4) { | ||
| // Handle wide strings by converting to narrow string for analysis | ||
| convertUTF32ToUTF8String(Format->getBytes(), FormatString); | ||
| } | ||
|
|
||
| const auto *Callee = cast<FunctionDecl>(Call->getCalleeDecl()); | ||
| StringRef FunctionName = Callee->getName(); | ||
|
|
||
| bool IsScanfFamily = FunctionName.contains("scanf"); | ||
|
|
||
| if (!hasUnboundedStringSpecifier(FormatString, IsScanfFamily)) | ||
| return; | ||
|
|
||
| auto Diag = diag(Call->getBeginLoc(), | ||
| IsScanfFamily | ||
| ? "format specifier '%%s' without field width may cause buffer overflow; consider using '%%Ns' where N limits input length" | ||
| : "format specifier '%%s' without precision may cause buffer overflow; consider using '%%.Ns' where N limits output length") | ||
| << Call->getSourceRange(); | ||
|
|
||
| std::string SafeAlternative = getSafeAlternative(FunctionName); | ||
| if (!SafeAlternative.empty()) { | ||
| Diag << FixItHint::CreateInsertion(Call->getBeginLoc(), | ||
| "/* Consider using " + SafeAlternative + " */ "); | ||
| } | ||
dkrupp marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
|
|
||
| bool UnsafeFormatStringCheck::hasUnboundedStringSpecifier(StringRef FormatString, bool IsScanfFamily) { | ||
dkrupp marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| size_t Pos = 0; | ||
| while ((Pos = FormatString.find('%', Pos)) != StringRef::npos) { | ||
| if (Pos + 1 >= FormatString.size()) | ||
dkrupp marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| break; | ||
|
|
||
| // Skip %% | ||
| if (FormatString[Pos + 1] == '%') { | ||
| Pos += 2; | ||
| continue; | ||
| } | ||
|
|
||
| size_t SpecPos = Pos + 1; | ||
|
|
||
| // Skip flags | ||
| while (SpecPos < FormatString.size() && | ||
| (FormatString[SpecPos] == '-' || FormatString[SpecPos] == '+' || | ||
| FormatString[SpecPos] == ' ' || FormatString[SpecPos] == '#' || | ||
| FormatString[SpecPos] == '0')) { | ||
| SpecPos++; | ||
| } | ||
|
|
||
| // Check for field width | ||
| bool HasFieldWidth = false; | ||
| if (SpecPos < FormatString.size() && FormatString[SpecPos] == '*') { | ||
| HasFieldWidth = true; | ||
| SpecPos++; | ||
| } else { | ||
| while (SpecPos < FormatString.size() && isdigit(FormatString[SpecPos])) { | ||
| HasFieldWidth = true; | ||
| SpecPos++; | ||
| } | ||
| } | ||
|
|
||
| // Check for precision | ||
| bool HasPrecision = false; | ||
| if (SpecPos < FormatString.size() && FormatString[SpecPos] == '.') { | ||
| SpecPos++; | ||
| if (SpecPos < FormatString.size() && FormatString[SpecPos] == '*') { | ||
| HasPrecision = true; | ||
| SpecPos++; | ||
| } else { | ||
| while (SpecPos < FormatString.size() && isdigit(FormatString[SpecPos])) { | ||
| HasPrecision = true; | ||
| SpecPos++; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Skip length modifiers | ||
| while (SpecPos < FormatString.size() && | ||
| (FormatString[SpecPos] == 'h' || FormatString[SpecPos] == 'l' || | ||
| FormatString[SpecPos] == 'L' || FormatString[SpecPos] == 'z' || | ||
| FormatString[SpecPos] == 'j' || FormatString[SpecPos] == 't')) { | ||
| SpecPos++; | ||
| } | ||
|
|
||
| // Check for 's' specifier | ||
| if (SpecPos < FormatString.size() && FormatString[SpecPos] == 's') { | ||
| if (IsScanfFamily) { | ||
| // For scanf family, field width provides protection | ||
| if (!HasFieldWidth) { | ||
| return true; | ||
| } | ||
| } else { | ||
| // For sprintf family, only precision provides protection | ||
| if (!HasPrecision) { | ||
| return true; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Pos = SpecPos + 1; | ||
| } | ||
|
|
||
| return false; | ||
| } | ||
|
|
||
| std::string UnsafeFormatStringCheck::getSafeAlternative(StringRef FunctionName) { | ||
| if (FunctionName == "sprintf") | ||
| return "snprintf"; | ||
| if (FunctionName == "vsprintf") | ||
| return "vsnprintf"; | ||
| if (FunctionName.starts_with("scanf") || FunctionName.ends_with("scanf")) | ||
| return "add field width to %s specifiers"; | ||
| return ""; | ||
| } | ||
|
|
||
| } // namespace clang::tidy::bugprone | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| //===--- UnsafeFormatStringCheck.h - clang-tidy ---------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNSAFEFORMATSTRINGCHECK_H | ||
| #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNSAFEFORMATSTRINGCHECK_H | ||
|
|
||
| #include "../ClangTidyCheck.h" | ||
|
|
||
| namespace clang::tidy::bugprone { | ||
|
|
||
| /// Detects usage of vulnerable format string functions with unbounded %s | ||
| /// specifiers that can cause buffer overflows. | ||
| /// | ||
| /// For the user-facing documentation see: | ||
| /// http://clang.llvm.org/extra/clang-tidy/checks/bugprone/unsafe-format-string.html | ||
| class UnsafeFormatStringCheck : public ClangTidyCheck { | ||
| public: | ||
| UnsafeFormatStringCheck(StringRef Name, ClangTidyContext *Context); | ||
| void registerMatchers(ast_matchers::MatchFinder *Finder) override; | ||
| void check(const ast_matchers::MatchFinder::MatchResult &Result) override; | ||
|
|
||
| private: | ||
| bool hasUnboundedStringSpecifier(StringRef FormatString, bool IsScanfFamily); | ||
| std::string getSafeAlternative(StringRef FunctionName); | ||
| }; | ||
|
|
||
| } // namespace clang::tidy::bugprone | ||
|
|
||
| #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNSAFEFORMATSTRINGCHECK_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| .. title:: clang-tidy - bugprone-unsafe-format-string | ||
|
|
||
| bugprone-unsafe-format-string | ||
| ============================== | ||
|
|
||
| Detects usage of vulnerable format string functions with unbounded ``%s`` | ||
| specifiers that can cause buffer overflows. | ||
|
|
||
| The check identifies calls to format string functions like ``sprintf``, ``scanf``, | ||
| and their variants that use ``%s`` format specifiers without proper limits. | ||
| This can lead to buffer overflow vulnerabilities when the input string is longer | ||
| than the destination buffer. | ||
|
|
||
| Format Specifier Behavior | ||
| -------------------------- | ||
|
|
||
| The check distinguishes between different function families: | ||
|
|
||
| **scanf family functions**: Field width limits input length | ||
| - ``%s`` - unsafe (no limit) | ||
| - ``%99s`` - safe (reads at most 99 characters) | ||
|
|
||
| **sprintf family functions**: Precision limits output length | ||
| - ``%s`` - unsafe (no limit) | ||
| - ``%99s`` - unsafe (minimum width, no maximum) | ||
| - ``%.99s`` - safe (outputs at most 99 characters) | ||
| - ``%10.99s`` - safe (minimum 10 chars, maximum 99 chars) | ||
|
|
||
| Examples | ||
| -------- | ||
|
|
||
| .. code-block:: c | ||
|
|
||
| char buffer[100]; | ||
| const char* input = "user input"; | ||
|
|
||
| // Unsafe sprintf usage | ||
| sprintf(buffer, "%s", input); // No limit | ||
| sprintf(buffer, "%99s", input); // Field width is minimum, not maximum | ||
|
|
||
| // Safe sprintf usage | ||
| sprintf(buffer, "%.99s", input); // Precision limits to 99 chars | ||
| sprintf(buffer, "%10.99s", input); // Min 10, max 99 chars | ||
|
|
||
| // Unsafe scanf usage | ||
| scanf("%s", buffer); // No limit | ||
|
|
||
| // Safe scanf usage | ||
| scanf("%99s", buffer); // Field width limits to 99 chars | ||
|
|
||
| // Safe alternative: use safer functions | ||
| snprintf(buffer, sizeof(buffer), "%s", input); | ||
|
|
||
| Checked Functions | ||
| ----------------- | ||
|
|
||
| The check detects unsafe format strings in these functions: | ||
|
|
||
| **sprintf family** (precision ``.N`` provides safety): | ||
| * ``sprintf``, ``vsprintf`` | ||
|
|
||
| **scanf family** (field width ``N`` provides safety): | ||
| * ``scanf``, ``fscanf``, ``sscanf`` | ||
| * ``vscanf``, ``vfscanf``, ``vsscanf`` | ||
| * ``wscanf``, ``fwscanf``, ``swscanf`` | ||
| * ``vwscanf``, ``vfwscanf``, ``vswscanf`` | ||
|
|
||
| Recommendations | ||
| --------------- | ||
|
|
||
| * For ``sprintf`` family: Use precision specifiers (``%.Ns``) or ``snprintf`` | ||
| * For ``scanf`` family: Use field width specifiers (``%Ns``) | ||
| * Consider using safer string handling functions when possible |
Uh oh!
There was an error while loading. Please reload this page.