Skip to content
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
#include "UnintendedCharOstreamOutputCheck.h"
#include "UniquePtrArrayMismatchCheck.h"
#include "UnsafeFunctionsCheck.h"
#include "UnsafeFormatStringCheck.h"
#include "UnusedLocalNonTrivialVariableCheck.h"
#include "UnusedRaiiCheck.h"
#include "UnusedReturnValueCheck.h"
Expand Down Expand Up @@ -287,6 +288,8 @@ class BugproneModule : public ClangTidyModule {
"bugprone-crtp-constructor-accessibility");
CheckFactories.registerCheck<UnsafeFunctionsCheck>(
"bugprone-unsafe-functions");
CheckFactories.registerCheck<UnsafeFormatStringCheck>(
"bugprone-unsafe-format-string");
CheckFactories.registerCheck<UnusedLocalNonTrivialVariableCheck>(
"bugprone-unused-local-non-trivial-variable");
CheckFactories.registerCheck<UnusedRaiiCheck>("bugprone-unused-raii");
Expand Down
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ add_clang_library(clangTidyBugproneModule STATIC
UnhandledSelfAssignmentCheck.cpp
UniquePtrArrayMismatchCheck.cpp
UnsafeFunctionsCheck.cpp
UnsafeFormatStringCheck.cpp
UnusedLocalNonTrivialVariableCheck.cpp
UnusedRaiiCheck.cpp
UnusedReturnValueCheck.cpp
Expand Down
149 changes: 149 additions & 0 deletions clang-tools-extra/clang-tidy/bugprone/UnsafeFormatStringCheck.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
//===--- UnsafeFormatStringCheck.cpp - clang-tidy -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "UnsafeFormatStringCheck.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "llvm/Support/ConvertUTF.h"

using namespace clang::ast_matchers;

namespace clang::tidy::bugprone {

UnsafeFormatStringCheck::UnsafeFormatStringCheck(StringRef Name,
ClangTidyContext *Context)
: ClangTidyCheck(Name, Context) {}

void UnsafeFormatStringCheck::registerMatchers(MatchFinder *Finder) {
// Matches sprintf and scanf family functions in std namespace in C++ and
// globally in C.
auto VulnerableFunctions =
hasAnyName("sprintf", "vsprintf", "scanf", "fscanf", "sscanf", "vscanf",
"vfscanf", "vsscanf", "wscanf", "fwscanf", "swscanf",
"vwscanf", "vfwscanf", "vswscanf");
Finder->addMatcher(
callExpr(callee(functionDecl(VulnerableFunctions,
anyOf(isInStdNamespace(),
hasParent(translationUnitDecl())))),
anyOf(hasArgument(0, stringLiteral().bind("format")),
hasArgument(1, stringLiteral().bind("format"))))
Comment on lines +32 to +33

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic is a bit hacky -- it would be more elegant if the checker knew that e.g. the format string of sprintf is always at argument index 1, while the format string of scanf is always at argument index 0. However, code that would confuse this check is wildly incorrect, won't occur in the wild and would produce compiler errors (or at least severe warnings), so this is not a serious issue.

.bind("call"),
this);
}

void UnsafeFormatStringCheck::check(const MatchFinder::MatchResult &Result) {
const auto *Call = Result.Nodes.getNodeAs<CallExpr>("call");
const auto *Format = Result.Nodes.getNodeAs<StringLiteral>("format");

if (!Call || !Format)
return;

std::string FormatString;
if (Format->getCharByteWidth() == 1) {
FormatString = Format->getString().str();
} else if (Format->getCharByteWidth() == 2) {
// Handle wide strings by converting to narrow string for analysis
convertUTF16ToUTF8String(Format->getBytes(), FormatString);
} else if (Format->getCharByteWidth() == 4) {
// Handle wide strings by converting to narrow string for analysis
convertUTF32ToUTF8String(Format->getBytes(), FormatString);
}

const auto *Callee = cast<FunctionDecl>(Call->getCalleeDecl());
StringRef FunctionName = Callee->getName();

bool IsScanfFamily = FunctionName.contains("scanf");

if (!hasUnboundedStringSpecifier(FormatString, IsScanfFamily))
return;

auto Diag = diag(Call->getBeginLoc(),
IsScanfFamily
? "format specifier '%%s' without field width may cause buffer overflow; consider using '%%Ns' where N limits input length"
: "format specifier '%%s' without precision may cause buffer overflow; consider using '%%.Ns' where N limits output length")
<< Call->getSourceRange();
}

bool UnsafeFormatStringCheck::hasUnboundedStringSpecifier(StringRef Fmt,
bool IsScanfFamily) {
size_t Pos = 0;
size_t N = Fmt.size();
while ((Pos = Fmt.find('%', Pos)) != StringRef::npos) {
if (Pos + 1 >= N)
break;

// Skip %%
if (Fmt[Pos + 1] == '%') {
Pos += 2;
continue;
}

size_t SpecPos = Pos + 1;

// Skip flags
while (SpecPos < N &&
(Fmt[SpecPos] == '-' || Fmt[SpecPos] == '+' || Fmt[SpecPos] == ' ' ||
Fmt[SpecPos] == '#' || Fmt[SpecPos] == '0')) {
SpecPos++;
}

// Check for field width
bool HasFieldWidth = false;
if (SpecPos < N && Fmt[SpecPos] == '*') {
HasFieldWidth = true;
SpecPos++;
} else {
while (SpecPos < N && isdigit(Fmt[SpecPos])) {
HasFieldWidth = true;
SpecPos++;
}
}

// Check for precision
bool HasPrecision = false;
if (SpecPos < N && Fmt[SpecPos] == '.') {
SpecPos++;
if (SpecPos < N && Fmt[SpecPos] == '*') {
HasPrecision = true;
SpecPos++;
} else {
while (SpecPos < N && isdigit(Fmt[SpecPos])) {
HasPrecision = true;
SpecPos++;
}
}
}

// Skip length modifiers
while (SpecPos < N && (Fmt[SpecPos] == 'h' || Fmt[SpecPos] == 'l' ||
Fmt[SpecPos] == 'L' || Fmt[SpecPos] == 'z' ||
Fmt[SpecPos] == 'j' || Fmt[SpecPos] == 't')) {
SpecPos++;
}

// Check for 's' specifier
if (SpecPos < N && Fmt[SpecPos] == 's') {
if (IsScanfFamily) {
// For scanf family, field width provides protection
if (!HasFieldWidth) {
return true;
}
} else {
// For sprintf family, only precision provides protection
if (!HasPrecision) {
return true;
}
}
}

Pos = SpecPos + 1;
}

return false;
}

} // namespace clang::tidy::bugprone
34 changes: 34 additions & 0 deletions clang-tools-extra/clang-tidy/bugprone/UnsafeFormatStringCheck.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
//===--- UnsafeFormatStringCheck.h - clang-tidy ---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNSAFEFORMATSTRINGCHECK_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNSAFEFORMATSTRINGCHECK_H

#include "../ClangTidyCheck.h"

namespace clang::tidy::bugprone {

/// Detects usage of vulnerable format string functions with unbounded %s
/// specifiers that can cause buffer overflows.
///
/// For the user-facing documentation see:
/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone/unsafe-format-string.html
class UnsafeFormatStringCheck : public ClangTidyCheck {
public:
UnsafeFormatStringCheck(StringRef Name, ClangTidyContext *Context);
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;

private:
bool hasUnboundedStringSpecifier(StringRef FormatString, bool IsScanfFamily);
std::string getSafeAlternative(StringRef FunctionName);
};

} // namespace clang::tidy::bugprone

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNSAFEFORMATSTRINGCHECK_H
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
.. title:: clang-tidy - bugprone-unsafe-format-string

bugprone-unsafe-format-string
==============================

Detects usage of vulnerable format string functions with unbounded ``%s``
specifiers that can cause buffer overflows.

The check identifies calls to format string functions like ``sprintf``, ``scanf``,
and their variants that use ``%s`` format specifiers without proper limits.
This can lead to buffer overflow vulnerabilities when the input string is longer
than the destination buffer.

Format Specifier Behavior
--------------------------

The check distinguishes between different function families:

**scanf family functions**: Field width limits input length
- ``%s`` - unsafe (no limit)
- ``%99s`` - safe (reads at most 99 characters)

**sprintf family functions**: Precision limits output length
- ``%s`` - unsafe (no limit)
- ``%99s`` - unsafe (minimum width, no maximum)
- ``%.99s`` - safe (outputs at most 99 characters)
- ``%10.99s`` - safe (minimum 10 chars, maximum 99 chars)

Examples
--------

.. code-block:: c

char buffer[100];
const char* input = "user input";

// Unsafe sprintf usage
sprintf(buffer, "%s", input); // No limit
sprintf(buffer, "%99s", input); // Field width is minimum, not maximum

// Safe sprintf usage
sprintf(buffer, "%.99s", input); // Precision limits to 99 chars
sprintf(buffer, "%10.99s", input); // Min 10, max 99 chars

// Unsafe scanf usage
scanf("%s", buffer); // No limit

// Safe scanf usage
scanf("%99s", buffer); // Field width limits to 99 chars

// Safe alternative: use safer functions
snprintf(buffer, sizeof(buffer), "%s", input);

Checked Functions
-----------------

The check detects unsafe format strings in these functions:

**sprintf family** (precision ``.N`` provides safety):
* ``sprintf``, ``vsprintf``

**scanf family** (field width ``N`` provides safety):
* ``scanf``, ``fscanf``, ``sscanf``
* ``vscanf``, ``vfscanf``, ``vsscanf``
* ``wscanf``, ``fwscanf``, ``swscanf``
* ``vwscanf``, ``vfwscanf``, ``vswscanf``

Recommendations
---------------

* For ``sprintf`` family: Use precision specifiers (``%.Ns``) or ``snprintf``
* For ``scanf`` family: Use field width specifiers (``%Ns``)
* Consider using safer string handling functions when possible
Loading