-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[clang-tidy] Add readability-string-view-substr check #120055
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 12 commits
8b2dc9a
64931c6
9ac11d9
5b81a4d
7941cb6
aa01bc0
f60ec8e
9a7be8e
e48db34
5ffdd87
0617301
afabddb
e4dc98f
b8d4702
1736884
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,217 @@ | ||
| //===--- StringViewSubstrCheck.cpp - clang-tidy------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "StringViewSubstrCheck.h" | ||
| #include "clang/AST/ASTContext.h" | ||
| #include "clang/ASTMatchers/ASTMatchFinder.h" | ||
| #include "clang/Lex/Lexer.h" | ||
|
|
||
| using namespace clang::ast_matchers; | ||
|
|
||
| namespace clang::tidy::readability { | ||
|
|
||
| void StringViewSubstrCheck::registerMatchers(MatchFinder *Finder) { | ||
| // Match string_view type | ||
| const auto StringViewDecl = recordDecl(hasName("::std::basic_string_view")); | ||
| const auto IsStringView = qualType( | ||
| hasUnqualifiedDesugaredType(recordType(hasDeclaration(StringViewDecl)))); | ||
|
|
||
| // Match substr() call on string_views | ||
| const auto SubstrCall = cxxMemberCallExpr( | ||
| callee(memberExpr(hasDeclaration(cxxMethodDecl(hasName("substr"))))), | ||
| on(expr(hasType(IsStringView)).bind("source"))); | ||
|
|
||
| // Match assignment to string_view's substr, but exclude assignments in | ||
| // expressions | ||
| Finder->addMatcher( | ||
| stmt(cxxOperatorCallExpr( | ||
| unless(isInTemplateInstantiation()), | ||
| hasOverloadedOperatorName("="), | ||
| hasArgument(0, expr(hasType(IsStringView)).bind("target")), | ||
| hasArgument(1, SubstrCall.bind("substr_call"))) | ||
| .bind("assignment"), | ||
| // Exclude assignments used in larger expressions | ||
| unless(hasAncestor(varDecl())), unless(hasAncestor(callExpr()))), | ||
| this); | ||
| } | ||
|
|
||
| void StringViewSubstrCheck::check(const MatchFinder::MatchResult &Result) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please try to do as much of the checking inside the matchers. I think everything can be done inside the matcher.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. all other feedback addressed, and pushed, will go and kind of mentally restart this, to get the most logic moved to matchers() 🙈 |
||
| const auto *Assignment = | ||
| Result.Nodes.getNodeAs<CXXOperatorCallExpr>("assignment"); | ||
| const auto *Target = Result.Nodes.getNodeAs<Expr>("target"); | ||
| const auto *Source = Result.Nodes.getNodeAs<Expr>("source"); | ||
| const auto *SubstrCall = | ||
| Result.Nodes.getNodeAs<CXXMemberCallExpr>("substr_call"); | ||
|
|
||
| if (!Assignment || !Target || !Source || !SubstrCall) { | ||
| return; | ||
| } | ||
|
|
||
| // Get the DeclRefExpr for the target and source | ||
| const auto *TargetDRE = dyn_cast<DeclRefExpr>(Target->IgnoreParenImpCasts()); | ||
| const auto *SourceDRE = dyn_cast<DeclRefExpr>(Source->IgnoreParenImpCasts()); | ||
|
|
||
| if (!TargetDRE || !SourceDRE) { | ||
| return; | ||
| } | ||
|
|
||
| const Expr *StartArg = SubstrCall->getArg(0); | ||
| const Expr *LengthArg = | ||
| SubstrCall->getNumArgs() > 1 ? SubstrCall->getArg(1) : nullptr; | ||
|
|
||
| std::string StartText = | ||
| Lexer::getSourceText( | ||
| CharSourceRange::getTokenRange(StartArg->getSourceRange()), | ||
| *Result.SourceManager, Result.Context->getLangOpts()) | ||
| .str(); | ||
|
|
||
| const bool IsSameVar = (TargetDRE->getDecl() == SourceDRE->getDecl()); | ||
|
|
||
| // Case 1: Check for remove_prefix pattern | ||
| if (!LengthArg || isa<CXXDefaultArgExpr>(LengthArg)) { | ||
| if (IsSameVar) { | ||
| std::string Replacement = TargetDRE->getNameInfo().getAsString() + | ||
| ".remove_prefix(" + StartText + ")"; | ||
| diag(Assignment->getBeginLoc(), "prefer 'remove_prefix' over 'substr' " | ||
| "for removing characters from the start") | ||
| << FixItHint::CreateReplacement(Assignment->getSourceRange(), | ||
| Replacement); | ||
| } | ||
| return; | ||
| } | ||
|
|
||
| // Check if StartArg resolves to 0 | ||
| bool IsZero = false; | ||
|
|
||
| // Handle cases where StartArg is an integer literal | ||
| if (const auto *IL = | ||
| dyn_cast<IntegerLiteral>(StartArg->IgnoreParenImpCasts())) { | ||
| IsZero = IL->getValue() == 0; | ||
| } | ||
|
|
||
| // Optional: Handle cases where StartArg evaluates to zero | ||
| if (!IsZero) { | ||
| // Add logic for other constant evaluation (e.g., constexpr evaluation) | ||
| const auto &EvalResult = StartArg->EvaluateKnownConstInt(*Result.Context); | ||
| IsZero = !EvalResult.isNegative() && EvalResult == 0; | ||
| } | ||
|
|
||
| // If StartArg resolves to 0, handle the case | ||
| if (IsZero) { | ||
hjanuschka marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| bool isFullCopy = false; | ||
hjanuschka marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| // Check for length() or length() - expr pattern | ||
| if (const auto *BinOp = dyn_cast<BinaryOperator>(LengthArg)) { | ||
| if (BinOp->getOpcode() == BO_Sub) { | ||
| const Expr *LHS = BinOp->getLHS(); | ||
| const Expr *RHS = BinOp->getRHS(); | ||
|
|
||
| // Check for length() call | ||
| if (const auto *LengthCall = dyn_cast<CXXMemberCallExpr>(LHS)) { | ||
| if (const auto *LengthMethod = | ||
| dyn_cast<CXXMethodDecl>(LengthCall->getDirectCallee())) { | ||
| if (LengthMethod->getName() == "length" || | ||
| LengthMethod->getName() == "size") { | ||
| const Expr *LengthObject = | ||
| LengthCall->getImplicitObjectArgument(); | ||
| const auto *LengthDRE = | ||
| dyn_cast<DeclRefExpr>(LengthObject->IgnoreParenImpCasts()); | ||
|
|
||
| if (!LengthDRE || LengthDRE->getDecl() != SourceDRE->getDecl()) { | ||
| return; | ||
| } | ||
|
|
||
| // Check if RHS is 0 or evaluates to 0 | ||
| bool IsZero = false; | ||
| if (const auto *IL = | ||
| dyn_cast<IntegerLiteral>(RHS->IgnoreParenImpCasts())) { | ||
| IsZero = IL->getValue() == 0; | ||
| } | ||
|
|
||
| if (IsZero) { | ||
| isFullCopy = true; | ||
| } else { | ||
| // remove_suffix case (works for both self and different vars) | ||
| std::string RHSText = | ||
| Lexer::getSourceText( | ||
| CharSourceRange::getTokenRange(RHS->getSourceRange()), | ||
| *Result.SourceManager, Result.Context->getLangOpts()) | ||
| .str(); | ||
|
|
||
| std::string TargetName = TargetDRE->getNameInfo().getAsString(); | ||
| std::string SourceName = SourceDRE->getNameInfo().getAsString(); | ||
|
|
||
| std::string Replacement = | ||
| IsSameVar | ||
| ? (TargetName + ".remove_suffix(" + RHSText + ")") | ||
| : (TargetName + " = " + SourceName + ";\n" + | ||
| TargetName + ".remove_suffix(" + RHSText + ")"); | ||
|
|
||
| diag(Assignment->getBeginLoc(), | ||
| IsSameVar | ||
| ? "prefer 'remove_suffix' over 'substr' for removing " | ||
| "characters from the end" | ||
| : "prefer assignment and remove_suffix over substr") | ||
| << FixItHint::CreateReplacement( | ||
| Assignment->getSourceRange(), Replacement); | ||
| return; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } else if (const auto *LengthCall = | ||
| dyn_cast<CXXMemberCallExpr>(LengthArg)) { | ||
| // Handle direct length() or size() call | ||
| if (const auto *LengthMethod = | ||
| dyn_cast<CXXMethodDecl>(LengthCall->getDirectCallee())) { | ||
| if (LengthMethod->getName() == "length" || | ||
| LengthMethod->getName() == "size") { | ||
| const Expr *LengthObject = LengthCall->getImplicitObjectArgument(); | ||
| const auto *LengthDRE = | ||
| dyn_cast<DeclRefExpr>(LengthObject->IgnoreParenImpCasts()); | ||
|
|
||
| if (LengthDRE && LengthDRE->getDecl() == SourceDRE->getDecl()) { | ||
| isFullCopy = true; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| if (isFullCopy) { | ||
| if (IsSameVar) { | ||
| // Remove redundant self-copy, including the semicolon | ||
| SourceLocation EndLoc = Assignment->getEndLoc(); | ||
| while (EndLoc.isValid()) { | ||
| const char *endPtr = Result.SourceManager->getCharacterData(EndLoc); | ||
| if (*endPtr == ';') | ||
| break; | ||
| EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager, | ||
| Result.Context->getLangOpts()); | ||
| } | ||
| if (EndLoc.isValid()) { | ||
| diag(Assignment->getBeginLoc(), "redundant self-copy") | ||
| << FixItHint::CreateRemoval(CharSourceRange::getCharRange( | ||
| Assignment->getBeginLoc(), | ||
| EndLoc.getLocWithOffset( | ||
| 1))); // +1 to include the semicolon. | ||
| } | ||
| } else { | ||
| // Simplify copy between different variables | ||
| std::string Replacement = TargetDRE->getNameInfo().getAsString() + | ||
| " = " + | ||
| SourceDRE->getNameInfo().getAsString(); | ||
| diag(Assignment->getBeginLoc(), "prefer direct copy over substr") | ||
| << FixItHint::CreateReplacement(Assignment->getSourceRange(), | ||
| Replacement); | ||
| } | ||
| return; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| } // namespace clang::tidy::readability | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| //===--- StringViewSubstrCheck.h - clang-tidy---------------------*- C++ | ||
| //-*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STRINGVIEWSUBSTRCHECK_H | ||
| #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STRINGVIEWSUBSTRCHECK_H | ||
|
|
||
| #include "../ClangTidyCheck.h" | ||
|
|
||
| namespace clang::tidy::readability { | ||
|
|
||
| /// Finds string_view substr() calls that can be replaced with remove_prefix() | ||
| /// or remove_suffix(). | ||
| /// | ||
| /// For the user-facing documentation see: | ||
| /// https://clang.llvm.org/extra/clang-tidy/checks/readability/string-view-substr.html | ||
| /// | ||
| /// The check matches two patterns: | ||
| /// sv = sv.substr(N) -> sv.remove_prefix(N) | ||
| /// sv = sv.substr(0, sv.length() - N) -> sv.remove_suffix(N) | ||
hjanuschka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| /// sv = sv.substr(0, sv.length()) -> // Remove redundant self-copy | ||
| /// sv1 = sv2.substr(0, sv2.length()) -> sv1 = sv2 | ||
| /// sv1 = sv2.substr(0, sv2.length() - N) -> sv1 = sv2; sv1.remove_suffix(N) | ||
| /// | ||
| /// These replacements make the intent clearer and are more efficient as they | ||
| /// modify the string_view in place rather than creating a new one. | ||
| class StringViewSubstrCheck : public ClangTidyCheck { | ||
| public: | ||
| StringViewSubstrCheck(StringRef Name, ClangTidyContext *Context) | ||
| : ClangTidyCheck(Name, Context) {} | ||
| void registerMatchers(ast_matchers::MatchFinder *Finder) override; | ||
| void check(const ast_matchers::MatchFinder::MatchResult &Result) override; | ||
| bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { | ||
| return LangOpts.CPlusPlus17; | ||
| } | ||
| }; | ||
hjanuschka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| } // namespace clang::tidy::readability | ||
|
|
||
| #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STRINGVIEWSUBSTRCHECK_H | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| .. title:: clang-tidy - readability-string-view-substr | ||
|
|
||
| readability-string-view-substr | ||
| ============================== | ||
|
|
||
| Finds ``string_view substr()`` calls that can be replaced with clearer alternatives | ||
| using ``remove_prefix()`` or ``remove_suffix()``. | ||
|
|
||
| The check suggests the following transformations: | ||
|
|
||
| =========================================== ======================================= | ||
| Expression Replacement | ||
| =========================================== ======================================= | ||
| ``sv = sv.substr(n)`` ``sv.remove_prefix(n)`` | ||
| ``sv = sv.substr(0, sv.length()-n)`` ``sv.remove_suffix(n)`` | ||
| ``sv = sv.substr(0, sv.length())`` Redundant self-copy | ||
| ``sv1 = sv.substr(0, sv.length())`` ``sv1 = sv`` | ||
| ``sv1 = sv2.substr(0, sv2.length()-n)`` ``sv1 = sv2;`` ``sv1.remove_suffix(n)`` | ||
| =========================================== ======================================= |
Uh oh!
There was an error while loading. Please reload this page.