Skip to content

Conversation

@nicovank
Copy link
Contributor

@nicovank nicovank commented Oct 11, 2024

Still work-in-progress, pushed by accident. Will open new PR later on.

@llvmbot
Copy link
Member

llvmbot commented Oct 11, 2024

@llvm/pr-subscribers-clang-tools-extra

@llvm/pr-subscribers-clang-tidy

Author: Nicolas van Kempen (nicovank)

Changes

Summary:

Test Plan:


Patch is 35.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112051.diff

10 Files Affected:

  • (modified) clang-tools-extra/clang-tidy/performance/CMakeLists.txt (+1)
  • (added) clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.cpp (+104)
  • (added) clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.h (+37)
  • (modified) clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp (+3)
  • (modified) clang-tools-extra/docs/ReleaseNotes.rst (+5)
  • (modified) clang-tools-extra/docs/clang-tidy/checks/list.rst (+1)
  • (added) clang-tools-extra/docs/clang-tidy/checks/performance/expensive-flat-container-operation.rst (+82)
  • (removed) clang-tools-extra/test/.clang-format (-1)
  • (added) clang-tools-extra/test/clang-tidy/checkers/performance/expensive-flat-container-operation-warn-outside-loops.cpp (+478)
  • (added) clang-tools-extra/test/clang-tidy/checkers/performance/expensive-flat-container-operation.cpp (+91)
diff --git a/clang-tools-extra/clang-tidy/performance/CMakeLists.txt b/clang-tools-extra/clang-tidy/performance/CMakeLists.txt
index c6e547c5089fb0..2def785be0465b 100644
--- a/clang-tools-extra/clang-tidy/performance/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/performance/CMakeLists.txt
@@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
 add_clang_library(clangTidyPerformanceModule STATIC
   AvoidEndlCheck.cpp
   EnumSizeCheck.cpp
+  ExpensiveFlatContainerOperationCheck.cpp
   FasterStringFindCheck.cpp
   ForRangeCopyCheck.cpp
   ImplicitConversionInLoopCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.cpp b/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.cpp
new file mode 100644
index 00000000000000..c06cbfd5d4271c
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.cpp
@@ -0,0 +1,104 @@
+//===--- ExpensiveFlatContainerOperationCheck.cpp - clang-tidy ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ExpensiveFlatContainerOperationCheck.h"
+
+#include "../utils/OptionsUtils.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang::tidy::performance {
+
+namespace {
+// TODO: folly::heap_vector_map?
+const auto DefaultFlatContainers =
+    "::std::flat_map; ::std::flat_multimap;"
+    "::std::flat_set; ::std::flat_multiset;"
+    "::boost::container::flat_map; ::boost::container::flat_multimap;"
+    "::boost::container::flat_set; ::boost::container::flat_multiset;"
+    "::folly::sorted_vector_map; ::folly::sorted_vector_set;";
+} // namespace
+
+ExpensiveFlatContainerOperationCheck::ExpensiveFlatContainerOperationCheck(
+    StringRef Name, ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context),
+      WarnOutsideLoops(Options.get("WarnOutsideLoops", false)),
+      FlatContainers(utils::options::parseStringList(
+          Options.get("FlatContainers", DefaultFlatContainers))) {}
+
+void ExpensiveFlatContainerOperationCheck::registerMatchers(
+    MatchFinder *Finder) {
+  const auto OnSoughtFlatContainer =
+      callee(cxxMethodDecl(ofClass(cxxRecordDecl(hasAnyName(FlatContainers)))));
+
+  // Any emplace-style or insert_or_assign call is a single-element operation.
+  const auto HasEmplaceOrInsertorAssignCall = callee(cxxMethodDecl(hasAnyName(
+      "emplace", "emplace_hint", "try_emplace", "insert_or_assign")));
+
+  // Erase calls with a single argument are single-element operations.
+  const auto HasEraseCallWithOneArgument = cxxMemberCallExpr(
+      argumentCountIs(1), callee(cxxMethodDecl(hasName("erase"))));
+
+  // TODO: insert.
+
+  const auto SoughtFlatContainerOperation =
+      cxxMemberCallExpr(
+          OnSoughtFlatContainer,
+          anyOf(HasEmplaceOrInsertorAssignCall, HasEraseCallWithOneArgument))
+          .bind("call");
+
+  if (WarnOutsideLoops) {
+    Finder->addMatcher(SoughtFlatContainerOperation, this);
+    return;
+  }
+
+  Finder->addMatcher(
+      mapAnyOf(whileStmt, forStmt, cxxForRangeStmt, doStmt)
+          .with(stmt(
+              stmt().bind("loop"),
+              forEachDescendant(cxxMemberCallExpr(
+                  SoughtFlatContainerOperation,
+                  // Common false positive: variable is declared directly within
+                  // the loop. Note that this won't catch cases where the
+                  // container is a member of a class declared in the loop.
+                  // More robust lifetime analysis would be required to catch
+                  // those cases, but this should filter out the most common
+                  // false positives.
+                  unless(onImplicitObjectArgument(declRefExpr(hasDeclaration(
+                      decl(hasAncestor(stmt(equalsBoundNode("loop")))))))))))),
+      this);
+}
+
+void ExpensiveFlatContainerOperationCheck::check(
+    const MatchFinder::MatchResult &Result) {
+  const auto *Call = Result.Nodes.getNodeAs<CXXMemberCallExpr>("call");
+
+  diag(Call->getExprLoc(),
+       "Single element operations are expensive for flat containers. "
+       "Consider using available bulk operations instead, aggregating values "
+       "beforehand if needed.");
+}
+
+void ExpensiveFlatContainerOperationCheck::storeOptions(
+    ClangTidyOptions::OptionMap &Opts) {
+  Options.store(Opts, "WarnOutsideLoops", WarnOutsideLoops);
+  Options.store(Opts, "FlatContainers",
+                utils::options::serializeStringList(FlatContainers));
+}
+
+bool ExpensiveFlatContainerOperationCheck::isLanguageVersionSupported(
+    const LangOptions &LangOpts) const {
+  return LangOpts.CPlusPlus;
+}
+
+std::optional<TraversalKind>
+ExpensiveFlatContainerOperationCheck::getCheckTraversalKind() const {
+  return TK_IgnoreUnlessSpelledInSource;
+}
+} // namespace clang::tidy::performance
diff --git a/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.h b/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.h
new file mode 100644
index 00000000000000..b64ea87b573ffd
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/performance/ExpensiveFlatContainerOperationCheck.h
@@ -0,0 +1,37 @@
+//===--- ExpensiveFlatContainerOperationCheck.h - clang-tidy ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_EXPENSIVEFLATCONTAINEROPERATIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_EXPENSIVEFLATCONTAINEROPERATIONCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang::tidy::performance {
+
+/// Warns when calling an O(N) operation on a flat container.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/performance/expensive-flat-container-operation.html
+class ExpensiveFlatContainerOperationCheck : public ClangTidyCheck {
+public:
+  ExpensiveFlatContainerOperationCheck(StringRef Name,
+                                       ClangTidyContext *Context);
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+  void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
+  bool isLanguageVersionSupported(const LangOptions &LangOpts) const override;
+  std::optional<TraversalKind> getCheckTraversalKind() const override;
+
+private:
+  bool WarnOutsideLoops;
+  std::vector<StringRef> FlatContainers;
+};
+
+} // namespace clang::tidy::performance
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_EXPENSIVEFLATCONTAINEROPERATIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp b/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp
index 9e0fa6f88b36a0..f6b0ed02389e36 100644
--- a/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp
@@ -11,6 +11,7 @@
 #include "../ClangTidyModuleRegistry.h"
 #include "AvoidEndlCheck.h"
 #include "EnumSizeCheck.h"
+#include "ExpensiveFlatContainerOperationCheck.h"
 #include "FasterStringFindCheck.h"
 #include "ForRangeCopyCheck.h"
 #include "ImplicitConversionInLoopCheck.h"
@@ -37,6 +38,8 @@ class PerformanceModule : public ClangTidyModule {
   void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
     CheckFactories.registerCheck<AvoidEndlCheck>("performance-avoid-endl");
     CheckFactories.registerCheck<EnumSizeCheck>("performance-enum-size");
+    CheckFactories.registerCheck<ExpensiveFlatContainerOperationCheck>(
+        "performance-expensive-flat-container-operation");
     CheckFactories.registerCheck<FasterStringFindCheck>(
         "performance-faster-string-find");
     CheckFactories.registerCheck<ForRangeCopyCheck>(
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 3f7bcde1eb3014..db396fd61ea636 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -121,6 +121,11 @@ New checks
   Gives warnings for tagged unions, where the number of tags is
   different from the number of data members inside the union.
 
+- New :doc:`performance-expensive-flat-container-operation
+  <clang-tidy/checks/performance/expensive-flat-container-operation>` check.
+
+  Warns when calling an O(N) operation on a flat container.
+
 - New :doc:`portability-template-virtual-member-function
   <clang-tidy/checks/portability/template-virtual-member-function>` check.
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 0082234f5ed31b..eaaf23bd5c535b 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -328,6 +328,7 @@ Clang-Tidy Checks
    :doc:`openmp-use-default-none <openmp/use-default-none>`,
    :doc:`performance-avoid-endl <performance/avoid-endl>`, "Yes"
    :doc:`performance-enum-size <performance/enum-size>`,
+   :doc:`performance-expensive-flat-container-operation <performance/expensive-flat-container-operation>`,
    :doc:`performance-faster-string-find <performance/faster-string-find>`, "Yes"
    :doc:`performance-for-range-copy <performance/for-range-copy>`, "Yes"
    :doc:`performance-implicit-conversion-in-loop <performance/implicit-conversion-in-loop>`,
diff --git a/clang-tools-extra/docs/clang-tidy/checks/performance/expensive-flat-container-operation.rst b/clang-tools-extra/docs/clang-tidy/checks/performance/expensive-flat-container-operation.rst
new file mode 100644
index 00000000000000..259c89fdcd7678
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/performance/expensive-flat-container-operation.rst
@@ -0,0 +1,82 @@
+.. title:: clang-tidy - performance-expensive-flat-container-operation
+
+performance-expensive-flat-container-operation
+==============================================
+
+Warns when calling an O(N) operation on a flat container.
+
+This check operates on vector-based flat containers such as
+``std::flat_(map|set)``, ``boost::container::flat_(map|set)``, or
+``folly::sorted_vector_(map|set)``. While these containers' behavior is
+identical to usual maps/sets, the insert and erase operations are O(N). This
+check flags such operations, which are a common bad pattern, notably in loops.
+
+Below is an example of a typical bad pattern: inserting some values one by one
+into a flat container. This is O(N^2), as the container will need to shift
+elements right after each insertion.
+
+.. code-block:: c++
+
+    std::random_device generator;
+    std::uniform_int_distribution<int> distribution;
+
+    std::flat_set<int> set;
+    for (auto i = 0; i < N; ++i) {
+        set.insert(distribution(generator));
+    }
+
+The code above can be improved using a temporary vector, later inserting all
+values at once into the ``flat_set``.
+
+.. code-block:: c++
+
+    std::vector<int> temporary;
+    for (auto i = 0; i < N; ++i) {
+        temporary.push_back(distribution(generator));
+    }
+    std::flat_set<int> set(temporary.begin(), temporary.end());
+
+    // Or even better when possible, moving the temporary container:
+    // std::flat_set<int> set(std::move(temporary));
+
+For expensive-to-copy objects, ``std::move_iterator`` should be used.
+When possible, the temporary container can be moved directly into the flat
+container. When it is known that the inserted keys are sorted and uniqued, such
+as cases when they come from another flat container, ``std::sorted_unique`` can
+be used when inserting to save more cycles. Finally, if order is not important,
+hash-based containers can provide better performance.
+
+Limitations
+-----------
+
+This check is not capable of flagging insertions into a map via ``operator[]``,
+as it is not possible at compile-time to know whether it will trigger an
+insertion or a simple lookup. These cases have to be detected using dynamic
+profiling.
+
+This check is also of course not able to detect single element operations in
+loops crossing function boundaries. A more robust static analysis would be
+necessary to detect these cases.
+
+Options
+-------
+
+.. option:: WarnOutsideLoops
+
+    When disabled, the check will only warn when the single element operation is
+    directly enclosed by a loop, hence directly actionable. At the very least,
+    these cases can be improved using some temporary container.
+
+    When enabled, all insert and erase operations will be flagged.
+
+    Default is `false`.
+
+.. option:: FlatContainers
+
+    A semicolon-separated list of flat containers, with ``insert``, ``emplace``
+    and/or ``erase`` operations.
+
+    Default includes ``std::flat_(map|set)``, ``flat_multi(map|set)``,
+    ``boost::container::flat_(map|set)``,
+    ``boost::container::flat_multi(map|set)``, and
+    ``folly::sorted_vector_(map|set)``.
diff --git a/clang-tools-extra/test/.clang-format b/clang-tools-extra/test/.clang-format
deleted file mode 100644
index e3845288a2aece..00000000000000
--- a/clang-tools-extra/test/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-DisableFormat: true
diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/expensive-flat-container-operation-warn-outside-loops.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance/expensive-flat-container-operation-warn-outside-loops.cpp
new file mode 100644
index 00000000000000..0a258a0e48da1f
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/performance/expensive-flat-container-operation-warn-outside-loops.cpp
@@ -0,0 +1,478 @@
+// RUN: %check_clang_tidy %s performance-expensive-flat-container-operation %t -- \
+// RUN:   -config="{CheckOptions: \
+// RUN:             [{key: performance-expensive-flat-container-operation.WarnOutsideLoops, \
+// RUN:               value: true}] \
+// RUN:             }"
+
+#include <stddef.h>
+
+namespace std {
+template <class T1, class T2> struct pair { pair(T1, T2); };
+
+template <class T> struct initializer_list {};
+
+template <class T> struct remove_reference { typedef T type; };
+template <class T> struct remove_reference<T &> { typedef T type; };
+template <class T> struct remove_reference<T &&> { typedef T type; };
+
+template <class T>
+typename std::remove_reference<T>::type &&move(T &&) noexcept;
+
+struct sorted_unique_t {};
+inline constexpr sorted_unique_t sorted_unique{};
+struct sorted_equivalent_t {};
+inline constexpr sorted_equivalent_t sorted_equivalent{};
+
+template <class Key, class T> struct flat_map {
+  using key_type = Key;
+  using mapped_type = T;
+  using value_type = pair<key_type, mapped_type>;
+  using reference = pair<const key_type &, mapped_type &>;
+  using const_reference = pair<const key_type &, const mapped_type &>;
+  using size_type = size_t;
+  using iterator = struct {};
+  using const_iterator = struct {};
+
+  const_iterator begin() const noexcept;
+  const_iterator end() const noexcept;
+
+  template <class... Args> pair<iterator, bool> emplace(Args &&...args);
+  template <class... Args>
+  iterator emplace_hint(const_iterator position, Args &&...args);
+
+  pair<iterator, bool> insert(const value_type &x);
+  pair<iterator, bool> insert(value_type &&x);
+  iterator insert(const_iterator position, const value_type &x);
+  iterator insert(const_iterator position, value_type &&x);
+
+  // template <class P> pair<iterator, bool> insert(P &&x);
+  // template <class P> iterator insert(const_iterator position, P &&);
+  template <class InputIter> void insert(InputIter first, InputIter last);
+  template <class InputIter>
+  void insert(sorted_unique_t, InputIter first, InputIter last);
+  template <class R> void insert_range(R &&rg);
+
+  void insert(initializer_list<value_type> il);
+  void insert(sorted_unique_t s, initializer_list<value_type> il);
+
+  template <class... Args>
+  pair<iterator, bool> try_emplace(const key_type &k, Args &&...args);
+  template <class... Args>
+  pair<iterator, bool> try_emplace(key_type &&k, Args &&...args);
+  template <class K, class... Args>
+  pair<iterator, bool> try_emplace(K &&k, Args &&...args);
+  template <class... Args>
+  iterator try_emplace(const_iterator hint, const key_type &k, Args &&...args);
+  template <class... Args>
+  iterator try_emplace(const_iterator hint, key_type &&k, Args &&...args);
+  template <class K, class... Args>
+  iterator try_emplace(const_iterator hint, K &&k, Args &&...args);
+  template <class M>
+  pair<iterator, bool> insert_or_assign(const key_type &k, M &&obj);
+  template <class M>
+  pair<iterator, bool> insert_or_assign(key_type &&k, M &&obj);
+  template <class K, class M>
+  pair<iterator, bool> insert_or_assign(K &&k, M &&obj);
+  template <class M>
+  iterator insert_or_assign(const_iterator hint, const key_type &k, M &&obj);
+  template <class M>
+  iterator insert_or_assign(const_iterator hint, key_type &&k, M &&obj);
+  template <class K, class M>
+  iterator insert_or_assign(const_iterator hint, K &&k, M &&obj);
+
+  iterator erase(iterator position);
+  iterator erase(const_iterator position);
+  size_type erase(const key_type &x);
+  template <class K> size_type erase(K &&x);
+  iterator erase(const_iterator first, const_iterator last);
+};
+
+template <class Key, class T> struct flat_multimap {
+  using key_type = Key;
+  using mapped_type = T;
+  using value_type = pair<key_type, mapped_type>;
+  using reference = pair<const key_type &, mapped_type &>;
+  using const_reference = pair<const key_type &, const mapped_type &>;
+  using size_type = size_t;
+  using iterator = struct {};
+  using const_iterator = struct {};
+
+  const_iterator begin() const noexcept;
+  const_iterator end() const noexcept;
+
+  template <class... Args> iterator emplace(Args &&...args);
+  template <class... Args>
+  iterator emplace_hint(const_iterator position, Args &&...args);
+
+  iterator insert(const value_type &x);
+  iterator insert(value_type &&x);
+  iterator insert(const_iterator position, const value_type &x);
+  iterator insert(const_iterator position, value_type &&x);
+
+  // template <class P> iterator insert(P &&x);
+  // template <class P> iterator insert(const_iterator position, P &&);
+  template <class InputIter> void insert(InputIter first, InputIter last);
+  template <class InputIter>
+  void insert(sorted_equivalent_t, InputIter first, InputIter last);
+  template <class R> void insert_range(R &&rg);
+
+  void insert(initializer_list<value_type> il);
+  void insert(sorted_equivalent_t s, initializer_list<value_type> il);
+
+  iterator erase(iterator position);
+  iterator erase(const_iterator position);
+  size_type erase(const key_type &x);
+  template <class K> size_type erase(K &&x);
+  iterator erase(const_iterator first, const_iterator last);
+
+  void swap(flat_multimap &) noexcept;
+  void clear() noexcept;
+};
+
+template <class Key> struct flat_set {
+  using key_type = Key;
+  using value_type = Key;
+  using reference = value_type &;
+  using const_reference = const value_type &;
+  using size_type = size_t;
+  using iterator = struct {};
+  using const_iterator = struct {};
+
+  const_iterator begin() const noexcept;
+  const_iterator end() const noexcept;
+
+  template <class... Args> pair<iterator, bool> emplace(Args &&...args);
+  template <class... Args>
+  iterator emplace_hint(const_iterator position, Args &&...args);
+
+  pair<iterator, bool> insert(const value_type &x);
+  pair<iterator, bool> insert(value_type &&x);
+  template <class K> pair<iterator, bool> insert(K &&x);
+  iterator insert(const_iterator position, const value_type &x);
+  iterator insert(const_iterator position, value_type &&x);
+  template <class K> iterator insert(const_iterator hint, K &&x);
+
+  template <class InputIter> void insert(InputIter first, InputIter last);
+  template <class InputIter>
+  void insert(sorted_unique_t, InputIter first, InputIter last);
+  template <class R> void ...
[truncated]

@nicovank nicovank deleted the pr112051 branch October 11, 2024 21:46
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants