Skip to content

Conversation

@vitalybuka
Copy link
Collaborator

This commit introduces SpecialCaseList::Match, a small struct to hold
the matched rule and its line number. This simplifies the match
methods by allowing them to return a single value instead of using a
callback.

Created using spr 1.3.7
@llvmbot
Copy link
Member

llvmbot commented Nov 1, 2025

@llvm/pr-subscribers-llvm-support

Author: Vitaly Buka (vitalybuka)

Changes

This commit introduces SpecialCaseList::Match, a small struct to hold
the matched rule and its line number. This simplifies the match
methods by allowing them to return a single value instead of using a
callback.


Full diff: https://github.com/llvm/llvm-project/pull/165943.diff

2 Files Affected:

  • (modified) llvm/include/llvm/Support/SpecialCaseList.h (+9-15)
  • (modified) llvm/lib/Support/SpecialCaseList.cpp (+36-35)
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index cb8e568de02e0..5ed7adeaf6c92 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -126,15 +126,16 @@ class SpecialCaseList {
   SpecialCaseList &operator=(SpecialCaseList const &) = delete;
 
 private:
+  using Match = std::pair<StringRef, unsigned>;
+  static constexpr Match NotMatched = {"", 0};
+
   // Lagacy v1 matcher.
   class RegexMatcher {
   public:
     LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
     LLVM_ABI void preprocess(bool BySize);
 
-    LLVM_ABI void
-    match(StringRef Query,
-          llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
+    LLVM_ABI Match match(StringRef Query) const;
 
     struct Reg {
       Reg(StringRef Name, unsigned LineNo, Regex &&Rg)
@@ -152,9 +153,7 @@ class SpecialCaseList {
     LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
     LLVM_ABI void preprocess(bool BySize);
 
-    LLVM_ABI void
-    match(StringRef Query,
-          llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
+    LLVM_ABI Match match(StringRef Query) const;
 
     struct Glob {
       Glob(StringRef Name, unsigned LineNo, GlobPattern &&Pattern)
@@ -168,11 +167,10 @@ class SpecialCaseList {
 
     RadixTree<iterator_range<StringRef::const_iterator>,
               RadixTree<iterator_range<StringRef::const_reverse_iterator>,
-                        SmallVector<const GlobMatcher::Glob *, 1>>>
+                        SmallVector<int, 1>>>
         PrefixSuffixToGlob;
 
-    RadixTree<iterator_range<StringRef::const_iterator>,
-              SmallVector<const GlobMatcher::Glob *, 1>>
+    RadixTree<iterator_range<StringRef::const_iterator>, SmallVector<int, 1>>
         SubstrToGlob;
   };
 
@@ -184,14 +182,10 @@ class SpecialCaseList {
     LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
     LLVM_ABI void preprocess(bool BySize);
 
-    LLVM_ABI void
-    match(StringRef Query,
-          llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
+    LLVM_ABI Match match(StringRef Query) const;
 
     LLVM_ABI bool matchAny(StringRef Query) const {
-      bool R = false;
-      match(Query, [&](StringRef, unsigned) { R = true; });
-      return R;
+      return match(Query) != NotMatched;
     }
 
     std::variant<RegexMatcher, GlobMatcher> M;
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 246d90cce3a43..8e6e9f34a73f3 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -15,11 +15,13 @@
 
 #include "llvm/Support/SpecialCaseList.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <limits>
 #include <memory>
@@ -63,12 +65,12 @@ void SpecialCaseList::RegexMatcher::preprocess(bool BySize) {
   }
 }
 
-void SpecialCaseList::RegexMatcher::match(
-    StringRef Query,
-    llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
+SpecialCaseList::Match
+SpecialCaseList::RegexMatcher::match(StringRef Query) const {
   for (const auto &R : reverse(RegExes))
     if (R.Rg.match(Query))
-      return Cb(R.Name, R.LineNo);
+      return {R.Name, R.LineNo};
+  return NotMatched;
 }
 
 Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern,
@@ -90,7 +92,7 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
     });
   }
 
-  for (const auto &G : reverse(Globs)) {
+  for (const auto &[Idx, G] : enumerate(Globs)) {
     StringRef Prefix = G.Pattern.prefix();
     StringRef Suffix = G.Pattern.suffix();
 
@@ -102,26 +104,29 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
         // But only if substring is not empty. Searching this tree is more
         // expensive.
         auto &V = SubstrToGlob.emplace(Substr).first->second;
-        V.emplace_back(&G);
+        V.emplace_back(Idx);
         continue;
       }
     }
 
     auto &SToGlob = PrefixSuffixToGlob.emplace(Prefix).first->second;
     auto &V = SToGlob.emplace(reverse(Suffix)).first->second;
-    V.emplace_back(&G);
+    V.emplace_back(Idx);
   }
 }
 
-void SpecialCaseList::GlobMatcher::match(
-    StringRef Query,
-    llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
+SpecialCaseList::Match
+SpecialCaseList::GlobMatcher::match(StringRef Query) const {
+  int Best = -1;
   if (!PrefixSuffixToGlob.empty()) {
     for (const auto &[_, SToGlob] : PrefixSuffixToGlob.find_prefixes(Query)) {
       for (const auto &[_, V] : SToGlob.find_prefixes(reverse(Query))) {
-        for (const auto *G : V) {
-          if (G->Pattern.match(Query)) {
-            Cb(G->Name, G->LineNo);
+        for (int Idx : reverse(V)) {
+          if (Best > Idx)
+            break;
+          const GlobMatcher::Glob &G = Globs[Idx];
+          if (G.Pattern.match(Query)) {
+            Best = Idx;
             // As soon as we find a match in the vector, we can break for this
             // vector, since the globs are already sorted by priority within the
             // prefix group. However, we continue searching other prefix groups
@@ -138,9 +143,12 @@ void SpecialCaseList::GlobMatcher::match(
     // possibilities. In most cases search will fail on first characters.
     for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) {
       for (const auto &[_, V] : SubstrToGlob.find_prefixes(Q)) {
-        for (const auto *G : V) {
-          if (G->Pattern.match(Query)) {
-            Cb(G->Name, G->LineNo);
+        for (int Idx : reverse(V)) {
+          if (Best > Idx)
+            break;
+          const GlobMatcher::Glob &G = Globs[Idx];
+          if (G.Pattern.match(Query)) {
+            Best = Idx;
             // As soon as we find a match in the vector, we can break for this
             // vector, since the globs are already sorted by priority within the
             // prefix group. However, we continue searching other prefix groups
@@ -151,6 +159,9 @@ void SpecialCaseList::GlobMatcher::match(
       }
     }
   }
+  if (Best < 0)
+    return NotMatched;
+  return {Globs[Best].Name, Globs[Best].LineNo};
 }
 
 SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
@@ -169,12 +180,11 @@ void SpecialCaseList::Matcher::preprocess(bool BySize) {
   return std::visit([&](auto &V) { return V.preprocess(BySize); }, M);
 }
 
-void SpecialCaseList::Matcher::match(
-    StringRef Query,
-    llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
+SpecialCaseList::Match SpecialCaseList::Matcher::match(StringRef Query) const {
   if (RemoveDotSlash)
     Query = llvm::sys::path::remove_leading_dotslash(Query);
-  return std::visit([&](auto &V) { return V.match(Query, Cb); }, M);
+  return std::visit(
+      [&](auto &V) -> SpecialCaseList::Match { return V.match(Query); }, M);
 }
 
 // TODO: Refactor this to return Expected<...>
@@ -371,26 +381,17 @@ LLVM_ABI void SpecialCaseList::Section::preprocess(bool OrderBySize) {
 unsigned SpecialCaseList::Section::getLastMatch(StringRef Prefix,
                                                 StringRef Query,
                                                 StringRef Category) const {
-  unsigned LastLine = 0;
-  if (const Matcher *M = findMatcher(Prefix, Category)) {
-    M->match(Query, [&](StringRef, unsigned LineNo) {
-      LastLine = std::max(LastLine, LineNo);
-    });
-  }
-  return LastLine;
+  if (const Matcher *M = findMatcher(Prefix, Category))
+    return M->match(Query).second;
+  return 0;
 }
 
 StringRef SpecialCaseList::Section::getLongestMatch(StringRef Prefix,
                                                     StringRef Query,
                                                     StringRef Category) const {
-  StringRef LongestRule;
-  if (const Matcher *M = findMatcher(Prefix, Category)) {
-    M->match(Query, [&](StringRef Rule, unsigned) {
-      if (LongestRule.size() < Rule.size())
-        LongestRule = Rule;
-    });
-  }
-  return LongestRule;
+  if (const Matcher *M = findMatcher(Prefix, Category))
+    return M->match(Query).first;
+  return {};
 }
 
 } // namespace llvm

Copy link
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull Request Overview

This PR refactors the SpecialCaseList matching API by replacing callback-based pattern matching with a return-value-based approach. The change simplifies the API by returning a Match pair (pattern name and line number) directly instead of invoking callbacks for each match.

Key changes:

  • Refactored RegexMatcher::match(), GlobMatcher::match(), and Matcher::match() to return Match (pair of StringRef and unsigned) instead of using callbacks
  • Changed internal data structures in GlobMatcher from storing pointers to Glob objects to storing indices into the Globs vector
  • Simplified getLastMatch() and getLongestMatch() methods to directly use the returned match result

Reviewed Changes

Copilot reviewed 2 out of 2 changed files in this pull request and generated 4 comments.

File Description
llvm/include/llvm/Support/SpecialCaseList.h Updated matcher method signatures to return Match type and changed internal storage from glob pointers to indices
llvm/lib/Support/SpecialCaseList.cpp Implemented new matching logic with index-based tracking and added unused header includes

💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.

@vitalybuka vitalybuka requested review from Copilot November 1, 2025 19:18
Copy link
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull Request Overview

Copilot reviewed 2 out of 2 changed files in this pull request and generated 1 comment.


💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.

Copy link
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copilot encountered an error and was unable to review this pull request. You can try again by re-requesting a review.

vitalybuka added a commit to vitalybuka/llvm-project that referenced this pull request Nov 10, 2025
This commit introduces `SpecialCaseList::Match`, a small struct to hold
the matched rule and its line number. This simplifies the `match`
methods by allowing them to return a single value instead of using a
callback.

Pull Request: llvm#165943
Created using spr 1.3.7
vitalybuka added a commit to vitalybuka/llvm-project that referenced this pull request Nov 10, 2025
This commit introduces `SpecialCaseList::Match`, a small struct to hold
the matched rule and its line number. This simplifies the `match`
methods by allowing them to return a single value instead of using a
callback.

Pull Request: llvm#165943
@vitalybuka vitalybuka requested a review from qinkunbao November 10, 2025 18:53
Created using spr 1.3.7
@vitalybuka vitalybuka enabled auto-merge (squash) November 10, 2025 21:37
@vitalybuka vitalybuka requested a review from Copilot November 10, 2025 21:45
Copy link
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull Request Overview

Copilot reviewed 2 out of 2 changed files in this pull request and generated no new comments.


💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.

@vitalybuka vitalybuka merged commit a1934ee into main Nov 10, 2025
15 of 16 checks passed
@vitalybuka vitalybuka deleted the users/vitalybuka/spr/nfcspecialcaselist-replace-callback-with-return-value branch November 10, 2025 22:04
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 11, 2025

LLVM Buildbot has detected a new failure on builder ppc64le-mlir-rhel-clang running on ppc64le-mlir-rhel-test while building llvm at step 6 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/129/builds/32833

Here is the relevant piece of the build log for the reference
Step 6 (test-build-check-mlir-build-only-check-mlir) failure: 1200 seconds without output running [b'ninja', b'check-mlir'], attempting to kill
...
PASS: MLIR :: mlir-tblgen/op-interface.td (3632 of 3643)
PASS: MLIR :: mlir-tblgen/attr-or-type-format.td (3633 of 3643)
PASS: MLIR-Unit :: Interfaces/./MLIRInterfacesTests/13/22 (3634 of 3643)
PASS: MLIR :: mlir-tblgen/llvm-intrinsics.td (3635 of 3643)
PASS: MLIR :: mlir-reduce/dce-test.mlir (3636 of 3643)
PASS: MLIR :: mlir-tblgen/cpp-class-comments.td (3637 of 3643)
PASS: MLIR :: Pass/pipeline-options-parsing.mlir (3638 of 3643)
PASS: MLIR-Unit :: Interfaces/./MLIRInterfacesTests/11/22 (3639 of 3643)
PASS: MLIR-Unit :: IR/./MLIRIRTests/38/130 (3640 of 3643)
PASS: MLIR-Unit :: IR/./MLIRIRTests/0/130 (3641 of 3643)
command timed out: 1200 seconds without output running [b'ninja', b'check-mlir'], attempting to kill
process killed by signal 9
program finished with exit code -1
elapsedTime=2154.704047

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants