Skip to content

Commit eb0efff

Browse files
authored
chore: use stringmatchlen in GlobMatcher (#5547)
GlobMatcher constructor is extremely slow sometimes as it requires to compile the regex produced from the glob expression. A simple example is the small glob tested in the benchmark: a*a*a*.pt which takes roughly 70ms to compile, diminishing any of the performance gains produced by the fast matching. * disable reflex * fall back to stringmatch for GlobMatcher Signed-off-by: kostas <[email protected]>
1 parent 31d1144 commit eb0efff

File tree

4 files changed

+52
-17
lines changed

4 files changed

+52
-17
lines changed

src/core/dfly_core_test.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,4 +660,11 @@ BENCHMARK(BM_MatchPcre2JitExp);
660660

661661
#endif
662662

663+
static void BM_MatchGlobSlow(benchmark::State& state) {
664+
GlobMatcher matcher("a*a*a*a*a*.pt", false);
665+
while (state.KeepRunning()) {
666+
DoNotOptimize(GlobMatcher("a*a*a*a*a*.pt", false));
667+
}
668+
}
669+
BENCHMARK(BM_MatchGlobSlow);
663670
} // namespace dfly

src/core/glob_matcher.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ GlobMatcher::GlobMatcher(string_view pattern, bool case_sensitive)
251251
regex.append(Glob2Regex(pattern));
252252
}
253253
matcher_.pattern(regex);
254-
#elif USE_PCRE2
254+
#elif defined(USE_PCRE2)
255255
string regex("(?s"); // dotall mode
256256
if (!case_sensitive) {
257257
regex.push_back('i');
@@ -300,7 +300,7 @@ bool GlobMatcher::Matches(std::string_view str) const {
300300
}
301301

302302
return true;
303-
#elif USE_PCRE2
303+
#elif defined(USE_PCRE2)
304304
if (!re_ || str.size() < 16) {
305305
return stringmatchlen(glob_.data(), glob_.size(), str.data(), str.size(), !case_sensitive_);
306306
}
@@ -319,7 +319,7 @@ bool GlobMatcher::Matches(std::string_view str) const {
319319

320320
GlobMatcher::~GlobMatcher() {
321321
#ifdef REFLEX_PERFORMANCE
322-
#elif USE_PCRE2
322+
#elif defined(USE_PCRE2)
323323
if (re_) {
324324
pcre2_code_free(re_);
325325
pcre2_match_data_free(match_data_);

src/core/glob_matcher.h

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,19 @@
1010

1111
// We opt for using Reflex library for glob matching.
1212
// While I find PCRE2 faster, it's not substantially faster to justify the shared lib dependency.
13-
#define REFLEX_PERFORMANCE
13+
14+
// For some regex, Reflex (and pcre2) have extremely slow compile times(70+ms).
15+
// This latency is significant for the hot path and therefore both are disabled
16+
// and we fall back to the plain old stringmatchlen. For more info, refer to #5547 on gh.
17+
//#define REFLEX_PERFORMANCE
1418

1519
#ifndef REFLEX_PERFORMANCE
1620
#ifdef USE_PCRE2
17-
#define PCRE2_CODE_UNIT_WIDTH 8
18-
#include <pcre2.h>
21+
#define PCRE2_CODE_UNIT_WIDTH 8
22+
#include <pcre2.h>
1923
#endif
2024
#endif
2125

22-
2326
namespace dfly {
2427

2528
class GlobMatcher {
@@ -36,21 +39,21 @@ class GlobMatcher {
3639
static std::string Glob2Regex(std::string_view glob);
3740

3841
private:
39-
// TODO: we fix the problem of stringmatchlen being much
40-
// faster when the result is immediately known to be false, for example: "a*" vs "bxxxxx".
41-
// The goal is to demonstrate on-par performance for the following case:
42-
// > debug populate 5000000 keys 32 RAND
43-
// > while true; do time valkey-cli scan 0 match 'foo*bar'; done
44-
// Also demonstrate that the "improved" performance via SCAN command and not only via
45-
// micro-benchmark.
46-
// The performance of naive algorithm becomes worse in cases where string is long enough,
47-
// and the pattern has a star at the start (or it matches at first).
42+
// TODO: we fix the problem of stringmatchlen being much
43+
// faster when the result is immediately known to be false, for example: "a*" vs "bxxxxx".
44+
// The goal is to demonstrate on-par performance for the following case:
45+
// > debug populate 5000000 keys 32 RAND
46+
// > while true; do time valkey-cli scan 0 match 'foo*bar'; done
47+
// Also demonstrate that the "improved" performance via SCAN command and not only via
48+
// micro-benchmark.
49+
// The performance of naive algorithm becomes worse in cases where string is long enough,
50+
// and the pattern has a star at the start (or it matches at first).
4851
#ifdef REFLEX_PERFORMANCE
4952
mutable reflex::Matcher matcher_;
5053

5154
bool starts_with_star_ = false;
5255
bool ends_with_star_ = false;
53-
#elif USE_PCRE2
56+
#elif defined(USE_PCRE2)
5457
pcre2_code_8* re_ = nullptr;
5558
pcre2_match_data_8* match_data_ = nullptr;
5659
#endif

tests/dragonfly/set_test.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import pytest
2+
from redis import asyncio as aioredis
3+
from .instance import DflyInstance, DflyInstanceFactory
4+
import logging
5+
import asyncio
6+
7+
8+
@pytest.mark.asyncio
9+
async def test_sscan_regression(df_factory: DflyInstanceFactory):
10+
df = df_factory.create(
11+
proactor_threads=2,
12+
)
13+
df.start()
14+
15+
client = df.client()
16+
17+
await client.execute_command(f"SADD key el1 el2")
18+
19+
element = "a*" * 3
20+
21+
cursor = await client.execute_command(f"SSCAN key 0 match {element}.pt")
22+
length = len(cursor[1])
23+
# Takes 3 seconds
24+
res = await client.execute_command("SLOWLOG GET 100")
25+
assert res == []

0 commit comments

Comments
 (0)