Skip to content

Commit 42fcb4f

Browse files
committed
Enhance fuzzy matcher performance
1 parent 5545174 commit 42fcb4f

File tree

2 files changed

+141
-135
lines changed

2 files changed

+141
-135
lines changed

src/Explorer/FuzzyMatcher.cpp

Lines changed: 113 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -1,113 +1,75 @@
1-
/*
2-
The MIT License (MIT)
3-
4-
Copyright (c) 2019 funap
5-
6-
Permission is hereby granted, free of charge, to any person obtaining a copy
7-
of this software and associated documentation files (the "Software"), to deal
8-
in the Software without restriction, including without limitation the rights
9-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10-
copies of the Software, and to permit persons to whom the Software is
11-
furnished to do so, subject to the following conditions:
12-
13-
The above copyright notice and this permission notice shall be included in
14-
all copies or substantial portions of the Software.
15-
16-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22-
THE SOFTWARE.
23-
*/
1+
// The MIT License (MIT)
2+
//
3+
// Copyright (c) 2019-2024 funap
4+
//
5+
// Permission is hereby granted, free of charge, to any person obtaining a copy
6+
// of this software and associated documentation files (the "Software"), to deal
7+
// in the Software without restriction, including without limitation the rights
8+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
// copies of the Software, and to permit persons to whom the Software is
10+
// furnished to do so, subject to the following conditions:
11+
//
12+
// The above copyright notice and this permission notice shall be included in
13+
// all copies or substantial portions of the Software.
14+
//
15+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
// THE SOFTWARE.
2422

2523
#include "FuzzyMatcher.h"
2624

2725
#include <cwctype>
2826
#include <memory>
2927
#include <algorithm>
3028

31-
FuzzyMatcher::FuzzyMatcher(std::wstring_view pattern)
32-
: pattern_(pattern)
33-
{
34-
}
35-
36-
FuzzyMatcher::~FuzzyMatcher()
37-
{
38-
}
39-
40-
int FuzzyMatcher::ScoreMatch(std::wstring_view target, std::vector<size_t>* positions)
41-
{
42-
if (0 == pattern_.length()) {
43-
return 0;
44-
}
45-
if (0 == target.length()) {
46-
return 0;
47-
}
48-
if (pattern_.length() > target.length()) {
49-
return 0;
29+
namespace {
30+
struct ScoringConstants {
31+
static constexpr int CHARACTER_MATCH_BONUS = 1;
32+
static constexpr int SAME_CASE_BONUS = 1;
33+
static constexpr int FIRST_LETTER_BONUS = 8;
34+
static constexpr int CONSECUTIVE_MATCH_BONUS = 5;
35+
static constexpr int START_OF_EXTENSION_BONUS = 3;
36+
static constexpr int CAMEL_CASE_BONUS = 4;
37+
static constexpr int SEPARATOR_BONUS = 4;
38+
static constexpr int DIRECTORY_SEPARATOR_BONUS = 5;
39+
};
40+
41+
bool ValidateInputs(const std::wstring_view& pattern, const std::wstring_view& target)
42+
{
43+
return !(pattern.empty() || target.empty() || pattern.length() > target.length());
5044
}
5145

52-
auto scoreMatrix = std::make_unique<int[]>(pattern_.length() * target.length());
53-
auto matcheMatrix = std::make_unique<int[]>(pattern_.length() * target.length());
46+
void RestoreMatchPositions(std::vector<size_t>* positions,
47+
const int* matchMatrix,
48+
size_t patternLength,
49+
size_t targetLength)
50+
{
51+
if (!positions) return;
5452

55-
for (size_t patternIndex = 0; patternIndex < pattern_.length(); ++patternIndex) {
56-
const bool patternIsFirstIndex = (0 == patternIndex);
57-
const size_t patternIndexOffset = patternIndex * target.length();
58-
const size_t patternIndexPreviousOffset = patternIndexOffset - target.length();
59-
60-
for (size_t targetIndex = 0; targetIndex < target.length(); ++targetIndex) {
61-
const bool targetIsFirstIndex = (0 == targetIndex);
62-
const size_t currentIndex = patternIndexOffset + targetIndex;
63-
const size_t leftIndex = currentIndex - 1;
64-
const size_t diagIndex = patternIndexPreviousOffset + (targetIndex - 1);
65-
66-
const int leftScore = targetIsFirstIndex ? 0 : scoreMatrix[leftIndex];
67-
const int diagScore = (patternIsFirstIndex || targetIsFirstIndex) ? 0 : scoreMatrix[diagIndex];
68-
const int matchesSequenceLength = (patternIsFirstIndex || targetIsFirstIndex) ? 0 : matcheMatrix[diagIndex];
69-
70-
int score;
71-
if (!diagScore && !patternIsFirstIndex) {
72-
score = 0;
73-
}
74-
else {
75-
score = CalculateScore(pattern_[patternIndex], target, targetIndex, matchesSequenceLength);
76-
}
77-
78-
if (score && (leftScore <= diagScore + score)) {
79-
matcheMatrix[currentIndex] = matchesSequenceLength + 1;
80-
scoreMatrix[currentIndex] = diagScore + score;
81-
}
82-
else {
83-
matcheMatrix[currentIndex] = 0;
84-
scoreMatrix[currentIndex] = leftScore;
85-
}
86-
}
87-
}
88-
const int result = scoreMatrix[pattern_.length() * target.length() - 1];
53+
size_t patternIndex = patternLength - 1;
54+
size_t targetIndex = targetLength - 1;
8955

90-
// Restore Positions
91-
if (positions) {
92-
size_t patternIndex = pattern_.length() - 1;
93-
size_t targetIndex = target.length() - 1;
9456
while ((0 <= patternIndex) && (0 <= targetIndex)) {
95-
const size_t currentIndex = patternIndex * target.length() + targetIndex;
96-
const int match = matcheMatrix[currentIndex];
57+
const size_t currentIndex = patternIndex * targetLength + targetIndex;
58+
const int match = matchMatrix[currentIndex];
59+
9760
if (0 == match) {
9861
if (0 < targetIndex) {
99-
--targetIndex; // go left
62+
--targetIndex;
10063
}
10164
else {
10265
break;
10366
}
10467
}
10568
else {
10669
positions->emplace_back(targetIndex);
107-
10870
if ((0 < patternIndex) && (0 < targetIndex)) {
10971
--patternIndex;
110-
--targetIndex; // go up and left
72+
--targetIndex;
11173
}
11274
else {
11375
break;
@@ -116,63 +78,103 @@ int FuzzyMatcher::ScoreMatch(std::wstring_view target, std::vector<size_t>* posi
11678
}
11779
std::reverse(positions->begin(), positions->end());
11880
}
81+
} // namespace
11982

120-
return result;
83+
FuzzyMatcher::FuzzyMatcher(std::wstring_view pattern)
84+
: pattern_(pattern)
85+
, scoreMatrix_()
86+
, matchMatrix_()
87+
{
12188
}
12289

90+
FuzzyMatcher::~FuzzyMatcher() = default;
12391

124-
int FuzzyMatcher::CalculateScore(wchar_t patternChar, const std::wstring_view &target, size_t targetIndex, int matchesSequenceLength)
92+
int FuzzyMatcher::ScoreMatch(std::wstring_view target, std::vector<size_t>* positions)
12593
{
126-
int score = 0;
94+
if (!ValidateInputs(pattern_, target)) {
95+
return 0;
96+
}
12797

128-
constexpr int CHARACTER_MATCH_BONUS = 1;
129-
constexpr int SAME_CASE_BONUS = 1;
130-
constexpr int FIRST_LETTER_BONUS = 8;
131-
constexpr int CONSECUTIVE_MATCH_BONUS = 5;
132-
constexpr int START_OF_EXTENSION_BONUS = 3;
133-
constexpr int CAMEL_CASE_BONUS = 4;
134-
constexpr int SEPARATOR_BONUS = 4;
135-
constexpr int DIRECTORY_SEPARATOR_BONUS = 5;
98+
scoreMatrix_.resize(pattern_.length() * target.length());
99+
matchMatrix_.resize(pattern_.length() * target.length());
100+
for (size_t patternIndex = 0; patternIndex < pattern_.length(); ++patternIndex) {
101+
const bool patternIsFirstIndex = (0 == patternIndex);
102+
const size_t patternIndexOffset = patternIndex * target.length();
103+
const size_t patternIndexPreviousOffset = patternIndexOffset - target.length();
104+
105+
for (size_t targetIndex = 0; targetIndex < target.length(); ++targetIndex) {
106+
const bool targetIsFirstIndex = (0 == targetIndex);
107+
const size_t currentIndex = patternIndexOffset + targetIndex;
108+
const size_t leftIndex = currentIndex - 1;
109+
const size_t diagIndex = patternIndexPreviousOffset + (targetIndex - 1);
110+
111+
const int leftScore = targetIsFirstIndex ? 0 : scoreMatrix_[leftIndex];
112+
const int diagScore = (patternIsFirstIndex || targetIsFirstIndex) ? 0 : scoreMatrix_[diagIndex];
113+
const int matchesSequenceLength = (patternIsFirstIndex || targetIsFirstIndex) ? 0 : matchMatrix_[diagIndex];
114+
115+
const int score = (!diagScore && !patternIsFirstIndex)
116+
? 0
117+
: CalculateScore(pattern_[patternIndex], target, targetIndex, matchesSequenceLength);
118+
119+
if (score && (leftScore <= diagScore + score)) {
120+
matchMatrix_[currentIndex] = matchesSequenceLength + 1;
121+
scoreMatrix_[currentIndex] = diagScore + score;
122+
}
123+
else {
124+
matchMatrix_[currentIndex] = 0;
125+
scoreMatrix_[currentIndex] = leftScore;
126+
}
127+
}
128+
}
129+
130+
const int result = scoreMatrix_[pattern_.length() * target.length() - 1];
131+
RestoreMatchPositions(positions, matchMatrix_.data(), pattern_.length(), target.length());
132+
return result;
133+
}
134+
135+
int FuzzyMatcher::CalculateScore(wchar_t patternChar, const std::wstring_view& target, size_t targetIndex, int matchesSequenceLength)
136+
{
137+
int score = 0;
136138

137139
const wchar_t patternLowerChar = std::towlower(patternChar);
138140
const wchar_t targetLowerChar = std::towlower(target[targetIndex]);
141+
139142
if (patternLowerChar != targetLowerChar) {
140-
return score; // no match
143+
return score;
141144
}
142-
score += CHARACTER_MATCH_BONUS;
145+
146+
score += ScoringConstants::CHARACTER_MATCH_BONUS;
143147

144148
if (0 < matchesSequenceLength) {
145-
score += (matchesSequenceLength * CONSECUTIVE_MATCH_BONUS);
149+
score += (matchesSequenceLength * ScoringConstants::CONSECUTIVE_MATCH_BONUS);
146150
}
147151

148152
if (patternChar == target[targetIndex]) {
149-
score += SAME_CASE_BONUS;
153+
score += ScoringConstants::SAME_CASE_BONUS;
150154
}
151155

152156
if (0 == targetIndex) {
153-
score += FIRST_LETTER_BONUS;
157+
score += ScoringConstants::FIRST_LETTER_BONUS;
154158
}
155159
else {
156160
switch (target[targetIndex - 1]) {
157161
case '\\':
158-
score += DIRECTORY_SEPARATOR_BONUS;
162+
score += ScoringConstants::DIRECTORY_SEPARATOR_BONUS;
159163
break;
160164
case ' ':
161165
case '_':
162-
score += SEPARATOR_BONUS;
166+
score += ScoringConstants::SEPARATOR_BONUS;
163167
break;
164168
case '.':
165-
score += START_OF_EXTENSION_BONUS;
169+
score += ScoringConstants::START_OF_EXTENSION_BONUS;
166170
break;
167171
default:
168-
if (std::iswlower(target[targetIndex - 1])) {
169-
if (std::iswupper(target[targetIndex])) {
170-
score += CAMEL_CASE_BONUS;
171-
}
172+
if (std::iswlower(target[targetIndex - 1]) && std::iswupper(target[targetIndex])) {
173+
score += ScoringConstants::CAMEL_CASE_BONUS;
172174
}
173175
break;
174176
}
175177
}
176178

177179
return score;
178-
};
180+
}

src/Explorer/FuzzyMatcher.h

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
1-
/*
2-
The MIT License (MIT)
3-
4-
Copyright (c) 2019 funap
5-
6-
Permission is hereby granted, free of charge, to any person obtaining a copy
7-
of this software and associated documentation files (the "Software"), to deal
8-
in the Software without restriction, including without limitation the rights
9-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10-
copies of the Software, and to permit persons to whom the Software is
11-
furnished to do so, subject to the following conditions:
12-
13-
The above copyright notice and this permission notice shall be included in
14-
all copies or substantial portions of the Software.
15-
16-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22-
THE SOFTWARE.
23-
*/
1+
// The MIT License (MIT)
2+
//
3+
// Copyright (c) 2019-2024 funap
4+
//
5+
// Permission is hereby granted, free of charge, to any person obtaining a copy
6+
// of this software and associated documentation files (the "Software"), to deal
7+
// in the Software without restriction, including without limitation the rights
8+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
// copies of the Software, and to permit persons to whom the Software is
10+
// furnished to do so, subject to the following conditions:
11+
//
12+
// The above copyright notice and this permission notice shall be included in
13+
// all copies or substantial portions of the Software.
14+
//
15+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
// THE SOFTWARE.
2422

25-
#pragma once
23+
#ifndef FUZZY_MATCHER_H_
24+
#define FUZZY_MATCHER_H_
2625

26+
#include <memory>
2727
#include <string_view>
2828
#include <vector>
2929

@@ -37,4 +37,8 @@ class FuzzyMatcher
3737
private:
3838
int CalculateScore(wchar_t patternChar, const std::wstring_view& target, size_t targetIndex, int matchesSequenceLength);
3939
std::wstring_view pattern_;
40+
std::vector<int> scoreMatrix_;
41+
std::vector<int> matchMatrix_;
4042
};
43+
44+
#endif // FUZZY_MATCHER_H_

0 commit comments

Comments
 (0)