@@ -12,7 +12,7 @@ public static class StringMatcher
12
12
{
13
13
public static MatchOption DefaultMatchOption = new MatchOption ( ) ;
14
14
15
- public static int UserSettingSearchPrecision { get ; set ; }
15
+ public static SearchPrecisionScore UserSettingSearchPrecision { get ; set ; }
16
16
17
17
public static bool ShouldUsePinyin { get ; set ; }
18
18
@@ -41,7 +41,15 @@ public static MatchResult FuzzySearch(string query, string stringToCompare)
41
41
}
42
42
43
43
/// <summary>
44
- /// refer to https://github.com/mattyork/fuzzy
44
+ /// Current method:
45
+ /// Character matching + substring matching;
46
+ /// 1. Query search string is split into substrings, separator is whitespace.
47
+ /// 2. Check each query substring's characters against full compare string,
48
+ /// 3. if a character in the substring is matched, loop back to verify the previous character.
49
+ /// 4. If previous character also matches, and is the start of the substring, update list.
50
+ /// 5. Once the previous character is verified, move on to the next character in the query substring.
51
+ /// 6. Move onto the next substring's characters until all substrings are checked.
52
+ /// 7. Consider success and move onto scoring if every char or substring without whitespaces matched
45
53
/// </summary>
46
54
public static MatchResult FuzzySearch ( string query , string stringToCompare , MatchOption opt )
47
55
{
@@ -52,107 +60,93 @@ public static MatchResult FuzzySearch(string query, string stringToCompare, Matc
52
60
var fullStringToCompareWithoutCase = opt . IgnoreCase ? stringToCompare . ToLower ( ) : stringToCompare ;
53
61
54
62
var queryWithoutCase = opt . IgnoreCase ? query . ToLower ( ) : query ;
63
+
64
+ var querySubstrings = queryWithoutCase . Split ( new [ ] { ' ' } , StringSplitOptions . RemoveEmptyEntries ) ;
65
+ int currentQuerySubstringIndex = 0 ;
66
+ var currentQuerySubstring = querySubstrings [ currentQuerySubstringIndex ] ;
67
+ var currentQuerySubstringCharacterIndex = 0 ;
55
68
56
- int currentQueryToCompareIndex = 0 ;
57
- var queryToCompareSeparated = queryWithoutCase . Split ( ' ' ) ;
58
- var currentQueryToCompare = queryToCompareSeparated [ currentQueryToCompareIndex ] ;
59
-
60
- var patternIndex = 0 ;
61
69
var firstMatchIndex = - 1 ;
62
70
var firstMatchIndexInWord = - 1 ;
63
71
var lastMatchIndex = 0 ;
64
- bool allMatched = false ;
65
- bool isFullWordMatched = false ;
66
- bool allWordsFullyMatched = true ;
72
+ bool allQuerySubstringsMatched = false ;
73
+ bool matchFoundInPreviousLoop = false ;
74
+ bool allSubstringsContainedInCompareString = true ;
67
75
68
76
var indexList = new List < int > ( ) ;
69
77
70
- for ( var index = 0 ; index < fullStringToCompareWithoutCase . Length ; index ++ )
78
+ for ( var compareStringIndex = 0 ; compareStringIndex < fullStringToCompareWithoutCase . Length ; compareStringIndex ++ )
71
79
{
72
- var ch = stringToCompare [ index ] ;
73
- if ( fullStringToCompareWithoutCase [ index ] == currentQueryToCompare [ patternIndex ] )
80
+ if ( fullStringToCompareWithoutCase [ compareStringIndex ] != currentQuerySubstring [ currentQuerySubstringCharacterIndex ] )
74
81
{
75
- if ( firstMatchIndex < 0 )
76
- { // first matched char will become the start of the compared string
77
- firstMatchIndex = index ;
78
- }
82
+ matchFoundInPreviousLoop = false ;
83
+ continue ;
84
+ }
79
85
80
- if ( patternIndex == 0 )
81
- { // first letter of current word
82
- isFullWordMatched = true ;
83
- firstMatchIndexInWord = index ;
84
- }
85
- else if ( ! isFullWordMatched )
86
- { // we want to verify that there is not a better match if this is not a full word
87
- // in order to do so we need to verify all previous chars are part of the pattern
88
- int startIndexToVerify = index - patternIndex ;
89
- bool allMatch = true ;
90
- for ( int indexToCheck = 0 ; indexToCheck < patternIndex ; indexToCheck ++ )
91
- {
92
- if ( fullStringToCompareWithoutCase [ startIndexToVerify + indexToCheck ] !=
93
- currentQueryToCompare [ indexToCheck ] )
94
- {
95
- allMatch = false ;
96
- }
97
- }
98
-
99
- if ( allMatch )
100
- { // update to this as a full word
101
- isFullWordMatched = true ;
102
- if ( currentQueryToCompareIndex == 0 )
103
- { // first word so we need to update start index
104
- firstMatchIndex = startIndexToVerify ;
105
- }
106
-
107
- indexList . RemoveAll ( x => x >= firstMatchIndexInWord ) ;
108
- for ( int indexToCheck = 0 ; indexToCheck < patternIndex ; indexToCheck ++ )
109
- { // update the index list
110
- indexList . Add ( startIndexToVerify + indexToCheck ) ;
111
- }
112
- }
113
- }
86
+ if ( firstMatchIndex < 0 )
87
+ {
88
+ // first matched char will become the start of the compared string
89
+ firstMatchIndex = compareStringIndex ;
90
+ }
114
91
115
- lastMatchIndex = index + 1 ;
116
- indexList . Add ( index ) ;
92
+ if ( currentQuerySubstringCharacterIndex == 0 )
93
+ {
94
+ // first letter of current word
95
+ matchFoundInPreviousLoop = true ;
96
+ firstMatchIndexInWord = compareStringIndex ;
97
+ }
98
+ else if ( ! matchFoundInPreviousLoop )
99
+ {
100
+ // we want to verify that there is not a better match if this is not a full word
101
+ // in order to do so we need to verify all previous chars are part of the pattern
102
+ var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex ;
117
103
118
- // increase the pattern matched index and check if everything was matched
119
- if ( ++ patternIndex == currentQueryToCompare . Length )
104
+ if ( AllPreviousCharsMatched ( startIndexToVerify , currentQuerySubstringCharacterIndex , fullStringToCompareWithoutCase , currentQuerySubstring ) )
120
105
{
121
- if ( ++ currentQueryToCompareIndex >= queryToCompareSeparated . Length )
122
- { // moved over all the words
123
- allMatched = true ;
124
- break ;
125
- }
126
-
127
- // otherwise move to the next word
128
- currentQueryToCompare = queryToCompareSeparated [ currentQueryToCompareIndex ] ;
129
- patternIndex = 0 ;
130
- if ( ! isFullWordMatched )
131
- { // if any of the words was not fully matched all are not fully matched
132
- allWordsFullyMatched = false ;
133
- }
106
+ matchFoundInPreviousLoop = true ;
107
+
108
+ // if it's the begining character of the first query substring that is matched then we need to update start index
109
+ firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex ;
110
+
111
+ indexList = GetUpdatedIndexList ( startIndexToVerify , currentQuerySubstringCharacterIndex , firstMatchIndexInWord , indexList ) ;
134
112
}
135
113
}
136
- else
114
+
115
+ lastMatchIndex = compareStringIndex + 1 ;
116
+ indexList . Add ( compareStringIndex ) ;
117
+
118
+ currentQuerySubstringCharacterIndex ++ ;
119
+
120
+ // if finished looping through every character in the current substring
121
+ if ( currentQuerySubstringCharacterIndex == currentQuerySubstring . Length )
137
122
{
138
- isFullWordMatched = false ;
139
- }
140
- }
123
+ // if any of the substrings was not matched then consider as all are not matched
124
+ allSubstringsContainedInCompareString = ! matchFoundInPreviousLoop ? false : allSubstringsContainedInCompareString ;
125
+
126
+ currentQuerySubstringIndex ++ ;
141
127
128
+ allQuerySubstringsMatched = AllQuerySubstringsMatched ( currentQuerySubstringIndex , querySubstrings . Length ) ;
129
+ if ( allQuerySubstringsMatched )
130
+ break ;
142
131
143
- // return rendered string if we have a match for every char or all substring without whitespaces matched
144
- if ( allMatched )
132
+ // otherwise move to the next query substring
133
+ currentQuerySubstring = querySubstrings [ currentQuerySubstringIndex ] ;
134
+ currentQuerySubstringCharacterIndex = 0 ;
135
+ }
136
+ }
137
+
138
+ // proceed to calculate score if every char or substring without whitespaces matched
139
+ if ( allQuerySubstringsMatched )
145
140
{
146
- // check if all query string was contained in string to compare
147
- bool containedFully = lastMatchIndex - firstMatchIndex == queryWithoutCase . Length ;
148
- var score = CalculateSearchScore ( query , stringToCompare , firstMatchIndex , lastMatchIndex - firstMatchIndex , containedFully , allWordsFullyMatched ) ;
141
+ var score = CalculateSearchScore ( query , stringToCompare , firstMatchIndex , lastMatchIndex - firstMatchIndex , allSubstringsContainedInCompareString ) ;
149
142
var pinyinScore = ScoreForPinyin ( stringToCompare , query ) ;
150
143
151
144
var result = new MatchResult
152
145
{
153
146
Success = true ,
154
147
MatchData = indexList ,
155
- RawScore = Math . Max ( score , pinyinScore )
148
+ RawScore = Math . Max ( score , pinyinScore ) ,
149
+ AllSubstringsContainedInCompareString = allSubstringsContainedInCompareString
156
150
} ;
157
151
158
152
return result ;
@@ -161,8 +155,44 @@ public static MatchResult FuzzySearch(string query, string stringToCompare, Matc
161
155
return new MatchResult { Success = false } ;
162
156
}
163
157
164
- private static int CalculateSearchScore ( string query , string stringToCompare , int firstIndex , int matchLen ,
165
- bool isFullyContained , bool allWordsFullyMatched )
158
+ private static bool AllPreviousCharsMatched ( int startIndexToVerify , int currentQuerySubstringCharacterIndex ,
159
+ string fullStringToCompareWithoutCase , string currentQuerySubstring )
160
+ {
161
+ var allMatch = true ;
162
+ for ( int indexToCheck = 0 ; indexToCheck < currentQuerySubstringCharacterIndex ; indexToCheck ++ )
163
+ {
164
+ if ( fullStringToCompareWithoutCase [ startIndexToVerify + indexToCheck ] !=
165
+ currentQuerySubstring [ indexToCheck ] )
166
+ {
167
+ allMatch = false ;
168
+ }
169
+ }
170
+
171
+ return allMatch ;
172
+ }
173
+
174
+ private static List < int > GetUpdatedIndexList ( int startIndexToVerify , int currentQuerySubstringCharacterIndex , int firstMatchIndexInWord , List < int > indexList )
175
+ {
176
+ var updatedList = new List < int > ( ) ;
177
+
178
+ indexList . RemoveAll ( x => x >= firstMatchIndexInWord ) ;
179
+
180
+ updatedList . AddRange ( indexList ) ;
181
+
182
+ for ( int indexToCheck = 0 ; indexToCheck < currentQuerySubstringCharacterIndex ; indexToCheck ++ )
183
+ {
184
+ updatedList . Add ( startIndexToVerify + indexToCheck ) ;
185
+ }
186
+
187
+ return updatedList ;
188
+ }
189
+
190
+ private static bool AllQuerySubstringsMatched ( int currentQuerySubstringIndex , int querySubstringsLength )
191
+ {
192
+ return currentQuerySubstringIndex >= querySubstringsLength ;
193
+ }
194
+
195
+ private static int CalculateSearchScore ( string query , string stringToCompare , int firstIndex , int matchLen , bool allSubstringsContainedInCompareString )
166
196
{
167
197
// A match found near the beginning of a string is scored more than a match found near the end
168
198
// A match is scored more if the characters in the patterns are closer to each other,
@@ -179,15 +209,8 @@ private static int CalculateSearchScore(string query, string stringToCompare, in
179
209
score += 10 ;
180
210
}
181
211
182
- if ( isFullyContained )
183
- {
184
- score += 20 ; // honestly I'm not sure what would be a good number here or should it factor the size of the pattern
185
- }
186
-
187
- if ( allWordsFullyMatched )
188
- {
189
- score += 20 ;
190
- }
212
+ if ( allSubstringsContainedInCompareString )
213
+ score += 10 * string . Concat ( query . Where ( c => ! char . IsWhiteSpace ( c ) ) ) . Count ( ) ;
191
214
192
215
return score ;
193
216
}
@@ -256,6 +279,11 @@ public int RawScore
256
279
}
257
280
}
258
281
282
+ /// <summary>
283
+ /// Indicates if all query's substrings are contained in the string to compare
284
+ /// </summary>
285
+ public bool AllSubstringsContainedInCompareString { get ; set ; }
286
+
259
287
/// <summary>
260
288
/// Matched data to highlight.
261
289
/// </summary>
@@ -268,7 +296,7 @@ public bool IsSearchPrecisionScoreMet()
268
296
269
297
private bool IsSearchPrecisionScoreMet ( int score )
270
298
{
271
- return score >= UserSettingSearchPrecision ;
299
+ return score >= ( int ) UserSettingSearchPrecision ;
272
300
}
273
301
274
302
private int ApplySearchPrecisionFilter ( int score )
0 commit comments