Skip to content

Commit 07310c7

Browse files
jjw24theClueless
andcommitted
Fix pinyin fuzzysearch (#131)
* fix typo * make function obsolete it is not used in the code * rewrite the function that converts chinese chars to pinyin 1. Only difference in this rewrite is instead of returning 2D array, return as a combined single string of all the possible pinyin combination. Since fuzzy search does character matching, this shouldn't be a problem. 2. Added a function that returns a custom language converter. In this case Pinyin converter. New converters can be added. * Use new language converter param + strip out ScoreForPinyin method * update * Change parameter name * fix failing tests * WIP * Remove todo There should be some distinction between score after precision filter and actual raw score derived from FuzzySearch. Although so far RawScore is used in testing, but it seems to describe the structure. Originally it was to avoid assigning score directly as it would be hard to reason about that output of FuzzySearch score is. * Add constructors, remove default to enforce required properties * remove setting rawscore in SearchPrecision * Change method name to reflect intention * Change parameter name + update comment * update * Remove params comment Co-authored-by: theClueless <[email protected]>
1 parent ee93f7e commit 07310c7

File tree

7 files changed

+132
-121
lines changed

7 files changed

+132
-121
lines changed

Wox.Infrastructure/Alphabet.cs

Lines changed: 50 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,35 @@
22
using System.Collections.Concurrent;
33
using System.Collections.Generic;
44
using System.Linq;
5+
using System.Text;
56
using hyjiacan.util.p4n;
67
using hyjiacan.util.p4n.format;
8+
using JetBrains.Annotations;
79
using Wox.Infrastructure.Logger;
810
using Wox.Infrastructure.Storage;
911
using Wox.Infrastructure.UserSettings;
1012

1113
namespace Wox.Infrastructure
1214
{
13-
public static class Alphabet
15+
public interface IAlphabet
1416
{
15-
private static readonly HanyuPinyinOutputFormat Format = new HanyuPinyinOutputFormat();
16-
private static ConcurrentDictionary<string, string[][]> PinyinCache;
17-
private static BinaryStorage<ConcurrentDictionary<string, string[][]>> _pinyinStorage;
18-
private static Settings _settings;
17+
string Translate(string stringToTranslate);
18+
}
19+
20+
public class Alphabet : IAlphabet
21+
{
22+
private readonly HanyuPinyinOutputFormat Format = new HanyuPinyinOutputFormat();
23+
private ConcurrentDictionary<string, string[][]> PinyinCache;
24+
private BinaryStorage<ConcurrentDictionary<string, string[][]>> _pinyinStorage;
25+
private Settings _settings;
1926

20-
public static void Initialize(Settings settings)
27+
public void Initialize([NotNull] Settings settings)
2128
{
22-
_settings = settings;
29+
_settings = settings ?? throw new ArgumentNullException(nameof(settings));
2330
InitializePinyinHelpers();
2431
}
2532

26-
private static void InitializePinyinHelpers()
33+
private void InitializePinyinHelpers()
2734
{
2835
Format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
2936

@@ -38,7 +45,35 @@ private static void InitializePinyinHelpers()
3845
Log.Info($"|Wox.Infrastructure.Alphabet.Initialize|Number of preload pinyin combination<{PinyinCache.Count}>");
3946
}
4047

41-
public static void Save()
48+
public string Translate(string str)
49+
{
50+
return ConvertChineseCharactersToPinyin(str);
51+
}
52+
53+
public string ConvertChineseCharactersToPinyin(string source)
54+
{
55+
if (!_settings.ShouldUsePinyin)
56+
return source;
57+
58+
if (string.IsNullOrEmpty(source))
59+
return source;
60+
61+
if (!ContainsChinese(source))
62+
return source;
63+
64+
var combination = PinyinCombination(source);
65+
66+
var pinyinArray=combination.Select(x => string.Join("", x));
67+
var acronymArray = combination.Select(Acronym).Distinct();
68+
69+
var joinedSingleStringCombination = new StringBuilder();
70+
var all = acronymArray.Concat(pinyinArray);
71+
all.ToList().ForEach(x => joinedSingleStringCombination.Append(x));
72+
73+
return joinedSingleStringCombination.ToString();
74+
}
75+
76+
public void Save()
4277
{
4378
if (!_settings.ShouldUsePinyin)
4479
{
@@ -50,11 +85,12 @@ public static void Save()
5085
private static string[] EmptyStringArray = new string[0];
5186
private static string[][] Empty2DStringArray = new string[0][];
5287

88+
[Obsolete("Not accurate, eg 音乐 will not return yinyue but returns yinle ")]
5389
/// <summary>
5490
/// replace chinese character with pinyin, non chinese character won't be modified
5591
/// <param name="word"> should be word or sentence, instead of single character. e.g. 微软 </param>
5692
/// </summary>
57-
public static string[] Pinyin(string word)
93+
public string[] Pinyin(string word)
5894
{
5995
if (!_settings.ShouldUsePinyin)
6096
{
@@ -76,7 +112,7 @@ public static string[] Pinyin(string word)
76112
/// e.g. 音乐 will return yinyue and yinle
77113
/// <param name="characters"> should be word or sentence, instead of single character. e.g. 微软 </param>
78114
/// </summmary>
79-
public static string[][] PinyinComination(string characters)
115+
public string[][] PinyinCombination(string characters)
80116
{
81117
if (!_settings.ShouldUsePinyin || string.IsNullOrEmpty(characters))
82118
{
@@ -111,13 +147,13 @@ public static string[][] PinyinComination(string characters)
111147
}
112148
}
113149

114-
public static string Acronym(string[] pinyin)
150+
public string Acronym(string[] pinyin)
115151
{
116152
var acronym = string.Join("", pinyin.Select(p => p[0]));
117153
return acronym;
118154
}
119155

120-
public static bool ContainsChinese(string word)
156+
public bool ContainsChinese(string word)
121157
{
122158
if (!_settings.ShouldUsePinyin)
123159
{
@@ -135,7 +171,7 @@ public static bool ContainsChinese(string word)
135171
return chinese;
136172
}
137173

138-
private static string[] Combination(string[] array1, string[] array2)
174+
private string[] Combination(string[] array1, string[] array2)
139175
{
140176
if (!_settings.ShouldUsePinyin)
141177
{

Wox.Infrastructure/FuzzyMatcher.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ private FuzzyMatcher(string query, MatchOption opt)
1616

1717
public static FuzzyMatcher Create(string query)
1818
{
19-
return new FuzzyMatcher(query, StringMatcher.DefaultMatchOption);
19+
return new FuzzyMatcher(query, new MatchOption());
2020
}
2121

2222
public static FuzzyMatcher Create(string query, MatchOption opt)
@@ -26,7 +26,7 @@ public static FuzzyMatcher Create(string query, MatchOption opt)
2626

2727
public MatchResult Evaluate(string str)
2828
{
29-
return StringMatcher.FuzzySearch(query, str, opt);
29+
return StringMatcher.Instance.FuzzyMatch(query, str, opt);
3030
}
3131
}
3232
}

Wox.Infrastructure/StringMatcher.cs

Lines changed: 53 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,46 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.ComponentModel;
34
using System.Linq;
4-
using System.Text;
5-
using Wox.Infrastructure.Logger;
6-
using Wox.Infrastructure.UserSettings;
75
using static Wox.Infrastructure.StringMatcher;
86

97
namespace Wox.Infrastructure
108
{
11-
public static class StringMatcher
9+
public class StringMatcher
1210
{
13-
public static MatchOption DefaultMatchOption = new MatchOption();
11+
private readonly MatchOption _defaultMatchOption = new MatchOption();
1412

15-
public static SearchPrecisionScore UserSettingSearchPrecision { get; set; }
13+
public SearchPrecisionScore UserSettingSearchPrecision { get; set; }
1614

17-
public static bool ShouldUsePinyin { get; set; }
15+
private readonly IAlphabet _alphabet;
16+
17+
public StringMatcher(IAlphabet alphabet = null)
18+
{
19+
_alphabet = alphabet;
20+
}
21+
22+
public static StringMatcher Instance { get; internal set; }
1823

1924
[Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")]
2025
public static int Score(string source, string target)
2126
{
22-
if (!string.IsNullOrEmpty(source) && !string.IsNullOrEmpty(target))
23-
{
24-
return FuzzySearch(target, source, DefaultMatchOption).Score;
25-
}
26-
else
27-
{
28-
return 0;
29-
}
27+
return FuzzySearch(target, source).Score;
3028
}
3129

3230
[Obsolete("This method is obsolete and should not be used. Please use the static function StringMatcher.FuzzySearch")]
3331
public static bool IsMatch(string source, string target)
3432
{
35-
return FuzzySearch(target, source, DefaultMatchOption).Score > 0;
33+
return Score(source, target) > 0;
3634
}
3735

3836
public static MatchResult FuzzySearch(string query, string stringToCompare)
3937
{
40-
return FuzzySearch(query, stringToCompare, DefaultMatchOption);
38+
return Instance.FuzzyMatch(query, stringToCompare);
39+
}
40+
41+
public MatchResult FuzzyMatch(string query, string stringToCompare)
42+
{
43+
return FuzzyMatch(query, stringToCompare, _defaultMatchOption);
4144
}
4245

4346
/// <summary>
@@ -51,12 +54,18 @@ public static MatchResult FuzzySearch(string query, string stringToCompare)
5154
/// 6. Move onto the next substring's characters until all substrings are checked.
5255
/// 7. Consider success and move onto scoring if every char or substring without whitespaces matched
5356
/// </summary>
54-
public static MatchResult FuzzySearch(string query, string stringToCompare, MatchOption opt)
57+
public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt)
5558
{
56-
if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) return new MatchResult { Success = false };
59+
if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) return new MatchResult (false, UserSettingSearchPrecision);
5760

5861
query = query.Trim();
5962

63+
if (_alphabet != null)
64+
{
65+
query = _alphabet.Translate(query);
66+
stringToCompare = _alphabet.Translate(stringToCompare);
67+
}
68+
6069
var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare;
6170

6271
var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query;
@@ -139,19 +148,11 @@ public static MatchResult FuzzySearch(string query, string stringToCompare, Matc
139148
if (allQuerySubstringsMatched)
140149
{
141150
var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString);
142-
var pinyinScore = ScoreForPinyin(stringToCompare, query);
143151

144-
var result = new MatchResult
145-
{
146-
Success = true,
147-
MatchData = indexList,
148-
RawScore = Math.Max(score, pinyinScore)
149-
};
150-
151-
return result;
152+
return new MatchResult(true, UserSettingSearchPrecision, indexList, score);
152153
}
153154

154-
return new MatchResult { Success = false };
155+
return new MatchResult (false, UserSettingSearchPrecision);
155156
}
156157

157158
private static bool AllPreviousCharsMatched(int startIndexToVerify, int currentQuerySubstringCharacterIndex,
@@ -224,46 +225,28 @@ public enum SearchPrecisionScore
224225
Low = 20,
225226
None = 0
226227
}
228+
}
227229

228-
public static int ScoreForPinyin(string source, string target)
230+
public class MatchResult
231+
{
232+
public MatchResult(bool success, SearchPrecisionScore searchPrecision)
229233
{
230-
if (!ShouldUsePinyin)
231-
{
232-
return 0;
233-
}
234+
Success = success;
235+
SearchPrecision = searchPrecision;
236+
}
234237

235-
if (!string.IsNullOrEmpty(source) && !string.IsNullOrEmpty(target))
236-
{
237-
if (Alphabet.ContainsChinese(source))
238-
{
239-
var combination = Alphabet.PinyinComination(source);
240-
var pinyinScore = combination
241-
.Select(pinyin => FuzzySearch(target, string.Join("", pinyin)).Score)
242-
.Max();
243-
var acronymScore = combination.Select(Alphabet.Acronym)
244-
.Select(pinyin => FuzzySearch(target, pinyin).Score)
245-
.Max();
246-
var score = Math.Max(pinyinScore, acronymScore);
247-
return score;
248-
}
249-
else
250-
{
251-
return 0;
252-
}
253-
}
254-
else
255-
{
256-
return 0;
257-
}
238+
public MatchResult(bool success, SearchPrecisionScore searchPrecision, List<int> matchData, int rawScore)
239+
{
240+
Success = success;
241+
SearchPrecision = searchPrecision;
242+
MatchData = matchData;
243+
RawScore = rawScore;
258244
}
259-
}
260245

261-
public class MatchResult
262-
{
263246
public bool Success { get; set; }
264247

265248
/// <summary>
266-
/// The final score of the match result with all search precision filters applied.
249+
/// The final score of the match result with search precision filters applied.
267250
/// </summary>
268251
public int Score { get; private set; }
269252

@@ -278,7 +261,7 @@ public int RawScore
278261
set
279262
{
280263
_rawScore = value;
281-
Score = ApplySearchPrecisionFilter(_rawScore);
264+
Score = ScoreAfterSearchPrecisionFilter(_rawScore);
282265
}
283266
}
284267

@@ -287,19 +270,21 @@ public int RawScore
287270
/// </summary>
288271
public List<int> MatchData { get; set; }
289272

273+
public SearchPrecisionScore SearchPrecision { get; set; }
274+
290275
public bool IsSearchPrecisionScoreMet()
291276
{
292-
return IsSearchPrecisionScoreMet(Score);
277+
return IsSearchPrecisionScoreMet(_rawScore);
293278
}
294279

295-
private bool IsSearchPrecisionScoreMet(int score)
280+
private bool IsSearchPrecisionScoreMet(int rawScore)
296281
{
297-
return score >= (int)UserSettingSearchPrecision;
282+
return rawScore >= (int)SearchPrecision;
298283
}
299284

300-
private int ApplySearchPrecisionFilter(int score)
285+
private int ScoreAfterSearchPrecisionFilter(int rawScore)
301286
{
302-
return IsSearchPrecisionScoreMet(score) ? score : 0;
287+
return IsSearchPrecisionScoreMet(rawScore) ? rawScore : 0;
303288
}
304289
}
305290

@@ -319,4 +304,4 @@ public class MatchOption
319304

320305
public bool IgnoreCase { get; set; } = true;
321306
}
322-
}
307+
}

Wox.Infrastructure/UserSettings/Settings.cs

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,11 @@ public class Settings : BaseModel
2121
public string ResultFontWeight { get; set; }
2222
public string ResultFontStretch { get; set; }
2323

24+
2425
/// <summary>
2526
/// when false Alphabet static service will always return empty results
2627
/// </summary>
27-
private bool _shouldUsePinyin = true;
28-
public bool ShouldUsePinyin
29-
{
30-
get { return _shouldUsePinyin; }
31-
set
32-
{
33-
_shouldUsePinyin = value;
34-
StringMatcher.ShouldUsePinyin = value;
35-
}
36-
}
28+
public bool ShouldUsePinyin { get; set; } = true;
3729

3830

3931
internal StringMatcher.SearchPrecisionScore QuerySearchPrecision { get; private set; } = StringMatcher.SearchPrecisionScore.Regular;
@@ -49,14 +41,14 @@ public string QuerySearchPrecisionString
4941
.Parse(typeof(StringMatcher.SearchPrecisionScore), value);
5042

5143
QuerySearchPrecision = precisionScore;
52-
StringMatcher.UserSettingSearchPrecision = precisionScore;
44+
StringMatcher.Instance.UserSettingSearchPrecision = precisionScore;
5345
}
5446
catch (ArgumentException e)
5547
{
5648
Logger.Log.Exception(nameof(Settings), "Failed to load QuerySearchPrecisionString value from Settings file", e);
5749

5850
QuerySearchPrecision = StringMatcher.SearchPrecisionScore.Regular;
59-
StringMatcher.UserSettingSearchPrecision = StringMatcher.SearchPrecisionScore.Regular;
51+
StringMatcher.Instance.UserSettingSearchPrecision = StringMatcher.SearchPrecisionScore.Regular;
6052

6153
throw;
6254
}

0 commit comments

Comments
 (0)