Skip to content

Commit 1bdfe5a

Browse files
committed
拼写检查支持检查字符串中的内容
1 parent 219767f commit 1bdfe5a

File tree

5 files changed

+143
-10
lines changed

5 files changed

+143
-10
lines changed

CodeService/src/Spell/CodeSpellChecker.cpp

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "CodeService/Spell/CodeSpellChecker.h"
22
#include "LuaParser/LuaTokenTypeDetail.h"
33
#include "Util/format.h"
4+
#include "CodeService/Spell/TextParser.h"
45

56
CodeSpellChecker::CodeSpellChecker()
67
: _symSpell(std::make_shared<SymSpell>(SymSpell::Strategy::LazyLoaded))
@@ -29,6 +30,10 @@ void CodeSpellChecker::Analysis(DiagnosisContext& ctx, const CustomDictionary& c
2930
{
3031
IdentifyAnalysis(ctx, token, customDict);
3132
}
33+
else if (token.TokenType == TK_STRING)
34+
{
35+
TextAnalysis(ctx, token, customDict);
36+
}
3237
}
3338
}
3439

@@ -182,7 +187,62 @@ void CodeSpellChecker::IdentifyAnalysis(DiagnosisContext& ctx, LuaToken& token,
182187
token.Range.StartOffset + word.Range.Start + word.Range.Count - 1
183188
);
184189
std::string originText(token.Text.substr(word.Range.Start, word.Range.Count));
185-
ctx.PushDiagnosis(Util::format("Typo in identifier '{}'", originText), range, DiagnosisType::Spell, originText);
190+
ctx.PushDiagnosis(Util::format("Typo in identifier '{}'", originText), range, DiagnosisType::Spell,
191+
originText);
192+
}
193+
}
194+
}
195+
196+
void CodeSpellChecker::TextAnalysis(DiagnosisContext& ctx, LuaToken& token, const CustomDictionary& customDict)
197+
{
198+
std::shared_ptr<spell::TextParser> parser = std::make_shared<spell::TextParser>(token.Text);
199+
parser->Parse();
200+
auto& identifiers = parser->GetIdentifiers();
201+
if (identifiers.empty())
202+
{
203+
return;
204+
}
205+
206+
for (auto& identifier : identifiers)
207+
{
208+
auto& text = identifier.Item;
209+
210+
if (customDict.count(text) != 0)
211+
{
212+
continue;
213+
}
214+
std::shared_ptr<spell::IdentifyParser> identifierParser = nullptr;
215+
216+
auto it = _caches.find(text);
217+
if (it != _caches.end())
218+
{
219+
identifierParser = it->second;
220+
}
221+
else
222+
{
223+
identifierParser = std::make_shared<spell::IdentifyParser>(text);
224+
identifierParser->Parse();
225+
_caches.insert({text, identifierParser});
226+
}
227+
228+
auto& words = identifierParser->GetWords();
229+
if (words.empty())
230+
{
231+
continue;
232+
}
233+
234+
for (auto& word : words)
235+
{
236+
if (!word.Item.empty() && !_symSpell->IsCorrectWord(word.Item) && customDict.count(word.Item) == 0)
237+
{
238+
auto range = TextRange(token.Range.StartOffset + identifier.Range.Start + word.Range.Start,
239+
token.Range.StartOffset + identifier.Range.Start + word.Range.Start + word.Range.
240+
Count - 1
241+
);
242+
std::string originText(token.Text.substr(identifier.Range.Start + word.Range.Start, word.Range.Count));
243+
ctx.PushDiagnosis(Util::format("Typo in string '{}'", originText), range, DiagnosisType::Spell,
244+
originText);
245+
}
186246
}
187247
}
188248
}

CodeService/src/Spell/IdentifyParser.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,12 @@ int IdentifyParser::GetCurrentChar()
168168

169169
void IdentifyParser::PushWords(WordRange range)
170170
{
171+
// 因为这极大可能是缩写
172+
if (range.Count <= 3)
173+
{
174+
return;
175+
}
176+
171177
std::string_view wordView = _source.substr(range.Start, range.Count);
172178
std::string word;
173179
word.resize(wordView.size());
Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,70 @@
11
#include "CodeService/Spell/TextParser.h"
2+
#include "CodeService/Spell/IdentifyParser.h"
23

34
using namespace spell;
4-
// TODO implement later
5+
6+
bool IsIdentifier(char ch)
7+
{
8+
return ch > 0 && (std::isalnum(ch) || ch == '_');
9+
}
10+
11+
TextParser::TextParser(std::string_view source)
12+
: _source(source)
13+
{
14+
}
15+
16+
void TextParser::Parse()
17+
{
18+
enum class ParseState
19+
{
20+
Unknown,
21+
Identify
22+
} state = ParseState::Unknown;
23+
24+
std::size_t start = 0;
25+
for (std::size_t i = 0; i != _source.size(); i++)
26+
{
27+
char ch = _source[i];
28+
switch (state)
29+
{
30+
case ParseState::Unknown:
31+
{
32+
if (IsIdentifier(ch))
33+
{
34+
state = ParseState::Identify;
35+
start = i;
36+
}
37+
break;
38+
}
39+
case ParseState::Identify:
40+
{
41+
if (!IsIdentifier(ch))
42+
{
43+
state = ParseState::Unknown;
44+
PushIdentifier(WordRange(start, i - start));
45+
}
46+
break;
47+
}
48+
}
49+
}
50+
if (state == ParseState::Identify)
51+
{
52+
PushIdentifier(WordRange(start, _source.size() - start));
53+
}
54+
}
55+
56+
std::vector<Word>& TextParser::GetIdentifiers()
57+
{
58+
return _identifiers;
59+
}
60+
61+
void TextParser::PushIdentifier(spell::WordRange range)
62+
{
63+
if (range.Count <= 3)
64+
{
65+
return;
66+
}
67+
68+
std::string_view identifyView = _source.substr(range.Start, range.Count);
69+
_identifiers.emplace_back(range, std::string(identifyView));
70+
}

include/CodeService/Spell/CodeSpellChecker.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ class CodeSpellChecker
2626
private:
2727
void IdentifyAnalysis(DiagnosisContext& ctx, LuaToken& token, const CustomDictionary& customDict);
2828

29+
void TextAnalysis(DiagnosisContext& ctx, LuaToken& token, const CustomDictionary& customDict);
30+
2931
std::shared_ptr<SymSpell> _symSpell;
3032
std::unordered_map<std::string, std::shared_ptr<spell::IdentifyParser>> _caches;
3133
};
34+

include/CodeService/Spell/TextParser.h

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,17 @@ namespace spell {
88
class TextParser
99
{
1010
public:
11-
enum class TextType
12-
{
13-
Unknown,
14-
End,
15-
};
16-
1711
TextParser(std::string_view source);
1812

1913
void Parse();
2014

21-
std::vector<spell::Word>& GetWords();
15+
std::vector<Word>& GetIdentifiers();
2216

2317
private:
24-
TextType Lex();
18+
19+
20+
void PushIdentifier(spell::WordRange range);
21+
std::string_view _source;
22+
std::vector<Word> _identifiers;
2523
};
2624
}

0 commit comments

Comments
 (0)