Skip to content

Commit 3f9b935

Browse files
committed
update
1 parent b3630ac commit 3f9b935

File tree

18 files changed

+913
-915
lines changed

18 files changed

+913
-915
lines changed

CodeFormatCore/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ target_sources(CodeFormatCore
4848
src/Diagnostic/NameStyle/NameStyleRuleMatcher.cpp
4949
# diagnostic/spell
5050
src/Diagnostic/Spell/CodeSpellChecker.cpp
51-
src/Diagnostic/Spell/IdentifyParser.cpp
52-
src/Diagnostic/Spell/TextParser.cpp
51+
src/Diagnostic/Spell/Util.cpp
5352
# diagnostic/codestyle
5453
src/Diagnostic/CodeStyle/CodeStyleChecker.cpp
5554
)

CodeFormatCore/include/CodeFormatCore/Diagnostic/Spell/CodeSpellChecker.h

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#pragma once
22

3+
#include "LuaParser/Ast/LuaSyntaxTree.h"
4+
#include "Util/StringUtil.h"
5+
#include "Util/SymSpell/SymSpell.h"
6+
#include "Util.h"
37
#include <memory>
4-
#include <vector>
5-
#include <unordered_map>
68
#include <set>
7-
#include "Util/SymSpell/SymSpell.h"
8-
#include "IdentifyParser.h"
9-
#include "Util/StringUtil.h"
10-
#include "LuaParser/Ast/LuaSyntaxTree.h"
9+
#include <unordered_map>
10+
#include <vector>
1111

1212
class DiagnosticBuilder;
1313

@@ -21,7 +21,7 @@ class CodeSpellChecker {
2121

2222
void LoadDictionaryFromBuffer(std::string_view buffer);
2323

24-
void SetCustomDictionary(const CustomDictionary& dictionary);
24+
void SetCustomDictionary(const CustomDictionary &dictionary);
2525

2626
void Analyze(DiagnosticBuilder &d, const LuaSyntaxTree &t);
2727

@@ -34,7 +34,5 @@ class CodeSpellChecker {
3434
void TextAnalyze(DiagnosticBuilder &d, LuaSyntaxNode &token, const LuaSyntaxTree &t);
3535

3636
std::shared_ptr<SymSpell> _symSpell;
37-
std::unordered_map<std::string, std::shared_ptr<spell::IdentifyParser>> _caches;
3837
CustomDictionary _dictionary;
3938
};
40-
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#pragma once
2+
3+
#include "LuaParser/Lexer/TextReader.h"
4+
#include "LuaParser/Types/TextRange.h"
5+
#include <cinttypes>
6+
#include <string>
7+
#include <string_view>
8+
#include <vector>
9+
10+
namespace spell {
11+
12+
struct Word {
13+
TextRange Range;
14+
std::string_view Item;
15+
16+
Word(TextRange range, std::string_view item)
17+
: Range(range),
18+
Item(item) {
19+
}
20+
};
21+
22+
namespace identify {
23+
enum class IdentifyType {
24+
Unknown,
25+
Ascii,
26+
Ignore,
27+
End,
28+
};
29+
30+
std::vector<Word> ParseToWords(std::string_view identify);
31+
32+
IdentifyType Lex(TextReader &reader);
33+
34+
};// namespace identify
35+
36+
namespace text {
37+
enum class TextType {
38+
Unknown,
39+
Identify,
40+
Ignore,
41+
End,
42+
};
43+
44+
std::vector<Word> ParseToIdentifies(std::string_view text);
45+
46+
TextType Lex(TextReader &reader);
47+
48+
}// namespace text
49+
50+
}// namespace spell

CodeFormatCore/src/Diagnostic/Spell/CodeSpellChecker.cpp

Lines changed: 26 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#include "CodeFormatCore/Diagnostic/Spell/CodeSpellChecker.h"
2-
#include "Util/format.h"
3-
#include "CodeFormatCore/Diagnostic/Spell/TextParser.h"
4-
#include "LuaParser/Lexer/LuaToken.h"
2+
#include "CodeFormatCore/Diagnostic/DiagnosticBuilder.h"
53
#include "CodeFormatCore/Diagnostic/DiagnosticType.h"
4+
#include "CodeFormatCore/Diagnostic/Spell/Util.h"
5+
#include "LuaParser/Lexer/LuaToken.h"
66
#include "LuaParser/Lexer/LuaTokenTypeDetail.h"
7-
#include "CodeFormatCore/Diagnostic/DiagnosticBuilder.h"
7+
#include "Util/format.h"
88

99
CodeSpellChecker::CodeSpellChecker()
10-
: _symSpell(std::make_shared<SymSpell>(SymSpell::Strategy::LazyLoaded)) {
10+
: _symSpell(std::make_shared<SymSpell>(SymSpell::Strategy::LazyLoaded)) {
1111
}
1212

1313
void CodeSpellChecker::LoadDictionary(std::string_view path) {
@@ -49,7 +49,7 @@ std::vector<SuggestItem> CodeSpellChecker::GetSuggests(std::string word) {
4949
if (std::isupper(c)) {
5050
state = ParseState::AllUpper;
5151
c = static_cast<char>(std::tolower(c));
52-
} else // lower
52+
} else// lower
5353
{
5454
state = ParseState::Lower;
5555
}
@@ -115,79 +115,59 @@ std::vector<SuggestItem> CodeSpellChecker::GetSuggests(std::string word) {
115115
return suggests;
116116
}
117117

118+
std::string lowerString(std::string_view source) {
119+
std::string lowerItem(source);
120+
std::transform(lowerItem.begin(), lowerItem.end(), lowerItem.begin(), ::tolower);
121+
return lowerItem;
122+
}
123+
118124
void CodeSpellChecker::IdentifyAnalyze(DiagnosticBuilder &d, LuaSyntaxNode &token, const LuaSyntaxTree &t) {
119-
std::shared_ptr<spell::IdentifyParser> parser = nullptr;
120125
std::string text(token.GetText(t));
121126

122127
auto &customDict = _dictionary;
123128
if (customDict.count(text) != 0) {
124129
return;
125130
}
126131

127-
auto it = _caches.find(text);
128-
if (it != _caches.end()) {
129-
parser = it->second;
130-
} else {
131-
parser = std::make_shared<spell::IdentifyParser>(text);
132-
parser->Parse();
133-
_caches.insert({text, parser});
134-
}
135-
136-
auto &words = parser->GetWords();
132+
auto words = spell::identify::ParseToWords(text);
137133
if (words.empty()) {
138134
return;
139135
}
140-
141136
for (auto &word: words) {
142-
if (!word.Item.empty() && !_symSpell->IsCorrectWord(word.Item) && customDict.count(word.Item) == 0) {
137+
auto lowerItem = lowerString(word.Item);
138+
if (!word.Item.empty() && customDict.count(lowerItem) == 0 && !_symSpell->IsCorrectWord(lowerItem)) {
143139
auto tokenRange = token.GetTextRange(t);
144-
auto range = TextRange(tokenRange.StartOffset + word.Range.Start,
145-
word.Range.Count
146-
);
147-
std::string originText(text.substr(word.Range.Start, word.Range.Count));
140+
auto range = TextRange(tokenRange.StartOffset + word.Range.StartOffset,
141+
word.Range.Length);
142+
std::string originText(text.substr(word.Range.StartOffset, word.Range.Length));
148143
d.PushDiagnostic(DiagnosticType::Spell, range,
149144
util::format("Typo in identifier '{}'", originText), originText);
150145
}
151146
}
152147
}
153148

154149
void CodeSpellChecker::TextAnalyze(DiagnosticBuilder &d, LuaSyntaxNode &token, const LuaSyntaxTree &t) {
155-
auto identifiers = spell::TextParser::ParseToWords(token.GetText(t));
150+
auto identifiers = spell::text::ParseToIdentifies(token.GetText(t));
156151
if (identifiers.empty()) {
157152
return;
158153
}
159154
auto &customDict = _dictionary;
160155

161156
for (auto &identifier: identifiers) {
162-
auto &text = identifier.Item;
163-
164-
if (customDict.count(text) != 0) {
165-
continue;
166-
}
167-
std::shared_ptr<spell::IdentifyParser> identifierParser = nullptr;
168-
169-
auto it = _caches.find(text);
170-
if (it != _caches.end()) {
171-
identifierParser = it->second;
172-
} else {
173-
identifierParser = std::make_shared<spell::IdentifyParser>(text);
174-
identifierParser->Parse();
175-
_caches.insert({text, identifierParser});
176-
}
177-
178-
auto &words = identifierParser->GetWords();
157+
auto identifyText = identifier.Item;
158+
auto words = spell::identify::ParseToWords(identifyText);
179159
if (words.empty()) {
180160
continue;
181161
}
182162

183163
auto tokenRange = token.GetTextRange(t);
184164
for (auto &word: words) {
185-
if (!word.Item.empty() && !_symSpell->IsCorrectWord(word.Item) && customDict.count(word.Item) == 0) {
186-
auto range = TextRange(tokenRange.StartOffset + identifier.Range.Start + word.Range.Start,
187-
word.Range.Count
188-
);
165+
auto lowerItem = lowerString(word.Item);
166+
if (!word.Item.empty() && !_symSpell->IsCorrectWord(lowerItem) && customDict.count(lowerItem) == 0) {
167+
auto range = TextRange(tokenRange.StartOffset + identifier.Range.StartOffset + word.Range.StartOffset,
168+
word.Range.Length);
189169
std::string originText(
190-
token.GetText(t).substr(identifier.Range.Start + word.Range.Start, word.Range.Count));
170+
token.GetText(t).substr(identifier.Range.StartOffset + word.Range.StartOffset, word.Range.Length));
191171
d.PushDiagnostic(DiagnosticType::Spell, range,
192172
util::format("Typo in string '{}'", originText), originText);
193173
}
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#include "CodeFormatCore/Diagnostic/Spell/Util.h"
2+
#include "Util/StringUtil.h"
3+
4+
using namespace spell;
5+
const int EOZ = -1;
6+
7+
std::vector<Word> identify::ParseToWords(std::string_view identify) {
8+
TextReader reader(identify);
9+
std::vector<Word> words;
10+
while (true) {
11+
switch (Lex(reader)) {
12+
case IdentifyType::Unknown: {
13+
words.clear();
14+
goto endLoop;
15+
}
16+
case IdentifyType::Ascii: {
17+
auto range = reader.GetTokenRange();
18+
if (range.Length > 3) {
19+
auto text = reader.GetSaveText();
20+
words.emplace_back(range, text);
21+
}
22+
break;
23+
}
24+
case IdentifyType::Ignore: {
25+
break;
26+
}
27+
default:// end
28+
{
29+
goto endLoop;
30+
}
31+
}
32+
}
33+
endLoop:
34+
return words;
35+
}
36+
37+
identify::IdentifyType identify::Lex(TextReader &reader) {
38+
reader.ResetBuffer();
39+
do {
40+
auto ch = reader.GetCurrentChar();
41+
42+
if (ch == EOZ) {
43+
return IdentifyType::End;
44+
} else if (ch == '_') {
45+
reader.NextChar();
46+
continue;
47+
} else if (ch > CHAR_MAX || !std::isalnum(ch)) {
48+
return IdentifyType::Unknown;
49+
}
50+
reader.SaveAndNext();
51+
52+
if (islower(ch)) {
53+
reader.EatWhile(islower);
54+
return IdentifyType::Ascii;
55+
} else if (isdigit(ch)) {
56+
reader.EatWhile(isdigit);
57+
return IdentifyType::Ignore;
58+
} else if (isupper(ch)) {
59+
auto upperCount = reader.EatWhile(isupper);
60+
reader.EatWhile(islower);
61+
return upperCount > 0 ? IdentifyType::Ignore : IdentifyType::Ascii;
62+
}
63+
64+
return IdentifyType::Unknown;
65+
} while (true);
66+
}
67+
68+
std::vector<Word> text::ParseToIdentifies(std::string_view text) {
69+
std::vector<Word> words;
70+
if (text.length() < 3) {
71+
return words;
72+
}
73+
74+
// give up check long string
75+
if (text.front() == '\'' || text.front() == '\"') {
76+
text = text.substr(1, text.size() - 2);
77+
} else {
78+
return words;
79+
}
80+
81+
TextReader reader(text);
82+
83+
while (true) {
84+
switch (Lex(reader)) {
85+
case TextType::Unknown: {
86+
words.clear();
87+
goto endLoop;
88+
}
89+
case TextType::Identify: {
90+
auto range = reader.GetTokenRange();
91+
if (range.Length > 3) {
92+
range.StartOffset++;
93+
words.emplace_back(range, reader.GetSaveText());
94+
}
95+
break;
96+
}
97+
case TextType::Ignore: {
98+
break;
99+
}
100+
default:// end
101+
{
102+
goto endLoop;
103+
}
104+
}
105+
}
106+
endLoop:
107+
return words;
108+
}
109+
110+
text::TextType text::Lex(TextReader &reader) {
111+
reader.ResetBuffer();
112+
do {
113+
auto ch = reader.GetCurrentChar();
114+
115+
if (ch == EOZ) {
116+
return TextType::End;
117+
} else if (ch == '\\') {
118+
reader.NextChar();
119+
reader.NextChar();
120+
continue;
121+
} else if (::isspace(ch)) {
122+
reader.NextChar();
123+
continue;
124+
}
125+
reader.SaveAndNext();
126+
reader.EatWhile([](int ch) {
127+
return !isspace(ch);
128+
});
129+
return TextType::Identify;
130+
} while (true);
131+
}

0 commit comments

Comments
 (0)