@@ -63,28 +63,49 @@ std::vector<MisspelledRange> HunspellSpellchecker::CheckSpelling(const uint16_t
6363
6464 std::vector<char > utf8_buffer (256 );
6565
66- size_t word_start = 0 ;
67- bool within_word = false ;
68- for (size_t i = 0 ; i < utf16_length; i++) {
66+ enum {
67+ unknown,
68+ in_separator,
69+ in_word,
70+ } state = in_separator;
71+
72+ for (size_t word_start = 0 , i = 0 ; i < utf16_length; i++) {
6973 uint16_t c = utf16_text[i];
70- bool is_word_character = iswalpha (c);
71- if (within_word) {
72- if (!is_word_character) {
73- within_word = false ;
74-
75- bool converted = TranscodeUTF16ToUTF8 (transcoder, (char *)utf8_buffer.data (), utf8_buffer.size (), utf16_text + word_start, i - word_start);
76- if (converted) {
77- if (hunspell->spell (utf8_buffer.data ()) == 0 ) {
78- MisspelledRange range;
79- range.start = word_start;
80- range.end = i;
81- result.push_back (range);
74+
75+ switch (state) {
76+ case unknown:
77+ if (iswpunct (c) || iswspace (c)) {
78+ state = in_separator;
79+ }
80+ break ;
81+
82+ case in_separator:
83+ if (iswalpha (c)) {
84+ word_start = i;
85+ state = in_word;
86+ } else if (!iswpunct (c) && !iswspace (c)) {
87+ state = unknown;
88+ }
89+ break ;
90+
91+ case in_word:
92+ if (c == ' \' ' && iswalpha (utf16_text[i + 1 ])) {
93+ i++;
94+ } else if (c == 0 || iswpunct (c) || iswspace (c)) {
95+ state = in_separator;
96+ bool converted = TranscodeUTF16ToUTF8 (transcoder, (char *)utf8_buffer.data (), utf8_buffer.size (), utf16_text + word_start, i - word_start);
97+ if (converted) {
98+ if (hunspell->spell (utf8_buffer.data ()) == 0 ) {
99+ MisspelledRange range;
100+ range.start = word_start;
101+ range.end = i;
102+ result.push_back (range);
103+ }
82104 }
105+ } else if (!iswalpha (c)) {
106+ state = unknown;
83107 }
84- }
85- } else if (is_word_character) {
86- word_start = i;
87- within_word = true ;
108+ break ;
88109 }
89110 }
90111
0 commit comments