Skip to content

Commit c0bbc0b

Browse files
committed
updated for version 7.4.088
Problem: When spell checking is enabled Asian characters are always marked as error. Solution: When 'spelllang' contains "cjk" do not mark Asian characters as error. (Ken Takata)
1 parent 6f49667 commit c0bbc0b

File tree

7 files changed

+72
-33
lines changed

7 files changed

+72
-33
lines changed

runtime/doc/options.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6555,6 +6555,9 @@ A jump table for the options with a short description can be found at |Q_op|.
65556555
region by listing them: "en_us,en_ca" supports both US and Canadian
65566556
English, but not words specific for Australia, New Zealand or Great
65576557
Britain.
6558+
If the name "cjk" is included East Asian characters are excluded from
6559+
spell checking. This is useful when editing text that also has Asian
6560+
words.
65586561
*E757*
65596562
As a special case the name of a .spl file can be given as-is. The
65606563
first "_xx" in the name is removed and used as the region name

runtime/doc/spell.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,13 @@ In a table:
269269
latin1 yi transliterated Yiddish
270270
utf-8 yi-tr transliterated Yiddish
271271

272+
*spell-cjk*
273+
Chinese, Japanese and other East Asian characters are normally marked as
274+
errors, because spell checking of these characters is not supported. If
275+
'spelllang' includes "cjk", these characters are not marked as errors. This
276+
is useful when editing text with spell checking while some Asian words are
277+
present.
278+
272279

273280
SPELL FILES *spell-load*
274281

src/mbyte.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -947,8 +947,8 @@ dbcs_class(lead, trail)
947947
{
948948
case 0x2121: /* ZENKAKU space */
949949
return 0;
950-
case 0x2122: /* KU-TEN (Japanese comma) */
951-
case 0x2123: /* TOU-TEN (Japanese period) */
950+
case 0x2122: /* TOU-TEN (Japanese comma) */
951+
case 0x2123: /* KU-TEN (Japanese period) */
952952
case 0x2124: /* ZENKAKU comma */
953953
case 0x2125: /* ZENKAKU period */
954954
return 1;
@@ -2477,9 +2477,9 @@ utf_class(c)
24772477
/* sorted list of non-overlapping intervals */
24782478
static struct clinterval
24792479
{
2480-
unsigned short first;
2481-
unsigned short last;
2482-
unsigned short class;
2480+
unsigned int first;
2481+
unsigned int last;
2482+
unsigned int class;
24832483
} classes[] =
24842484
{
24852485
{0x037e, 0x037e, 1}, /* Greek question mark */
@@ -2544,6 +2544,10 @@ utf_class(c)
25442544
{0xff1a, 0xff20, 1}, /* half/fullwidth ASCII */
25452545
{0xff3b, 0xff40, 1}, /* half/fullwidth ASCII */
25462546
{0xff5b, 0xff65, 1}, /* half/fullwidth ASCII */
2547+
{0x20000, 0x2a6df, 0x4e00}, /* CJK Ideographs */
2548+
{0x2a700, 0x2b73f, 0x4e00}, /* CJK Ideographs */
2549+
{0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */
2550+
{0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */
25472551
};
25482552
int bot = 0;
25492553
int top = sizeof(classes) / sizeof(struct clinterval) - 1;
@@ -2563,9 +2567,9 @@ utf_class(c)
25632567
while (top >= bot)
25642568
{
25652569
mid = (bot + top) / 2;
2566-
if (classes[mid].last < c)
2570+
if (classes[mid].last < (unsigned int)c)
25672571
bot = mid + 1;
2568-
else if (classes[mid].first > c)
2572+
else if (classes[mid].first > (unsigned int)c)
25692573
top = mid - 1;
25702574
else
25712575
return (int)classes[mid].class;

src/option.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7122,18 +7122,22 @@ did_set_string_option(opt_idx, varp, new_value_alloced, oldval, errbuf,
71227122
if (varp == &(curwin->w_s->b_p_spl))
71237123
{
71247124
char_u fname[200];
7125+
char_u *q = curwin->w_s->b_p_spl;
7126+
7127+
/* Skip the first name if it is "cjk". */
7128+
if (STRNCMP(q, "cjk,", 4) == 0)
7129+
q += 4;
71257130

71267131
/*
71277132
* Source the spell/LANG.vim in 'runtimepath'.
71287133
* They could set 'spellcapcheck' depending on the language.
71297134
* Use the first name in 'spelllang' up to '_region' or
71307135
* '.encoding'.
71317136
*/
7132-
for (p = curwin->w_s->b_p_spl; *p != NUL; ++p)
7137+
for (p = q; *p != NUL; ++p)
71337138
if (vim_strchr((char_u *)"_.,", *p) != NULL)
71347139
break;
7135-
vim_snprintf((char *)fname, 200, "spell/%.*s.vim",
7136-
(int)(p - curwin->w_s->b_p_spl), curwin->w_s->b_p_spl);
7140+
vim_snprintf((char *)fname, 200, "spell/%.*s.vim", (int)(p - q), q);
71377141
source_runtime(fname, TRUE);
71387142
}
71397143
#endif

src/spell.c

Lines changed: 39 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -754,9 +754,9 @@ static int did_set_spelltab;
754754
static void clear_spell_chartab __ARGS((spelltab_T *sp));
755755
static int set_spell_finish __ARGS((spelltab_T *new_st));
756756
static int spell_iswordp __ARGS((char_u *p, win_T *wp));
757-
static int spell_iswordp_nmw __ARGS((char_u *p));
757+
static int spell_iswordp_nmw __ARGS((char_u *p, win_T *wp));
758758
#ifdef FEAT_MBYTE
759-
static int spell_mb_isword_class __ARGS((int cl));
759+
static int spell_mb_isword_class __ARGS((int cl, win_T *wp));
760760
static int spell_iswordp_w __ARGS((int *p, win_T *wp));
761761
#endif
762762
static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
@@ -1149,7 +1149,7 @@ spell_check(wp, ptr, attrp, capcol, docount)
11491149

11501150
/* When we are at a non-word character there is no error, just
11511151
* skip over the character (try looking for a word after it). */
1152-
else if (!spell_iswordp_nmw(ptr))
1152+
else if (!spell_iswordp_nmw(ptr, wp))
11531153
{
11541154
if (capcol != NULL && wp->w_s->b_cap_prog != NULL)
11551155
{
@@ -1561,7 +1561,7 @@ find_word(mip, mode)
15611561
* accept a no-caps word, even when the dictionary
15621562
* word specifies ONECAP. */
15631563
mb_ptr_back(mip->mi_word, p);
1564-
if (spell_iswordp_nmw(p)
1564+
if (spell_iswordp_nmw(p, mip->mi_win)
15651565
? capflags == WF_ONECAP
15661566
: (flags & WF_ONECAP) != 0
15671567
&& capflags != WF_ONECAP)
@@ -4234,14 +4234,22 @@ did_set_spelllang(wp)
42344234
if (spl_copy == NULL)
42354235
goto theend;
42364236

4237-
/* loop over comma separated language names. */
4237+
wp->w_s->b_cjk = 0;
4238+
4239+
/* Loop over comma separated language names. */
42384240
for (splp = spl_copy; *splp != NUL; )
42394241
{
42404242
/* Get one language name. */
42414243
copy_option_part(&splp, lang, MAXWLEN, ",");
42424244
region = NULL;
42434245
len = (int)STRLEN(lang);
42444246

4247+
if (STRCMP(lang, "cjk") == 0)
4248+
{
4249+
wp->w_s->b_cjk = 1;
4250+
continue;
4251+
}
4252+
42454253
/* If the name ends in ".spl" use it as the name of the spell file.
42464254
* If there is a region name let "region" point to it and remove it
42474255
* from the name. */
@@ -4601,7 +4609,7 @@ captype(word, end)
46014609
int past_second = FALSE; /* past second word char */
46024610

46034611
/* find first letter */
4604-
for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p))
4612+
for (p = word; !spell_iswordp_nmw(p, curwin); mb_ptr_adv(p))
46054613
if (end == NULL ? *p == NUL : p >= end)
46064614
return 0; /* only non-word characters, illegal word */
46074615
#ifdef FEAT_MBYTE
@@ -4617,7 +4625,7 @@ captype(word, end)
46174625
* But a word with an upper char only at start is a ONECAP.
46184626
*/
46194627
for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
4620-
if (spell_iswordp_nmw(p))
4628+
if (spell_iswordp_nmw(p, curwin))
46214629
{
46224630
c = PTR2CHAR(p);
46234631
if (!SPELL_ISUPPER(c))
@@ -9907,7 +9915,7 @@ spell_iswordp(p, wp)
99079915

99089916
c = mb_ptr2char(s);
99099917
if (c > 255)
9910-
return spell_mb_isword_class(mb_get_class(s));
9918+
return spell_mb_isword_class(mb_get_class(s), wp);
99119919
return spelltab.st_isw[c];
99129920
}
99139921
#endif
@@ -9920,8 +9928,9 @@ spell_iswordp(p, wp)
99209928
* Unlike spell_iswordp() this doesn't check for "midword" characters.
99219929
*/
99229930
static int
9923-
spell_iswordp_nmw(p)
9931+
spell_iswordp_nmw(p, wp)
99249932
char_u *p;
9933+
win_T *wp;
99259934
{
99269935
#ifdef FEAT_MBYTE
99279936
int c;
@@ -9930,7 +9939,7 @@ spell_iswordp_nmw(p)
99309939
{
99319940
c = mb_ptr2char(p);
99329941
if (c > 255)
9933-
return spell_mb_isword_class(mb_get_class(p));
9942+
return spell_mb_isword_class(mb_get_class(p), wp);
99349943
return spelltab.st_isw[c];
99359944
}
99369945
#endif
@@ -9942,11 +9951,16 @@ spell_iswordp_nmw(p)
99429951
* Return TRUE if word class indicates a word character.
99439952
* Only for characters above 255.
99449953
* Unicode subscript and superscript are not considered word characters.
9954+
* See also dbcs_class() and utf_class() in mbyte.c.
99459955
*/
99469956
static int
9947-
spell_mb_isword_class(cl)
9948-
int cl;
9957+
spell_mb_isword_class(cl, wp)
9958+
int cl;
9959+
win_T *wp;
99499960
{
9961+
if (wp->w_s->b_cjk)
9962+
/* East Asian characters are not considered word characters. */
9963+
return cl == 2 || cl == 0x2800;
99509964
return cl >= 2 && cl != 0x2070 && cl != 0x2080;
99519965
}
99529966

@@ -9971,9 +9985,10 @@ spell_iswordp_w(p, wp)
99719985
if (*s > 255)
99729986
{
99739987
if (enc_utf8)
9974-
return spell_mb_isword_class(utf_class(*s));
9988+
return spell_mb_isword_class(utf_class(*s), wp);
99759989
if (enc_dbcs)
9976-
return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2;
9990+
return spell_mb_isword_class(
9991+
dbcs_class((unsigned)*s >> 8, *s & 0xff), wp);
99779992
return 0;
99789993
}
99799994
return spelltab.st_isw[*s];
@@ -10193,13 +10208,13 @@ spell_suggest(count)
1019310208
line = ml_get_curline();
1019410209
p = line + curwin->w_cursor.col;
1019510210
/* Backup to before start of word. */
10196-
while (p > line && spell_iswordp_nmw(p))
10211+
while (p > line && spell_iswordp_nmw(p, curwin))
1019710212
mb_ptr_back(line, p);
1019810213
/* Forward to start of word. */
10199-
while (*p != NUL && !spell_iswordp_nmw(p))
10214+
while (*p != NUL && !spell_iswordp_nmw(p, curwin))
1020010215
mb_ptr_adv(p);
1020110216

10202-
if (!spell_iswordp_nmw(p)) /* No word found. */
10217+
if (!spell_iswordp_nmw(p, curwin)) /* No word found. */
1020310218
{
1020410219
beep_flush();
1020510220
return;
@@ -10436,7 +10451,7 @@ check_need_cap(lnum, col)
1043610451
for (;;)
1043710452
{
1043810453
mb_ptr_back(line, p);
10439-
if (p == line || spell_iswordp_nmw(p))
10454+
if (p == line || spell_iswordp_nmw(p, curwin))
1044010455
break;
1044110456
if (vim_regexec(&regmatch, p, 0)
1044210457
&& regmatch.endp[0] == line + endcol)
@@ -11645,7 +11660,7 @@ suggest_trie_walk(su, lp, fword, soundfold)
1164511660

1164611661
/* When appending a compound word after a word character don't
1164711662
* use Onecap. */
11648-
if (p != NULL && spell_iswordp_nmw(p))
11663+
if (p != NULL && spell_iswordp_nmw(p, curwin))
1164911664
c &= ~WF_ONECAP;
1165011665
make_case_word(tword + sp->ts_splitoff,
1165111666
preword + sp->ts_prewordlen, c);
@@ -11895,7 +11910,8 @@ suggest_trie_walk(su, lp, fword, soundfold)
1189511910
* character when the word ends. But only when the
1189611911
* good word can end. */
1189711912
if (((!try_compound && !spell_iswordp_nmw(fword
11898-
+ sp->ts_fidx))
11913+
+ sp->ts_fidx,
11914+
curwin))
1189911915
|| fword_ends)
1190011916
&& fword[sp->ts_fidx] != NUL
1190111917
&& goodword_ends)
@@ -14226,7 +14242,7 @@ spell_soundfold_sal(slang, inword, res)
1422614242
}
1422714243
else
1422814244
{
14229-
if (spell_iswordp_nmw(s))
14245+
if (spell_iswordp_nmw(s, curwin))
1423014246
*t++ = *s;
1423114247
++s;
1423214248
}
@@ -14521,7 +14537,7 @@ spell_soundfold_wsal(slang, inword, res)
1452114537
else
1452214538
{
1452314539
did_white = FALSE;
14524-
if (!spell_iswordp_nmw(t))
14540+
if (!spell_iswordp_nmw(t, curwin))
1452514541
continue;
1452614542
}
1452714543
}
@@ -16045,7 +16061,7 @@ spell_word_start(startcol)
1604516061
for (p = line + startcol; p > line; )
1604616062
{
1604716063
mb_ptr_back(line, p);
16048-
if (spell_iswordp_nmw(p))
16064+
if (spell_iswordp_nmw(p, curwin))
1604916065
break;
1605016066
}
1605116067

src/structs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,6 +1310,9 @@ typedef struct {
13101310
regprog_T *b_cap_prog; /* program for 'spellcapcheck' */
13111311
char_u *b_p_spf; /* 'spellfile' */
13121312
char_u *b_p_spl; /* 'spelllang' */
1313+
# ifdef FEAT_MBYTE
1314+
int b_cjk; /* all CJK letters as OK */
1315+
# endif
13131316
#endif
13141317
#if !defined(FEAT_SYN_HL) && !defined(FEAT_SPELL)
13151318
int dummy;

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,8 @@ static char *(features[]) =
738738

739739
static int included_patches[] =
740740
{ /* Add new patch number below this line */
741+
/**/
742+
88,
741743
/**/
742744
87,
743745
/**/

0 commit comments

Comments
 (0)