@@ -46,9 +46,7 @@ struct casing_context {
46
46
When run on a buffer, syntax_prefix_flag_p is taken into account when
47
47
determined inword flag. */
48
48
bool inbuffer ;
49
- /* Conceptually, this denotes whether we are inside of a word except
50
- that if flag is CASE_UP it’s always false and if flag is CASE_DOWN
51
- this is always true. */
49
+ /* Whether we are inside of a word. */
52
50
bool inword ;
53
51
};
54
52
@@ -59,7 +57,7 @@ prepare_casing_context (struct casing_context *ctx,
59
57
{
60
58
ctx -> flag = flag ;
61
59
ctx -> inbuffer = inbuffer ;
62
- ctx -> inword = flag == CASE_DOWN ;
60
+ ctx -> inword = false ;
63
61
ctx -> titlecase_char_table = (int )flag < (int )CASE_CAPITALIZE ? Qnil :
64
62
uniprop_table (intern_c_string ("titlecase" ));
65
63
ctx -> specialcase_char_tables [CASE_UP ] = flag == CASE_DOWN ? Qnil :
@@ -101,15 +99,16 @@ case_character_impl (struct casing_str_buf *buf,
101
99
102
100
/* Update inword state */
103
101
was_inword = ctx -> inword ;
104
- if ((int ) ctx -> flag >= (int ) CASE_CAPITALIZE )
105
- ctx -> inword = SYNTAX (ch ) == Sword &&
106
- (!ctx -> inbuffer || was_inword || !syntax_prefix_flag_p (ch ));
102
+ ctx -> inword = SYNTAX (ch ) == Sword &&
103
+ (!ctx -> inbuffer || was_inword || !syntax_prefix_flag_p (ch ));
107
104
108
105
/* Normalise flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */
109
- if (! was_inword )
110
- flag = ctx -> flag == CASE_UP ? CASE_UP : CASE_CAPITALIZE ;
106
+ if (ctx -> flag == CASE_CAPITALIZE )
107
+ flag = ( enum case_action )(( int ) ctx -> flag - was_inword ) ;
111
108
else if (ctx -> flag != CASE_CAPITALIZE_UP )
112
- flag = CASE_DOWN ;
109
+ flag = ctx -> flag ;
110
+ else if (!was_inword )
111
+ flag = CASE_CAPITALIZE ;
113
112
else
114
113
{
115
114
cased = ch ;
@@ -150,7 +149,18 @@ case_character_impl (struct casing_str_buf *buf,
150
149
buf -> len_bytes = CHAR_STRING (cased , buf -> data );
151
150
return cased != ch ;
152
151
}
152
+
153
+ /* In Greek, lower case sigma has two forms: one when used in the middle and one
154
+ when used at the end of a word. Below is to help handle those cases when
155
+ casing.
156
+
157
+ The rule does not conflict with any other casing rules so while it is
158
+ a conditional one, it is independent on language. */
153
159
160
+ #define CAPITAL_SIGMA 0x03A3
161
+ #define SMALL_SIGMA 0x03C3
162
+ #define SMALL_FINAL_SIGMA 0x03C2
163
+
154
164
/* Based on CTX, case character CH accordingly. Update CTX as necessary.
155
165
Return cased character.
156
166
@@ -164,12 +174,34 @@ case_single_character (struct casing_context *ctx, int ch)
164
174
}
165
175
166
176
/* Save in BUF result of casing character CH. Return whether casing changed the
167
- character. This is like case_single_character but also handles one-to-many
168
- casing rules. */
169
- static inline bool
170
- case_character (struct casing_str_buf * buf , struct casing_context * ctx , int ch )
177
+ character.
178
+
179
+ If not-NULL, NEXT points to the next character in the cased string. If NULL,
180
+ it is assumed current character is the last one being cased. This is used to
181
+ apply some rules which depend on proceeding state.
182
+
183
+ This is like case_single_character but also handles one-to-many casing
184
+ rules. */
185
+ static bool
186
+ case_character (struct casing_str_buf * buf , struct casing_context * ctx ,
187
+ int ch , const unsigned char * next )
171
188
{
172
- return case_character_impl (buf , ctx , ch );
189
+ bool changed , was_inword ;
190
+
191
+ was_inword = ctx -> inword ;
192
+ changed = case_character_impl (buf , ctx , ch );
193
+
194
+ /* If we have just down-cased a capital sigma and the next character no longer
195
+ has a word syntax (i.e. current character is end of word), use final
196
+ sigma. */
197
+ if (was_inword && ch == CAPITAL_SIGMA && changed &&
198
+ (!next || SYNTAX (STRING_CHAR (next )) != Sword ))
199
+ {
200
+ buf -> len_bytes = CHAR_STRING (SMALL_FINAL_SIGMA , buf -> data );
201
+ buf -> len_chars = 1 ;
202
+ }
203
+
204
+ return changed ;
173
205
}
174
206
175
207
static Lisp_Object
@@ -231,7 +263,7 @@ do_casify_multibyte_string (struct casing_context *ctx, Lisp_Object obj)
231
263
if (dst_end - o < sizeof (struct casing_str_buf ))
232
264
string_overflow ();
233
265
ch = STRING_CHAR_ADVANCE (src );
234
- case_character ((void * )o , ctx , ch );
266
+ case_character ((void * )o , ctx , ch , size > 1 ? src : NULL );
235
267
n += ((struct casing_str_buf * )o )-> len_chars ;
236
268
o += ((struct casing_str_buf * )o )-> len_bytes ;
237
269
}
@@ -382,12 +414,17 @@ do_casify_multibyte_region (struct casing_context *ctx,
382
414
ptrdiff_t pos = * startp , pos_byte = CHAR_TO_BYTE (pos ), size = * endp - pos ;
383
415
ptrdiff_t opoint = PT , added = 0 ;
384
416
struct casing_str_buf buf ;
385
- int ch , cased , len ;
417
+ bool changed ;
418
+ int ch , len ;
386
419
387
420
for (; size ; -- size )
388
421
{
389
422
ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (pos_byte ), len );
390
- if (!case_character (& buf , ctx , ch ))
423
+ changed = case_character (
424
+ & buf , ctx , ch ,
425
+ size > 1 ? BYTE_POS_ADDR (pos_byte + len ) : NULL );
426
+
427
+ if (!changed )
391
428
{
392
429
pos_byte += len ;
393
430
++ pos ;
0 commit comments