@@ -33,51 +33,61 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
3333enum case_action {CASE_UP , CASE_DOWN , CASE_CAPITALIZE , CASE_CAPITALIZE_UP };
3434
3535/* State for casing individual characters. */
36- struct casing_context {
36+ struct casing_context
37+ {
3738 /* A char-table with title-case character mappings or nil. Non-nil implies
3839 flag is CASE_CAPITALIZE or CASE_CAPITALIZE_UP. */
3940 Lisp_Object titlecase_char_table ;
41+
4042 /* The unconditional special-casing Unicode property char tables for upper
41- casing, lower casing and title casing respectively. */
43+ casing, lower casing and title casing respectively. */
4244 Lisp_Object specialcase_char_tables [3 ];
43- /* User-requested action. */
45+
46+ /* User-requested action. */
4447 enum case_action flag ;
45- /* If true, function operates on a buffer as opposed to a string or character.
46- When run on a buffer, syntax_prefix_flag_p is taken into account when
47- determined inword flag. */
48+
49+ /* If true, the function operates on a buffer as opposed to a string
50+ or character. When run on a buffer, syntax_prefix_flag_p is
51+ taken into account when determining whether the context is within
52+ a word. */
4853 bool inbuffer ;
49- /* Whether we are inside of a word. */
54+
55+ /* Whether the context is within a word. */
5056 bool inword ;
5157};
5258
53- /* Initialise CTX structure for casing characters. */
59+ /* Initialize CTX structure for casing characters. */
5460static void
5561prepare_casing_context (struct casing_context * ctx ,
5662 enum case_action flag , bool inbuffer )
5763{
5864 ctx -> flag = flag ;
5965 ctx -> inbuffer = inbuffer ;
6066 ctx -> inword = false;
61- ctx -> titlecase_char_table = (int )flag < (int )CASE_CAPITALIZE ? Qnil :
62- uniprop_table (intern_c_string ("titlecase" ));
63- ctx -> specialcase_char_tables [CASE_UP ] = flag == CASE_DOWN ? Qnil :
64- uniprop_table (intern_c_string ("special-uppercase" ));
65- ctx -> specialcase_char_tables [CASE_DOWN ] = flag == CASE_UP ? Qnil :
66- uniprop_table (intern_c_string ("special-lowercase" ));
67- ctx -> specialcase_char_tables [CASE_CAPITALIZE ] =
68- (int )flag < (int )CASE_CAPITALIZE ? Qnil :
69- uniprop_table (intern_c_string ("special-titlecase" ));
67+ ctx -> titlecase_char_table
68+ = (flag < CASE_CAPITALIZE ? Qnil
69+ : uniprop_table (intern_c_string ("titlecase" )));
70+ ctx -> specialcase_char_tables [CASE_UP ]
71+ = (flag == CASE_DOWN ? Qnil
72+ : uniprop_table (intern_c_string ("special-uppercase" )));
73+ ctx -> specialcase_char_tables [CASE_DOWN ]
74+ = (flag == CASE_UP ? Qnil
75+ : uniprop_table (intern_c_string ("special-lowercase" )));
76+ ctx -> specialcase_char_tables [CASE_CAPITALIZE ]
77+ = (flag < CASE_CAPITALIZE ? Qnil
78+ : uniprop_table (intern_c_string ("special-titlecase" )));
7079
7180 /* If the case table is flagged as modified, rescan it. */
7281 if (NILP (XCHAR_TABLE (BVAR (current_buffer , downcase_table ))-> extras [1 ]))
7382 Fset_case_table (BVAR (current_buffer , downcase_table ));
7483
75- if (inbuffer && ( int ) flag >= ( int ) CASE_CAPITALIZE )
84+ if (inbuffer && flag >= CASE_CAPITALIZE )
7685 SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */
7786}
7887
79- struct casing_str_buf {
80- unsigned char data [MAX_MULTIBYTE_LENGTH > 6 ? MAX_MULTIBYTE_LENGTH : 6 ];
88+ struct casing_str_buf
89+ {
90+ unsigned char data [max (6 , MAX_MULTIBYTE_LENGTH )];
8191 unsigned char len_chars ;
8292 unsigned char len_bytes ;
8393};
@@ -87,24 +97,23 @@ struct casing_str_buf {
8797 character has been changed.
8898
8999 Since meaning of return value depends on arguments, it’s more convenient to
90- use case_single_character or case_character instead. */
100+ use case_single_character or case_character instead. */
91101static int
92102case_character_impl (struct casing_str_buf * buf ,
93103 struct casing_context * ctx , int ch )
94104{
95105 enum case_action flag ;
96106 Lisp_Object prop ;
97- bool was_inword ;
98107 int cased ;
99108
100109 /* Update inword state */
101- was_inword = ctx -> inword ;
110+ bool was_inword = ctx -> inword ;
102111 ctx -> inword = SYNTAX (ch ) == Sword &&
103112 (!ctx -> inbuffer || was_inword || !syntax_prefix_flag_p (ch ));
104113
105- /* Normalise flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */
114+ /* Normalize flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */
106115 if (ctx -> flag == CASE_CAPITALIZE )
107- flag = ( enum case_action )(( int ) ctx -> flag - was_inword ) ;
116+ flag = ctx -> flag - was_inword ;
108117 else if (ctx -> flag != CASE_CAPITALIZE_UP )
109118 flag = ctx -> flag ;
110119 else if (!was_inword )
@@ -115,33 +124,34 @@ case_character_impl (struct casing_str_buf *buf,
115124 goto done ;
116125 }
117126
118- /* Look through the special casing entries. */
119- if (buf && !NILP (ctx -> specialcase_char_tables [(int )flag ]))
127+ /* Look through the special casing entries. */
128+ if (buf && !NILP (ctx -> specialcase_char_tables [(int )flag ]))
120129 {
121- prop = CHAR_TABLE_REF (ctx -> specialcase_char_tables [(int )flag ], ch );
122- if (STRINGP (prop ))
130+ prop = CHAR_TABLE_REF (ctx -> specialcase_char_tables [(int )flag ], ch );
131+ if (STRINGP (prop ))
123132 {
124- struct Lisp_String * str = XSTRING (prop );
125- if (STRING_BYTES (str ) <= sizeof buf -> data )
133+ struct Lisp_String * str = XSTRING (prop );
134+ if (STRING_BYTES (str ) <= sizeof buf -> data )
126135 {
127136 buf -> len_chars = str -> size ;
128- buf -> len_bytes = STRING_BYTES (str );
129- memcpy (buf -> data , str -> data , buf -> len_bytes );
137+ buf -> len_bytes = STRING_BYTES (str );
138+ memcpy (buf -> data , str -> data , buf -> len_bytes );
130139 return 1 ;
131140 }
132141 }
133142 }
134143
135- /* Handle simple, one-to-one case. */
144+ /* Handle simple, one-to-one case. */
136145 if (flag == CASE_DOWN )
137146 cased = downcase (ch );
138- else if (!NILP (ctx -> titlecase_char_table ) &&
139- CHARACTERP (prop = CHAR_TABLE_REF (ctx -> titlecase_char_table , ch )))
147+ else if (!NILP (ctx -> titlecase_char_table )
148+ && CHARACTERP (prop
149+ = CHAR_TABLE_REF (ctx -> titlecase_char_table , ch )))
140150 cased = XFASTINT (prop );
141151 else
142- cased = upcase (ch );
152+ cased = upcase (ch );
143153
144- /* And we’re done. */
154+ /* And we’re done. */
145155 done :
146156 if (!buf )
147157 return cased ;
@@ -155,18 +165,17 @@ case_character_impl (struct casing_str_buf *buf,
155165 casing.
156166
157167 The rule does not conflict with any other casing rules so while it is
158- a conditional one, it is independent on language. */
168+ a conditional one, it is independent of language. */
159169
160- #define CAPITAL_SIGMA 0x03A3
161- #define SMALL_SIGMA 0x03C3
162- #define SMALL_FINAL_SIGMA 0x03C2
170+ enum { GREEK_CAPITAL_LETTER_SIGMA = 0x03A }; /* Σ */
171+ enum { GREEK_SMALL_LETTER_FINAL_SIGMA = 0x03C2 }; /* ς */
163172
164173/* Based on CTX, case character CH accordingly. Update CTX as necessary.
165174 Return cased character.
166175
167176 Special casing rules (such as upcase(fi) = FI) are not handled. For
168177 characters whose casing results in multiple code points, the character is
169- returned unchanged. */
178+ returned unchanged. */
170179static inline int
171180case_single_character (struct casing_context * ctx , int ch )
172181{
@@ -181,23 +190,21 @@ case_single_character (struct casing_context *ctx, int ch)
181190 apply some rules which depend on proceeding state.
182191
183192 This is like case_single_character but also handles one-to-many casing
184- rules. */
193+ rules. */
185194static bool
186195case_character (struct casing_str_buf * buf , struct casing_context * ctx ,
187196 int ch , const unsigned char * next )
188197{
189- bool changed , was_inword ;
190-
191- was_inword = ctx -> inword ;
192- changed = case_character_impl (buf , ctx , ch );
198+ bool was_inword = ctx -> inword ;
199+ bool changed = case_character_impl (buf , ctx , ch );
193200
194201 /* If we have just down-cased a capital sigma and the next character no longer
195202 has a word syntax (i.e. current character is end of word), use final
196- sigma. */
197- if (was_inword && ch == CAPITAL_SIGMA && changed &&
198- (!next || SYNTAX (STRING_CHAR (next )) != Sword ))
203+ sigma. */
204+ if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed
205+ && (!next || SYNTAX (STRING_CHAR (next )) != Sword ))
199206 {
200- buf -> len_bytes = CHAR_STRING (SMALL_FINAL_SIGMA , buf -> data );
207+ buf -> len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA , buf -> data );
201208 buf -> len_chars = 1 ;
202209 }
203210
@@ -209,63 +216,64 @@ do_casify_natnum (struct casing_context *ctx, Lisp_Object obj)
209216{
210217 int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
211218 | CHAR_SHIFT | CHAR_CTL | CHAR_META );
212- int flags , ch = XFASTINT (obj ), cased ;
213- bool multibyte ;
219+ int ch = XFASTINT (obj );
214220
215221 /* If the character has higher bits set above the flags, return it unchanged.
216222 It is not a real character. */
217223 if (UNSIGNED_CMP (ch , > , flagbits ))
218224 return obj ;
219225
220- flags = ch & flagbits ;
226+ int flags = ch & flagbits ;
221227 ch = ch & ~flagbits ;
222228
223229 /* FIXME: Even if enable-multibyte-characters is nil, we may manipulate
224230 multibyte chars. This means we have a bug for latin-1 chars since when we
225231 receive an int 128-255 we can't tell whether it's an eight-bit byte or
226232 a latin-1 char. */
227- multibyte = ch >= 256
228- || !NILP (BVAR (current_buffer , enable_multibyte_characters ));
233+ bool multibyte = (ch >= 256
234+ || !NILP (BVAR (current_buffer ,
235+ enable_multibyte_characters )));
229236 if (! multibyte )
230237 MAKE_CHAR_MULTIBYTE (ch );
231- cased = case_single_character (ctx , ch );
238+ int cased = case_single_character (ctx , ch );
232239 if (cased == ch )
233240 return obj ;
234241
235242 if (! multibyte )
236243 MAKE_CHAR_UNIBYTE (cased );
237- XSETFASTINT (obj , cased | flags );
238- return obj ;
244+ return make_natnum (cased | flags );
239245}
240246
241247static Lisp_Object
242248do_casify_multibyte_string (struct casing_context * ctx , Lisp_Object obj )
243249{
244- /* We assume data is the first member of casing_str_buf structure so that if
245- we cast a (char *) into (struct casing_str_buf *) the representation of the
246- character is at the beginning of the buffer. This is why we don’t need
247- separate struct casing_str_buf object but rather write directly to o. */
248- typedef char static_assertion [offsetof(struct casing_str_buf , data ) ? -1 : 1 ];
250+ /* Verify that ‘data’ is the first member of struct casing_str_buf
251+ so that when casting char * to struct casing_str_buf *, the
252+ representation of the character is at the beginning of the
253+ buffer. This is why we don’t need a separate struct
254+ casing_str_buf object, and can write directly to the destination. */
255+ verify (offsetof (struct casing_str_buf , data ) == 0 );
249256
250257 ptrdiff_t size = SCHARS (obj ), n ;
251- int ch ;
252258 USE_SAFE_ALLOCA ;
253- if (INT_MULTIPLY_WRAPV (size , MAX_MULTIBYTE_LENGTH , & n ) ||
254- INT_ADD_WRAPV (n , sizeof (struct casing_str_buf ), & n ))
259+ if (INT_MULTIPLY_WRAPV (size , MAX_MULTIBYTE_LENGTH , & n )
260+ || INT_ADD_WRAPV (n , sizeof (struct casing_str_buf ), & n ))
255261 n = PTRDIFF_MAX ;
256- unsigned char * const dst = SAFE_ALLOCA (n ), * const dst_end = dst + n ;
262+ unsigned char * dst = SAFE_ALLOCA (n );
263+ unsigned char * dst_end = dst + n ;
257264 unsigned char * o = dst ;
258265
259266 const unsigned char * src = SDATA (obj );
260267
261268 for (n = 0 ; size ; -- size )
262269 {
263- if (dst_end - o < sizeof (struct casing_str_buf ))
270+ if (dst_end - o < sizeof (struct casing_str_buf ))
264271 string_overflow ();
265- ch = STRING_CHAR_ADVANCE (src );
266- case_character ((void * )o , ctx , ch , size > 1 ? src : NULL );
267- n += ((struct casing_str_buf * )o )-> len_chars ;
268- o += ((struct casing_str_buf * )o )-> len_bytes ;
272+ int ch = STRING_CHAR_ADVANCE (src );
273+ case_character ((struct casing_str_buf * ) o , ctx , ch ,
274+ size > 1 ? src : NULL );
275+ n += ((struct casing_str_buf * ) o )-> len_chars ;
276+ o += ((struct casing_str_buf * ) o )-> len_bytes ;
269277 }
270278 eassert (o <= dst_end );
271279 obj = make_multibyte_string ((char * ) dst , n , o - dst );
@@ -288,7 +296,8 @@ do_casify_unibyte_string (struct casing_context *ctx, Lisp_Object obj)
288296 if (ch == cased )
289297 continue ;
290298 MAKE_CHAR_UNIBYTE (cased );
291- /* If the char can't be converted to a valid byte, just don't change it */
299+ /* If the char can't be converted to a valid byte, just don't
300+ change it. */
292301 if (cased >= 0 && cased < 256 )
293302 SSET (obj , i , cased );
294303 }
@@ -369,21 +378,20 @@ cased, e.g. fi, are returned unchanged. */)
369378 *ENDP unspecified.
370379
371380 Always return 0. This is so that interface of this function is the same as
372- do_casify_multibyte_region. */
381+ do_casify_multibyte_region. */
373382static ptrdiff_t
374383do_casify_unibyte_region (struct casing_context * ctx ,
375384 ptrdiff_t * startp , ptrdiff_t * endp )
376385{
377- ptrdiff_t first = -1 , last = -1 ; /* Position of first and last changes. */
378- ptrdiff_t pos = * startp , end = * endp ;
379- int ch , cased ;
386+ ptrdiff_t first = -1 , last = -1 ; /* Position of first and last changes. */
387+ ptrdiff_t end = * endp ;
380388
381- for (; pos < end ; ++ pos )
389+ for (ptrdiff_t pos = * startp ; pos < end ; ++ pos )
382390 {
383- ch = FETCH_BYTE (pos );
391+ int ch = FETCH_BYTE (pos );
384392 MAKE_CHAR_MULTIBYTE (ch );
385393
386- cased = case_single_character (ctx , ch );
394+ int cased = case_single_character (ctx , ch );
387395 if (cased == ch )
388396 continue ;
389397
@@ -405,26 +413,22 @@ do_casify_unibyte_region (struct casing_context *ctx,
405413 Return number of added characters (may be negative if more characters were
406414 deleted then inserted), save first and last positions that has changed in
407415 *STARTP and *ENDP respectively. If no characters were changed, return 0,
408- save -1 to *STARTP and leave *ENDP unspecified. */
416+ save -1 to *STARTP and leave *ENDP unspecified. */
409417static ptrdiff_t
410418do_casify_multibyte_region (struct casing_context * ctx ,
411419 ptrdiff_t * startp , ptrdiff_t * endp )
412420{
413- ptrdiff_t first = -1 , last = -1 ; /* Position of first and last changes. */
421+ ptrdiff_t first = -1 , last = -1 ; /* Position of first and last changes. */
414422 ptrdiff_t pos = * startp , pos_byte = CHAR_TO_BYTE (pos ), size = * endp - pos ;
415423 ptrdiff_t opoint = PT , added = 0 ;
416- struct casing_str_buf buf ;
417- bool changed ;
418- int ch , len ;
419424
420425 for (; size ; -- size )
421426 {
422- ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (pos_byte ), len );
423- changed = case_character (
424- & buf , ctx , ch ,
425- size > 1 ? BYTE_POS_ADDR (pos_byte + len ) : NULL );
426-
427- if (!changed )
427+ int len ;
428+ int ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (pos_byte ), len );
429+ struct casing_str_buf buf ;
430+ if (!case_character (& buf , ctx , ch ,
431+ size > 1 ? BYTE_POS_ADDR (pos_byte + len ) : NULL ))
428432 {
429433 pos_byte += len ;
430434 ++ pos ;
@@ -468,19 +472,19 @@ do_casify_multibyte_region (struct casing_context *ctx,
468472static ptrdiff_t
469473casify_region (enum case_action flag , Lisp_Object b , Lisp_Object e )
470474{
471- ptrdiff_t start , end , orig_end , added ;
475+ ptrdiff_t added ;
472476 struct casing_context ctx ;
473477
474478 validate_region (& b , & e );
475- start = XFASTINT (b );
476- end = XFASTINT (e );
479+ ptrdiff_t start = XFASTINT (b );
480+ ptrdiff_t end = XFASTINT (e );
477481 if (start == end )
478- /* Not modifying because nothing marked */
482+ /* Not modifying because nothing marked. */
479483 return end ;
480484 modify_text (start , end );
481485 prepare_casing_context (& ctx , flag , true);
482486
483- orig_end = end ;
487+ ptrdiff_t orig_end = end ;
484488 record_delete (start , make_buffer_string (start , end , true), false);
485489 if (NILP (BVAR (current_buffer , enable_multibyte_characters )))
486490 {
0 commit comments