Skip to content

Commit 451a0c8

Browse files
committed
Minor casefiddle.c cleanups
* src/casefiddle.c: Redo recent changes to match GNU style, and prefer C99-style decls within blocks. (GREEK_CAPITAL_LETTER_SIGMA): Rename from CAPITAL_SIGMA, so that we are merely using the Unicode name, and make it a constant rather than a macro. All uses changed. (SMALL_SIGMA): Remove; unused. (GREEK_SMALL_LETTER_FINAL_SIGMA): Rename from SMALL_FINAL_SIGMA, and make it a constant rather than a macro. All uses changed. (do_casify_multibyte_string): Use ‘verify’ rather than an unportable static_assertion local.
1 parent f3a0f3a commit 451a0c8

File tree

1 file changed

+102
-98
lines changed

1 file changed

+102
-98
lines changed

src/casefiddle.c

Lines changed: 102 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -33,51 +33,61 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
3333
enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
3434

3535
/* State for casing individual characters. */
36-
struct casing_context {
36+
struct casing_context
37+
{
3738
/* A char-table with title-case character mappings or nil. Non-nil implies
3839
flag is CASE_CAPITALIZE or CASE_CAPITALIZE_UP. */
3940
Lisp_Object titlecase_char_table;
41+
4042
/* The unconditional special-casing Unicode property char tables for upper
41-
casing, lower casing and title casing respectively. */
43+
casing, lower casing and title casing respectively. */
4244
Lisp_Object specialcase_char_tables[3];
43-
/* User-requested action. */
45+
46+
/* User-requested action. */
4447
enum case_action flag;
45-
/* If true, function operates on a buffer as opposed to a string or character.
46-
When run on a buffer, syntax_prefix_flag_p is taken into account when
47-
determined inword flag. */
48+
49+
/* If true, the function operates on a buffer as opposed to a string
50+
or character. When run on a buffer, syntax_prefix_flag_p is
51+
taken into account when determining whether the context is within
52+
a word. */
4853
bool inbuffer;
49-
/* Whether we are inside of a word. */
54+
55+
/* Whether the context is within a word. */
5056
bool inword;
5157
};
5258

53-
/* Initialise CTX structure for casing characters. */
59+
/* Initialize CTX structure for casing characters. */
5460
static void
5561
prepare_casing_context (struct casing_context *ctx,
5662
enum case_action flag, bool inbuffer)
5763
{
5864
ctx->flag = flag;
5965
ctx->inbuffer = inbuffer;
6066
ctx->inword = false;
61-
ctx->titlecase_char_table = (int)flag < (int)CASE_CAPITALIZE ? Qnil :
62-
uniprop_table (intern_c_string ("titlecase"));
63-
ctx->specialcase_char_tables[CASE_UP] = flag == CASE_DOWN ? Qnil :
64-
uniprop_table (intern_c_string ("special-uppercase"));
65-
ctx->specialcase_char_tables[CASE_DOWN] = flag == CASE_UP ? Qnil :
66-
uniprop_table (intern_c_string ("special-lowercase"));
67-
ctx->specialcase_char_tables[CASE_CAPITALIZE] =
68-
(int)flag < (int)CASE_CAPITALIZE ? Qnil :
69-
uniprop_table (intern_c_string ("special-titlecase"));
67+
ctx->titlecase_char_table
68+
= (flag < CASE_CAPITALIZE ? Qnil
69+
: uniprop_table (intern_c_string ("titlecase")));
70+
ctx->specialcase_char_tables[CASE_UP]
71+
= (flag == CASE_DOWN ? Qnil
72+
: uniprop_table (intern_c_string ("special-uppercase")));
73+
ctx->specialcase_char_tables[CASE_DOWN]
74+
= (flag == CASE_UP ? Qnil
75+
: uniprop_table (intern_c_string ("special-lowercase")));
76+
ctx->specialcase_char_tables[CASE_CAPITALIZE]
77+
= (flag < CASE_CAPITALIZE ? Qnil
78+
: uniprop_table (intern_c_string ("special-titlecase")));
7079

7180
/* If the case table is flagged as modified, rescan it. */
7281
if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
7382
Fset_case_table (BVAR (current_buffer, downcase_table));
7483

75-
if (inbuffer && (int) flag >= (int) CASE_CAPITALIZE)
84+
if (inbuffer && flag >= CASE_CAPITALIZE)
7685
SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */
7786
}
7887

79-
struct casing_str_buf {
80-
unsigned char data[MAX_MULTIBYTE_LENGTH > 6 ? MAX_MULTIBYTE_LENGTH : 6];
88+
struct casing_str_buf
89+
{
90+
unsigned char data[max (6, MAX_MULTIBYTE_LENGTH)];
8191
unsigned char len_chars;
8292
unsigned char len_bytes;
8393
};
@@ -87,24 +97,23 @@ struct casing_str_buf {
8797
character has been changed.
8898
8999
Since meaning of return value depends on arguments, it’s more convenient to
90-
use case_single_character or case_character instead. */
100+
use case_single_character or case_character instead. */
91101
static int
92102
case_character_impl (struct casing_str_buf *buf,
93103
struct casing_context *ctx, int ch)
94104
{
95105
enum case_action flag;
96106
Lisp_Object prop;
97-
bool was_inword;
98107
int cased;
99108

100109
/* Update inword state */
101-
was_inword = ctx->inword;
110+
bool was_inword = ctx->inword;
102111
ctx->inword = SYNTAX (ch) == Sword &&
103112
(!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch));
104113

105-
/* Normalise flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */
114+
/* Normalize flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */
106115
if (ctx->flag == CASE_CAPITALIZE)
107-
flag = (enum case_action)((int)ctx->flag - was_inword);
116+
flag = ctx->flag - was_inword;
108117
else if (ctx->flag != CASE_CAPITALIZE_UP)
109118
flag = ctx->flag;
110119
else if (!was_inword)
@@ -115,33 +124,34 @@ case_character_impl (struct casing_str_buf *buf,
115124
goto done;
116125
}
117126

118-
/* Look through the special casing entries. */
119-
if (buf && !NILP(ctx->specialcase_char_tables[(int)flag]))
127+
/* Look through the special casing entries. */
128+
if (buf && !NILP (ctx->specialcase_char_tables[(int)flag]))
120129
{
121-
prop = CHAR_TABLE_REF(ctx->specialcase_char_tables[(int)flag], ch);
122-
if (STRINGP(prop))
130+
prop = CHAR_TABLE_REF (ctx->specialcase_char_tables[(int)flag], ch);
131+
if (STRINGP (prop))
123132
{
124-
struct Lisp_String *str = XSTRING(prop);
125-
if (STRING_BYTES(str) <= sizeof buf->data)
133+
struct Lisp_String *str = XSTRING (prop);
134+
if (STRING_BYTES (str) <= sizeof buf->data)
126135
{
127136
buf->len_chars = str->size;
128-
buf->len_bytes = STRING_BYTES(str);
129-
memcpy(buf->data, str->data, buf->len_bytes);
137+
buf->len_bytes = STRING_BYTES (str);
138+
memcpy (buf->data, str->data, buf->len_bytes);
130139
return 1;
131140
}
132141
}
133142
}
134143

135-
/* Handle simple, one-to-one case. */
144+
/* Handle simple, one-to-one case. */
136145
if (flag == CASE_DOWN)
137146
cased = downcase (ch);
138-
else if (!NILP (ctx->titlecase_char_table) &&
139-
CHARACTERP (prop = CHAR_TABLE_REF (ctx->titlecase_char_table, ch)))
147+
else if (!NILP (ctx->titlecase_char_table)
148+
&& CHARACTERP (prop
149+
= CHAR_TABLE_REF (ctx->titlecase_char_table, ch)))
140150
cased = XFASTINT (prop);
141151
else
142-
cased = upcase(ch);
152+
cased = upcase (ch);
143153

144-
/* And we’re done. */
154+
/* And we’re done. */
145155
done:
146156
if (!buf)
147157
return cased;
@@ -155,18 +165,17 @@ case_character_impl (struct casing_str_buf *buf,
155165
casing.
156166
157167
The rule does not conflict with any other casing rules so while it is
158-
a conditional one, it is independent on language. */
168+
a conditional one, it is independent of language. */
159169

160-
#define CAPITAL_SIGMA 0x03A3
161-
#define SMALL_SIGMA 0x03C3
162-
#define SMALL_FINAL_SIGMA 0x03C2
170+
enum { GREEK_CAPITAL_LETTER_SIGMA = 0x03A }; /* Σ */
171+
enum { GREEK_SMALL_LETTER_FINAL_SIGMA = 0x03C2 }; /* ς */
163172

164173
/* Based on CTX, case character CH accordingly. Update CTX as necessary.
165174
Return cased character.
166175
167176
Special casing rules (such as upcase(fi) = FI) are not handled. For
168177
characters whose casing results in multiple code points, the character is
169-
returned unchanged. */
178+
returned unchanged. */
170179
static inline int
171180
case_single_character (struct casing_context *ctx, int ch)
172181
{
@@ -181,23 +190,21 @@ case_single_character (struct casing_context *ctx, int ch)
181190
apply some rules which depend on proceeding state.
182191
183192
This is like case_single_character but also handles one-to-many casing
184-
rules. */
193+
rules. */
185194
static bool
186195
case_character (struct casing_str_buf *buf, struct casing_context *ctx,
187196
int ch, const unsigned char *next)
188197
{
189-
bool changed, was_inword;
190-
191-
was_inword = ctx->inword;
192-
changed = case_character_impl (buf, ctx, ch);
198+
bool was_inword = ctx->inword;
199+
bool changed = case_character_impl (buf, ctx, ch);
193200

194201
/* If we have just down-cased a capital sigma and the next character no longer
195202
has a word syntax (i.e. current character is end of word), use final
196-
sigma. */
197-
if (was_inword && ch == CAPITAL_SIGMA && changed &&
198-
(!next || SYNTAX (STRING_CHAR (next)) != Sword))
203+
sigma. */
204+
if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed
205+
&& (!next || SYNTAX (STRING_CHAR (next)) != Sword))
199206
{
200-
buf->len_bytes = CHAR_STRING (SMALL_FINAL_SIGMA, buf->data);
207+
buf->len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA, buf->data);
201208
buf->len_chars = 1;
202209
}
203210

@@ -209,63 +216,64 @@ do_casify_natnum (struct casing_context *ctx, Lisp_Object obj)
209216
{
210217
int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
211218
| CHAR_SHIFT | CHAR_CTL | CHAR_META);
212-
int flags, ch = XFASTINT (obj), cased;
213-
bool multibyte;
219+
int ch = XFASTINT (obj);
214220

215221
/* If the character has higher bits set above the flags, return it unchanged.
216222
It is not a real character. */
217223
if (UNSIGNED_CMP (ch, >, flagbits))
218224
return obj;
219225

220-
flags = ch & flagbits;
226+
int flags = ch & flagbits;
221227
ch = ch & ~flagbits;
222228

223229
/* FIXME: Even if enable-multibyte-characters is nil, we may manipulate
224230
multibyte chars. This means we have a bug for latin-1 chars since when we
225231
receive an int 128-255 we can't tell whether it's an eight-bit byte or
226232
a latin-1 char. */
227-
multibyte = ch >= 256
228-
|| !NILP (BVAR (current_buffer, enable_multibyte_characters));
233+
bool multibyte = (ch >= 256
234+
|| !NILP (BVAR (current_buffer,
235+
enable_multibyte_characters)));
229236
if (! multibyte)
230237
MAKE_CHAR_MULTIBYTE (ch);
231-
cased = case_single_character (ctx, ch);
238+
int cased = case_single_character (ctx, ch);
232239
if (cased == ch)
233240
return obj;
234241

235242
if (! multibyte)
236243
MAKE_CHAR_UNIBYTE (cased);
237-
XSETFASTINT (obj, cased | flags);
238-
return obj;
244+
return make_natnum (cased | flags);
239245
}
240246

241247
static Lisp_Object
242248
do_casify_multibyte_string (struct casing_context *ctx, Lisp_Object obj)
243249
{
244-
/* We assume data is the first member of casing_str_buf structure so that if
245-
we cast a (char *) into (struct casing_str_buf *) the representation of the
246-
character is at the beginning of the buffer. This is why we don’t need
247-
separate struct casing_str_buf object but rather write directly to o. */
248-
typedef char static_assertion[offsetof(struct casing_str_buf, data) ? -1 : 1];
250+
/* Verify that ‘data’ is the first member of struct casing_str_buf
251+
so that when casting char * to struct casing_str_buf *, the
252+
representation of the character is at the beginning of the
253+
buffer. This is why we don’t need a separate struct
254+
casing_str_buf object, and can write directly to the destination. */
255+
verify (offsetof (struct casing_str_buf, data) == 0);
249256

250257
ptrdiff_t size = SCHARS (obj), n;
251-
int ch;
252258
USE_SAFE_ALLOCA;
253-
if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &n) ||
254-
INT_ADD_WRAPV (n, sizeof(struct casing_str_buf), &n))
259+
if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &n)
260+
|| INT_ADD_WRAPV (n, sizeof (struct casing_str_buf), &n))
255261
n = PTRDIFF_MAX;
256-
unsigned char *const dst = SAFE_ALLOCA (n), *const dst_end = dst + n;
262+
unsigned char *dst = SAFE_ALLOCA (n);
263+
unsigned char *dst_end = dst + n;
257264
unsigned char *o = dst;
258265

259266
const unsigned char *src = SDATA (obj);
260267

261268
for (n = 0; size; --size)
262269
{
263-
if (dst_end - o < sizeof(struct casing_str_buf))
270+
if (dst_end - o < sizeof (struct casing_str_buf))
264271
string_overflow ();
265-
ch = STRING_CHAR_ADVANCE (src);
266-
case_character ((void *)o, ctx, ch, size > 1 ? src : NULL);
267-
n += ((struct casing_str_buf *)o)->len_chars;
268-
o += ((struct casing_str_buf *)o)->len_bytes;
272+
int ch = STRING_CHAR_ADVANCE (src);
273+
case_character ((struct casing_str_buf *) o, ctx, ch,
274+
size > 1 ? src : NULL);
275+
n += ((struct casing_str_buf *) o)->len_chars;
276+
o += ((struct casing_str_buf *) o)->len_bytes;
269277
}
270278
eassert (o <= dst_end);
271279
obj = make_multibyte_string ((char *) dst, n, o - dst);
@@ -288,7 +296,8 @@ do_casify_unibyte_string (struct casing_context *ctx, Lisp_Object obj)
288296
if (ch == cased)
289297
continue;
290298
MAKE_CHAR_UNIBYTE (cased);
291-
/* If the char can't be converted to a valid byte, just don't change it */
299+
/* If the char can't be converted to a valid byte, just don't
300+
change it. */
292301
if (cased >= 0 && cased < 256)
293302
SSET (obj, i, cased);
294303
}
@@ -369,21 +378,20 @@ cased, e.g. fi, are returned unchanged. */)
369378
*ENDP unspecified.
370379
371380
Always return 0. This is so that interface of this function is the same as
372-
do_casify_multibyte_region. */
381+
do_casify_multibyte_region. */
373382
static ptrdiff_t
374383
do_casify_unibyte_region (struct casing_context *ctx,
375384
ptrdiff_t *startp, ptrdiff_t *endp)
376385
{
377-
ptrdiff_t first = -1, last = -1; /* Position of first and last changes. */
378-
ptrdiff_t pos = *startp, end = *endp;
379-
int ch, cased;
386+
ptrdiff_t first = -1, last = -1; /* Position of first and last changes. */
387+
ptrdiff_t end = *endp;
380388

381-
for (; pos < end; ++pos)
389+
for (ptrdiff_t pos = *startp; pos < end; ++pos)
382390
{
383-
ch = FETCH_BYTE (pos);
391+
int ch = FETCH_BYTE (pos);
384392
MAKE_CHAR_MULTIBYTE (ch);
385393

386-
cased = case_single_character (ctx, ch);
394+
int cased = case_single_character (ctx, ch);
387395
if (cased == ch)
388396
continue;
389397

@@ -405,26 +413,22 @@ do_casify_unibyte_region (struct casing_context *ctx,
405413
Return number of added characters (may be negative if more characters were
406414
deleted then inserted), save first and last positions that has changed in
407415
*STARTP and *ENDP respectively. If no characters were changed, return 0,
408-
save -1 to *STARTP and leave *ENDP unspecified. */
416+
save -1 to *STARTP and leave *ENDP unspecified. */
409417
static ptrdiff_t
410418
do_casify_multibyte_region (struct casing_context *ctx,
411419
ptrdiff_t *startp, ptrdiff_t *endp)
412420
{
413-
ptrdiff_t first = -1, last = -1; /* Position of first and last changes. */
421+
ptrdiff_t first = -1, last = -1; /* Position of first and last changes. */
414422
ptrdiff_t pos = *startp, pos_byte = CHAR_TO_BYTE (pos), size = *endp - pos;
415423
ptrdiff_t opoint = PT, added = 0;
416-
struct casing_str_buf buf;
417-
bool changed;
418-
int ch, len;
419424

420425
for (; size; --size)
421426
{
422-
ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (pos_byte), len);
423-
changed = case_character (
424-
&buf, ctx, ch,
425-
size > 1 ? BYTE_POS_ADDR (pos_byte + len) : NULL);
426-
427-
if (!changed)
427+
int len;
428+
int ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (pos_byte), len);
429+
struct casing_str_buf buf;
430+
if (!case_character (&buf, ctx, ch,
431+
size > 1 ? BYTE_POS_ADDR (pos_byte + len) : NULL))
428432
{
429433
pos_byte += len;
430434
++pos;
@@ -468,19 +472,19 @@ do_casify_multibyte_region (struct casing_context *ctx,
468472
static ptrdiff_t
469473
casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
470474
{
471-
ptrdiff_t start, end, orig_end, added;
475+
ptrdiff_t added;
472476
struct casing_context ctx;
473477

474478
validate_region (&b, &e);
475-
start = XFASTINT (b);
476-
end = XFASTINT (e);
479+
ptrdiff_t start = XFASTINT (b);
480+
ptrdiff_t end = XFASTINT (e);
477481
if (start == end)
478-
/* Not modifying because nothing marked */
482+
/* Not modifying because nothing marked. */
479483
return end;
480484
modify_text (start, end);
481485
prepare_casing_context (&ctx, flag, true);
482486

483-
orig_end = end;
487+
ptrdiff_t orig_end = end;
484488
record_delete (start, make_buffer_string (start, end, true), false);
485489
if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
486490
{

0 commit comments

Comments
 (0)