Skip to content

Commit 13d813b

Browse files
committed
Add support for title-casing letters (bug#24603)
* src/casefiddle.c (struct casing_context, prepare_casing_context): Add titlecase_char_table member. It’s set to the ‘titlecase’ Unicode property table if capitalisation has been requested. (case_character): Make use of the titlecase_char_table to title-case initial characters when capitalising. * test/src/casefiddle-tests.el (casefiddle-tests--characters, casefiddle-tests-casing): Update test cases which are now passing.
1 parent 8e5b909 commit 13d813b

File tree

3 files changed

+46
-20
lines changed

3 files changed

+46
-20
lines changed

etc/NEWS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ same as in modes where the character is not whitespace.
355355
Instead of only checking the modification time, Emacs now also checks
356356
the file's actual content before prompting the user.
357357

358-
** Title case characters are properly converted to upper case.
358+
** Title case characters are properly cased (from and into).
359359
'upcase', 'upcase-region' et al. convert title case characters (such
360360
as the single character "Dz") into their upper case form (such as "DZ").
361361
As a downside, 'capitalize' and 'upcase-initials' produce awkward

src/casefiddle.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
3333

3434
/* State for casing individual characters. */
3535
struct casing_context {
36+
/* A char-table with title-case character mappings or nil. Non-nil implies
37+
flag is CASE_CAPITALIZE or CASE_CAPITALIZE_UP. */
38+
Lisp_Object titlecase_char_table;
3639
/* User-requested action. */
3740
enum case_action flag;
3841
/* If true, function operates on a buffer as opposed to a string or character.
@@ -53,6 +56,8 @@ prepare_casing_context (struct casing_context *ctx,
5356
ctx->flag = flag;
5457
ctx->inbuffer = inbuffer;
5558
ctx->inword = flag == CASE_DOWN;
59+
ctx->titlecase_char_table = (int)flag < (int)CASE_CAPITALIZE ? Qnil :
60+
uniprop_table (intern_c_string ("titlecase"));
5661

5762
/* If the case table is flagged as modified, rescan it. */
5863
if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
@@ -67,10 +72,16 @@ prepare_casing_context (struct casing_context *ctx,
6772
static int
6873
case_character (struct casing_context *ctx, int ch)
6974
{
75+
Lisp_Object prop;
76+
7077
if (ctx->inword)
7178
ch = ctx->flag == CASE_CAPITALIZE_UP ? ch : downcase (ch);
79+
else if (!NILP (ctx->titlecase_char_table) &&
80+
CHARACTERP (prop = CHAR_TABLE_REF (ctx->titlecase_char_table, ch)))
81+
ch = XFASTINT (prop);
7282
else
7383
ch = upcase(ch);
84+
7485
if ((int) ctx->flag >= (int) CASE_CAPITALIZE)
7586
ctx->inword = SYNTAX (ch) == Sword &&
7687
(!ctx->inbuffer || ctx->inword || !syntax_prefix_flag_p (ch));
@@ -198,8 +209,8 @@ The argument object is not altered--the value is a copy. */)
198209

199210
DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
200211
doc: /* Convert argument to capitalized form and return that.
201-
This means that each word's first character is upper case
202-
and the rest is lower case.
212+
This means that each word's first character is converted to either
213+
title case or upper case, and the rest to lower case.
203214
The argument may be a character or string. The result has the same type.
204215
The argument object is not altered--the value is a copy. */)
205216
(Lisp_Object obj)
@@ -211,7 +222,8 @@ The argument object is not altered--the value is a copy. */)
211222

212223
DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
213224
doc: /* Convert the initial of each word in the argument to upper case.
214-
Do not change the other letters of each word.
225+
This means that each word's first character is converted to either
226+
title case or upper case, and the rest are left unchanged.
215227
The argument may be a character or string. The result has the same type.
216228
The argument object is not altered--the value is a copy. */)
217229
(Lisp_Object obj)
@@ -375,8 +387,8 @@ point and the mark is operated on. */)
375387

376388
DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
377389
doc: /* Convert the region to capitalized form.
378-
Capitalized form means each word's first character is upper case
379-
and the rest of it is lower case.
390+
This means that each word's first character is converted to either
391+
title case or upper case, and the rest to lower case.
380392
In programs, give two arguments, the starting and ending
381393
character positions to operate on. */)
382394
(Lisp_Object beg, Lisp_Object end)
@@ -390,7 +402,8 @@ character positions to operate on. */)
390402
DEFUN ("upcase-initials-region", Fupcase_initials_region,
391403
Supcase_initials_region, 2, 2, "r",
392404
doc: /* Upcase the initial of each word in the region.
393-
Subsequent letters of each word are not changed.
405+
This means that each word's first character is converted to either
406+
title case or upper case, and the rest are left unchanged.
394407
In programs, give two arguments, the starting and ending
395408
character positions to operate on. */)
396409
(Lisp_Object beg, Lisp_Object end)

test/src/casefiddle-tests.el

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,9 @@
6363
( )
6464
( )
6565

66-
;; FIXME(bug#24603): Commented ones are what we want.
67-
;;(?DŽ ?DŽ ?dž ?Dž)
68-
( )
69-
;;(?Dž ?DŽ ?dž ?Dž)
70-
( )
71-
;;(?dž ?DŽ ?dž ?Dž)
72-
( )
66+
( )
67+
( )
68+
( )
7369

7470
( )
7571
( )
@@ -186,19 +182,19 @@
186182
;; input upper lower capitalize up-initials
187183
'(("Foo baR" "FOO BAR" "foo bar" "Foo Bar" "Foo BaR")
188184
("Ⅷ ⅷ" "Ⅷ Ⅷ" "ⅷ ⅷ" "Ⅷ Ⅷ" "Ⅷ Ⅷ")
185+
;; "DžUNGLA" is an unfortunate result but it’s really best we can
186+
;; do while still being consistent. Hopefully, users only ever
187+
;; use upcase-initials on camelCase identifiers not real words.
188+
("DŽUNGLA" "DŽUNGLA" "džungla" "Džungla" "DžUNGLA")
189+
("Džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
190+
("džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
189191
;; FIXME(bug#24603): Everything below is broken at the moment.
190192
;; Here’s what should happen:
191-
;;("DŽUNGLA" "DŽUNGLA" "džungla" "Džungla" "DžUNGLA")
192-
;;("Džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
193-
;;("džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
194193
;;("define" "DEFINE" "define" "Define" "Define")
195194
;;("fish" "FIsh" "fish" "Fish" "Fish")
196195
;;("Straße" "STRASSE" "straße" "Straße" "Straße")
197196
;;("ΌΣΟΣ" "ΌΣΟΣ" "όσος" "Όσος" "Όσος")
198197
;; And here’s what is actually happening:
199-
("DŽUNGLA" "DŽUNGLA" "džungla" "DŽungla" "DŽUNGLA")
200-
("Džungla" "DŽUNGLA" "džungla" "DŽungla" "DŽungla")
201-
("džungla" "DŽUNGLA" "džungla" "DŽungla" "DŽungla")
202198
("define" "DEfiNE" "define" "Define" "Define")
203199
("fish" "fiSH" "fish" "fish" "fish")
204200
("Straße" "STRAßE" "straße" "Straße" "Straße")
@@ -243,4 +239,21 @@
243239
"\xef\xff\xef Zażółć GĘŚlą \xcf\xcf")))))))
244240

245241

242+
(ert-deftest casefiddle-tests-char-casing ()
243+
;; input upcase downcase [titlecase]
244+
(dolist (test '((?a ?A ?a) (?A ?A ?a)
245+
( ) ( )
246+
( ) (?ẞ ?ẞ )
247+
(?ⅷ ?Ⅷ ?ⅷ) (?Ⅷ ?Ⅷ ?ⅷ)
248+
( ) ( ) ( )))
249+
(let ((ch (car test))
250+
(up (nth 1 test))
251+
(lo (nth 2 test))
252+
(tc (or (nth 3 test) (nth 1 test))))
253+
(should (eq up (upcase ch)))
254+
(should (eq lo (downcase ch)))
255+
(should (eq tc (capitalize ch)))
256+
(should (eq tc (upcase-initials ch))))))
257+
258+
246259
;;; casefiddle-tests.el ends here

0 commit comments

Comments
 (0)