Skip to content

Commit bdb4afe

Browse files
author
José Valim
committed
String.capitalize now considers Unicode's titlecase property
1 parent 6df5dfc commit bdb4afe

File tree

3 files changed

+36
-15
lines changed

3 files changed

+36
-15
lines changed

lib/elixir/lib/string.ex

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -179,23 +179,25 @@ defmodule String do
179179
defdelegate downcase(binary), to: String.Unicode
180180

181181
@doc """
182-
Convert the first character on the given string to uppercase
183-
and the remaining to downcase.
182+
Converts the first character in the given string to
183+
titlecase and the remaining to downcase.
184+
185+
This relies on the titlecase information provided
186+
by the Unicode Standard. Note this function makes
187+
no attempt in capitalizing all words in the string
188+
(usually known as titlecase).
184189
185190
## Examples
186191
187192
String.capitalize("abcd") #=> "Abcd"
188-
String.capitalize("ab 123 xpto") #=> "Ab 123 xpto"
193+
String.capitalize("fin") #=> "Fin"
189194
String.capitalize("josé") #=> "José"
190195
191196
"""
192197
@spec capitalize(t) :: t
193-
194198
def capitalize(string) when is_binary(string) do
195-
case next_grapheme(string) do
196-
{ char, rest } -> upcase(char) <> downcase(rest)
197-
:no_grapheme -> ""
198-
end
199+
{ char, rest } = String.Unicode.titlecase_once(string)
200+
char <> downcase(rest)
199201
end
200202

201203
@doc """

lib/elixir/priv/unicode.ex

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,13 @@ defmodule String.Unicode do
2323
_class, bidi, _decomposition,
2424
_numeric_1, _numeric_2, _numeric_3,
2525
_bidi_mirror, _unicode_1, _iso,
26-
upper, lower, _title ] = :binary.split(line, ";", [:global])
26+
upper, lower, title ] = :binary.split(line, ";", [:global])
27+
28+
title = :binary.part(title, 0, size(title) - 1)
2729

2830
cond do
29-
upper != "" or lower != "" ->
30-
{ [{ to_binary.(codepoint), to_binary.(upper), to_binary.(lower) } | cacc], wacc }
31+
upper != "" or lower != "" or title != "" ->
32+
{ [{ to_binary.(codepoint), to_binary.(upper), to_binary.(lower), to_binary.(title) } | cacc], wacc }
3133
bidi in ["B", "S", "WS"] ->
3234
{ cacc, [to_binary.(codepoint) | wacc] }
3335
true ->
@@ -38,9 +40,9 @@ defmodule String.Unicode do
3840
special_path = Path.expand("../SpecialCasing.txt", __FILE__)
3941

4042
codes = Enum.reduce File.iterator!(special_path), codes, fn(line, acc) ->
41-
[ codepoint, lower, _title, upper, _comment ] = :binary.split(line, "; ", [:global])
43+
[ codepoint, lower, title, upper, _comment ] = :binary.split(line, "; ", [:global])
4244
key = to_binary.(codepoint)
43-
:lists.keystore(key, 1, acc, { key, to_binary.(upper), to_binary.(lower) })
45+
:lists.keystore(key, 1, acc, { key, to_binary.(upper), to_binary.(lower), to_binary.(title) })
4446
end
4547

4648
seqs_path = Path.expand("../NamedSequences.txt", __FILE__)
@@ -55,7 +57,7 @@ defmodule String.Unicode do
5557

5658
# Downcase
5759

58-
lc { codepoint, _upper, lower } inlist codes, lower && lower != codepoint do
60+
lc { codepoint, _upper, lower, _title } inlist codes, lower && lower != codepoint do
5961
args = quote do: [unquote(codepoint) <> t]
6062
code = quote do: unquote(lower) <> downcase(t)
6163
def :downcase, args, [], do: code
@@ -71,7 +73,7 @@ defmodule String.Unicode do
7173

7274
# Upcase
7375

74-
lc { codepoint, upper, _lower } inlist codes, upper && upper != codepoint do
76+
lc { codepoint, upper, _lower, _title } inlist codes, upper && upper != codepoint do
7577
args = quote do: [unquote(codepoint) <> t]
7678
code = quote do: unquote(upper) <> upcase(t)
7779
def :upcase, args, [], do: code
@@ -85,6 +87,22 @@ defmodule String.Unicode do
8587
<< >>
8688
end
8789

90+
# Titlecase once
91+
92+
lc { codepoint, _upper, _lower, title } inlist codes, title && title != codepoint do
93+
args = quote do: [unquote(codepoint) <> t]
94+
code = quote do: { unquote(title), t }
95+
def :titlecase_once, args, [], do: code
96+
end
97+
98+
def titlecase_once(<< h, t :: binary >>) do
99+
{ <<h>>, t }
100+
end
101+
102+
def titlecase_once(<< >>) do
103+
{ <<>>, <<>> }
104+
end
105+
88106
# Strip
89107

90108
def lstrip(""), do: ""

lib/elixir/test/elixir/string_test.exs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ defmodule StringTest do
7474
assert String.capitalize("ÂÁÀ") == "Âáà"
7575
assert String.capitalize("òóôõö") == "Òóôõö"
7676
assert String.capitalize("ÒÓÔÕÖ") == "Òóôõö"
77+
assert String.capitalize("fin") == "Fin"
7778
end
7879

7980
test :rstrip do

0 commit comments

Comments
 (0)