Skip to content

Commit 1828888

Browse files
author
José Valim
committed
Provide :greek and :ascii mappings in String upcase/downcase/capitalize
Closes #7105.
1 parent afd51e8 commit 1828888

File tree

3 files changed

+116
-39
lines changed

3 files changed

+116
-39
lines changed

lib/elixir/lib/string.ex

Lines changed: 84 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ defmodule String do
208208
@type grapheme :: t
209209
@type pattern :: t | [t] | :binary.cp()
210210

211+
@conditional_mappings [:greek]
212+
211213
@doc """
212214
Checks if a string contains only printable characters.
213215
@@ -604,7 +606,12 @@ defmodule String do
604606
defdelegate normalize(string, form), to: String.Normalizer
605607

606608
@doc """
607-
Converts all characters in the given string to uppercase.
609+
Converts all characters in the given string to uppercase according to `mode`.
610+
611+
`mode` may be `:default`, `:ascii` or `:greek`. The `:default` mode considers
612+
all non-conditional transformations outlined in the Unicode standard. `:ascii`
613+
uppercases only the letters a to z. `:greek` includes the context sensitive
614+
mappings found in Greek.
608615
609616
## Examples
610617
@@ -617,12 +624,38 @@ defmodule String do
617624
iex> String.upcase("olá")
618625
"OLÁ"
619626
627+
The `:ascii` mode ignores Unicode characters and provides a more
628+
performant implementation when you know the string contains only
629+
ASCII characters:
630+
631+
iex> String.upcase("olá", :ascii)
632+
"OLá"
633+
620634
"""
621-
@spec upcase(t) :: t
622-
defdelegate upcase(binary), to: String.Casing
635+
@spec upcase(t, :default | :ascii | :greek) :: t
636+
def upcase(string, mode \\ :default)
637+
638+
def upcase(string, :default) when is_binary(string) do
639+
String.Casing.upcase(string, "", :default)
640+
end
641+
642+
def upcase(string, :ascii) when is_binary(string) do
643+
for <<x <- string>>,
644+
do: if(x >= ?a and x <= ?z, do: <<x - 32>>, else: <<x>>),
645+
into: ""
646+
end
647+
648+
def upcase(string, mode) when mode in @conditional_mappings do
649+
String.Casing.upcase(string, "", mode)
650+
end
623651

624652
@doc """
625-
Converts all characters in the given string to lowercase.
653+
Converts all characters in the given string to lowercase according to `mode`.
654+
655+
`mode` may be `:default`, `:ascii` or `:greek`. The `:default` mode considers
656+
all non-conditional transformations outlined in the Unicode standard. `:ascii`
657+
lowercases only the letters A to Z. `:greek` includes the context sensitive
658+
mappings found in Greek.
626659
627660
## Examples
628661
@@ -635,18 +668,47 @@ defmodule String do
635668
iex> String.downcase("OLÁ")
636669
"olá"
637670
671+
The `:ascii` mode ignores Unicode characters and provides a more
672+
performant implementation when you know the string contains only
673+
ASCII characters:
674+
675+
iex> String.downcase("OLÁ", :ascii)
676+
"olÁ"
677+
678+
And `:greek` properly handles the context sensitive sigma in Greek:
679+
680+
iex> String.downcase("ΣΣ")
681+
"ςς"
682+
683+
iex> String.downcase("ΣΣ", :greek)
684+
"σς"
685+
638686
"""
639-
@spec downcase(t) :: t
640-
defdelegate downcase(binary), to: String.Casing
687+
@spec downcase(t, :default | :ascii | :greek) :: t
688+
def downcase(string, mode \\ :default)
689+
690+
def downcase(string, :default) when is_binary(string) do
691+
String.Casing.downcase(string, "", :default)
692+
end
693+
694+
def downcase(string, :ascii) when is_binary(string) do
695+
for <<x <- string>>,
696+
do: if(x >= ?A and x <= ?Z, do: <<x + 32>>, else: <<x>>),
697+
into: ""
698+
end
699+
700+
def downcase(string, mode) when mode in @conditional_mappings do
701+
String.Casing.downcase(string, "", mode)
702+
end
641703

642704
@doc """
643705
Converts the first character in the given string to
644-
uppercase and the remainder to lowercase.
706+
uppercase and the remainder to lowercase according to `mode`.
645707
646-
This relies on the titlecase information provided
647-
by the Unicode Standard. Note this function makes
648-
no attempt to capitalize all words in the string
649-
(usually known as titlecase).
708+
`mode` may be `:default`, `:ascii` or `:greek`. The `:default` mode considers
709+
all non-conditional transformations outlined in the Unicode standard. `:ascii`
710+
lowercases only the letters A to Z. `:greek` includes the context sensitive
711+
mappings found in Greek.
650712
651713
## Examples
652714
@@ -660,10 +722,17 @@ defmodule String do
660722
"Olá"
661723
662724
"""
663-
@spec capitalize(t) :: t
664-
def capitalize(string) when is_binary(string) do
665-
{char, rest} = String.Casing.titlecase_once(string)
666-
char <> downcase(rest)
725+
@spec capitalize(t, :default | :ascii | :greek) :: t
726+
def capitalize(string, mode \\ :default)
727+
728+
def capitalize(<<char, rest::binary>>, :ascii) do
729+
char = if char >= ?a and char <= ?z, do: char - 32, else: char
730+
<<char>> <> downcase(rest, :ascii)
731+
end
732+
733+
def capitalize(string, mode) when is_binary(string) do
734+
{char, rest} = String.Casing.titlecase_once(string, mode)
735+
char <> downcase(rest, mode)
667736
end
668737

669738
@doc false

lib/elixir/test/elixir/string_test.exs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,15 @@ defmodule StringTest do
164164
end
165165

166166
test "downcase/1 with greek final sigma" do
167-
assert String.downcase("ΣΣ") == "σς"
168-
assert String.downcase("ΣΣ ΣΣ") == "σς σς"
167+
assert String.downcase("ΣΣ") == "ςς"
168+
assert String.downcase("ΣΣ ΣΣ") == "ςς ςς"
169+
170+
assert String.downcase("ΣΣ", :greek) == "σς"
171+
assert String.downcase("ΣΣ ΣΣ", :greek) == "σς σς"
172+
end
173+
174+
test "downcase/1 with ascii" do
175+
assert String.downcase("OLÁ", :ascii) == "olÁ"
169176
end
170177

171178
test "capitalize/1" do
@@ -188,6 +195,11 @@ defmodule StringTest do
188195
assert String.capitalize("fin") == "Fin"
189196
end
190197

198+
test "capitalize/1 with ascii" do
199+
assert String.capitalize("àáâ", :ascii) == "àáâ"
200+
assert String.capitalize("aáA", :ascii) == "Aáa"
201+
end
202+
191203
test "replace_leading/3" do
192204
assert String.replace_leading("aa abc ", "a", "b") == "bb abc "
193205
assert String.replace_leading("__ abc ", "_", "b") == "bb abc "

lib/elixir/unicode/properties.ex

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -105,33 +105,31 @@ defmodule String.Casing do
105105

106106
# Downcase
107107

108-
def downcase(string) when is_binary(string), do: downcase(string, "")
109-
110-
defp downcase(<<0x03A3::utf8, codepoint::utf8, rest::bits>>, acc) do
108+
def downcase(<<0x03A3::utf8, codepoint::utf8, rest::bits>>, acc, :greek) do
111109
downcased =
112110
case letter?(codepoint) do
113111
true -> 0x03C3
114112
false -> 0x03C2
115113
end
116114

117-
downcase(<<codepoint::utf8, rest::bits>>, <<acc::binary, downcased::utf8>>)
115+
downcase(<<codepoint::utf8, rest::bits>>, <<acc::binary, downcased::utf8>>, :greek)
118116
end
119117

120-
defp downcase(<<0x03A3::utf8, rest::bits>>, acc) do
121-
downcase(rest, <<acc::binary, 0x03C2::utf8>>)
118+
def downcase(<<0x03A3::utf8, rest::bits>>, acc, mode) do
119+
downcase(rest, <<acc::binary, 0x03C2::utf8>>, mode)
122120
end
123121

124122
for {codepoint, _upper, lower, _title} <- codes, lower && lower != codepoint do
125-
defp downcase(<<unquote(codepoint), rest::bits>>, acc) do
126-
downcase(rest, acc <> unquote(lower))
123+
def downcase(<<unquote(codepoint), rest::bits>>, acc, mode) do
124+
downcase(rest, acc <> unquote(lower), mode)
127125
end
128126
end
129127

130-
defp downcase(<<char, rest::bits>>, acc) do
131-
downcase(rest, <<acc::binary, char>>)
128+
def downcase(<<char, rest::bits>>, acc, mode) do
129+
downcase(rest, <<acc::binary, char>>, mode)
132130
end
133131

134-
defp downcase("", acc), do: acc
132+
def downcase("", acc, _mode), do: acc
135133

136134
# Sigma handling
137135

@@ -149,35 +147,33 @@ defmodule String.Casing do
149147

150148
# Upcase
151149

152-
def upcase(string) when is_binary(string), do: upcase(string, "")
153-
154150
for {codepoint, upper, _lower, _title} <- codes, upper && upper != codepoint do
155-
defp upcase(<<unquote(codepoint), rest::bits>>, acc) do
156-
upcase(rest, acc <> unquote(upper))
151+
def upcase(<<unquote(codepoint), rest::bits>>, acc, mode) do
152+
upcase(rest, acc <> unquote(upper), mode)
157153
end
158154
end
159155

160-
defp upcase(<<char, rest::bits>>, acc) do
161-
upcase(rest, <<acc::binary, char>>)
156+
def upcase(<<char, rest::bits>>, acc, mode) do
157+
upcase(rest, <<acc::binary, char>>, mode)
162158
end
163159

164-
defp upcase("", acc), do: acc
160+
def upcase("", acc, _mode), do: acc
165161

166162
# Titlecase once
167163

168-
def titlecase_once(""), do: {"", ""}
164+
def titlecase_once("", _mode), do: {"", ""}
169165

170166
for {codepoint, _upper, _lower, title} <- codes, title && title != codepoint do
171-
def titlecase_once(unquote(codepoint) <> rest) do
167+
def titlecase_once(unquote(codepoint) <> rest, _mode) do
172168
{unquote(title), rest}
173169
end
174170
end
175171

176-
def titlecase_once(<<char::utf8, rest::binary>>) do
172+
def titlecase_once(<<char::utf8, rest::binary>>, _mode) do
177173
{<<char::utf8>>, rest}
178174
end
179175

180-
def titlecase_once(<<char, rest::binary>>) do
176+
def titlecase_once(<<char, rest::binary>>, _mode) do
181177
{<<char>>, rest}
182178
end
183179
end

0 commit comments

Comments
 (0)