Skip to content

Commit d0dd0bd

Browse files
author
José Valim
committed
Clean up unicode range parsing
Signed-off-by: José Valim <[email protected]>
1 parent 77b5fe0 commit d0dd0bd

File tree

1 file changed

+14
-17
lines changed

1 file changed

+14
-17
lines changed

lib/elixir/unicode/unicode.ex

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ defmodule String.Unicode do
2020
<<first::4-bytes, "..", last::4-bytes, _::binary>> ->
2121
first = String.to_integer(first, 16)
2222
last = String.to_integer(last, 16)
23-
Enum.map(first..last, &to_binary.(Integer.to_string(&1, 16))) ++ acc
23+
Enum.map(first..last, fn int -> <<int::utf8>> end) ++ acc
2424
<<single::4-bytes, _::binary>> ->
25-
[to_binary.(single) | acc]
25+
[<<String.to_integer(single, 16)::utf8>> | acc]
2626
end
2727
end
2828

@@ -222,24 +222,21 @@ defmodule String.Graphemes do
222222
cluster_path = Path.join(__DIR__, "GraphemeBreakProperty.txt")
223223
regex = ~r/(?:^([0-9A-F]+)(?:\.\.([0-9A-F]+))?)\s+;\s(\w+)/m
224224

225-
to_range = fn
226-
first, "" ->
227-
[<<String.to_integer(first, 16)::utf8>>]
228-
first, last ->
229-
range = String.to_integer(first, 16)..String.to_integer(last, 16)
230-
Enum.map(range, fn(int) -> <<int::utf8>> end)
231-
end
232-
233225
cluster = Enum.reduce File.stream!(cluster_path), %{}, fn(line, dict) ->
234226
[_full, first, last, class] = Regex.run(regex, line)
235227

236-
# Skip surrogates
237-
if first == "D800" and last == "DFFF" do
238-
dict
239-
else
240-
list = to_range.(first, last)
241-
Map.update(dict, class, list, &(&1 ++ list))
242-
end
228+
codepoints =
229+
case {first, last} do
230+
{"D800", "DFFF"} ->
231+
[]
232+
{first, ""} ->
233+
[<<String.to_integer(first, 16)::utf8>>]
234+
{first, last} ->
235+
range = String.to_integer(first, 16)..String.to_integer(last, 16)
236+
Enum.map(range, fn int -> <<int::utf8>> end)
237+
end
238+
239+
Map.update(dict, class, codepoints, &(&1 ++ codepoints))
243240
end
244241

245242
# There is no codepoint marked as Prepend by Unicode 6.3.0

0 commit comments

Comments
 (0)