@@ -20,9 +20,9 @@ defmodule String.Unicode do
20
20
<< first :: 4 - bytes , ".." , last :: 4 - bytes , _ :: binary >> ->
21
21
first = String . to_integer ( first , 16 )
22
22
last = String . to_integer ( last , 16 )
23
- Enum . map ( first .. last , & to_binary . ( Integer . to_string ( & 1 , 16 ) ) ) ++ acc
23
+ Enum . map ( first .. last , fn int -> << int :: utf8 >> end ) ++ acc
24
24
<< single :: 4 - bytes , _ :: binary >> ->
25
- [ to_binary . ( single ) | acc ]
25
+ [ << String . to_integer ( single , 16 ) :: utf8 >> | acc ]
26
26
end
27
27
end
28
28
@@ -222,24 +222,21 @@ defmodule String.Graphemes do
222
222
cluster_path = Path . join ( __DIR__ , "GraphemeBreakProperty.txt" )
223
223
regex = ~r/ (?:^([0-9A-F]+)(?:\. \. ([0-9A-F]+))?)\s +;\s (\w +)/ m
224
224
225
- to_range = fn
226
- first , "" ->
227
- [ << String . to_integer ( first , 16 ) :: utf8 >> ]
228
- first , last ->
229
- range = String . to_integer ( first , 16 ) .. String . to_integer ( last , 16 )
230
- Enum . map ( range , fn ( int ) -> << int :: utf8 >> end )
231
- end
232
-
233
225
cluster = Enum . reduce File . stream! ( cluster_path ) , % { } , fn ( line , dict ) ->
234
226
[ _full , first , last , class ] = Regex . run ( regex , line )
235
227
236
- # Skip surrogates
237
- if first == "D800" and last == "DFFF" do
238
- dict
239
- else
240
- list = to_range . ( first , last )
241
- Map . update ( dict , class , list , & ( & 1 ++ list ) )
242
- end
228
+ codepoints =
229
+ case { first , last } do
230
+ { "D800" , "DFFF" } ->
231
+ [ ]
232
+ { first , "" } ->
233
+ [ << String . to_integer ( first , 16 ) :: utf8 >> ]
234
+ { first , last } ->
235
+ range = String . to_integer ( first , 16 ) .. String . to_integer ( last , 16 )
236
+ Enum . map ( range , fn int -> << int :: utf8 >> end )
237
+ end
238
+
239
+ Map . update ( dict , class , codepoints , & ( & 1 ++ codepoints ) )
243
240
end
244
241
245
242
# There is no codepoint marked as Prepend by Unicode 6.3.0
0 commit comments