@@ -490,18 +490,12 @@ defmodule String.Normalizer do
490
490
normalize_nfd ( rest , acc <> binary )
491
491
end
492
492
493
- for { binary , decomposition } <- decompositions do
494
- defp normalize_nfd ( unquote ( binary ) <> rest , acc ) do
495
- normalize_nfd ( unquote ( IO . iodata_to_binary ( decomposition ) ) <> rest , acc )
496
- end
497
- end
498
-
499
493
defp normalize_nfd ( binary , acc ) do
500
494
{ n , rest } = String.Unicode . next_grapheme_size ( binary )
501
495
part = :binary . part ( binary , 0 , n )
502
496
case n do
503
- 1 -> normalize_nfd ( rest , acc <> part )
504
- _ -> normalize_nfd ( rest , acc <> canonical_order ( part ) )
497
+ 1 -> normalize_nfc ( rest , acc <> part )
498
+ _ -> normalize_nfd ( rest , acc <> canonical_order ( part , [ ] ) )
505
499
end
506
500
end
507
501
@@ -520,11 +514,21 @@ defmodule String.Normalizer do
520
514
end
521
515
end
522
516
523
- defp canonical_order ( binary ) do
524
- binary
525
- |> :unicode . characters_to_list ( )
526
- |> Enum . sort_by ( & combining_class / 1 )
527
- |> :unicode . characters_to_binary ( )
517
+ for { binary , decomposition } <- decompositions do
518
+ defp canonical_order ( unquote ( binary ) <> rest , acc ) do
519
+ canonical_order ( unquote ( IO . iodata_to_binary ( decomposition ) ) <> rest , acc )
520
+ end
521
+ end
522
+ defp canonical_order ( << h :: utf8 , t :: binary >> , acc ) do
523
+ canonical_order ( t , [ { h , combining_class ( h ) } | acc ] )
524
+ end
525
+ defp canonical_order ( << >> , [ { x , _ } ] ) do
526
+ << x :: utf8 >>
527
+ end
528
+ defp canonical_order ( << >> , acc ) do
529
+ :lists . keysort ( 2 , Enum . reverse ( acc ) )
530
+ |> Enum . map ( & << elem ( & 1 , 0 ) :: utf8 >> )
531
+ |> IO . iodata_to_binary
528
532
end
529
533
530
534
for { codepoint , class } <- combining_classes do
@@ -533,8 +537,6 @@ defmodule String.Normalizer do
533
537
534
538
defp combining_class ( _ ) , do: 0
535
539
536
- defp compose ( << _ :: utf8 >> = binary ) , do: binary
537
-
538
540
defp compose ( << lead :: utf8 , vowel :: utf8 , rest :: binary >> ) when lead in 0x1100 .. 0x1112 and vowel in 0x1161 .. 0x1175 do
539
541
codepoint = 0xAC00 + ( ( lead - 0x1100 ) * 588 ) + ( ( vowel - 0x1161 ) * 28 )
540
542
case rest do
@@ -545,29 +547,28 @@ defmodule String.Normalizer do
545
547
end
546
548
end
547
549
548
- for { composition , [ _ , _ ] = binary } <- compositions do
549
- defp compose ( unquote ( IO . iodata_to_binary ( binary ) ) ) , do: unquote ( composition )
550
- end
551
-
552
- defp compose ( << cp :: utf8 , rest :: binary >> ) do
553
- compose ( rest , << cp :: utf8 >> , "" , combining_class ( cp ) - 1 )
550
+ defp compose ( binary ) do
551
+ compose_one ( binary ) || (
552
+ << cp :: utf8 , rest :: binary >> = binary
553
+ compose_many ( rest , << cp :: utf8 >> , "" , combining_class ( cp ) - 1 )
554
+ )
554
555
end
555
556
556
- defp compose ( "" , base , accents , _ ) , do: base <> accents
557
+ defp compose_many ( "" , base , accents , _ ) , do: base <> accents
557
558
558
- defp compose ( << cp :: utf8 , rest :: binary >> , base , accents , last_class ) do
559
+ defp compose_many ( << cp :: utf8 , rest :: binary >> , base , accents , last_class ) do
559
560
part_class = combining_class ( cp )
560
561
combined = << base :: binary , cp :: utf8 >>
561
- if last_class < part_class and composable? ( combined ) do
562
- compose ( rest , compose ( combined ) , accents , last_class )
562
+ if composed = ( last_class < part_class && compose_one ( combined ) ) do
563
+ compose_many ( rest , composed , accents , last_class )
563
564
else
564
- compose ( rest , base , << accents :: binary , cp :: utf8 >> , part_class )
565
+ compose_many ( rest , base , << accents :: binary , cp :: utf8 >> , part_class )
565
566
end
566
567
end
567
568
568
- for { _ , [ _ , _ ] = binary } <- compositions do
569
- defp composable? ( unquote ( IO . iodata_to_binary ( binary ) ) ) , do: true
569
+ for { composition , [ _ , _ ] = binary } <- compositions do
570
+ defp compose_one ( unquote ( IO . iodata_to_binary ( binary ) ) ) , do: unquote ( composition )
570
571
end
571
572
572
- defp composable? ( _ ) , do: false
573
+ defp compose_one ( _ ) , do: nil
573
574
end
0 commit comments