@@ -13,21 +13,26 @@ let is_ascii s =
1313
1414let normalize = Uunf_string. normalize_utf_8 `NFKC
1515
16+ let fold_uchars f acc str =
17+ let len = String. length str in
18+ let rec loop pos acc =
19+ if pos == len then acc
20+ else
21+ let decode = String. get_utf_8_uchar str pos in
22+ let char_length = Uchar. utf_decode_length decode in
23+ let uchar = Uchar. utf_decode_uchar decode in
24+ let acc = f acc pos uchar in
25+ loop (pos + char_length) acc in
26+ loop 0 acc
27+
1628let iter_uchars s f =
17- let len = String. length s in
18- let out = Buffer. create len in
19- let pos = ref 0 in
20- (* move through code point by code point *)
21- while ! pos != len do
22- let decode = String. get_utf_8_uchar s ! pos in
23- let char_length = Uchar. utf_decode_length decode in
24- let uchar = Uchar. utf_decode_uchar decode in
25- Buffer. add_utf_8_uchar out uchar;
26- f ! pos uchar;
27- pos := ! pos + char_length
28- done ;
29+ let f' buf pos c =
30+ f pos c;
31+ Buffer. add_utf_8_uchar buf c;
32+ buf in
33+ let s_after =
34+ Buffer. contents @@ fold_uchars f' (Buffer. create (String. length s)) s in
2935 (* another sanity check *)
30- let s_after = Buffer. contents out in
3136 if not (String. equal s s_after) then
3237 Core. (
3338 ICE. internal_compiler_error
@@ -49,14 +54,14 @@ let iter_uchars s f =
4954let confusable x y =
5055 let skeleton x =
5156 let x = Uunf_string. normalize_utf_8 `NFD x in
52- let out = Buffer. create (String. length x) in
53- let f _ c =
57+ let f acc _ c =
5458 if Uucp.Gen. is_default_ignorable c then ()
5559 else
5660 (* TODO!! replace with prototype - need data? *)
57- Buffer. add_utf_8_uchar out c in
58- iter_uchars x f;
59- let x = Buffer. contents out in
61+ Buffer. add_utf_8_uchar acc c;
62+ acc in
63+ let buf = fold_uchars f (Buffer. create (String. length x)) x in
64+ let x = Buffer. contents buf in
6065 let x = Uunf_string. normalize_utf_8 `NFD x in
6166 x in
6267 String. compare (skeleton x) (skeleton y)
@@ -89,13 +94,11 @@ let extended s =
8994
9095(* Defined in https://www.unicode.org/reports/tr39/#Restriction_Level_Detection *)
9196let restriction_level x =
92- let soss = ref [] in
93- let f _ c =
97+ let f acc _ c =
9498 let scripts =
9599 Uucp.Script. script_extensions c |> ScriptSet. of_list |> extended in
96- soss := scripts :: ! soss;
97- () in
98- iter_uchars x f;
99- let resolved = List. fold_right ScriptSet. inter ! soss all in
100+ scripts :: acc in
101+ let soss = fold_uchars f [] x in
102+ let resolved = List. fold_right ScriptSet. inter soss all in
100103 if not @@ ScriptSet. is_empty resolved then `Single
101104 else `Unrestricted (* TODO implement levels 3-5 *)
0 commit comments