Skip to content

Commit eb8b2ff

Browse files
committed
unicode_util: expand range of 2
Make jump tables larger
1 parent 64c8ae9 commit eb8b2ff

File tree

1 file changed

+31
-20
lines changed

1 file changed

+31
-20
lines changed

lib/stdlib/uc_spec/gen_unicode_mod.escript

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,7 @@ gen_gc(Fd, GBP) ->
822822
GenExtP = fun(Range) -> io:format(Fd, "gc_1~s gc_ext_pict(R1,[CP]);\n", [gen_clause(Range)]) end,
823823
ExtendedPictographic0 = merge_ranges(maps:get(extended_pictographic,GBP)),
824824
%% Pick codepoints below 256 (some data knowledge here)
825-
{ExtendedPictographicLow,ExtendedPictographicHigh} =
825+
{ExtendedPictographicLow,_ExtendedPictographicHigh} =
826826
lists:splitwith(fun({Start,undefined}) -> Start < 256 end,ExtendedPictographic0),
827827
io:put_chars(Fd,
828828
"\ngc_1([$\\r|R0] = R) ->\n"
@@ -879,21 +879,23 @@ gen_gc(Fd, GBP) ->
879879
%% GenEBG = fun(Range) -> io:format(Fd, "gc_1~s gc_e_cont(R1,[CP]);\n", [gen_clause(Range)]) end,
880880
%% [GenEBG(CP) || CP <- merge_ranges(maps:get(e_base_gaz,GBP))],
881881

882-
io:put_chars(Fd, "\n%% Handle extended_pictographic\n"),
883-
[GenExtP(CP) || CP <- merge_ranges(ExtendedPictographicHigh)],
882+
%% io:put_chars(Fd, "\n%% Handle extended_pictographic\n"),
883+
%% [GenExtP(CP) || CP <- merge_ranges(ExtendedPictographicHigh)],
884884

885885
io:put_chars(Fd, "\n%% default clauses\n"),
886-
io:put_chars(Fd,
887-
"""
888-
gc_1([CP|R]) ->
889-
case is_indic_consonant(CP) of
890-
true ->
891-
gc_indic(cp(R), R, false, [CP]);
892-
false ->
893-
gc_extend(cp(R), R, CP)
894-
end.
886+
io:put_chars(Fd, """
887+
gc_1([CP|R]) ->
888+
case is_ext_pict(CP) of
889+
true -> gc_ext_pict(R, [CP]);
890+
false ->
891+
case is_indic_consonant(CP) of
892+
true -> gc_indic(cp(R), R, false, [CP]);
893+
false -> gc_extend(cp(R), R, CP)
894+
end
895+
end.
895896
896-
"""),
897+
898+
"""),
897899

898900
io:put_chars(Fd, "%% Handle Prepend\n"),
899901
io:put_chars(Fd,
@@ -1341,7 +1343,6 @@ decompose([CP|CPs], Data) when is_integer(CP) ->
13411343
#cp{dec=Dec} -> decompose(Dec, Data) ++ decompose(CPs,Data)
13421344
end.
13431345

1344-
13451346
decompose_compat(Canon, [], Data) ->
13461347
case decompose_compat(Canon, Data) of
13471348
Canon -> [];
@@ -1405,7 +1406,7 @@ merge_ranges(List, Opt) ->
14051406
split ->
14061407
split_ranges(Res0,[]); % One clause per CP
14071408
true ->
1408-
Res = Res0,
1409+
Res = split_small_ranges(Res0, []),
14091410
OptRes = optimize_ranges(Res),
14101411
true = lists:sort(Res) =:= lists:sort(OptRes), %Assertion.
14111412
OptRes;
@@ -1423,11 +1424,6 @@ merge_ranges_1([{Next, Stop}|R], [{Start,Prev}|Acc]) when Prev+1 =:= Next ->
14231424
undefined -> merge_ranges_1(R, [{Start, Next}|Acc]);
14241425
_ -> merge_ranges_1(R, [{Start,Stop}|Acc])
14251426
end;
1426-
merge_ranges_1([{Next, Stop}|R], [{Start,undefined}|Acc]) when Start+1 =:= Next ->
1427-
case Stop of
1428-
undefined -> merge_ranges_1(R, [{Start, Next}|Acc]);
1429-
_ -> merge_ranges_1(R, [{Start,Stop}|Acc])
1430-
end;
14311427
merge_ranges_1([Next|R], Acc) ->
14321428
merge_ranges_1(R, [Next|Acc]);
14331429
merge_ranges_1([], Acc) ->
@@ -1442,6 +1438,21 @@ split_ranges([{L,L}|Rs], Acc) ->
14421438
split_ranges([], Acc) ->
14431439
lists:reverse(Acc).
14441440

1441+
split_small_ranges([{_,undefined}=CP|Rs], Acc) ->
1442+
split_small_ranges(Rs,[CP|Acc]);
1443+
split_small_ranges([{L,L}|Rs], Acc) ->
1444+
split_small_ranges(Rs,[{L, undefined}|Acc]);
1445+
split_small_ranges([{F,L}=Range|Rs], Acc) ->
1446+
case L - F of
1447+
1 ->
1448+
split_small_ranges(Rs, [{L, undefined}, {F, undefined}|Acc]);
1449+
N when N > 1 ->
1450+
split_small_ranges(Rs, [Range|Acc])
1451+
end;
1452+
split_small_ranges([], Acc) ->
1453+
lists:reverse(Acc).
1454+
1455+
14451456
optimize_ranges(Rs0) ->
14461457
PF = fun({N,undefined}) when is_integer(N) -> true;
14471458
(_) -> false

0 commit comments

Comments
 (0)