Skip to content

Commit 883d887

Browse files
committed
stdlib: Sync re_SUITE 'testoutput4' with PCRE2 10.46
by parsing \N{U+hhh..} syntax in subject strings.
1 parent 6e1a4d3 commit 883d887

File tree

2 files changed

+41
-45
lines changed

2 files changed

+41
-45
lines changed

lib/stdlib/test/re_SUITE_data/testoutput4

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3848,10 +3848,10 @@ No match
38483848
\x{1234}
38493849
0: \x{1234}
38503850

3851-
#/(\x{1234}) \1/utf
3852-
# \N{U+1234} \o{11064}
3853-
# 0: \x{1234} \x{1234}
3854-
# 1: \x{1234}
3851+
/(\x{1234}) \1/utf
3852+
\N{U+1234} \o{11064}
3853+
0: \x{1234} \x{1234}
3854+
1: \x{1234}
38553855

38563856
# Test the full list of Unicode "Pattern White Space" characters that are to
38573857
# be ignored by /x. The pattern lines below may show up oddly in text editors

lib/stdlib/test/run_pcre_tests.erl

Lines changed: 37 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,49 +1036,31 @@ multi_esc(Bin, Unicode) ->
10361036
{_Cha, Tpl} = multi_hex_esc(Bin, Unicode),
10371037
Tpl.
10381038

1039-
multi_hex_esc(<<"x{",N,$},Rest/binary>>,Unicode) when ?is_hex_char(N) ->
1040-
Cha = trx(N),
1041-
case Unicode of
1042-
false ->
1043-
{Cha, {<<Cha:8>>,Rest}};
1044-
_ ->
1045-
{Cha, {int_to_utf8(Cha),Rest}}
1039+
1040+
multi_hex_esc(<<"x{", Rest0/binary>>, Unicode) ->
1041+
case hex_num(Rest0, 6) of
1042+
{Cha, <<$}, Rest1/binary>>} ->
1043+
case {Unicode, Cha < 256} of
1044+
{false, true} ->
1045+
{Cha, {<<Cha:8>>, Rest1}};
1046+
_ ->
1047+
{Cha, {int_to_utf8(Cha), Rest1}}
1048+
end;
1049+
_Error ->
1050+
{no, no}
10461051
end;
1047-
multi_hex_esc(<<"x{",N,O,$},Rest/binary>>,Unicode) when (?is_hex_char(N) and
1048-
?is_hex_char(O)) ->
1049-
Cha = (trx(N) bsl 4) bor trx(O),
1050-
case Unicode of
1051-
false ->
1052-
{Cha, {<<Cha:8>>,Rest}};
1053-
_ ->
1054-
{Cha, {int_to_utf8(Cha),Rest}}
1052+
multi_hex_esc(<<"N{U+", Rest0/binary>>, Unicode) ->
1053+
case hex_num(Rest0, 6) of
1054+
{Cha, <<$}, Rest1/binary>>} ->
1055+
case {Unicode, Cha < 256} of
1056+
{false, true} ->
1057+
{Cha, {<<Cha:8>>, Rest1}};
1058+
_ ->
1059+
{Cha, {int_to_utf8(Cha), Rest1}}
1060+
end;
1061+
_Error ->
1062+
{no, no}
10551063
end;
1056-
multi_hex_esc(<<"x{",N,O,P,$},Rest/binary>>,_) when (?is_hex_char(N) and
1057-
?is_hex_char(O) and
1058-
?is_hex_char(P)) ->
1059-
Cha = (trx(N) bsl 8) bor (trx(O) bsl 4) bor trx(P),
1060-
{Cha, {int_to_utf8(Cha),Rest}};
1061-
multi_hex_esc(<<"x{",N,O,P,Q,$},Rest/binary>>,_) when (?is_hex_char(N) and
1062-
?is_hex_char(O) and
1063-
?is_hex_char(P) and
1064-
?is_hex_char(Q)) ->
1065-
Cha = (trx(N) bsl 12) bor (trx(O) bsl 8) bor (trx(P) bsl 4) bor trx(Q),
1066-
{Cha, {int_to_utf8(Cha),Rest}};
1067-
multi_hex_esc(<<"x{",N,O,P,Q,R,$},Rest/binary>>,_) when (?is_hex_char(N) and
1068-
?is_hex_char(O) and
1069-
?is_hex_char(P) and
1070-
?is_hex_char(Q) and
1071-
?is_hex_char(R)) ->
1072-
Cha = (trx(N) bsl 16) bor (trx(O) bsl 12) bor (trx(P) bsl 8) bor (trx(Q) bsl 4) bor trx(R),
1073-
{Cha, {int_to_utf8(Cha),Rest}};
1074-
multi_hex_esc(<<"x{",N,O,P,Q,R,S,$},Rest/binary>>,_) when (?is_hex_char(N) and
1075-
?is_hex_char(O) and
1076-
?is_hex_char(P) and
1077-
?is_hex_char(Q) and
1078-
?is_hex_char(R) and
1079-
?is_hex_char(S)) ->
1080-
Cha = (trx(N) bsl 20) bor (trx(O) bsl 16) bor (trx(P) bsl 12) bor (trx(Q) bsl 8) bor (trx(R) bsl 4) bor trx(S),
1081-
{Cha, {int_to_utf8(Cha),Rest}};
10821064
multi_hex_esc(<<$x,N,O,Rest/binary>>,_) when (?is_hex_char(N) and
10831065
?is_hex_char(O)) ->
10841066
Cha = (trx(N) bsl 4) bor trx(O),
@@ -1089,6 +1071,20 @@ multi_hex_esc(<<$x,N,Rest/binary>>,_) when ?is_hex_char(N) ->
10891071
multi_hex_esc(_,_) ->
10901072
{no, no}.
10911073

1074+
hex_num(Bin, Maxlen) ->
1075+
hex_num(Bin, Maxlen, 0, 0).
1076+
1077+
hex_num(Bin, MaxLen, MaxLen, Acc) ->
1078+
{Acc, Bin};
1079+
hex_num(<<C, Rest/binary>>, Maxlen, Gotlen, Acc) when ?is_hex_char(C) ->
1080+
hex_num(Rest, Maxlen, Gotlen+1, (Acc bsl 4) bor trx(C));
1081+
hex_num(Bin, _Maxlen, Gotlen, Acc) when Gotlen > 0 ->
1082+
{Acc, Bin};
1083+
hex_num(Bin, _, 0, 0) ->
1084+
{error, "Expected hex number", Bin}.
1085+
1086+
1087+
10921088
single_esc($") ->
10931089
$";
10941090
single_esc(?SPACE) ->

0 commit comments

Comments
 (0)