@@ -1026,46 +1026,44 @@ public static function buildRegex(array $strings, ?string $delim = null, bool $r
10261026 {
10271027 static $ regexes = [];
10281028
1029- $ encoding = mb_detect_encoding (implode (' ' , $ strings )) ?: mb_internal_encoding ();
1030-
1031- $ normalized_strings = [];
1029+ // Only strings are allowed.
1030+ $ strings = array_unique (array_map ('strval ' , array_filter ($ strings , 'is_scalar ' )));
10321031
1033- foreach ($ strings as $ str ) {
1034- if (\is_scalar ($ str )) {
1035- $ s = (string ) $ str ;
1036- $ normalized_strings [$ s ] = mb_strlen ($ s , $ encoding );
1037- }
1038- }
1039-
1040- if (empty ($ normalized_strings )) {
1041- return '' ;
1032+ // A regex to match nothing?
1033+ if ($ strings === [] || $ strings === ['' ]) {
1034+ return $ return_array ? ['' ] : '' ;
10421035 }
10431036
1037+ // Don't repeat unnecessarily.
10441038 $ regex_key = md5 (json_encode ([$ strings , $ delim , $ return_array ]));
10451039
10461040 if (isset ($ regexes [$ regex_key ])) {
10471041 return $ regexes [$ regex_key ];
10481042 }
10491043
1044+ // Which character encoding is being used?
1045+ $ encoding = mb_detect_encoding (implode (' ' , $ strings )) ?: mb_internal_encoding ();
1046+
10501047 // Optimizing is faster when we sort by length.
1051- asort ($ normalized_strings );
1052- $ strings = array_map ('strval ' , array_keys ($ normalized_strings ));
1048+ $ strings = array_combine ($ strings , array_map (fn ($ s ) => mb_strlen ($ s , $ encoding ), $ strings ));
1049+ asort ($ strings );
1050+ $ strings = array_map ('strval ' , array_keys ($ strings ));
10531051
10541052 // Can we trim common characters from the end?
10551053 $ trailing = '' ;
1056- unset( $ normalized_strings ) ;
1054+ $ i = - 1 ;
10571055
1058- while (mb_strlen ( $ strings [0 ], $ encoding ) > 1 ) {
1059- $ last_char = mb_substr ($ strings [0 ], - 1 , 1 , $ encoding );
1056+ while ($ strings [0 ] !== '' ) {
1057+ $ last_char = mb_substr ($ strings [0 ], $ i , null , $ encoding );
10601058
10611059 foreach ($ strings as $ string ) {
10621060 if (!str_ends_with ($ string , $ last_char )) {
10631061 break 2 ;
10641062 }
10651063 }
10661064
1067- $ strings = array_map ( fn ( $ string ) => mb_substr ( $ string , 0 , - 1 , $ encoding ), $ strings ) ;
1068- $ trailing = $ last_char . $ trailing ;
1065+ $ i -- ;
1066+ $ trailing = $ last_char ;
10691067 }
10701068
10711069 // Create the trie from the strings.
@@ -1107,9 +1105,8 @@ public static function buildRegex(array $strings, ?string $delim = null, bool $r
11071105 } else {
11081106 $ sub_regex = $ trie_to_regex ($ value , $ delim );
11091107
1110- if (\count (array_keys ($ value )) == 1 ) {
1111- $ new_key_array = explode ('(? ' . '> ' , $ sub_regex );
1112- $ new_key .= $ new_key_array [0 ];
1108+ if (\count ($ value ) == 1 ) {
1109+ $ new_key .= strtok ($ sub_regex , '(? ' . '> ' );
11131110 } else {
11141111 $ sub_regex = '(? ' . '> ' . $ sub_regex . ') ' ;
11151112 }
0 commit comments