@@ -152,20 +152,16 @@ static zend_string *browscap_convert_pattern(zend_string *pattern, int persisten
152152 size_t i , j = 0 ;
153153 char * t ;
154154 zend_string * res ;
155- char * lc_pattern ;
156- ALLOCA_FLAG (use_heap );
157155
158156 res = zend_string_alloc (browscap_compute_regex_len (pattern ), persistent );
159157 t = ZSTR_VAL (res );
160158
161- lc_pattern = do_alloca (ZSTR_LEN (pattern ) + 1 , use_heap );
162- zend_str_tolower_copy (lc_pattern , ZSTR_VAL (pattern ), ZSTR_LEN (pattern ));
163-
164159 t [j ++ ] = '~' ;
165160 t [j ++ ] = '^' ;
166161
167162 for (i = 0 ; i < ZSTR_LEN (pattern ); i ++ , j ++ ) {
168- switch (lc_pattern [i ]) {
163+ char c = ZSTR_VAL (pattern )[i ];
164+ switch (c ) {
169165 case '?' :
170166 t [j ] = '.' ;
171167 break ;
@@ -198,7 +194,7 @@ static zend_string *browscap_convert_pattern(zend_string *pattern, int persisten
198194 t [j ] = '+' ;
199195 break ;
200196 default :
201- t [j ] = lc_pattern [ i ] ;
197+ t [j ] = zend_tolower_ascii ( c ) ;
202198 break ;
203199 }
204200 }
@@ -208,7 +204,6 @@ static zend_string *browscap_convert_pattern(zend_string *pattern, int persisten
208204 t [j ]= 0 ;
209205
210206 ZSTR_LEN (res ) = j ;
211- free_alloca (lc_pattern , use_heap );
212207 return res ;
213208}
214209/* }}} */
@@ -272,27 +267,39 @@ static void browscap_add_kv(
272267 bdata -> kv_used ++ ;
273268}
274269
270+ static void browscap_entry_add_kv_to_existing_array (browser_data * bdata , browscap_entry * entry , HashTable * ht ) {
271+ for (uint32_t i = entry -> kv_start ; i < entry -> kv_end ; i ++ ) {
272+ zval tmp ;
273+ ZVAL_STR_COPY (& tmp , bdata -> kv [i ].value );
274+ zend_hash_add (ht , bdata -> kv [i ].key , & tmp );
275+ }
276+ }
277+
275278static HashTable * browscap_entry_to_array (browser_data * bdata , browscap_entry * entry ) {
276279 zval tmp ;
277- uint32_t i ;
278-
279- HashTable * ht = zend_new_array (8 );
280+ HashTable * ht = zend_new_array (2 + (entry -> parent ? 1 : 0 ) + (entry -> kv_end - entry -> kv_start ));
280281
281282 ZVAL_STR (& tmp , browscap_convert_pattern (entry -> pattern , 0 ));
282- zend_hash_str_add (ht , "browser_name_regex" , sizeof ("browser_name_regex" )- 1 , & tmp );
283+ zend_string * key = ZSTR_INIT_LITERAL ("browser_name_regex" , 0 );
284+ ZSTR_H (key ) = zend_inline_hash_func ("browser_name_regex" , sizeof ("browser_name_regex" )- 1 );
285+ zend_hash_add_new (ht , key , & tmp );
286+ zend_string_release_ex (key , false);
283287
284288 ZVAL_STR_COPY (& tmp , entry -> pattern );
285- zend_hash_str_add (ht , "browser_name_pattern" , sizeof ("browser_name_pattern" )- 1 , & tmp );
289+ key = ZSTR_INIT_LITERAL ("browser_name_pattern" , 0 );
290+ ZSTR_H (key ) = zend_inline_hash_func ("browser_name_pattern" , sizeof ("browser_name_pattern" )- 1 );
291+ zend_hash_add_new (ht , key , & tmp );
292+ zend_string_release_ex (key , false);
286293
287294 if (entry -> parent ) {
288295 ZVAL_STR_COPY (& tmp , entry -> parent );
289- zend_hash_str_add (ht , "parent" , sizeof ("parent" )- 1 , & tmp );
296+ key = ZSTR_INIT_LITERAL ("parent" , 0 );
297+ ZSTR_H (key ) = zend_inline_hash_func ("parent" , sizeof ("parent" )- 1 );
298+ zend_hash_add_new (ht , key , & tmp );
299+ zend_string_release_ex (key , false);
290300 }
291301
292- for (i = entry -> kv_start ; i < entry -> kv_end ; i ++ ) {
293- ZVAL_STR_COPY (& tmp , bdata -> kv [i ].value );
294- zend_hash_add (ht , bdata -> kv [i ].key , & tmp );
295- }
302+ browscap_entry_add_kv_to_existing_array (bdata , entry , ht );
296303
297304 return ht ;
298305}
@@ -542,31 +549,89 @@ static inline size_t browscap_get_minimum_length(browscap_entry *entry) {
542549 return len ;
543550}
544551
545- static int browser_reg_compare ( browscap_entry * entry , zend_string * agent_name , browscap_entry * * found_entry_ptr ) /* {{{ */
552+ static bool browscap_match_string_wildcard ( const char * s , const char * s_end , const char * pattern , const char * pattern_end )
546553{
547- browscap_entry * found_entry = * found_entry_ptr ;
548- ALLOCA_FLAG (use_heap )
549- zend_string * pattern_lc , * regex ;
550- const char * cur ;
551- int i ;
554+ const char * pattern_current = pattern ;
555+ const char * s_current = s ;
556+
557+ const char * wildcard_pattern_restore_pos = NULL ;
558+ const char * wildcard_s_restore_pos = NULL ;
559+
560+ while (s_current < s_end ) {
561+ char pattern_char = * pattern_current ;
562+ char s_char = * s_current ;
563+
564+ if (pattern_char == '*' ) {
565+ /* Collapse wildcards */
566+ pattern_current ++ ;
567+ while (pattern_current < pattern_end && * pattern_current == '*' ) {
568+ pattern_current ++ ;
569+ }
552570
553- pcre2_code * re ;
554- pcre2_match_data * match_data ;
555- uint32_t capture_count ;
556- int rc ;
571+ /* If we're at the end of the pattern, it means that the ending was just '*', so this is a trivial match */
572+ if (pattern_current == pattern_end ) {
573+ return true;
574+ }
575+
576+ /* Optimization: if there is a non-wildcard character X after a *, then we can immediately jump to the first
577+ * character X in s starting from s_current because it is the only way to match beyond the *. */
578+ if (* pattern_current != '?' ) {
579+ while (s_current < s_end && * s_current != * pattern_current ) {
580+ s_current ++ ;
581+ }
582+ }
583+
584+ /* We will first assume the skipped part by * is a 0-length string (or n-length if the optimization above skipped n characters).
585+ * When a mismatch happens we will backtrack and move s one position to assume * skipped a 1-length string.
586+ * Then 2, 3, 4, ... */
587+ wildcard_pattern_restore_pos = pattern_current ;
588+ wildcard_s_restore_pos = s_current ;
589+
590+ continue ;
591+ } else if (pattern_char == s_char || pattern_char == '?' ) {
592+ /* Match */
593+ pattern_current ++ ;
594+ s_current ++ ;
595+
596+ /* If this was the last character of the pattern, we either fully matched s, or we have a mismatch */
597+ if (pattern_current == pattern_end ) {
598+ if (s_current == s_end ) {
599+ return true;
600+ }
601+ /* Fallthrough to mismatch */
602+ } else {
603+ continue ;
604+ }
605+ }
557606
558- /* Agent name too short */
559- if (ZSTR_LEN (agent_name ) < browscap_get_minimum_length (entry )) {
560- return 0 ;
607+ /* Mismatch */
608+ if (wildcard_pattern_restore_pos ) {
609+ pattern_current = wildcard_pattern_restore_pos ;
610+ wildcard_s_restore_pos ++ ;
611+ s_current = wildcard_s_restore_pos ;
612+ } else {
613+ /* No wildcard is active, so it is impossible to match */
614+ return false;
615+ }
561616 }
562617
563- /* Quickly discard patterns where the prefix doesn't match. */
564- if (zend_binary_strcasecmp (
565- ZSTR_VAL (agent_name ), entry -> prefix_len ,
566- ZSTR_VAL (entry -> pattern ), entry -> prefix_len ) != 0 ) {
567- return 0 ;
618+ /* Skip remaining * wildcards, they match nothing here as we are at the end of s */
619+ while (pattern_current < pattern_end && * pattern_current == '*' ) {
620+ pattern_current ++ ;
568621 }
569622
623+ ZEND_ASSERT (s_current == s_end );
624+ return pattern_current == pattern_end ;
625+ }
626+
627+ static int browser_reg_compare (browscap_entry * entry , zend_string * agent_name , browscap_entry * * found_entry_ptr , size_t * cached_prev_len ) /* {{{ */
628+ {
629+ browscap_entry * found_entry = * found_entry_ptr ;
630+ ALLOCA_FLAG (use_heap )
631+ zend_string * pattern_lc ;
632+ const char * cur ;
633+ int i ;
634+
570635 /* Lowercase the pattern, the agent name is already lowercase */
571636 ZSTR_ALLOCA_ALLOC (pattern_lc , ZSTR_LEN (entry -> pattern ), use_heap );
572637 zend_str_tolower_copy (ZSTR_VAL (pattern_lc ), ZSTR_VAL (entry -> pattern ), ZSTR_LEN (entry -> pattern ));
@@ -590,91 +655,52 @@ static int browser_reg_compare(browscap_entry *entry, zend_string *agent_name, b
590655 /* See if we have an exact match, if so, we're done... */
591656 if (zend_string_equals (agent_name , pattern_lc )) {
592657 * found_entry_ptr = entry ;
658+ /* cached_prev_len doesn't matter here because we end the search when an exact match is found. */
593659 ZSTR_ALLOCA_FREE (pattern_lc , use_heap );
594660 return 1 ;
595661 }
596662
597- regex = browscap_convert_pattern (entry -> pattern , 0 );
598- re = pcre_get_compiled_regex (regex , & capture_count );
599- if (re == NULL ) {
600- ZSTR_ALLOCA_FREE (pattern_lc , use_heap );
601- zend_string_release (regex );
602- return 0 ;
603- }
604-
605- match_data = php_pcre_create_match_data (capture_count , re );
606- if (!match_data ) {
607- ZSTR_ALLOCA_FREE (pattern_lc , use_heap );
608- zend_string_release (regex );
609- return 0 ;
610- }
611- rc = pcre2_match (re , (PCRE2_SPTR )ZSTR_VAL (agent_name ), ZSTR_LEN (agent_name ), 0 , 0 , match_data , php_pcre_mctx ());
612- php_pcre_free_match_data (match_data );
613- if (rc >= 0 ) {
663+ if (browscap_match_string_wildcard (
664+ ZSTR_VAL (agent_name ) + entry -> prefix_len ,
665+ ZSTR_VAL (agent_name ) + ZSTR_LEN (agent_name ),
666+ ZSTR_VAL (pattern_lc ) + entry -> prefix_len ,
667+ ZSTR_VAL (pattern_lc ) + ZSTR_LEN (pattern_lc )
668+ )) {
614669 /* If we've found a possible browser, we need to do a comparison of the
615670 number of characters changed in the user agent being checked versus
616671 the previous match found and the current match. */
617- if (found_entry ) {
618- size_t i , prev_len = 0 , curr_len = 0 ;
619- zend_string * previous_match = found_entry -> pattern ;
620- zend_string * current_match = entry -> pattern ;
621-
622- for (i = 0 ; i < ZSTR_LEN (previous_match ); i ++ ) {
623- switch (ZSTR_VAL (previous_match )[i ]) {
624- case '?' :
625- case '*' :
626- /* do nothing, ignore these characters in the count */
627- break ;
628-
629- default :
630- ++ prev_len ;
631- }
632- }
633-
634- for (i = 0 ; i < ZSTR_LEN (current_match ); i ++ ) {
635- switch (ZSTR_VAL (current_match )[i ]) {
636- case '?' :
637- case '*' :
638- /* do nothing, ignore these characters in the count */
639- break ;
672+ size_t curr_len = entry -> prefix_len ; /* Start from the prefix because the prefix is free of wildcards */
673+ zend_string * current_match = entry -> pattern ;
674+ for (size_t i = curr_len ; i < ZSTR_LEN (current_match ); i ++ ) {
675+ switch (ZSTR_VAL (current_match )[i ]) {
676+ case '?' :
677+ case '*' :
678+ /* do nothing, ignore these characters in the count */
679+ break ;
640680
641- default :
642- ++ curr_len ;
643- }
681+ default :
682+ ++ curr_len ;
644683 }
684+ }
645685
686+ if (found_entry ) {
646687 /* Pick which browser pattern replaces the least amount of
647688 characters when compared to the original user agent string... */
648- if (prev_len < curr_len ) {
689+ if (* cached_prev_len < curr_len ) {
649690 * found_entry_ptr = entry ;
691+ * cached_prev_len = curr_len ;
650692 }
651693 } else {
652694 * found_entry_ptr = entry ;
695+ * cached_prev_len = curr_len ;
653696 }
654697 }
655698
656699 ZSTR_ALLOCA_FREE (pattern_lc , use_heap );
657- zend_string_release (regex );
658700 return 0 ;
659701}
660702/* }}} */
661703
662- static void browscap_zval_copy_ctor (zval * p ) /* {{{ */
663- {
664- if (Z_REFCOUNTED_P (p )) {
665- zend_string * str ;
666-
667- ZEND_ASSERT (Z_TYPE_P (p ) == IS_STRING );
668- str = Z_STR_P (p );
669- if (!(GC_FLAGS (str ) & GC_PERSISTENT )) {
670- GC_ADDREF (str );
671- } else {
672- ZVAL_NEW_STR (p , zend_string_init (ZSTR_VAL (str ), ZSTR_LEN (str ), 0 ));
673- }
674- }
675- }
676- /* }}} */
677-
678704/* {{{ Get information about the capabilities of a browser. If browser_name is omitted or null, HTTP_USER_AGENT is used. Returns an object by default; if return_array is true, returns an array. */
679705PHP_FUNCTION (get_browser )
680706{
@@ -724,9 +750,31 @@ PHP_FUNCTION(get_browser)
724750 found_entry = zend_hash_find_ptr (bdata -> htab , lookup_browser_name );
725751 if (found_entry == NULL ) {
726752 browscap_entry * entry ;
753+ size_t cached_prev_len = 0 ; /* silence compiler warning */
754+
755+ ZEND_HASH_MAP_FOREACH_PTR (bdata -> htab , entry ) {
756+ /* The following two early-skip checks are inside this loop instead of inside browser_reg_compare().
757+ * That's because we want to avoid the call frame overhead, especially as browser_reg_compare() is
758+ * a function that uses alloca(). */
759+
760+ /* Agent name too short */
761+ if (ZSTR_LEN (lookup_browser_name ) < browscap_get_minimum_length (entry )) {
762+ continue ;
763+ }
764+
765+ /* Quickly discard patterns where the prefix doesn't match. */
766+ bool prefix_matches = true;
767+ for (size_t i = 0 ; i < entry -> prefix_len ; i ++ ) {
768+ if (ZSTR_VAL (lookup_browser_name )[i ] != zend_tolower_ascii (ZSTR_VAL (entry -> pattern )[i ])) {
769+ prefix_matches = false;
770+ break ;
771+ }
772+ }
773+ if (!prefix_matches ) {
774+ continue ;
775+ }
727776
728- ZEND_HASH_FOREACH_PTR (bdata -> htab , entry ) {
729- if (browser_reg_compare (entry , lookup_browser_name , & found_entry )) {
777+ if (browser_reg_compare (entry , lookup_browser_name , & found_entry , & cached_prev_len )) {
730778 break ;
731779 }
732780 } ZEND_HASH_FOREACH_END ();
@@ -735,12 +783,14 @@ PHP_FUNCTION(get_browser)
735783 found_entry = zend_hash_str_find_ptr (bdata -> htab ,
736784 DEFAULT_SECTION_NAME , sizeof (DEFAULT_SECTION_NAME )- 1 );
737785 if (found_entry == NULL ) {
738- zend_string_release (lookup_browser_name );
786+ zend_string_release_ex (lookup_browser_name , false );
739787 RETURN_FALSE ;
740788 }
741789 }
742790 }
743791
792+ zend_string_release_ex (lookup_browser_name , false);
793+
744794 agent_ht = browscap_entry_to_array (bdata , found_entry );
745795
746796 if (return_array ) {
@@ -749,23 +799,15 @@ PHP_FUNCTION(get_browser)
749799 object_and_properties_init (return_value , zend_standard_class_def , agent_ht );
750800 }
751801
802+ HashTable * target_ht = return_array ? Z_ARRVAL_P (return_value ) : Z_OBJPROP_P (return_value );
803+
752804 while (found_entry -> parent ) {
753805 found_entry = zend_hash_find_ptr (bdata -> htab , found_entry -> parent );
754806 if (found_entry == NULL ) {
755807 break ;
756808 }
757809
758- agent_ht = browscap_entry_to_array (bdata , found_entry );
759- if (return_array ) {
760- zend_hash_merge (Z_ARRVAL_P (return_value ), agent_ht , (copy_ctor_func_t ) browscap_zval_copy_ctor , 0 );
761- } else {
762- zend_hash_merge (Z_OBJPROP_P (return_value ), agent_ht , (copy_ctor_func_t ) browscap_zval_copy_ctor , 0 );
763- }
764-
765- zend_hash_destroy (agent_ht );
766- efree (agent_ht );
810+ browscap_entry_add_kv_to_existing_array (bdata , found_entry , target_ht );
767811 }
768-
769- zend_string_release_ex (lookup_browser_name , 0 );
770812}
771813/* }}} */
0 commit comments