@@ -47,19 +47,24 @@ PHPAPI void php_url_free(php_url *theurl)
4747}
4848/* }}} */
4949
50- static void php_replace_controlchars ( char * str , size_t len )
50+ static void php_str_to_utf8 ( const char * str , size_t len )
5151{
52- unsigned char * s = (unsigned char * )str ;
53- unsigned char * e = (unsigned char * )str + len ;
54-
55- ZEND_ASSERT (str != NULL );
56-
57- while (s < e ) {
58- if (iscntrl (* s )) {
59- * s = '_' ;
60- }
61- s ++ ;
62- }
52+ zend_string * utf8 ;
53+ utf8 = zend_string_alloc (len * 4 , 0 );
54+ const unsigned char * s = (const unsigned char * )str ;
55+ const unsigned char * e = s + len ;
56+ unsigned char * d = (unsigned char * )ZSTR_VAL (utf8 );
57+
58+ while (s < e ) {
59+ if (* s < 0x80 ) { // ASCII
60+ * d ++ = * s ++ ;
61+ } else {
62+ /* Convert non-ASCII bytes to UTF-8 */
63+ unsigned int codepoint = * s ++ ;
64+ * d ++ = 0xEF ; * d ++ = 0xBF ; * d ++ = 0xBD ;
65+ }
66+ }
67+ zend_string_release (utf8 );
6368}
6469
6570PHPAPI php_url * php_url_parse (char const * str )
@@ -119,7 +124,7 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
119124
120125 if (e + 1 == ue ) { /* only scheme is available */
121126 ret -> scheme = zend_string_init (s , (e - s ), 0 );
122- php_replace_controlchars (ZSTR_VAL (ret -> scheme ), ZSTR_LEN (ret -> scheme ));
127+ php_str_to_utf8 (ZSTR_VAL (ret -> scheme ), ZSTR_LEN (ret -> scheme ));
123128 return ret ;
124129 }
125130
@@ -141,13 +146,13 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
141146 }
142147
143148 ret -> scheme = zend_string_init (s , (e - s ), 0 );
144- php_replace_controlchars (ZSTR_VAL (ret -> scheme ), ZSTR_LEN (ret -> scheme ));
149+ php_str_to_utf8 (ZSTR_VAL (ret -> scheme ), ZSTR_LEN (ret -> scheme ));
145150
146151 s = e + 1 ;
147152 goto just_path ;
148153 } else {
149154 ret -> scheme = zend_string_init (s , (e - s ), 0 );
150- php_replace_controlchars (ZSTR_VAL (ret -> scheme ), ZSTR_LEN (ret -> scheme ));
155+ php_str_to_utf8 (ZSTR_VAL (ret -> scheme ), ZSTR_LEN (ret -> scheme ));
151156
152157 if (e + 2 < ue && * (e + 2 ) == '/' ) {
153158 s = e + 3 ;
@@ -213,14 +218,14 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
213218 if ((p = zend_memrchr (s , '@' , (e - s )))) {
214219 if ((pp = memchr (s , ':' , (p - s )))) {
215220 ret -> user = zend_string_init (s , (pp - s ), 0 );
216- php_replace_controlchars (ZSTR_VAL (ret -> user ), ZSTR_LEN (ret -> user ));
221+ php_str_to_utf8 (ZSTR_VAL (ret -> user ), ZSTR_LEN (ret -> user ));
217222
218223 pp ++ ;
219224 ret -> pass = zend_string_init (pp , (p - pp ), 0 );
220- php_replace_controlchars (ZSTR_VAL (ret -> pass ), ZSTR_LEN (ret -> pass ));
225+ php_str_to_utf8 (ZSTR_VAL (ret -> pass ), ZSTR_LEN (ret -> pass ));
221226 } else {
222227 ret -> user = zend_string_init (s , (p - s ), 0 );
223- php_replace_controlchars (ZSTR_VAL (ret -> user ), ZSTR_LEN (ret -> user ));
228+ php_str_to_utf8 (ZSTR_VAL (ret -> user ), ZSTR_LEN (ret -> user ));
224229 }
225230
226231 s = p + 1 ;
@@ -269,7 +274,7 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
269274 }
270275
271276 ret -> host = zend_string_init (s , (p - s ), 0 );
272- php_replace_controlchars (ZSTR_VAL (ret -> host ), ZSTR_LEN (ret -> host ));
277+ php_str_to_utf8 (ZSTR_VAL (ret -> host ), ZSTR_LEN (ret -> host ));
273278
274279 if (e == ue ) {
275280 return ret ;
@@ -285,7 +290,7 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
285290 p ++ ;
286291 if (p < e ) {
287292 ret -> fragment = zend_string_init (p , (e - p ), 0 );
288- php_replace_controlchars (ZSTR_VAL (ret -> fragment ), ZSTR_LEN (ret -> fragment ));
293+ php_str_to_utf8 (ZSTR_VAL (ret -> fragment ), ZSTR_LEN (ret -> fragment ));
289294 } else {
290295 ret -> fragment = ZSTR_EMPTY_ALLOC ();
291296 }
@@ -297,7 +302,7 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
297302 p ++ ;
298303 if (p < e ) {
299304 ret -> query = zend_string_init (p , (e - p ), 0 );
300- php_replace_controlchars (ZSTR_VAL (ret -> query ), ZSTR_LEN (ret -> query ));
305+ php_str_to_utf8 (ZSTR_VAL (ret -> query ), ZSTR_LEN (ret -> query ));
301306 } else {
302307 ret -> query = ZSTR_EMPTY_ALLOC ();
303308 }
@@ -306,7 +311,7 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
306311
307312 if (s < e || s == ue ) {
308313 ret -> path = zend_string_init (s , (e - s ), 0 );
309- php_replace_controlchars (ZSTR_VAL (ret -> path ), ZSTR_LEN (ret -> path ));
314+ php_str_to_utf8 (ZSTR_VAL (ret -> path ), ZSTR_LEN (ret -> path ));
310315 }
311316
312317 return ret ;
0 commit comments