Skip to content

Commit 1114173

Browse files
authored
Fix URL parsing for Unicode on macOS
1 parent c98b173 commit 1114173

File tree

1 file changed

+27
-22
lines changed

1 file changed

+27
-22
lines changed

ext/standard/url.c

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,24 @@ PHPAPI void php_url_free(php_url *theurl)
4747
}
4848
/* }}} */
4949

50-
static void php_replace_controlchars(char *str, size_t len)
50+
static void php_str_to_utf8(const char *str, size_t len)
5151
{
52-
unsigned char *s = (unsigned char *)str;
53-
unsigned char *e = (unsigned char *)str + len;
54-
55-
ZEND_ASSERT(str != NULL);
56-
57-
while (s < e) {
58-
if (iscntrl(*s)) {
59-
*s='_';
60-
}
61-
s++;
62-
}
52+
zend_string *utf8;
53+
utf8 = zend_string_alloc(len * 4, 0);
54+
const unsigned char *s = (const unsigned char *)str;
55+
const unsigned char *e = s + len;
56+
unsigned char *d = (unsigned char *)ZSTR_VAL(utf8);
57+
58+
while (s < e) {
59+
if (*s < 0x80) { // ASCII
60+
*d++ = *s++;
61+
} else {
62+
/* Convert non-ASCII bytes to UTF-8 */
63+
unsigned int codepoint = *s++;
64+
*d++ = 0xEF; *d++ = 0xBF; *d++ = 0xBD;
65+
}
66+
}
67+
zend_string_release(utf8);
6368
}
6469

6570
PHPAPI php_url *php_url_parse(char const *str)
@@ -119,7 +124,7 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
119124

120125
if (e + 1 == ue) { /* only scheme is available */
121126
ret->scheme = zend_string_init(s, (e - s), 0);
122-
php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
127+
php_str_to_utf8(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
123128
return ret;
124129
}
125130

@@ -141,13 +146,13 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
141146
}
142147

143148
ret->scheme = zend_string_init(s, (e-s), 0);
144-
php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
149+
php_str_to_utf8(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
145150

146151
s = e + 1;
147152
goto just_path;
148153
} else {
149154
ret->scheme = zend_string_init(s, (e-s), 0);
150-
php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
155+
php_str_to_utf8(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme));
151156

152157
if (e + 2 < ue && *(e + 2) == '/') {
153158
s = e + 3;
@@ -213,14 +218,14 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
213218
if ((p = zend_memrchr(s, '@', (e-s)))) {
214219
if ((pp = memchr(s, ':', (p-s)))) {
215220
ret->user = zend_string_init(s, (pp-s), 0);
216-
php_replace_controlchars(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user));
221+
php_str_to_utf8(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user));
217222

218223
pp++;
219224
ret->pass = zend_string_init(pp, (p-pp), 0);
220-
php_replace_controlchars(ZSTR_VAL(ret->pass), ZSTR_LEN(ret->pass));
225+
php_str_to_utf8(ZSTR_VAL(ret->pass), ZSTR_LEN(ret->pass));
221226
} else {
222227
ret->user = zend_string_init(s, (p-s), 0);
223-
php_replace_controlchars(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user));
228+
php_str_to_utf8(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user));
224229
}
225230

226231
s = p + 1;
@@ -269,7 +274,7 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
269274
}
270275

271276
ret->host = zend_string_init(s, (p-s), 0);
272-
php_replace_controlchars(ZSTR_VAL(ret->host), ZSTR_LEN(ret->host));
277+
php_str_to_utf8(ZSTR_VAL(ret->host), ZSTR_LEN(ret->host));
273278

274279
if (e == ue) {
275280
return ret;
@@ -285,7 +290,7 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
285290
p++;
286291
if (p < e) {
287292
ret->fragment = zend_string_init(p, (e - p), 0);
288-
php_replace_controlchars(ZSTR_VAL(ret->fragment), ZSTR_LEN(ret->fragment));
293+
php_str_to_utf8(ZSTR_VAL(ret->fragment), ZSTR_LEN(ret->fragment));
289294
} else {
290295
ret->fragment = ZSTR_EMPTY_ALLOC();
291296
}
@@ -297,7 +302,7 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
297302
p++;
298303
if (p < e) {
299304
ret->query = zend_string_init(p, (e - p), 0);
300-
php_replace_controlchars(ZSTR_VAL(ret->query), ZSTR_LEN(ret->query));
305+
php_str_to_utf8(ZSTR_VAL(ret->query), ZSTR_LEN(ret->query));
301306
} else {
302307
ret->query = ZSTR_EMPTY_ALLOC();
303308
}
@@ -306,7 +311,7 @@ PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port
306311

307312
if (s < e || s == ue) {
308313
ret->path = zend_string_init(s, (e - s), 0);
309-
php_replace_controlchars(ZSTR_VAL(ret->path), ZSTR_LEN(ret->path));
314+
php_str_to_utf8(ZSTR_VAL(ret->path), ZSTR_LEN(ret->path));
310315
}
311316

312317
return ret;

0 commit comments

Comments
 (0)