From 74c0d05a020b4a0e761ab2b65f769d15ee8c3ef2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Mon, 14 Jul 2025 23:21:07 +0200 Subject: [PATCH 1/2] Cherry pick some recent lexbor changes The following changes are cherry-picked: - https://github.com/lexbor/lexbor/commit/c60846689d3f516bddd3f6ff1f20cf721bd67b79 core/str.c: Fix undefined behavior in function lexbor_str_append - https://github.com/lexbor/lexbor/commit/92260fd6709247c0fc324b1d59efb53ba16db27e URL: fixed hostname setter if port is specified. --- ext/lexbor/lexbor/core/str.c | 4 ++++ ext/lexbor/lexbor/url/url.c | 2 +- ext/uri/tests/026.phpt | 38 ++++++++++++------------------------ 3 files changed, 18 insertions(+), 26 deletions(-) diff --git a/ext/lexbor/lexbor/core/str.c b/ext/lexbor/lexbor/core/str.c index 0f04286bdea56..3b3c574f1d9a9 100644 --- a/ext/lexbor/lexbor/core/str.c +++ b/ext/lexbor/lexbor/core/str.c @@ -133,6 +133,10 @@ lexbor_str_append(lexbor_str_t *str, lexbor_mraw_t *mraw, { lxb_char_t *data_begin; + if (length == 0) { + return str->data; + } + lexbor_str_check_size_arg_m(str, lexbor_str_size(str), mraw, (length + 1), NULL); diff --git a/ext/lexbor/lexbor/url/url.c b/ext/lexbor/lexbor/url/url.c index bbb3b5bbd3cb7..ed3732a5ccab2 100644 --- a/ext/lexbor/lexbor/url/url.c +++ b/ext/lexbor/lexbor/url/url.c @@ -1818,7 +1818,7 @@ lxb_url_parse_basic_h(lxb_url_parser_t *parser, lxb_url_t *url, } if (override_state == LXB_URL_STATE_HOSTNAME_STATE) { - lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK); + lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR); } status = lxb_url_host_parse(parser, begin, p, &url->host, diff --git a/ext/uri/tests/026.phpt b/ext/uri/tests/026.phpt index 4763ea9d4406c..47a8597fa2e7d 100644 --- a/ext/uri/tests/026.phpt +++ b/ext/uri/tests/026.phpt @@ -8,21 +8,27 @@ uri $url1 = Uri\WhatWg\Url::parse("https://example.com"); $url2 = $url1->withHost("test.com"); $url3 = $url2->withHost("t%65st.com"); // test.com -$url4 = $url3->withHost("test.com:8080"); +try { + $url3->withHost("test.com:8080"); +} catch (Uri\WhatWg\InvalidUrlException $e) { + echo $e->getMessage() . "\n"; +} var_dump($url1->getAsciiHost()); var_dump($url2->getAsciiHost()); var_dump($url3->getAsciiHost()); -var_dump($url4->getAsciiHost()); -var_dump($url4->getPort()); try { - $url4->withHost("t%3As%2Ft.com"); // t:s/t.com + $url3->withHost("t%3As%2Ft.com"); // t:s/t.com } catch (Uri\WhatWg\InvalidUrlException $e) { echo $e->getMessage() . "\n"; } -var_dump($url4->withHost("t:s/t.com")); +try { + $url3->withHost("t:s/t.com"); // t:s/t.com +} catch (Uri\WhatWg\InvalidUrlException $e) { + echo $e->getMessage() . "\n"; +} try { $url2->withHost(null); @@ -38,30 +44,12 @@ var_dump($url2->getAsciiHost()); ?> --EXPECTF-- +The specified host is malformed string(11) "example.com" string(8) "test.com" string(8) "test.com" -string(8) "test.com" -NULL The specified host is malformed (DomainInvalidCodePoint) -object(Uri\WhatWg\Url)#%d (%d) { - ["scheme"]=> - string(5) "https" - ["username"]=> - NULL - ["password"]=> - NULL - ["host"]=> - string(8) "test.com" - ["port"]=> - NULL - ["path"]=> - string(1) "/" - ["query"]=> - NULL - ["fragment"]=> - NULL -} +The specified host is malformed The specified host is malformed (HostMissing) string(7) "foo.com" string(8) "test.com" From aeb9cd4fa7a3e559b85ee3d60aff05a50a6b7c5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Tue, 15 Jul 2025 12:58:59 +0200 Subject: [PATCH 2/2] Fix parsing URIs with empty hosts Both RFC 3986 and WHATWG support empty hosts --- ext/uri/php_lexbor.c | 4 +++- ext/uri/php_uriparser.c | 2 +- ext/uri/tests/003.phpt | 40 +++++++++++++++++++++++++++++++++++++++- ext/uri/tests/012.phpt | 4 ++-- 4 files changed, 45 insertions(+), 5 deletions(-) diff --git a/ext/uri/php_lexbor.c b/ext/uri/php_lexbor.c index 39b0fb7d09ce3..5287bbcd9023d 100644 --- a/ext/uri/php_lexbor.c +++ b/ext/uri/php_lexbor.c @@ -372,7 +372,9 @@ static zend_result lexbor_read_host(const struct uri_internal_t *internal_uri, u smart_str_appendc(&host_str, ']'); ZVAL_NEW_STR(retval, smart_str_extract(&host_str)); - } else if (lexbor_uri->host.type != LXB_URL_HOST_TYPE_EMPTY && lexbor_uri->host.type != LXB_URL_HOST_TYPE__UNDEF) { + } else if (lexbor_uri->host.type == LXB_URL_HOST_TYPE_EMPTY) { + ZVAL_EMPTY_STRING(retval); + } else if (lexbor_uri->host.type != LXB_URL_HOST_TYPE__UNDEF) { switch (read_mode) { case URI_COMPONENT_READ_NORMALIZED_UNICODE: { smart_str host_str = {0}; diff --git a/ext/uri/php_uriparser.c b/ext/uri/php_uriparser.c index 875d72b85241a..ba6f1ce776cb6 100644 --- a/ext/uri/php_uriparser.c +++ b/ext/uri/php_uriparser.c @@ -174,7 +174,7 @@ ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_host(const uri_internal UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode); ZEND_ASSERT(uriparser_uri != NULL); - if (uriparser_uri->hostText.first != NULL && uriparser_uri->hostText.afterLast != NULL && get_text_range_length(&uriparser_uri->hostText) > 0) { + if (uriparser_uri->hostText.first != NULL && uriparser_uri->hostText.afterLast != NULL) { if (uriparser_uri->hostData.ip6 != NULL || uriparser_uri->hostData.ipFuture.first != NULL) { /* the textual representation of the host is always accessible in the .hostText field no matter what the host is */ smart_str host_str = {0}; diff --git a/ext/uri/tests/003.phpt b/ext/uri/tests/003.phpt index be607fd6cacef..a1918f7a838b2 100644 --- a/ext/uri/tests/003.phpt +++ b/ext/uri/tests/003.phpt @@ -1,5 +1,5 @@ --TEST-- -Parse URL exotic URLs +Parse special URIs --EXTENSIONS-- uri --FILE-- @@ -8,6 +8,8 @@ uri var_dump(Uri\Rfc3986\Uri::parse("http://username:password@héééostname:9090/gah/../path?arg=vaéue#anchor")); var_dump(Uri\WhatWg\Url::parse("http://username:password@héééostname:9090/gah/../path?arg=vaéue#anchor")); +var_dump(Uri\Rfc3986\Uri::parse("//host123/")); +var_dump(Uri\Rfc3986\Uri::parse("///foo/")); var_dump(Uri\Rfc3986\Uri::parse("/page:1")); var_dump(Uri\WhatWg\Url::parse("/page:1")); @@ -32,6 +34,42 @@ object(Uri\WhatWg\Url)#%d (%d) { ["fragment"]=> string(6) "anchor" } +object(Uri\Rfc3986\Uri)#%d (%d) { + ["scheme"]=> + NULL + ["username"]=> + NULL + ["password"]=> + NULL + ["host"]=> + string(7) "host123" + ["port"]=> + NULL + ["path"]=> + string(1) "/" + ["query"]=> + NULL + ["fragment"]=> + NULL +} +object(Uri\Rfc3986\Uri)#%d (%d) { + ["scheme"]=> + NULL + ["username"]=> + NULL + ["password"]=> + NULL + ["host"]=> + string(0) "" + ["port"]=> + NULL + ["path"]=> + string(5) "/foo/" + ["query"]=> + NULL + ["fragment"]=> + NULL +} object(Uri\Rfc3986\Uri)#%d (%d) { ["scheme"]=> NULL diff --git a/ext/uri/tests/012.phpt b/ext/uri/tests/012.phpt index 7c14014fb3519..3eb343af535cb 100644 --- a/ext/uri/tests/012.phpt +++ b/ext/uri/tests/012.phpt @@ -57,7 +57,7 @@ object(Uri\Rfc3986\Uri)#%d (%d) { ["password"]=> NULL ["host"]=> - NULL + string(0) "" ["port"]=> NULL ["path"]=> @@ -75,7 +75,7 @@ object(Uri\WhatWg\Url)#%d (%d) { ["password"]=> NULL ["host"]=> - NULL + string(0) "" ["port"]=> NULL ["path"]=>