From ad9e55785ada2eadf8ba2274056d2d07647b29a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20D=C3=BCsterhus?= Date: Tue, 26 Aug 2025 22:13:37 +0200 Subject: [PATCH] uri: Fix normalization memory management for uri_parser_php_parse_url.c There were two issues with the previous implementation of normalization: - `php_raw_url_decode_ex()` would be used to modify a string with RC >1. - The return value of `php_raw_url_decode_ex()` was not used, resulting in incorrect string lengths when percent-encoded characters are decoded. Additionally there was a bogus assertion that verified that strings returned from the read handlers are RC =2, which was not the case for the `parse_url`-based parser when repeatedly retrieving a component even without normalization happening. Remove that assertion, since its usefulness is questionable. Any obvious data type issues with read handlers should be detectable when testing during development. --- ext/uri/php_uri.c | 6 +-- ext/uri/tests/100.phpt | 52 +++++++++++++++++++++++ ext/uri/uri_parser_php_parse_url.c | 42 +++++++++---------- ext/zend_test/test.c | 67 ++++++++++++++++++++++++++++++ ext/zend_test/test.stub.php | 2 + ext/zend_test/test_arginfo.h | 9 +++- 6 files changed, 149 insertions(+), 29 deletions(-) create mode 100644 ext/uri/tests/100.phpt diff --git a/ext/uri/php_uri.c b/ext/uri/php_uri.c index bfbe34a9699f1..5af4fcb36ac11 100644 --- a/ext/uri/php_uri.c +++ b/ext/uri/php_uri.c @@ -137,11 +137,7 @@ ZEND_ATTRIBUTE_NONNULL static zend_result php_uri_get_property(const uri_interna return FAILURE; } - zend_result result = property_handler->read_func(internal_uri, read_mode, zv); - - ZEND_ASSERT(result == FAILURE || (Z_TYPE_P(zv) == IS_STRING && GC_REFCOUNT(Z_STR_P(zv)) == 2) || Z_TYPE_P(zv) == IS_NULL || Z_TYPE_P(zv) == IS_LONG); - - return result; + return property_handler->read_func(internal_uri, read_mode, zv); } ZEND_ATTRIBUTE_NONNULL PHPAPI zend_result php_uri_get_scheme(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *zv) diff --git a/ext/uri/tests/100.phpt b/ext/uri/tests/100.phpt new file mode 100644 index 0000000000000..bf3d34c29255a --- /dev/null +++ b/ext/uri/tests/100.phpt @@ -0,0 +1,52 @@ +--TEST-- +Test the parse_url-based URI parser +--EXTENSIONS-- +uri +zend_test +--FILE-- + +--EXPECT-- +array(2) { + ["normalized"]=> + array(8) { + ["scheme"]=> + string(5) "https" + ["username"]=> + string(7) "example" + ["password"]=> + string(7) "example" + ["host"]=> + string(11) "example.com" + ["port"]=> + int(123) + ["path"]=> + string(13) "/example.html" + ["query"]=> + string(15) "example=example" + ["fragment"]=> + string(7) "example" + } + ["raw"]=> + array(8) { + ["scheme"]=> + string(5) "https" + ["username"]=> + string(9) "%65xample" + ["password"]=> + string(9) "%65xample" + ["host"]=> + string(13) "%65xample.com" + ["port"]=> + int(123) + ["path"]=> + string(15) "/%65xample.html" + ["query"]=> + string(19) "%65xample=%65xample" + ["fragment"]=> + string(9) "%65xample" + } +} diff --git a/ext/uri/uri_parser_php_parse_url.c b/ext/uri/uri_parser_php_parse_url.c index ae950ed48885e..8a22f9fa4dd04 100644 --- a/ext/uri/uri_parser_php_parse_url.c +++ b/ext/uri/uri_parser_php_parse_url.c @@ -21,17 +21,21 @@ #include "Zend/zend_exceptions.h" #include "ext/standard/url.h" -static void decode_component(zval *zv, uri_component_read_mode_t read_mode) +static zend_string *decode_component(zend_string *in, uri_component_read_mode_t read_mode) { - if (Z_TYPE_P(zv) != IS_STRING) { - return; - } + switch (read_mode) { + case URI_COMPONENT_READ_RAW: + return zend_string_copy(in); + case URI_COMPONENT_READ_NORMALIZED_ASCII: + case URI_COMPONENT_READ_NORMALIZED_UNICODE: { + zend_string *out = zend_string_alloc(ZSTR_LEN(in), false); - if (read_mode == URI_COMPONENT_READ_RAW) { - return; - } + ZSTR_LEN(out) = php_raw_url_decode_ex(ZSTR_VAL(out), ZSTR_VAL(in), ZSTR_LEN(in)); - php_raw_url_decode(Z_STRVAL_P(zv), Z_STRLEN_P(zv)); + return out; + } + EMPTY_SWITCH_DEFAULT_CASE(); + } } static zend_result uri_parser_php_parse_url_scheme_read(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval) @@ -39,8 +43,7 @@ static zend_result uri_parser_php_parse_url_scheme_read(const uri_internal_t *in const php_url *parse_url_uri = internal_uri->uri; if (parse_url_uri->scheme) { - ZVAL_STR_COPY(retval, parse_url_uri->scheme); - decode_component(retval, read_mode); + ZVAL_STR(retval, decode_component(parse_url_uri->scheme, read_mode)); } else { ZVAL_NULL(retval); } @@ -53,8 +56,7 @@ static zend_result uri_parser_php_parse_url_username_read(const uri_internal_t * const php_url *parse_url_uri = internal_uri->uri; if (parse_url_uri->user) { - ZVAL_STR_COPY(retval, parse_url_uri->user); - decode_component(retval, read_mode); + ZVAL_STR(retval, decode_component(parse_url_uri->user, read_mode)); } else { ZVAL_NULL(retval); } @@ -67,8 +69,7 @@ static zend_result uri_parser_php_parse_url_password_read(const uri_internal_t * const php_url *parse_url_uri = internal_uri->uri; if (parse_url_uri->pass) { - ZVAL_STR_COPY(retval, parse_url_uri->pass); - decode_component(retval, read_mode); + ZVAL_STR(retval, decode_component(parse_url_uri->pass, read_mode)); } else { ZVAL_NULL(retval); } @@ -81,8 +82,7 @@ static zend_result uri_parser_php_parse_url_host_read(const uri_internal_t *inte const php_url *parse_url_uri = internal_uri->uri; if (parse_url_uri->host) { - ZVAL_STR_COPY(retval, parse_url_uri->host); - decode_component(retval, read_mode); + ZVAL_STR(retval, decode_component(parse_url_uri->host, read_mode)); } else { ZVAL_NULL(retval); } @@ -96,7 +96,6 @@ static zend_result uri_parser_php_parse_url_port_read(const uri_internal_t *inte if (parse_url_uri->port) { ZVAL_LONG(retval, parse_url_uri->port); - decode_component(retval, read_mode); } else { ZVAL_NULL(retval); } @@ -109,8 +108,7 @@ static zend_result uri_parser_php_parse_url_path_read(const uri_internal_t *inte const php_url *parse_url_uri = internal_uri->uri; if (parse_url_uri->path) { - ZVAL_STR_COPY(retval, parse_url_uri->path); - decode_component(retval, read_mode); + ZVAL_STR(retval, decode_component(parse_url_uri->path, read_mode)); } else { ZVAL_NULL(retval); } @@ -123,8 +121,7 @@ static zend_result uri_parser_php_parse_url_query_read(const uri_internal_t *int const php_url *parse_url_uri = internal_uri->uri; if (parse_url_uri->query) { - ZVAL_STR_COPY(retval, parse_url_uri->query); - decode_component(retval, read_mode); + ZVAL_STR(retval, decode_component(parse_url_uri->query, read_mode)); } else { ZVAL_NULL(retval); } @@ -137,8 +134,7 @@ static zend_result uri_parser_php_parse_url_fragment_read(const uri_internal_t * const php_url *parse_url_uri = internal_uri->uri; if (parse_url_uri->fragment) { - ZVAL_STR_COPY(retval, parse_url_uri->fragment); - decode_component(retval, read_mode); + ZVAL_STR(retval, decode_component(parse_url_uri->fragment, read_mode)); } else { ZVAL_NULL(retval); } diff --git a/ext/zend_test/test.c b/ext/zend_test/test.c index c71beebdea8ab..2f5eae7b16c18 100644 --- a/ext/zend_test/test.c +++ b/ext/zend_test/test.c @@ -40,6 +40,7 @@ #include "zend_call_stack.h" #include "zend_exceptions.h" #include "zend_mm_custom_handlers.h" +#include "ext/uri/php_uri.h" #if defined(HAVE_LIBXML) && !defined(PHP_WIN32) # include @@ -724,6 +725,72 @@ static ZEND_FUNCTION(zend_test_crash) php_printf("%s", invalid); } +static ZEND_FUNCTION(zend_test_uri_parser) +{ + zend_string *uri_string; + zend_string *parser_name; + + ZEND_PARSE_PARAMETERS_START(2, 2) + Z_PARAM_STR(uri_string) + Z_PARAM_STR(parser_name) + ZEND_PARSE_PARAMETERS_END(); + + uri_parser_t *parser = php_uri_get_parser(parser_name); + if (parser == NULL) { + zend_argument_value_error(1, "Unknown parser"); + RETURN_THROWS(); + } + + uri_internal_t *uri = php_uri_parse(parser, ZSTR_VAL(uri_string), ZSTR_LEN(uri_string), false); + if (uri == NULL) { + RETURN_THROWS(); + } + + zval value; + + array_init(return_value); + zval normalized; + array_init(&normalized); + php_uri_get_scheme(uri, URI_COMPONENT_READ_NORMALIZED_ASCII, &value); + zend_hash_add(Z_ARR(normalized), ZSTR_KNOWN(ZEND_STR_SCHEME), &value); + php_uri_get_username(uri, URI_COMPONENT_READ_NORMALIZED_ASCII, &value); + zend_hash_add(Z_ARR(normalized), ZSTR_KNOWN(ZEND_STR_USERNAME), &value); + php_uri_get_password(uri, URI_COMPONENT_READ_NORMALIZED_ASCII, &value); + zend_hash_add(Z_ARR(normalized), ZSTR_KNOWN(ZEND_STR_PASSWORD), &value); + php_uri_get_host(uri, URI_COMPONENT_READ_NORMALIZED_ASCII, &value); + zend_hash_add(Z_ARR(normalized), ZSTR_KNOWN(ZEND_STR_HOST), &value); + php_uri_get_port(uri, URI_COMPONENT_READ_NORMALIZED_ASCII, &value); + zend_hash_add(Z_ARR(normalized), ZSTR_KNOWN(ZEND_STR_PORT), &value); + php_uri_get_path(uri, URI_COMPONENT_READ_NORMALIZED_ASCII, &value); + zend_hash_add(Z_ARR(normalized), ZSTR_KNOWN(ZEND_STR_PATH), &value); + php_uri_get_query(uri, URI_COMPONENT_READ_NORMALIZED_ASCII, &value); + zend_hash_add(Z_ARR(normalized), ZSTR_KNOWN(ZEND_STR_QUERY), &value); + php_uri_get_fragment(uri, URI_COMPONENT_READ_NORMALIZED_ASCII, &value); + zend_hash_add(Z_ARR(normalized), ZSTR_KNOWN(ZEND_STR_FRAGMENT), &value); + zend_hash_str_add(Z_ARR_P(return_value), "normalized", strlen("normalized"), &normalized); + zval raw; + array_init(&raw); + php_uri_get_scheme(uri, URI_COMPONENT_READ_RAW, &value); + zend_hash_add(Z_ARR(raw), ZSTR_KNOWN(ZEND_STR_SCHEME), &value); + php_uri_get_username(uri, URI_COMPONENT_READ_RAW, &value); + zend_hash_add(Z_ARR(raw), ZSTR_KNOWN(ZEND_STR_USERNAME), &value); + php_uri_get_password(uri, URI_COMPONENT_READ_RAW, &value); + zend_hash_add(Z_ARR(raw), ZSTR_KNOWN(ZEND_STR_PASSWORD), &value); + php_uri_get_host(uri, URI_COMPONENT_READ_RAW, &value); + zend_hash_add(Z_ARR(raw), ZSTR_KNOWN(ZEND_STR_HOST), &value); + php_uri_get_port(uri, URI_COMPONENT_READ_RAW, &value); + zend_hash_add(Z_ARR(raw), ZSTR_KNOWN(ZEND_STR_PORT), &value); + php_uri_get_path(uri, URI_COMPONENT_READ_RAW, &value); + zend_hash_add(Z_ARR(raw), ZSTR_KNOWN(ZEND_STR_PATH), &value); + php_uri_get_query(uri, URI_COMPONENT_READ_RAW, &value); + zend_hash_add(Z_ARR(raw), ZSTR_KNOWN(ZEND_STR_QUERY), &value); + php_uri_get_fragment(uri, URI_COMPONENT_READ_RAW, &value); + zend_hash_add(Z_ARR(raw), ZSTR_KNOWN(ZEND_STR_FRAGMENT), &value); + zend_hash_str_add(Z_ARR_P(return_value), "raw", strlen("raw"), &raw); + + php_uri_free(uri); +} + static bool has_opline(zend_execute_data *execute_data) { return execute_data diff --git a/ext/zend_test/test.stub.php b/ext/zend_test/test.stub.php index 17316230e8d3e..ab5abfa6ce7ef 100644 --- a/ext/zend_test/test.stub.php +++ b/ext/zend_test/test.stub.php @@ -342,6 +342,8 @@ function zend_test_compile_to_ast(string $str): string {} function zend_test_gh18756(): void {} function zend_test_opcache_preloading(): bool {} + + function zend_test_uri_parser(string $uri, string $parser): array { } } namespace ZendTestNS { diff --git a/ext/zend_test/test_arginfo.h b/ext/zend_test/test_arginfo.h index 33c02310ba4fb..0d95340e12289 100644 --- a/ext/zend_test/test_arginfo.h +++ b/ext/zend_test/test_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: b767745e4e7be7cb4ba294e238a1b0f63da8479e */ + * Stub hash: eb624df6b39083abc81b8636e965370cea9e093f */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_zend_trigger_bailout, 0, 0, IS_NEVER, 0) ZEND_END_ARG_INFO() @@ -190,6 +190,11 @@ ZEND_END_ARG_INFO() #define arginfo_zend_test_opcache_preloading arginfo_zend_test_is_pcre_bundled +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_zend_test_uri_parser, 0, 2, IS_ARRAY, 0) + ZEND_ARG_TYPE_INFO(0, uri, IS_STRING, 0) + ZEND_ARG_TYPE_INFO(0, parser, IS_STRING, 0) +ZEND_END_ARG_INFO() + #define arginfo_ZendTestNS2_namespaced_func arginfo_zend_test_is_pcre_bundled #define arginfo_ZendTestNS2_namespaced_deprecated_func arginfo_zend_test_void_return @@ -332,6 +337,7 @@ static ZEND_FUNCTION(zend_test_log_err_debug); static ZEND_FUNCTION(zend_test_compile_to_ast); static ZEND_FUNCTION(zend_test_gh18756); static ZEND_FUNCTION(zend_test_opcache_preloading); +static ZEND_FUNCTION(zend_test_uri_parser); static ZEND_FUNCTION(ZendTestNS2_namespaced_func); static ZEND_FUNCTION(ZendTestNS2_namespaced_deprecated_func); static ZEND_FUNCTION(ZendTestNS2_ZendSubNS_namespaced_func); @@ -461,6 +467,7 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(zend_test_compile_to_ast, arginfo_zend_test_compile_to_ast) ZEND_FE(zend_test_gh18756, arginfo_zend_test_gh18756) ZEND_FE(zend_test_opcache_preloading, arginfo_zend_test_opcache_preloading) + ZEND_FE(zend_test_uri_parser, arginfo_zend_test_uri_parser) #if (PHP_VERSION_ID >= 80400) ZEND_RAW_FENTRY(ZEND_NS_NAME("ZendTestNS2", "namespaced_func"), zif_ZendTestNS2_namespaced_func, arginfo_ZendTestNS2_namespaced_func, 0, NULL, NULL) #else