From 8e6faeeee5334d6efcd5c7bbe2ade6d47f302130 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 29 May 2025 21:53:36 +0200 Subject: [PATCH 1/2] Remove custom UTF-8 check function from ext/libxml This was originally introduced as a workaround for a libxml2 bug [1]. This bug has been fixed for more than a decade, and we can use the libxml2 API again. [1] 7e53511ec810c2ab257b9cb68c1fc315e057a37f --- UPGRADING.INTERNALS | 1 + ext/libxml/libxml.c | 26 -------------------------- ext/libxml/php_libxml.h | 1 - ext/soap/php_encoding.c | 2 +- 4 files changed, 2 insertions(+), 28 deletions(-) diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index 2af2d4ce1c57d..82bd53db51872 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -62,6 +62,7 @@ PHP 8.5 INTERNALS UPGRADE NOTES - ext/libxml . The refcount APIs now return an `unsigned int` instead of an `int`. + . Removed php_libxml_xmlCheckUTF8(). Use xmlCheckUTF8() from libxml instead. - ext/pdo . Added `php_pdo_stmt_valid_db_obj_handle()` to check if the database object diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index 5d5f3f383cd1f..03a89c7aad51d 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -1236,32 +1236,6 @@ PHP_FUNCTION(libxml_get_external_entity_loader) /* }}} */ /* {{{ Common functions shared by extensions */ -bool php_libxml_xmlCheckUTF8(const unsigned char *s) -{ - size_t i; - unsigned char c; - - for (i = 0; (c = s[i++]);) { - if ((c & 0x80) == 0) { - } else if ((c & 0xe0) == 0xc0) { - if ((s[i++] & 0xc0) != 0x80) { - return false; - } - } else if ((c & 0xf0) == 0xe0) { - if ((s[i++] & 0xc0) != 0x80 || (s[i++] & 0xc0) != 0x80) { - return false; - } - } else if ((c & 0xf8) == 0xf0) { - if ((s[i++] & 0xc0) != 0x80 || (s[i++] & 0xc0) != 0x80 || (s[i++] & 0xc0) != 0x80) { - return false; - } - } else { - return false; - } - } - return true; -} - zval *php_libxml_register_export(const zend_class_entry *ce, php_libxml_export_node export_function) { /* Initialize in case this module hasn't been loaded yet */ diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h index ea7961dc2f1a7..eb10e7e4cb93e 100644 --- a/ext/libxml/php_libxml.h +++ b/ext/libxml/php_libxml.h @@ -207,7 +207,6 @@ PHP_LIBXML_API void php_libxml_ctx_warning(void *ctx, const char *msg, ...); PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(const char *file, int line, int column, const char *msg,...); PHP_LIBXML_API void php_libxml_ctx_error(void *ctx, const char *msg, ...); PHP_LIBXML_API void php_libxml_error_handler_va(php_libxml_error_level error_type, void *ctx, const char *msg, va_list args); -PHP_LIBXML_API bool php_libxml_xmlCheckUTF8(const unsigned char *s); PHP_LIBXML_API void php_libxml_switch_context(const zval *context, zval *oldcontext); PHP_LIBXML_API void php_libxml_issue_error(int level, const char *msg); PHP_LIBXML_API bool php_libxml_disable_entity_loader(bool disable); diff --git a/ext/soap/php_encoding.c b/ext/soap/php_encoding.c index c4fa7702cf446..386d7e3f6ba87 100644 --- a/ext/soap/php_encoding.c +++ b/ext/soap/php_encoding.c @@ -878,7 +878,7 @@ static xmlNodePtr to_xml_string(encodeTypePtr type, zval *data, int style, xmlNo xmlBufferFree(in); } - if (!php_libxml_xmlCheckUTF8(BAD_CAST(str))) { + if (!xmlCheckUTF8(str)) { char *err = emalloc(new_len + 8); char c; int i; From a25e63f63147b31ce4a6c53f3f12267dbc322145 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 29 May 2025 22:15:50 +0200 Subject: [PATCH 2/2] fix build --- ext/soap/php_encoding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/soap/php_encoding.c b/ext/soap/php_encoding.c index 386d7e3f6ba87..5286791e021d0 100644 --- a/ext/soap/php_encoding.c +++ b/ext/soap/php_encoding.c @@ -878,7 +878,7 @@ static xmlNodePtr to_xml_string(encodeTypePtr type, zval *data, int style, xmlNo xmlBufferFree(in); } - if (!xmlCheckUTF8(str)) { + if (!xmlCheckUTF8(BAD_CAST str)) { char *err = emalloc(new_len + 8); char c; int i;