Skip to content

Commit 50eca61

Browse files
craigfrancisnikic
authored andcommitted
Use ENT_QUOTES|ENT_SUBSTITUTE default for HTML encoding and decoding functions
htmlspecialchars() etc now use ENT_QUOTES | ENT_SUBSTITUTE rather than ENT_COMPAT by default. Closes GH-6583.
1 parent 496e474 commit 50eca61

14 files changed

+41
-35
lines changed

UPGRADING

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ PHP 8.1 UPGRADE NOTES
6969

7070
- Standard:
7171
. version_compare() no longer accepts undocumented operator abbreviations.
72+
. htmlspecialchars(), htmlentities(), htmlspecialchars_decode(),
73+
html_entitity_decode() and get_html_translation_table() now use
74+
ENT_QUOTES | ENT_SUBSTITUTE rather than ENT_COMPAT by default. This means
75+
that ' is escaped to ' while previously it was left alone.
76+
Additionally, malformed UTF-8 will be replaced by a Unicode substitution
77+
character, instead of resulting in an empty string.
7278

7379
========================================
7480
2. New Features

ext/standard/basic_functions.stub.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -512,15 +512,15 @@ function headers_list(): array {}
512512

513513
/* {{{ html.c */
514514

515-
function htmlspecialchars(string $string, int $flags = ENT_COMPAT, ?string $encoding = null, bool $double_encode = true): string {}
515+
function htmlspecialchars(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, ?string $encoding = null, bool $double_encode = true): string {}
516516

517-
function htmlspecialchars_decode(string $string, int $flags = ENT_COMPAT): string {}
517+
function htmlspecialchars_decode(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE): string {}
518518

519-
function html_entity_decode(string $string, int $flags = ENT_COMPAT, ?string $encoding = null): string {}
519+
function html_entity_decode(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, ?string $encoding = null): string {}
520520

521-
function htmlentities(string $string, int $flags = ENT_COMPAT, ?string $encoding = null, bool $double_encode = true): string {}
521+
function htmlentities(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, ?string $encoding = null, bool $double_encode = true): string {}
522522

523-
function get_html_translation_table(int $table = HTML_SPECIALCHARS, int $flags = ENT_COMPAT, string $encoding = "UTF-8"): array {}
523+
function get_html_translation_table(int $table = HTML_SPECIALCHARS, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, string $encoding = "UTF-8"): array {}
524524

525525
/* }}} */
526526

ext/standard/basic_functions_arginfo.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -765,27 +765,27 @@ ZEND_END_ARG_INFO()
765765

766766
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_htmlspecialchars, 0, 1, IS_STRING, 0)
767767
ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0)
768-
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT")
768+
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE")
769769
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
770770
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, double_encode, _IS_BOOL, 0, "true")
771771
ZEND_END_ARG_INFO()
772772

773773
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_htmlspecialchars_decode, 0, 1, IS_STRING, 0)
774774
ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0)
775-
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT")
775+
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE")
776776
ZEND_END_ARG_INFO()
777777

778778
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_html_entity_decode, 0, 1, IS_STRING, 0)
779779
ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0)
780-
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT")
780+
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE")
781781
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
782782
ZEND_END_ARG_INFO()
783783

784784
#define arginfo_htmlentities arginfo_htmlspecialchars
785785

786786
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_get_html_translation_table, 0, 0, IS_ARRAY, 0)
787787
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, table, IS_LONG, 0, "HTML_SPECIALCHARS")
788-
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT")
788+
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE")
789789
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 0, "\"UTF-8\"")
790790
ZEND_END_ARG_INFO()
791791

ext/standard/html.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,7 +1316,7 @@ PHPAPI zend_string *php_escape_html_entities_ex(const unsigned char *old, size_t
13161316
static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
13171317
{
13181318
zend_string *str, *hint_charset = NULL;
1319-
zend_long flags = ENT_COMPAT;
1319+
zend_long flags = ENT_QUOTES|ENT_SUBSTITUTE;
13201320
zend_string *replaced;
13211321
bool double_encode = 1;
13221322

@@ -1367,7 +1367,7 @@ PHP_FUNCTION(htmlspecialchars)
13671367
PHP_FUNCTION(htmlspecialchars_decode)
13681368
{
13691369
zend_string *str;
1370-
zend_long quote_style = ENT_COMPAT;
1370+
zend_long quote_style = ENT_QUOTES|ENT_SUBSTITUTE;
13711371
zend_string *replaced;
13721372

13731373
ZEND_PARSE_PARAMETERS_START(1, 2)
@@ -1385,7 +1385,7 @@ PHP_FUNCTION(htmlspecialchars_decode)
13851385
PHP_FUNCTION(html_entity_decode)
13861386
{
13871387
zend_string *str, *hint_charset = NULL;
1388-
zend_long quote_style = ENT_COMPAT;
1388+
zend_long quote_style = ENT_QUOTES|ENT_SUBSTITUTE;
13891389
zend_string *replaced;
13901390

13911391
ZEND_PARSE_PARAMETERS_START(1, 3)
@@ -1468,7 +1468,7 @@ static inline void write_s3row_data(
14681468
PHP_FUNCTION(get_html_translation_table)
14691469
{
14701470
zend_long all = HTML_SPECIALCHARS,
1471-
flags = ENT_COMPAT;
1471+
flags = ENT_QUOTES|ENT_SUBSTITUTE;
14721472
int doctype;
14731473
entity_table_opt entity_table;
14741474
const enc_to_uni *to_uni_table = NULL;

ext/standard/tests/strings/bug53021.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@ single quotes variations:
3838
'
3939
'
4040
'
41-
'
41+
'

ext/standard/tests/strings/bug61116.phpt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Function [ <internal:standard> function htmlspecialchars ] {
1010

1111
- Parameters [4] {
1212
Parameter #0 [ <required> string $string ]
13-
Parameter #1 [ <optional> int $flags = ENT_COMPAT ]
13+
Parameter #1 [ <optional> int $flags = ENT_QUOTES | ENT_SUBSTITUTE ]
1414
Parameter #2 [ <optional> ?string $encoding = null ]
1515
Parameter #3 [ <optional> bool $double_encode = true ]
1616
}
@@ -21,7 +21,7 @@ Function [ <internal:standard> function get_html_translation_table ] {
2121

2222
- Parameters [3] {
2323
Parameter #0 [ <optional> int $table = HTML_SPECIALCHARS ]
24-
Parameter #1 [ <optional> int $flags = ENT_COMPAT ]
24+
Parameter #1 [ <optional> int $flags = ENT_QUOTES | ENT_SUBSTITUTE ]
2525
Parameter #2 [ <optional> string $encoding = "UTF-8" ]
2626
}
2727
- Return [ array ]

ext/standard/tests/strings/html_entity_decode3.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ echo "\nDone.\n";
218218
&#x0E; NOT DECODED
219219
&#x1F; NOT DECODED
220220
&#x20; DECODED
221-
&#x27; NOT DECODED
221+
&#x27; DECODED
222222
&#x7F; NOT DECODED
223223
&#x80; NOT DECODED
224224
&#x9F; NOT DECODED

ext/standard/tests/strings/htmlentities24.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ string(198) "&sbquo;&dagger;&trade;&Yuml;&euro;&sbquo;&dagger;&bdquo;&euro;&perm
310310
string(42) "&lt;html&gt; This is a test! &lt;/html&gt;"
311311

312312
*** Testing htmlentites() on a quote ***
313-
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"
313+
string(46) "A &#039;quote&#039; is &lt;b&gt;bold&lt;/b&gt;"
314314
string(46) "A &#039;quote&#039; is &lt;b&gt;bold&lt;/b&gt;"
315315
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"
316316
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"

ext/standard/tests/strings/htmlspecialchars.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ string(187) "&lt;br&gt;Testing&lt;p&gt;New file.&lt;/p&gt;&lt;p&gt;&lt;br&gt;Fil
306306
string(46) "&lt;br&gt;Testing&lt;p&gt;New file.&lt;/p&gt; "
307307

308308
*** Testing htmlspecialchars() on a quote...
309-
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"
309+
string(46) "A &#039;quote&#039; is &lt;b&gt;bold&lt;/b&gt;"
310310
string(46) "A &#039;quote&#039; is &lt;b&gt;bold&lt;/b&gt;"
311311
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"
312312
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"

ext/standard/tests/strings/htmlspecialchars_basic.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ Basic tests
5656
Test 1: abc&lt;&gt;&quot;&amp;
5757
Test 2: &amp;&amp;abc&lt;&gt;&quot;&amp;
5858
Test 3: a&gt;,\&lt;bc&lt;&gt;&quot;&amp;
59-
Test 4: a\'\'&amp;bc&lt;&gt;&quot;&amp;
59+
Test 4: a\&#039;\&#039;&amp;bc&lt;&gt;&quot;&amp;
6060
Test 5: &amp;amp;&amp;lt;
6161
Test 6: abc&lt;&gt;"&amp;
6262
Test 7: &amp;&amp;abc&lt;&gt;"&amp;

0 commit comments

Comments
 (0)