Skip to content

Commit c431687

Browse files
authored
Merge pull request #4181 from ampproject/fix/4179-duplicate-meta-tag
Parse malformed [meta charset] tag
2 parents 951bd79 + 303b025 commit c431687

File tree

3 files changed

+8
-2
lines changed

3 files changed

+8
-2
lines changed

includes/class-amp-theme-support.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2221,7 +2221,7 @@ public static function prepare_response( $response, $args = [] ) {
22212221
* Note that the meta charset is supposed to appear within the first 1024 bytes.
22222222
* See <https://www.w3.org/International/questions/qa-html-encoding-declarations>.
22232223
*/
2224-
if ( ! preg_match( '#<meta[^>]+charset=#i', substr( $response, 0, 1024 ) ) ) {
2224+
if ( ! preg_match( '#<meta[^>]+charset\s*=#i', substr( $response, 0, 1024 ) ) ) {
22252225
$meta_charset = sprintf( '<meta charset="%s">', esc_attr( get_bloginfo( 'charset' ) ) );
22262226

22272227
$response = preg_replace(

src/Dom/Document.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ final class Document extends DOMDocument {
118118

119119
// Regex patterns used for finding tags or extracting attribute values in an HTML string.
120120
const HTML_FIND_TAG_WITHOUT_ATTRIBUTE_PATTERN = '/<%1$s[^>]*?>[^<]*(?:<\/%1$s>)?/i';
121-
const HTML_FIND_TAG_WITH_ATTRIBUTE_PATTERN = '/<%1$s [^>]*?\s*%2$s=[^>]*?>[^<]*(?:<\/%1$s>)?/i';
121+
const HTML_FIND_TAG_WITH_ATTRIBUTE_PATTERN = '/<%1$s [^>]*?\s*%2$s\s*=[^>]*?>[^<]*(?:<\/%1$s>)?/i';
122122
const HTML_EXTRACT_ATTRIBUTE_VALUE_PATTERN = '/%s=(?:([\'"])(?<full>.*)?\1|(?<partial>[^ \'";]+))/';
123123

124124
// Tags constants used throughout.

tests/php/test-tag-and-attribute-sanitizer.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2239,6 +2239,12 @@ public function get_html_data() {
22392239
[],
22402240
[ AMP_Tag_And_Attribute_Sanitizer::DISALLOWED_PROPERTY_IN_ATTR_VALUE, AMP_Tag_And_Attribute_Sanitizer::ATTR_REQUIRED_BUT_MISSING ],
22412241
],
2242+
'parse_malformed_meta_charset_tag' => [
2243+
'<html amp><head><meta charset = "utf-8"></head><body></body></html>',
2244+
'<html amp><head><meta charset="utf-8"></head><body></body></html>',
2245+
[],
2246+
[],
2247+
],
22422248
'edge_meta_ua_compatible' => [
22432249
'<html amp><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"></head><body></body></html>',
22442250
null, // No change.

0 commit comments

Comments
 (0)