Skip to content

Commit f8cc996

Browse files
authored
Merge pull request #5366 from ampproject/fix/5358-multiple-entries-in-img-srcset
Validate and sanitize image candidates in srcset attribute
2 parents 6166bba + c850122 commit f8cc996

8 files changed

+308
-11
lines changed

.phpcs.xml.dist

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
<element value="firstChild"/>
6666
<element value="lastChild"/>
6767
<element value="nodeValue"/>
68+
<element value="ownerElement"/>
6869
<element value="DEFAULT_ARGS"/>
6970
<element value="documentElement"/>
7071
<element value="removeChild"/>

includes/amp-helper-functions.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,6 +1517,7 @@ function amp_get_content_sanitizers( $post = null ) {
15171517
'force_svg_support' => [], // Always replace 'no-svg' class with 'svg' if it exists.
15181518
],
15191519
],
1520+
'AMP_Srcset_Sanitizer' => [],
15201521
'AMP_Img_Sanitizer' => [
15211522
'align_wide_support' => current_theme_supports( 'align-wide' ),
15221523
],
@@ -1639,8 +1640,8 @@ function amp_get_content_sanitizers( $post = null ) {
16391640
*/
16401641
$sanitizers['AMP_Style_Sanitizer']['allow_transient_caching'] = apply_filters( 'amp_parsed_css_transient_caching_allowed', true );
16411642

1642-
// Force style sanitizer, meta sanitizer, and validating sanitizer to be at end.
1643-
foreach ( [ 'AMP_Style_Sanitizer', 'AMP_Meta_Sanitizer', 'AMP_Tag_And_Attribute_Sanitizer' ] as $class_name ) {
1643+
// Force layout, style, meta, and validating sanitizers to be at the end.
1644+
foreach ( [ 'AMP_Layout_Sanitizer', 'AMP_Style_Sanitizer', 'AMP_Meta_Sanitizer', 'AMP_Tag_And_Attribute_Sanitizer' ] as $class_name ) {
16441645
if ( isset( $sanitizers[ $class_name ] ) ) {
16451646
$sanitizer = $sanitizers[ $class_name ];
16461647
unset( $sanitizers[ $class_name ] );
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
<?php
2+
/**
3+
* Class AMP_Srcset_Sanitizer.
4+
*
5+
* @package AMP
6+
*/
7+
8+
/**
9+
* Sanitizes the `srcset` attribute of elements.
10+
*
11+
* @internal
12+
* @since 2.0.2
13+
*/
14+
class AMP_Srcset_Sanitizer extends AMP_Base_Sanitizer {
15+
16+
/**
17+
* Pattern used used for finding image candidates defined within the `srcset` attribute.
18+
* The pattern is adapted from the JS validator. See https://github.com/ampproject/amphtml/blob/5fcb29a41d06867b25ed6aca69b4aeaf96456c8c/validator/js/engine/parse-srcset.js#L72-L81.
19+
*
20+
* @var string
21+
*/
22+
const SRCSET_REGEX_PATTERN = '/\s*(?:,\s*)?(?<url>[^,\s]\S*[^,\s])\s*(?<dimension>[1-9]\d*[wx]|[1-9]\d*\.\d+x|0.\d*[1-9]\d*x)?\s*(?<comma>,)?\s*/';
23+
24+
/**
25+
* Sanitize the HTML contained in the DOMDocument received by the constructor
26+
*/
27+
public function sanitize() {
28+
$attribute_query = $this->dom->xpath->query( '//*/@srcset' );
29+
30+
if ( 0 === $attribute_query->length ) {
31+
return;
32+
}
33+
34+
foreach ( $attribute_query as $attribute ) {
35+
/** @var DOMAttr $attribute */
36+
if ( ! empty( $attribute->value ) ) {
37+
$this->sanitize_srcset_attribute( $attribute );
38+
}
39+
}
40+
}
41+
42+
/**
43+
* Parses the `srcset` attribute and validates each image candidate defined.
44+
*
45+
* Validation errors will be raised if the attribute value is malformed or contains image candidates
46+
* with the same width or pixel density defined.
47+
*
48+
* @param DOMAttr $attribute Srcset attribute.
49+
*/
50+
private function sanitize_srcset_attribute( DOMAttr $attribute ) {
51+
$srcset = $attribute->value;
52+
53+
// Bail and raise a validation error if no image candidates were found or the last matched group does not
54+
// match the end of the `srcset`.
55+
if (
56+
! preg_match_all( self::SRCSET_REGEX_PATTERN, $srcset, $matches )
57+
||
58+
end( $matches[0] ) !== substr( $srcset, -strlen( end( $matches[0] ) ) )
59+
) {
60+
61+
$validation_error = [
62+
'code' => AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE,
63+
];
64+
$this->remove_invalid_attribute( $attribute->ownerElement, $attribute, $validation_error );
65+
return;
66+
}
67+
68+
$dimension_count = count( $matches['dimension'] );
69+
$commas_count = count( array_filter( $matches['comma'] ) );
70+
71+
// Bail and raise a validation error if the number of dimensions does not match the number of URLs, or there
72+
// are not enough commas to separate the image candidates.
73+
if ( count( $matches['url'] ) !== $dimension_count || ( $dimension_count - 1 ) !== $commas_count ) {
74+
$validation_error = [
75+
'code' => AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE,
76+
];
77+
$this->remove_invalid_attribute( $attribute->ownerElement, $attribute, $validation_error );
78+
return;
79+
}
80+
81+
// Bail if there are no duplicate image candidates.
82+
// Note: array_flip() is used as a faster alternative to array_unique(). See https://stackoverflow.com/a/8321709/93579.
83+
if ( count( array_flip( $matches['dimension'] ) ) === $dimension_count ) {
84+
return;
85+
}
86+
87+
$image_candidates = [];
88+
$duplicate_dimensions = [];
89+
90+
foreach ( $matches['dimension'] as $index => $dimension ) {
91+
if ( empty( trim( $dimension ) ) ) {
92+
$dimension = '1x';
93+
}
94+
95+
// Catch if there are duplicate dimensions that have different URLs. In such cases a validation error will be raised.
96+
if ( isset( $image_candidates[ $dimension ] ) && $matches['url'][ $index ] !== $image_candidates[ $dimension ] ) {
97+
$duplicate_dimensions[] = $dimension;
98+
continue;
99+
}
100+
101+
$image_candidates[ $dimension ] = $matches['url'][ $index ];
102+
}
103+
104+
// If there are duplicates, raise a validation error and stop short-circuit processing if the error is not removed.
105+
if ( ! empty( $duplicate_dimensions ) ) {
106+
$validation_error = [
107+
'code' => AMP_Tag_And_Attribute_Sanitizer::DUPLICATE_DIMENSIONS,
108+
'duplicate_dimensions' => $duplicate_dimensions,
109+
];
110+
if ( ! $this->should_sanitize_validation_error( $validation_error, [ 'node' => $attribute ] ) ) {
111+
return;
112+
}
113+
}
114+
115+
// Otherwise, output the normalized/validated srcset value.
116+
$attribute->value = implode(
117+
', ',
118+
array_map(
119+
static function ( $dimension ) use ( $image_candidates ) {
120+
return "{$image_candidates[ $dimension ]} {$dimension}";
121+
},
122+
array_keys( $image_candidates )
123+
)
124+
);
125+
}
126+
}

includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class AMP_Tag_And_Attribute_Sanitizer extends AMP_Base_Sanitizer {
4646
const DISALLOWED_RELATIVE_URL = 'DISALLOWED_RELATIVE_URL';
4747
const DISALLOWED_TAG = 'DISALLOWED_TAG';
4848
const DISALLOWED_TAG_ANCESTOR = 'DISALLOWED_TAG_ANCESTOR';
49+
const DUPLICATE_DIMENSIONS = 'DUPLICATE_DIMENSIONS';
4950
const DUPLICATE_ONEOF_ATTRS = 'DUPLICATE_ONEOF_ATTRS';
5051
const DUPLICATE_UNIQUE_TAG = 'DUPLICATE_UNIQUE_TAG';
5152
const IMPLIED_LAYOUT_INVALID = 'IMPLIED_LAYOUT_INVALID';
@@ -1942,8 +1943,8 @@ private function check_attr_spec_rule_allowed_protocol( DOMElement $node, $attr_
19421943
*/
19431944
private function extract_attribute_urls( DOMAttr $attribute_node, $spec_attr_name = null ) {
19441945
/*
1945-
* Handle the srcset special case where the attribute value can contain multiple parts, each in the format `URL [WIDTH] [PIXEL_DENSITY]`.
1946-
* So we split the srcset attribute value by commas and then return the first token of each item, omitting width descriptor and pixel density descriptor.
1946+
* Handle the srcset special case where the attribute value can contain multiple parts, each in the format `URL [WIDTH OR PIXEL_DENSITY]`.
1947+
* So we split the srcset attribute value by commas and then return the first token of each item, omitting width or pixel density descriptor.
19471948
* This splitting cannot be done for other URLs because it a comma can appear in a URL itself generally, but the syntax can break in srcset,
19481949
* unless the commas are URL-encoded.
19491950
*/

includes/validation/class-amp-validation-error-taxonomy.php

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3166,10 +3166,9 @@ static function ( $attribute_name ) {
31663166
case AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE_REGEX_CASEI:
31673167
case AMP_Tag_And_Attribute_Sanitizer::INVALID_DISALLOWED_VALUE_REGEX:
31683168
return sprintf(
3169-
/* translators: %1$s is the attribute name, %2$s is the invalid attribute value */
3170-
esc_html__( 'The attribute %1$s is set to the invalid value %2$s', 'amp' ),
3171-
'<code>' . esc_html( $validation_error['node_name'] ) . '</code>',
3172-
'<code>' . esc_html( $validation_error['element_attributes'][ $validation_error['node_name'] ] ) . '</code>'
3169+
/* translators: %s is the attribute name */
3170+
esc_html__( 'The attribute %s is set to an invalid value', 'amp' ),
3171+
'<code>' . esc_html( $validation_error['node_name'] ) . '</code>'
31733172
);
31743173

31753174
case AMP_Tag_And_Attribute_Sanitizer::INVALID_URL_PROTOCOL:
@@ -3206,6 +3205,14 @@ static function ( $attribute_name ) {
32063205
'<code>' . esc_html( $validation_error['node_name'] ) . '</code>'
32073206
);
32083207

3208+
case AMP_Tag_And_Attribute_Sanitizer::DUPLICATE_DIMENSIONS:
3209+
return sprintf(
3210+
/* translators: %1$s is the attribute name, %2$s is the tag name */
3211+
esc_html__( 'Multiple image candidates with the same width or pixel density found in attribute %1$s in tag %2$s', 'amp' ),
3212+
'<code>' . esc_html( $validation_error['node_name'] ) . '</code>',
3213+
'<code>' . esc_html( $validation_error['parent_name'] ) . '</code>'
3214+
);
3215+
32093216
case AMP_Tag_And_Attribute_Sanitizer::INVALID_LAYOUT_WIDTH:
32103217
case AMP_Tag_And_Attribute_Sanitizer::INVALID_LAYOUT_HEIGHT:
32113218
case AMP_Tag_And_Attribute_Sanitizer::INVALID_LAYOUT_AUTO_HEIGHT:
@@ -3339,6 +3346,8 @@ public static function get_source_key_label( $key, $validation_error ) {
33393346
return __( 'URL', 'amp' );
33403347
case 'message':
33413348
return __( 'Message', 'amp' );
3349+
case 'duplicate_dimensions':
3350+
return __( 'Duplicate dimensions', 'amp' );
33423351
default:
33433352
return $key;
33443353
}

tests/php/test-amp-helper-functions.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1315,7 +1315,8 @@ static function( $classes ) {
13151315
}
13161316
);
13171317
$ordered_sanitizers = array_keys( amp_get_content_sanitizers() );
1318-
$this->assertEquals( 'Even_After_Validating_Sanitizer', $ordered_sanitizers[ count( $ordered_sanitizers ) - 4 ] );
1318+
$this->assertEquals( 'Even_After_Validating_Sanitizer', $ordered_sanitizers[ count( $ordered_sanitizers ) - 5 ] );
1319+
$this->assertEquals( 'AMP_Layout_Sanitizer', $ordered_sanitizers[ count( $ordered_sanitizers ) - 4 ] );
13191320
$this->assertEquals( 'AMP_Style_Sanitizer', $ordered_sanitizers[ count( $ordered_sanitizers ) - 3 ] );
13201321
$this->assertEquals( 'AMP_Meta_Sanitizer', $ordered_sanitizers[ count( $ordered_sanitizers ) - 2 ] );
13211322
$this->assertEquals( 'AMP_Tag_And_Attribute_Sanitizer', $ordered_sanitizers[ count( $ordered_sanitizers ) - 1 ] );
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
<?php
2+
/**
3+
* Class AMP_Srcset_Sanitizer_Test.
4+
*
5+
* @package AMP
6+
*/
7+
8+
use AmpProject\AmpWP\Tests\Helpers\MarkupComparison;
9+
10+
/**
11+
* Class AMP_Srcset_Sanitizer_Test
12+
*
13+
* @coversDefaultClass AMP_Srcset_Sanitizer
14+
*/
15+
class AMP_Srcset_Sanitizer_Test extends WP_UnitTestCase {
16+
17+
use MarkupComparison;
18+
19+
/**
20+
* Provide the data to test the sanitize() method.
21+
*
22+
* @return array[] Test data.
23+
*/
24+
public function data_sanitize() {
25+
return [
26+
'img_with_valid_srcset' => [
27+
'<img src="https://example.com/image.jpg" srcset="https://example.com/image.jpg, https://example.com/image-1.jpg 512w, https://example.com/image-2.jpg 1024w , https://example.com/image-3.jpg 300w, https://example.com/image-4.jpg 768w" width="350" height="150">',
28+
null,
29+
],
30+
31+
'img_with_duplicate_img_candidate_but_same_url' => [
32+
'<img src="https://example.com/image.jpg" srcset="https://example.com/image.jpg, https://example.com/image-1.jpg 1024w, https://example.com/image-1.jpg 1024w , https://example.com/image-2.jpg 300w, https://example.com/image-3.jpg 768w" width="350" height="150">',
33+
'<img src="https://example.com/image.jpg" srcset="https://example.com/image.jpg 1x, https://example.com/image-1.jpg 1024w, https://example.com/image-2.jpg 300w, https://example.com/image-3.jpg 768w" width="350" height="150">',
34+
],
35+
36+
'img_with_duplicate_img_candidate_but_different_url' => [
37+
'<img src="https://example.com/image.jpg" srcset="https://example.com/image.jpg, https://example.com/image-1.jpg 1024w, https://example.com/image-2.jpg 1024w , https://example.com/image-2.jpg 300w, https://example.com/image-3.jpg 768w" width="350" height="150">',
38+
'<img src="https://example.com/image.jpg" srcset="https://example.com/image.jpg 1x, https://example.com/image-1.jpg 1024w, https://example.com/image-2.jpg 300w, https://example.com/image-3.jpg 768w" width="350" height="150">',
39+
[ AMP_Tag_And_Attribute_Sanitizer::DUPLICATE_DIMENSIONS ],
40+
],
41+
42+
'amp_img_srcset_missing_comma' => [
43+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="https://example.com/image-1.jpg 1024w https://example.com/image-2.jpg 1024w">',
44+
'<img src="https://example.com/image.jpg" height="100" width="200">',
45+
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE ],
46+
],
47+
48+
'amp_img_srcset_empty' => [
49+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="">',
50+
null,
51+
],
52+
53+
'amp_img_srcset_whitespace' => [
54+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset=" ">',
55+
'<img src="https://example.com/image.jpg" height="100" width="200">',
56+
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE ],
57+
],
58+
59+
'amp_img_srcset_invalid_bare_number' => [
60+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="1">',
61+
'<img src="https://example.com/image.jpg" height="100" width="200">',
62+
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE ],
63+
],
64+
65+
'amp_img_srcset_invalid_dimension_unit' => [
66+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="https://example.com/image.jpg 500px">',
67+
'<img src="https://example.com/image.jpg" height="100" width="200">',
68+
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE ],
69+
],
70+
71+
'amp_img_srcset_invalid_space_in_dimension' => [
72+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="https://example.com/image.jpg 2 x">',
73+
'<img src="https://example.com/image.jpg" height="100" width="200">',
74+
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE ],
75+
],
76+
77+
'amp_img_srcset_invalid_dimension_type' => [
78+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="https://example.com/image.jpg 5kw">',
79+
'<img src="https://example.com/image.jpg" height="100" width="200">',
80+
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE ],
81+
],
82+
83+
'amp_img_srcset_invalid_width_descriptor' => [
84+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="https://example.com/image.jpg 5.2w">',
85+
'<img src="https://example.com/image.jpg" height="100" width="200">',
86+
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE ],
87+
],
88+
89+
'amp_img_srcset_invalid_zero_width' => [
90+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="https://example.com/image.jpg 0w">',
91+
'<img src="https://example.com/image.jpg" height="100" width="200">',
92+
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE ],
93+
],
94+
95+
'amp_img_srcset_invalid_zero_pixel_density' => [
96+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="https://example.com/image.jpg 0.0x">',
97+
'<img src="https://example.com/image.jpg" height="100" width="200">',
98+
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE ],
99+
],
100+
101+
'amp_img_srcset_valid_pixel_density' => [
102+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="https://example.com/image.jpg 5x">',
103+
null,
104+
],
105+
106+
'amp_img_srcset_valid_float_pixel_density' => [
107+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="https://example.com/image.jpg 5.2x">',
108+
null,
109+
],
110+
111+
'amp_img_srcset_valid_float_pixel_density_with_leading_zero' => [
112+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="https://example.com/image.jpg 0.002x">',
113+
null,
114+
],
115+
116+
'amp_img_srcset_invalid_tokens' => [
117+
'<img src="https://example.com/image.jpg" height="100" width="200" srcset="bad bad">',
118+
'<img src="https://example.com/image.jpg" height="100" width="200">',
119+
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_ATTR_VALUE ],
120+
],
121+
];
122+
}
123+
124+
/**
125+
* Test the sanitize() method.
126+
*
127+
* @covers ::sanitize()
128+
* @dataProvider data_sanitize()
129+
*
130+
* @param string $source Source.
131+
* @param string|null $expected Expected. If null, then no change from source.
132+
* @param array $expected_error_codes Expected error codes.
133+
*/
134+
public function test_sanitize( $source, $expected = null, $expected_error_codes = [] ) {
135+
$error_codes = [];
136+
if ( null === $expected ) {
137+
$expected = $source;
138+
}
139+
140+
$args = [
141+
'use_document_element' => true,
142+
'validation_error_callback' => static function( $error ) use ( &$error_codes ) {
143+
$error_codes[] = $error['code'];
144+
},
145+
];
146+
147+
$dom = AMP_DOM_Utils::get_dom_from_content( $source );
148+
149+
$sanitizer = new AMP_Srcset_Sanitizer( $dom, $args );
150+
$sanitizer->sanitize();
151+
152+
$this->assertEqualSets( $error_codes, $expected_error_codes );
153+
154+
$content = AMP_DOM_Utils::get_content_from_dom( $dom );
155+
156+
$this->assertEqualMarkup( $expected, $content );
157+
}
158+
}

tests/php/test-tag-and-attribute-sanitizer.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2281,8 +2281,8 @@ function getRemoteData() { /*...*/ }
22812281
[ AMP_Tag_And_Attribute_Sanitizer::MISSING_URL, AMP_Tag_And_Attribute_Sanitizer::ATTR_REQUIRED_BUT_MISSING ],
22822282
],
22832283

2284-
'amp_img_missing_url' => [
2285-
'<amp-img src="" height="100" width="200"></amp-img>',
2284+
'amp_img_missing_srcset' => [
2285+
'<amp-img srcset="" height="100" width="200"></amp-img>',
22862286
'',
22872287
[],
22882288
[ AMP_Tag_And_Attribute_Sanitizer::MISSING_URL, AMP_Tag_And_Attribute_Sanitizer::ATTR_REQUIRED_BUT_MISSING ],

0 commit comments

Comments
 (0)