Skip to content

Commit b4c723a

Browse files
authored
Merge pull request #443 from ampproject/fix/441-svg-regex
2 parents 8af1f29 + a3a47c4 commit b4c723a

File tree

13 files changed

+147
-51
lines changed

13 files changed

+147
-51
lines changed

src/Dom/Document/Filter/AmpBindAttributes.php

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,7 @@ public function beforeLoad($html)
116116
* @return string Replacement.
117117
*/
118118
$replaceCallback = function ($tagMatches) {
119-
120-
// Strip the self-closing slash as long as it is not an attribute value, like for the href attribute.
121-
$oldAttrs = rtrim(preg_replace('#(?<!=)/$#', '', $tagMatches['attrs']));
122-
119+
$oldAttrs = $this->maybeStripSelfClosingSlash($tagMatches['attrs']);
123120
$newAttrs = '';
124121
$offset = 0;
125122
while (
@@ -151,22 +148,17 @@ public function beforeLoad($html)
151148
return '<' . $tagMatches['name'] . $newAttrs . '>';
152149
};
153150

154-
$converted = preg_replace_callback(
151+
$result = preg_replace_callback(
155152
self::AMP_BIND_SQUARE_START_PATTERN,
156153
$replaceCallback,
157154
$html
158155
);
159156

160-
/*
161-
* If the regex engine incurred an error during processing, for example exceeding the backtrack
162-
* limit, $converted will be null. In this case we return the originally passed document to allow
163-
* DOMDocument to attempt to load it. If the AMP HTML doesn't make use of amp-bind or similar
164-
* attributes, then everything should still work.
165-
*
166-
* See https://github.com/ampproject/amp-wp/issues/993 for additional context on this issue.
167-
* See http://php.net/manual/en/pcre.constants.php for additional info on PCRE errors.
168-
*/
169-
return (null !== $converted) ? $converted : $html;
157+
if (! is_string($result)) {
158+
return $html;
159+
}
160+
161+
return $result;
170162
}
171163

172164
/**
@@ -205,10 +197,7 @@ public function afterSave($html)
205197
* @return string Replacement.
206198
*/
207199
$replaceCallback = function ($tagMatches) {
208-
209-
// Strip the self-closing slash as long as it is not an attribute value, like for the href attribute.
210-
$oldAttrs = rtrim(preg_replace('#(?<!=)/$#', '', $tagMatches['attrs']));
211-
200+
$oldAttrs = $this->maybeStripSelfClosingSlash($tagMatches['attrs']);
212201
$newAttrs = '';
213202
$offset = 0;
214203
while (
@@ -241,21 +230,33 @@ public function afterSave($html)
241230
return '<' . $tagMatches['name'] . $newAttrs . '>';
242231
};
243232

244-
$converted = preg_replace_callback(
233+
$result = preg_replace_callback(
245234
self::AMP_BIND_DATA_START_PATTERN,
246235
$replaceCallback,
247236
$html
248237
);
249238

250-
/*
251-
* If the regex engine incurred an error during processing, for example exceeding the backtrack
252-
* limit, $converted will be null. In this case we return the originally passed document to allow
253-
* DOMDocument to attempt to load it. If the AMP HTML doesn't make use of amp-bind or similar
254-
* attributes, then everything should still work.
255-
*
256-
* See https://github.com/ampproject/amp-wp/issues/993 for additional context on this issue.
257-
* See http://php.net/manual/en/pcre.constants.php for additional info on PCRE errors.
258-
*/
259-
return (null !== $converted) ? $converted : $html;
239+
if (! is_string($result)) {
240+
return $html;
241+
}
242+
243+
return $result;
244+
}
245+
246+
/**
247+
* Strip the self-closing slash as long as it is not an attribute value, like for the href attribute.
248+
*
249+
* @param string $attributes Attributes to strip the self-closing slash of.
250+
* @return string Adapted attributes.
251+
*/
252+
private function maybeStripSelfClosingSlash($attributes)
253+
{
254+
$result = preg_replace('#(?<!=)/$#', '', $attributes);
255+
256+
if (! is_string($result)) {
257+
return rtrim($attributes);
258+
}
259+
260+
return rtrim($result);
260261
}
261262
}

src/Dom/Document/Filter/AmpEmojiAttribute.php

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public function beforeLoad($html)
4848
{
4949
$this->usedAmpEmoji = '';
5050

51-
return preg_replace_callback(
51+
$result = preg_replace_callback(
5252
self::AMP_EMOJI_ATTRIBUTE_PATTERN,
5353
function ($matches) {
5454
// Split into individual attributes.
@@ -85,6 +85,12 @@ function ($matches) {
8585
$html,
8686
1
8787
);
88+
89+
if (! is_string($result)) {
90+
return $html;
91+
}
92+
93+
return $result;
8894
}
8995

9096
/**
@@ -99,11 +105,17 @@ public function afterSave($html)
99105
return $html;
100106
}
101107

102-
return preg_replace(
108+
$result = preg_replace(
103109
'/(<html\s[^>]*?)' . preg_quote(Document::EMOJI_AMP_ATTRIBUTE_PLACEHOLDER, '/') . '="([^"]*)"/i',
104110
'\1' . $this->usedAmpEmoji . '\2',
105111
$html,
106112
1
107113
);
114+
115+
if (! is_string($result)) {
116+
return $html;
117+
}
118+
119+
return $result;
108120
}
109121
}

src/Dom/Document/Filter/DoctypeNode.php

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,19 @@ final class DoctypeNode implements BeforeLoadFilter, AfterSaveFilter
6161
*/
6262
public function beforeLoad($html)
6363
{
64-
return preg_replace(
64+
$result = preg_replace(
6565
self::HTML_SECURE_DOCTYPE_IF_NOT_FIRST_PATTERN,
6666
self::HTML_SECURE_DOCTYPE_REPLACEMENT_TEMPLATE,
6767
$html,
6868
1,
6969
$this->securedDoctype
7070
);
71+
72+
if (! is_string($result)) {
73+
return $html;
74+
}
75+
76+
return $result;
7177
}
7278

7379
/**
@@ -82,11 +88,17 @@ public function afterSave($html)
8288
return $html;
8389
}
8490

85-
return preg_replace(
91+
$result = preg_replace(
8692
self::HTML_RESTORE_DOCTYPE_PATTERN,
8793
self::HTML_RESTORE_DOCTYPE_REPLACEMENT_TEMPLATE,
8894
$html,
8995
1
9096
);
97+
98+
if (! is_string($result)) {
99+
return $html;
100+
}
101+
102+
return $result;
91103
}
92104
}

src/Dom/Document/Filter/HttpEquivCharset.php

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,23 +102,31 @@ public function beforeLoad($html)
102102
$count = 0;
103103

104104
// We try first to detect an existing <head> node.
105-
$html = preg_replace(
105+
$result = preg_replace(
106106
self::HTML_GET_HEAD_OPENING_TAG_PATTERN,
107107
self::HTML_GET_HEAD_OPENING_TAG_REPLACEMENT,
108108
$html,
109109
1,
110110
$count
111111
);
112112

113+
if (is_string($result)) {
114+
$html = $result;
115+
}
116+
113117
// If no <head> was found, we look for the <html> tag instead.
114118
if ($count < 1) {
115-
$html = preg_replace(
119+
$result = preg_replace(
116120
self::HTML_GET_HTML_OPENING_TAG_PATTERN,
117121
self::HTML_GET_HTML_OPENING_TAG_REPLACEMENT,
118122
$html,
119123
1,
120124
$count
121125
);
126+
127+
if (is_string($result)) {
128+
$html = $result;
129+
}
122130
}
123131

124132
// Finally, we just prepend the head with the required http-equiv charset.
@@ -182,6 +190,12 @@ public function afterSave($html)
182190
$this->temporaryCharset->parentNode->removeChild($this->temporaryCharset);
183191
}
184192

185-
return preg_replace(self::HTML_GET_HTTP_EQUIV_TAG_PATTERN, '', $html, 1);
193+
$result = preg_replace(self::HTML_GET_HTTP_EQUIV_TAG_PATTERN, '', $html, 1);
194+
195+
if (! is_string($result)) {
196+
return $html;
197+
}
198+
199+
return $result;
186200
}
187201
}

src/Dom/Document/Filter/MustacheScriptTemplates.php

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,17 @@ final class MustacheScriptTemplates implements BeforeLoadFilter, AfterLoadFilter
5555
*/
5656
public function beforeLoad($html)
5757
{
58-
return preg_replace(
58+
$result = preg_replace(
5959
'#<script(\s[^>]*?template=(["\']?)amp-mustache\2[^>]*)>(.*?)</script\s*?>#is',
6060
'<tmp-script$1>$3</tmp-script>',
6161
$html
6262
);
63+
64+
if (! is_string($result)) {
65+
return $html;
66+
}
67+
68+
return $result;
6369
}
6470

6571
/**

src/Dom/Document/Filter/NoscriptElements.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ public function __construct(UniqueIdManager $uniqueIdManager)
5858
public function beforeLoad($html)
5959
{
6060
if (version_compare(LIBXML_DOTTED_VERSION, '2.8', '<')) {
61-
$html = preg_replace_callback(
61+
$result = preg_replace_callback(
6262
'#^.+?(?=<body)#is',
6363
function ($headMatches) {
6464
return preg_replace_callback(
@@ -73,6 +73,10 @@ function ($noscriptMatches) {
7373
},
7474
$html
7575
);
76+
77+
if (is_string($result)) {
78+
$html = $result;
79+
}
7680
}
7781

7882
return $html;

src/Dom/Document/Filter/ProtectEsiTags.php

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,13 @@ public function beforeLoad($html)
4444
'$1esi-',
4545
];
4646

47-
return preg_replace($patterns, $replacements, $html);
47+
$result = preg_replace($patterns, $replacements, $html);
48+
49+
if (! is_string($result)) {
50+
return $html;
51+
}
52+
53+
return $result;
4854
}
4955

5056
/**
@@ -67,6 +73,12 @@ public function afterSave($html)
6773
'/>',
6874
];
6975

70-
return preg_replace($patterns, $replacements, $html);
76+
$result = preg_replace($patterns, $replacements, $html);
77+
78+
if (! is_string($result)) {
79+
return $html;
80+
}
81+
82+
return $result;
7183
}
7284
}

src/Dom/Document/Filter/SelfClosingSVGElements.php

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,16 @@ public function beforeLoad($html)
3535
static $regexPattern = null;
3636

3737
if (null === $regexPattern) {
38-
$regexPattern = '#<(' . implode('|', self::SELF_CLOSING_TAGS) . ')([^>]*?)(?>\s*(?<!\\\\)/)?>(?!.*</\1>)#';
38+
$regexPattern = '#<(' . implode('|', self::SELF_CLOSING_TAGS) . ')((?>\s+[^/>]*))/?>(?!.*</\1>)#i';
3939
}
4040

41-
return preg_replace($regexPattern, '<$1$2></$1>', $html);
41+
$result = preg_replace($regexPattern, '<$1$2></$1>', $html);
42+
43+
if (! is_string($result)) {
44+
return $html;
45+
}
46+
47+
return $result;
4248
}
4349

4450
/**
@@ -52,9 +58,15 @@ public function afterSave($html)
5258
static $regexPattern = null;
5359

5460
if (null === $regexPattern) {
55-
$regexPattern = '#<(' . implode('|', self::SELF_CLOSING_TAGS) . ')([^>]*?)(?>\s*(?<!\\\\)\/)?>(<\/\1>)#i';
61+
$regexPattern = '#<(' . implode('|', self::SELF_CLOSING_TAGS) . ')((?>\s+[^>]*))>(?><\/\1>)#i';
62+
}
63+
64+
$result = preg_replace($regexPattern, '<$1$2 />', $html);
65+
66+
if (! is_string($result)) {
67+
return $html;
5668
}
5769

58-
return preg_replace($regexPattern, '<$1$2 />', $html);
70+
return $result;
5971
}
6072
}

src/Dom/Document/Filter/SelfClosingTags.php

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,13 @@ public function beforeLoad($html)
4040

4141
$this->selfClosingTagsTransformed = true;
4242

43-
return preg_replace($regexPattern, '<$1$2></$1>', $html);
43+
$result = preg_replace($regexPattern, '<$1$2></$1>', $html);
44+
45+
if (! is_string($result)) {
46+
return $html;
47+
}
48+
49+
return $result;
4450
}
4551

4652
/**
@@ -63,6 +69,12 @@ public function afterSave($html)
6369

6470
$this->selfClosingTagsTransformed = false;
6571

66-
return preg_replace($regexPattern, '', $html);
72+
$result = preg_replace($regexPattern, '', $html);
73+
74+
if (! is_string($result)) {
75+
return $html;
76+
}
77+
78+
return $result;
6779
}
6880
}

src/Dom/Document/Filter/SvgSourceAttributeEncoding.php

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,13 @@ final class SvgSourceAttributeEncoding implements AfterSaveFilter
3535
*/
3636
public function afterSave($html)
3737
{
38-
return preg_replace_callback(self::I_AMPHTML_SIZER_REGEX_PATTERN, [$this, 'adaptSizer'], $html);
38+
$result = preg_replace_callback(self::I_AMPHTML_SIZER_REGEX_PATTERN, [$this, 'adaptSizer'], $html);
39+
40+
if (! is_string($result)) {
41+
return $html;
42+
}
43+
44+
return $result;
3945
}
4046

4147
/**
@@ -50,6 +56,11 @@ private function adaptSizer($matches)
5056
$src = htmlspecialchars_decode($src, ENT_NOQUOTES);
5157
$src = preg_replace_callback(self::SRC_SVG_REGEX_PATTERN, [$this, 'adaptSvg'], $src);
5258

59+
if (! is_string($src)) {
60+
// The regex replace failed, so revert to the initial src.
61+
$src = $matches['src'];
62+
}
63+
5364
return $matches['before_src'] . $src . $matches['after_src'];
5465
}
5566

0 commit comments

Comments
 (0)