Skip to content

Commit 6066fcc

Browse files
Strip inline SVG data URIs prior to content parsing
Inline SVG data can cause some issues to our parsing if they're too long. Given we're not interested in these, it's simpler to strip them out prior to our parsing
1 parent 0a69bf6 commit 6066fcc

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

php/class-utils.php

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,9 @@ public static function is_admin() {
620620
* @return array
621621
*/
622622
public static function extract_urls( $content ) {
623+
// Remove inline SVG data URIs, as they can cause parsing issues when extracting URLs.
624+
$content = self::strip_inline_svg_data_uris( $content );
625+
623626
preg_match_all(
624627
"#([\"']?)("
625628
. '(?:[\w-]+:)?//?'
@@ -642,6 +645,25 @@ public static function extract_urls( $content ) {
642645
return array_values( $post_links );
643646
}
644647

648+
/**
649+
* Strip inline SVG data URIs from content.
650+
*
651+
* @param string $content The content to process.
652+
*
653+
* @return string The content with SVG data URIs removed.
654+
*/
655+
public static function strip_inline_svg_data_uris( $content ) {
656+
// Pattern to match the data URI structure: data:image/svg+xml;base64,<base64-encoded-data>.
657+
$svg_data_uri_pattern = '/data:image\/svg\+xml;base64,[a-zA-Z0-9\/\+\=]+/i';
658+
659+
// Remove all occurrences of SVG data URIs from the content.
660+
$cleaned_content = preg_replace( $svg_data_uri_pattern, '', $content );
661+
662+
// In case an error occurred, we return the original content to avoid data loss.
663+
return is_null( $cleaned_content ) ? $content : $cleaned_content;
664+
}
665+
666+
645667
/**
646668
* Is saving metadata.
647669
*

0 commit comments

Comments
 (0)