diff --git a/src/SitemapParser.php b/src/SitemapParser.php index 27ac19c3..2812a67b 100644 --- a/src/SitemapParser.php +++ b/src/SitemapParser.php @@ -173,8 +173,13 @@ public function getQueue() { * @throws WP2StaticException */ public function parse( $url, $url_content = null ) { + $check_url = $url; + $clean_url = preg_replace( '/(?clean(); - $this->current_url = $this->urlEncode( $url ); + if ( $clean_url ) { + $check_url = $clean_url; + } + $this->current_url = $this->urlEncode( $check_url ); if ( ! $this->urlValidate( $this->current_url ) ) { throw new WP2StaticException( 'Invalid URL' ); } @@ -242,6 +247,16 @@ protected function getContent() { } } + /** + * callable trim function + * + * @param string $string + * @return string + */ + protected function trim( $string ) { + return trim( $string ); + } + /** * Search for sitemaps in the robots.txt content * @@ -252,7 +267,7 @@ protected function parseRobotstxt( $robotstxt ) { // Split lines into array $lines = array_filter( array_map( - 'trim', + [ $this, 'trim' ], (array) preg_split( '/\r\n|\n|\r/', $robotstxt ) ) ); @@ -269,7 +284,7 @@ protected function parseRobotstxt( $robotstxt ) { $line = $line[0]; // Split by directive and rule - $pair = array_map( 'trim', (array) preg_split( '/:/', $line, 2 ) ); + $pair = array_map( [ $this, 'trim' ], (array) preg_split( '/:/', $line, 2 ) ); // Check if the line contains a sitemap if ( strtolower( $pair[0] ) !== self::XML_TAG_SITEMAP || @@ -374,7 +389,12 @@ protected function parseString( $string ) { // Strings are not part of any documented sitemap standard return false; } - $array = array_filter( array_map( 'trim', (array) preg_split( '/\r\n|\n|\r/', $string ) ) ); + $array = array_filter( + array_map( + [ $this, 'trim' ], + (array) preg_split( '/\r\n|\n|\r/', $string ) + ) + ); foreach ( $array as $line ) { if ( $this->isSitemapURL( $line ) ) { $this->addArray( self::XML_TAG_SITEMAP, [ 'loc' => $line ] );