@@ -123,9 +123,8 @@ public function parseRecursive( $url ) {
123
123
$ sitemaps = $ this ->sitemaps ;
124
124
$ urls = $ this ->urls ;
125
125
try {
126
- $ this ->parse ( strval ( $ todo [0 ] ) );
126
+ $ this ->parse ( $ todo [0 ] );
127
127
} catch ( WP2StaticException $ e ) {
128
- WsLog::w ( $ e ->getMessage () );
129
128
// Keep crawling
130
129
continue ;
131
130
}
@@ -141,7 +140,7 @@ public function parseRecursive( $url ) {
141
140
*/
142
141
public function addToQueue ( array $ url_array ) : void {
143
142
foreach ( $ url_array as $ url ) {
144
- $ url = $ this ->urlEncode ( strval ( $ url ) );
143
+ $ url = $ this ->urlEncode ( $ url );
145
144
if ( $ this ->urlValidate ( $ url ) ) {
146
145
$ this ->queue [] = $ url ;
147
146
}
@@ -181,11 +180,6 @@ public function parse( $url, $url_content = null ) {
181
180
}
182
181
$ this ->history [] = $ this ->current_url ;
183
182
$ response = is_string ( $ url_content ) ? $ url_content : $ this ->getContent ();
184
-
185
- if ( ! $ response ) {
186
- return ;
187
- }
188
-
189
183
if ( parse_url ( $ this ->current_url , PHP_URL_PATH ) === self ::ROBOTSTXT_PATH ) {
190
184
$ this ->parseRobotstxt ( $ response );
191
185
return ;
@@ -223,7 +217,7 @@ protected function clean() {
223
217
/**
224
218
* Request the body content of an URL
225
219
*
226
- * @return ? string Raw body content
220
+ * @return string Raw body content
227
221
* @throws WP2StaticException
228
222
*/
229
223
protected function getContent () {
@@ -238,22 +232,9 @@ protected function getContent() {
238
232
if ( ! isset ( $ this ->config ['guzzle ' ]['headers ' ]['User-Agent ' ] ) ) {
239
233
$ this ->config ['guzzle ' ]['headers ' ]['User-Agent ' ] = $ this ->user_agent ;
240
234
}
241
- $ client = new WP2StaticGuzzleHttp \Client ( [ 'verify ' => false ] );
242
-
243
- if ( ! is_array ( $ this ->config ['guzzle ' ] ) ) {
244
- WsLog::w ( 'Guzzle config is not in expected array format ' );
245
- return null ;
246
- }
235
+ $ client = new WP2StaticGuzzleHttp \Client ();
247
236
$ res = $ client ->request ( 'GET ' , $ this ->current_url , $ this ->config ['guzzle ' ] );
248
- if ( $ res ->getStatusCode () === 200 ) {
249
- return $ res ->getBody ()->getContents ();
250
- } else {
251
- WsLog::w (
252
- 'Got ' . $ res ->getStatusCode () .
253
- ' for sitemap url " ' . $ this ->current_url . '", skipping. '
254
- );
255
- return null ;
256
- }
237
+ return $ res ->getBody ()->getContents ();
257
238
} catch ( WP2StaticGuzzleHttp \Exception \TransferException $ e ) {
258
239
throw new WP2StaticException ( 'Unable to fetch URL contents ' , 0 , $ e );
259
240
} catch ( WP2StaticGuzzleHttp \Exception \GuzzleException $ e ) {
@@ -271,7 +252,7 @@ protected function parseRobotstxt( $robotstxt ) {
271
252
// Split lines into array
272
253
$ lines = array_filter (
273
254
array_map (
274
- fn ( $ line ) => trim ( ( string ) $ line ) ,
255
+ ' trim ' ,
275
256
(array ) preg_split ( '/\r\n|\n|\r/ ' , $ robotstxt )
276
257
)
277
258
);
@@ -288,10 +269,7 @@ protected function parseRobotstxt( $robotstxt ) {
288
269
$ line = $ line [0 ];
289
270
290
271
// Split by directive and rule
291
- $ pair = array_map (
292
- fn ( $ line ) => trim ( (string ) $ line ),
293
- (array ) preg_split ( '/:/ ' , $ line , 2 )
294
- );
272
+ $ pair = array_map ( 'trim ' , (array ) preg_split ( '/:/ ' , $ line , 2 ) );
295
273
// Check if the line contains a sitemap
296
274
if (
297
275
strtolower ( $ pair [0 ] ) !== self ::XML_TAG_SITEMAP ||
@@ -324,7 +302,7 @@ protected function addArray( $type, array $array ) {
324
302
if ( ! isset ( $ array ['loc ' ] ) ) {
325
303
return false ;
326
304
}
327
- $ array ['loc ' ] = $ this ->urlEncode ( trim ( strval ( $ array ['loc ' ] ) ) );
305
+ $ array ['loc ' ] = $ this ->urlEncode ( trim ( $ array ['loc ' ] ) );
328
306
if ( $ this ->urlValidate ( $ array ['loc ' ] ) ) {
329
307
switch ( $ type ) {
330
308
case self ::XML_TAG_SITEMAP :
@@ -396,12 +374,7 @@ protected function parseString( $string ) {
396
374
// Strings are not part of any documented sitemap standard
397
375
return false ;
398
376
}
399
- $ array = array_filter (
400
- array_map (
401
- fn ( $ line ) => trim ( (string ) $ line ),
402
- (array ) preg_split ( '/\r\n|\n|\r/ ' , $ string )
403
- )
404
- );
377
+ $ array = array_filter ( array_map ( 'trim ' , (array ) preg_split ( '/\r\n|\n|\r/ ' , $ string ) ) );
405
378
foreach ( $ array as $ line ) {
406
379
if ( $ this ->isSitemapURL ( $ line ) ) {
407
380
$ this ->addArray ( self ::XML_TAG_SITEMAP , [ 'loc ' => $ line ] );
0 commit comments