Skip to content

Commit 07c7251

Browse files
authored
Merge pull request #30 from flownative/task/crawler-error-handling
Sitemap crawler error handling
2 parents 298f040 + 3351fdb commit 07c7251

File tree

1 file changed

+17
-4
lines changed

1 file changed

+17
-4
lines changed

root-files/opt/flownative/sitemap-crawler/sitemap-crawler.php

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@
99

1010
if (PHP_MAJOR_VERSION >= 9) {
1111
echo "This script is not compatible with PHP 9 or higher yet\n";
12-
exit (1);
12+
exit(1);
1313
}
1414

1515
if (getenv('FLOWNATIVE_LOG_PATH') === false) {
1616
echo "Missing environment variable FLOWNATIVE_LOG_PATH\n";
17-
exit (1);
17+
exit(1);
1818
}
1919

2020
$internalBaseUrl = getenv('SITEMAP_CRAWLER_INTERNAL_BASE_URL');
@@ -53,7 +53,7 @@ public function __construct(string $sitemapUrl, string $internalBaseUrl)
5353
$this->parseSitemap($sitemapUrl);
5454
} catch (\Throwable $throwable) {
5555
$this->log($throwable->getMessage());
56-
exit (1);
56+
exit(1);
5757
}
5858
}
5959

@@ -63,7 +63,16 @@ public function __construct(string $sitemapUrl, string $internalBaseUrl)
6363
public function crawl(): void
6464
{
6565
$firstUrl = reset($this->urls);
66+
if ($firstUrl === false) {
67+
$this->log('No first URL to parse.');
68+
exit(0);
69+
}
70+
6671
$parsedFirstUrl = parse_url($firstUrl);
72+
if ($parsedFirstUrl === false) {
73+
$this->log('Could not parse first URL: ' . $firstUrl);
74+
exit(1);
75+
}
6776
$internalFirstUrl = $this->internalBaseUrl . $parsedFirstUrl['path'] . (isset($parsedFirstUrl['query']) ? '?' . $parsedFirstUrl['query'] : '');
6877

6978
$this->log(sprintf('Checking connectivity by retrieving %s, simulating host %s', $internalFirstUrl, $parsedFirstUrl['host']));
@@ -110,6 +119,10 @@ public function crawl(): void
110119
$curlHandles = [];
111120
foreach ($chunk as $i => $url) {
112121
$parsedUrl = parse_url($url);
122+
if ($parsedUrl === false) {
123+
$this->log('Could not parse URL: ' . $url);
124+
continue;
125+
}
113126
$url = $this->internalBaseUrl . $parsedUrl['path'] . (isset($parsedUrl['query']) ? '?' . $parsedUrl['query'] : '');
114127

115128
$curlHandles[$i] = curl_init($url);
@@ -141,7 +154,7 @@ public function crawl(): void
141154
}
142155
} catch (\Throwable $throwable) {
143156
$this->log($throwable->getMessage());
144-
exit (1);
157+
exit(1);
145158
}
146159
}
147160

0 commit comments

Comments
 (0)