Skip to content

Commit 3e1f268

Browse files
author
Tom
committed
Fix infinite loop when coming across an unrecognised line
1 parent 9199385 commit 3e1f268

File tree

4 files changed

+43
-4
lines changed

4 files changed

+43
-4
lines changed

src/Robot/RobotsFile.php

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,33 @@ class RobotsFile
1616
*/
1717
private $lines;
1818

19+
const USER_AGENT = 'user-agent';
20+
21+
const DISALLOW = 'disallow';
22+
23+
const ALLOW = 'allow';
24+
1925
/**
2026
* Construct this Robots file
2127
* @param String $content
2228
*/
2329
public function __construct($content)
2430
{
2531
$withoutComments = preg_replace( '/#.*/', '', strtolower($content));
32+
2633
foreach(explode("\n", $withoutComments) as $line) {
2734
$lineParts = array_filter(array_map('trim', explode(':', $line)));
28-
if (count($lineParts) == 2) {
35+
if ($this->lineIsValid($lineParts)) {
2936
$this->lines[] = $lineParts;
3037
}
3138
}
3239
}
3340

41+
private function lineIsValid($line) {
42+
$validDirectives = [self::USER_AGENT, self::DISALLOW, self::ALLOW];
43+
return count($line) == 2 && in_array($line[0], $validDirectives);
44+
}
45+
3446
/**
3547
* Get the first directive in the file
3648
*/

src/Robot/RobotsTxt.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ private function parseFile(RobotsFile $robotFile)
3131
{
3232
while($robotFile->hasLines()) {
3333
$currentUserAgents = [];
34-
while ($robotFile->firstDirectiveIs('user-agent')) {
34+
while ($robotFile->firstDirectiveIs(RobotsFile::USER_AGENT)) {
3535
$currentUserAgents[] = $robotFile->shiftArgument();
3636
}
37-
while ($robotFile->firstDirectiveIs('allow', 'disallow')) {
38-
$isAllowed = $robotFile->firstDirective() == 'allow';
37+
while ($robotFile->firstDirectiveIs(RobotsFile::ALLOW, RobotsFile::DISALLOW)) {
38+
$isAllowed = $robotFile->firstDirective() == RobotsFile::ALLOW;
3939
$urlParts = array_filter(explode('/', $robotFile->shiftArgument()));
4040
$this->tree->getNode($urlParts)->addRule($currentUserAgents, $isAllowed);
4141
}

tests/RobotTest.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,9 @@ public function testMultipleNonConsecutiveUserAgents()
105105
$this->assertFalse(self::getRobotsTxt('multiUserAgent')->isAllowed('robot2', '/some/other'));
106106
$this->assertTrue(self::getRobotsTxt('multiUserAgent')->isAllowed('Googlebot', '/some/other'));
107107
}
108+
109+
public function testFileWithInvalidLines()
110+
{
111+
$this->assertTrue(self::getRobotsTxt('corrupted')->isAllowed('Googlebot', '/some/other'));
112+
}
108113
}

tests/files/corrupted.txt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
User-agent: *
2+
User-agent: UA
3+
User-agent: Googlebot
4+
5+
6+
7+
Allow: /
8+
Disallow: /private/
9+
Disallow: /secret/page.html
10+
User-agent: robot2
11+
Something: 10
12+
Allow: /
13+
Whatever: 50
14+
15+
16+
17+
18+
Disallow: /some/other/
19+
20+
21+
22+

0 commit comments

Comments
 (0)