Skip to content

Commit 4ae4bc7

Browse files
committed
Merge pull request #12 from tomverran/fix-exact-match-precedence
Fix exact user-agent matches not always being chosen above partial matches
2 parents c308749 + 9830329 commit 4ae4bc7

File tree

5 files changed

+38
-1
lines changed

5 files changed

+38
-1
lines changed

src/Robot/Record.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,9 @@ public function isAllowed($userAgent, $url)
3838
{
3939
return !$this->ua->getMatches($userAgent) || $this->ar->isAllowed($url);
4040
}
41+
42+
public function matchesExactly($userAgent)
43+
{
44+
return in_array(strtolower($userAgent), $this->ua->getMatches($userAgent));
45+
}
4146
}

src/Robot/RobotsTxt.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,16 @@ public function isAllowed($userAgent, $path)
8282
return true;
8383
}
8484

85+
/** @var Record[] $exactMatches */
86+
$exactMatches = array_filter($this->records, function(Record $r) use ($userAgent) {
87+
return $r->matchesExactly($userAgent);
88+
});
89+
90+
if (!empty($exactMatches)) {
91+
$firstMatch = array_shift($exactMatches);
92+
return $firstMatch->isAllowed($userAgent, $path);
93+
}
94+
8595
$matching = array_filter($this->records, function(Record $r) use ($userAgent) {
8696
return $r->matches($userAgent);
8797
});

tests/RecordTest.php

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,17 @@ public function givenMatch_returnLengthOfMatchedUAAsMatchStrength()
5454
$googleOnly = new Record(new UserAgent(['Googlebot']), new AccessRules([]));
5555
$this->assertTrue($googleOnly->getMatchStrength('G') == 9, 'Length of the matched UA is the strength');
5656
}
57+
58+
/**
59+
* @test
60+
*/
61+
public function givenExactMatch_flagAsBeingExact()
62+
{
63+
$googleOnly = new Record(new UserAgent(['Googlebot']), new AccessRules([]));
64+
$this->assertTrue($googleOnly->matchesExactly('Googlebot'));
65+
$this->assertTrue($googleOnly->matchesExactly('googlebot'));
66+
67+
$this->assertFalse($googleOnly->matchesExactly('google'));
68+
$this->assertFalse($googleOnly->matchesExactly('googlebot-news'));
69+
}
5770
}

tests/RobotTest.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,11 @@ public function testRfcExample()
141141
$this->assertRfcExample('/org/plans.html', [$webcrawler, $excite], [$unhipBot, $other]);
142142
$this->assertRfcExample('/%7Ejim/jim.html', [$webcrawler, $excite], [$unhipBot, $other]);
143143
$this->assertRfcExample('/%7Emak/mak.html', [$webcrawler, $excite, $other], [$unhipBot]);
144+
}
144145

145-
146+
public function testExactMatchesBeatPartial()
147+
{
148+
$this->assertTrue(self::getRobotsTxt('match')->isAllowed('Googlebot-News', '/'));
149+
$this->assertTrue(self::getRobotsTxt('match')->isDisallowed('Googlebot', '/'));
146150
}
147151
}

tests/files/match.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
User-Agent: Googlebot
2+
Disallow: /
3+
4+
User-Agent: Googlebot-News
5+
Allow: /

0 commit comments

Comments
 (0)