File tree Expand file tree Collapse file tree 4 files changed +43
-4
lines changed
Expand file tree Collapse file tree 4 files changed +43
-4
lines changed Original file line number Diff line number Diff line change @@ -16,21 +16,33 @@ class RobotsFile
1616 */
1717 private $ lines ;
1818
19+ const USER_AGENT = 'user-agent ' ;
20+
21+ const DISALLOW = 'disallow ' ;
22+
23+ const ALLOW = 'allow ' ;
24+
1925 /**
2026 * Construct this Robots file
2127 * @param String $content
2228 */
2329 public function __construct ($ content )
2430 {
2531 $ withoutComments = preg_replace ( '/#.*/ ' , '' , strtolower ($ content ));
32+
2633 foreach (explode ("\n" , $ withoutComments ) as $ line ) {
2734 $ lineParts = array_filter (array_map ('trim ' , explode (': ' , $ line )));
28- if (count ($ lineParts ) == 2 ) {
35+ if ($ this -> lineIsValid ($ lineParts )) {
2936 $ this ->lines [] = $ lineParts ;
3037 }
3138 }
3239 }
3340
41+ private function lineIsValid ($ line ) {
42+ $ validDirectives = [self ::USER_AGENT , self ::DISALLOW , self ::ALLOW ];
43+ return count ($ line ) == 2 && in_array ($ line [0 ], $ validDirectives );
44+ }
45+
3446 /**
3547 * Get the first directive in the file
3648 */
Original file line number Diff line number Diff line change @@ -31,11 +31,11 @@ private function parseFile(RobotsFile $robotFile)
3131 {
3232 while ($ robotFile ->hasLines ()) {
3333 $ currentUserAgents = [];
34- while ($ robotFile ->firstDirectiveIs (' user-agent ' )) {
34+ while ($ robotFile ->firstDirectiveIs (RobotsFile:: USER_AGENT )) {
3535 $ currentUserAgents [] = $ robotFile ->shiftArgument ();
3636 }
37- while ($ robotFile ->firstDirectiveIs (' allow ' , ' disallow ' )) {
38- $ isAllowed = $ robotFile ->firstDirective () == ' allow ' ;
37+ while ($ robotFile ->firstDirectiveIs (RobotsFile:: ALLOW , RobotsFile:: DISALLOW )) {
38+ $ isAllowed = $ robotFile ->firstDirective () == RobotsFile:: ALLOW ;
3939 $ urlParts = array_filter (explode ('/ ' , $ robotFile ->shiftArgument ()));
4040 $ this ->tree ->getNode ($ urlParts )->addRule ($ currentUserAgents , $ isAllowed );
4141 }
Original file line number Diff line number Diff line change @@ -105,4 +105,9 @@ public function testMultipleNonConsecutiveUserAgents()
105105 $ this ->assertFalse (self ::getRobotsTxt ('multiUserAgent ' )->isAllowed ('robot2 ' , '/some/other ' ));
106106 $ this ->assertTrue (self ::getRobotsTxt ('multiUserAgent ' )->isAllowed ('Googlebot ' , '/some/other ' ));
107107 }
108+
109+ public function testFileWithInvalidLines ()
110+ {
111+ $ this ->assertTrue (self ::getRobotsTxt ('corrupted ' )->isAllowed ('Googlebot ' , '/some/other ' ));
112+ }
108113}
Original file line number Diff line number Diff line change 1+ User-agent: *
2+ User-agent: UA
3+ User-agent: Googlebot
4+
5+
6+
7+ Allow: /
8+ Disallow: /private/
9+ Disallow: /secret/page.html
10+ User-agent: robot2
11+ Something: 10
12+ Allow: /
13+ Whatever: 50
14+
15+
16+
17+
18+ Disallow: /some/other/
19+
20+
21+
22+
You can’t perform that action at this time.
0 commit comments