@@ -19,43 +19,25 @@ class RobotsTxt
1919 public function __construct ($ contents )
2020 {
2121 $ this ->tree = new Leaf ();
22- $ this ->parseFile ($ contents );
22+ $ this ->parseFile (new RobotsFile ( $ contents) );
2323 }
2424
2525 /**
2626 * Parse a robot file
2727 * @param $robotFile
2828 * @throws \LogicException
2929 */
30- private function parseFile ($ robotFile )
30+ private function parseFile (RobotsFile $ robotFile )
3131 {
32- $ currentUserAgent = null ;
33-
34- $ withoutComments = preg_replace ( '/#.*/ ' , '' , strtolower ( $ robotFile ) );
35-
36- foreach (explode ( "\n" , $ withoutComments ) as $ line ) {
37-
38- $ parts = array_filter (array_map ('trim ' , explode (': ' , $ line )));
39-
40- //if we don't have a full rule or this is a comment..
41- if (count ($ parts ) < 2 ) {
42- continue ;
32+ while ($ robotFile ->hasLines ()) {
33+ $ currentUserAgents = [];
34+ while ($ robotFile ->firstDirectiveIs ('user-agent ' )) {
35+ $ currentUserAgents [] = $ robotFile ->shiftArgument ();
4336 }
44-
45- list ($ directive , $ argument ) = $ parts ;
46-
47- //handle setting our user agent
48- if ($ directive == 'user-agent ' ) {
49- $ currentUserAgent = $ argument ;
50- continue ;
51- } else if (!$ currentUserAgent ) {
52- throw new \LogicException ('No user agent specified ' );
53- }
54-
55- //the last case is allow / deny. Add to the trees
56- if ($ directive == 'disallow ' || $ directive == 'allow ' ) {
57- $ urlParts = array_filter (explode ('/ ' , $ argument ));
58- $ this ->tree ->getNode ($ urlParts )->addRule ($ currentUserAgent , $ directive != 'disallow ' );
37+ while ($ robotFile ->firstDirectiveIs ('allow ' , 'disallow ' )) {
38+ $ isAllowed = $ robotFile ->firstDirective () == 'allow ' ;
39+ $ urlParts = array_filter (explode ('/ ' , $ robotFile ->shiftArgument ()));
40+ $ this ->tree ->getNode ($ urlParts )->addRule ($ currentUserAgents , $ isAllowed );
5941 }
6042 }
6143 }
@@ -69,14 +51,6 @@ private function parseFile($robotFile)
6951 public function isAllowed ($ userAgent , $ path )
7052 {
7153 $ urlParts = array_filter (explode ('/ ' , $ path ));
72- $ ret = $ this ->tree ->allowed ($ userAgent , $ urlParts );
73-
74- if ($ ret === null ) {
75- $ ret = $ this ->tree ->allowed ('* ' , $ urlParts );
76- }
77- if ($ ret === null ) {
78- $ ret = true ;
79- }
80- return $ ret ;
54+ return $ this ->tree ->allowed (strtolower ($ userAgent ), $ urlParts ) !== false ;
8155 }
8256}
0 commit comments