|
| 1 | +<?php |
| 2 | + |
| 3 | +use GuzzleHttp\Client; |
| 4 | +use GuzzleHttp\HandlerStack; |
| 5 | +use GuzzleHttp\Middleware; |
| 6 | +use GuzzleHttp\Psr7\Response; |
| 7 | +use Spatie\Crawler\Crawler; |
| 8 | + |
| 9 | +beforeEach(function () { |
| 10 | + $this->mockHandler = new \GuzzleHttp\Handler\MockHandler([ |
| 11 | + new Response(200, [], "User-agent: *\nDisallow: /admin"), |
| 12 | + new Response(200, [], '<html><body>Home</body></html>'), |
| 13 | + ]); |
| 14 | + |
| 15 | + $this->crawledUrls = []; |
| 16 | + $this->history = Middleware::history($this->crawledUrls); |
| 17 | + |
| 18 | + $this->handlerStack = HandlerStack::create($this->mockHandler); |
| 19 | + $this->handlerStack->push($this->history); |
| 20 | +}); |
| 21 | + |
| 22 | +it('should send the correct user agent header when fetching robots.txt', function () { |
| 23 | + $client = new Client(['handler' => $this->handlerStack]); |
| 24 | + $crawler = new Crawler($client); |
| 25 | + $crawler->respectRobots()->startCrawling('http://example.com'); |
| 26 | + |
| 27 | + expect($this->crawledUrls)->toHaveCount(2); |
| 28 | + expect((string) $this->crawledUrls[0]['request']->getUri())->toBe('http://example.com/robots.txt'); |
| 29 | + expect($this->crawledUrls[0]['request']->getHeader('User-Agent'))->toBe(['GuzzleHttp/7']); |
| 30 | +}); |
| 31 | + |
| 32 | +it('should send the custom user agent header when fetching robots.txt', function () { |
| 33 | + $client = new Client(['handler' => $this->handlerStack]); |
| 34 | + $crawler = new Crawler($client); |
| 35 | + $crawler->respectRobots()->setUserAgent('CustomBot/2.0')->startCrawling('http://example.com'); |
| 36 | + |
| 37 | + expect($this->crawledUrls)->toHaveCount(2); |
| 38 | + expect((string) $this->crawledUrls[0]['request']->getUri())->toBe('http://example.com/robots.txt'); |
| 39 | + expect($this->crawledUrls[0]['request']->getHeader('User-Agent'))->toBe(['CustomBot/2.0']); |
| 40 | +}); |
0 commit comments