diff --git a/src/UsesUrls.php b/src/UsesUrls.php index 25f0334..71431af 100644 --- a/src/UsesUrls.php +++ b/src/UsesUrls.php @@ -41,7 +41,13 @@ public function currentHost(): string */ public function currentBaseHost(): string { - $uri = Uri::createFromString($this->baseHref() ?? $this->currentUrl()); + //In case baseHref is a relative URL + $currentBase = $this->baseHref(); + if ($currentBase === null || !preg_match('/^https?:\/\//', $currentBase)) { + $currentBase = $this->currentUrl(); + } + + $uri = Uri::createFromString($currentBase); return $uri->getScheme() . '://' . $uri->getHost(); } @@ -61,7 +67,7 @@ public function makeUrlAbsolute(?string $url = null, string $baseUrl = null): ?s // Resolve the Url using one of the provided/set base href. return (string) UriResolver::resolve( Http::createFromString($url), - Http::createFromString($baseUrl ?? $this->baseHref() ?? $this->currentBaseHost()), + Http::createFromString($baseUrl ?? $this->currentBaseHost()), ); } } diff --git a/tests/BaseHrefTest.php b/tests/BaseHrefTest.php index 63782f9..b4be71a 100644 --- a/tests/BaseHrefTest.php +++ b/tests/BaseHrefTest.php @@ -41,4 +41,18 @@ public function testBaseHref() $web->baseHref ); } + + public function testBaseHrefContainRelativePath() + { + $web = new \Spekulatius\PHPScraper\PHPScraper(['disable_ssl' => true]); + + // Navigate to the test page. + // Contains: (relative path) + $web->go('https://test-pages.phpscraper.de/links/invalid-base-href.html'); + // Check the baseHref + $this->assertSame( + '/links/invalid-base-href.html', + $web->baseHref + ); + } } diff --git a/tests/UrlTest.php b/tests/UrlTest.php index 822bb41..f695edf 100644 --- a/tests/UrlTest.php +++ b/tests/UrlTest.php @@ -67,6 +67,24 @@ public function testCurrentBaseHostWithBase() ); } + /** + * @test + */ + public function testCurrentBaseHostWithBaseIsRelativeUri() + { + $web = new \Spekulatius\PHPScraper\PHPScraper; + + // Navigate to the test page. + // Contains: + $web->go('https://test-pages.phpscraper.de/links/invalid-base-href.html'); + + // Check the base href being passed through the current base host. + $this->assertSame( + 'https://test-pages.phpscraper.de', + $web->currentBaseHost + ); + } + /** * Basic processing of the URLs. * @@ -167,6 +185,36 @@ public function testMakeUrlAbsoluteConsiderBaseHref() ); } + /** + * Special case where the base href is a relative URL. So we need to use the current base host. + * + * @test + */ + public function testMakeUrlAbsoluteConsiderBaseHrefIsRelativeUrl() + { + $web = new \Spekulatius\PHPScraper\PHPScraper; + + /** + * Navigate to test page: This sets the base URL. + * + * It contains: + * + * ```html + * + * ``` + * + * While it's located on `test-pages.phpscraper.de`. + * + * This page isn't actually used. It's purely to set the context. + */ + $web->go('https://test-pages.phpscraper.de/links/invalid-base-href.html'); + + $this->assertSame( + 'https://test-pages.phpscraper.de/test/index.html', + $web->makeUrlAbsolute('test/index.html'), + ); + } + /** * Test if passed in hosts are considered. It trumps any base-href and current url. *