diff --git a/src/UsesUrls.php b/src/UsesUrls.php
index 25f0334..71431af 100644
--- a/src/UsesUrls.php
+++ b/src/UsesUrls.php
@@ -41,7 +41,13 @@ public function currentHost(): string
*/
public function currentBaseHost(): string
{
- $uri = Uri::createFromString($this->baseHref() ?? $this->currentUrl());
+ //In case baseHref is a relative URL
+ $currentBase = $this->baseHref();
+ if ($currentBase === null || !preg_match('/^https?:\/\//', $currentBase)) {
+ $currentBase = $this->currentUrl();
+ }
+
+ $uri = Uri::createFromString($currentBase);
return $uri->getScheme() . '://' . $uri->getHost();
}
@@ -61,7 +67,7 @@ public function makeUrlAbsolute(?string $url = null, string $baseUrl = null): ?s
// Resolve the Url using one of the provided/set base href.
return (string) UriResolver::resolve(
Http::createFromString($url),
- Http::createFromString($baseUrl ?? $this->baseHref() ?? $this->currentBaseHost()),
+ Http::createFromString($baseUrl ?? $this->currentBaseHost()),
);
}
}
diff --git a/tests/BaseHrefTest.php b/tests/BaseHrefTest.php
index 63782f9..b4be71a 100644
--- a/tests/BaseHrefTest.php
+++ b/tests/BaseHrefTest.php
@@ -41,4 +41,18 @@ public function testBaseHref()
$web->baseHref
);
}
+
+ public function testBaseHrefContainRelativePath()
+ {
+ $web = new \Spekulatius\PHPScraper\PHPScraper(['disable_ssl' => true]);
+
+ // Navigate to the test page.
+ // Contains: (relative path)
+ $web->go('https://test-pages.phpscraper.de/links/invalid-base-href.html');
+ // Check the baseHref
+ $this->assertSame(
+ '/links/invalid-base-href.html',
+ $web->baseHref
+ );
+ }
}
diff --git a/tests/UrlTest.php b/tests/UrlTest.php
index 822bb41..f695edf 100644
--- a/tests/UrlTest.php
+++ b/tests/UrlTest.php
@@ -67,6 +67,24 @@ public function testCurrentBaseHostWithBase()
);
}
+ /**
+ * @test
+ */
+ public function testCurrentBaseHostWithBaseIsRelativeUri()
+ {
+ $web = new \Spekulatius\PHPScraper\PHPScraper;
+
+ // Navigate to the test page.
+ // Contains:
+ $web->go('https://test-pages.phpscraper.de/links/invalid-base-href.html');
+
+ // Check the base href being passed through the current base host.
+ $this->assertSame(
+ 'https://test-pages.phpscraper.de',
+ $web->currentBaseHost
+ );
+ }
+
/**
* Basic processing of the URLs.
*
@@ -167,6 +185,36 @@ public function testMakeUrlAbsoluteConsiderBaseHref()
);
}
+ /**
+ * Special case where the base href is a relative URL. So we need to use the current base host.
+ *
+ * @test
+ */
+ public function testMakeUrlAbsoluteConsiderBaseHrefIsRelativeUrl()
+ {
+ $web = new \Spekulatius\PHPScraper\PHPScraper;
+
+ /**
+ * Navigate to test page: This sets the base URL.
+ *
+ * It contains:
+ *
+ * ```html
+ *
+ * ```
+ *
+ * While it's located on `test-pages.phpscraper.de`.
+ *
+ * This page isn't actually used. It's purely to set the context.
+ */
+ $web->go('https://test-pages.phpscraper.de/links/invalid-base-href.html');
+
+ $this->assertSame(
+ 'https://test-pages.phpscraper.de/test/index.html',
+ $web->makeUrlAbsolute('test/index.html'),
+ );
+ }
+
/**
* Test if passed in hosts are considered. It trumps any base-href and current url.
*