diff --git a/CHANGELOG.md b/CHANGELOG.md index d08253c2..6d50c5a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Add logic to set the node's link from the element when isPermaLink="true" and no link is present. (#8) - ### Fixed - +- Fix: Analysis of relative links for the Atom feed (#10) diff --git a/src/FeedIo/Feed/Node.php b/src/FeedIo/Feed/Node.php index 9e347d32..10ffd904 100644 --- a/src/FeedIo/Feed/Node.php +++ b/src/FeedIo/Feed/Node.php @@ -30,6 +30,8 @@ class Node implements NodeInterface, ElementsAwareInterface, ArrayableInterface protected ?string $host = null; + protected ?string $linkForAnalysis = null; + public function __construct() { $this->initElements(); @@ -135,10 +137,23 @@ public function getLink(): ?string return $this->link; } + public function getLinkForAnalysis(): ?string + { + return $this->linkForAnalysis; + } + public function setLink(string $link = null): NodeInterface { $this->link = $link; $this->setHost($link); + $this->setLinkForAnalysis($link); + + return $this; + } + + public function setLinkForAnalysis(string $link = null): NodeInterface + { + $this->linkForAnalysis = $link; return $this; } @@ -168,7 +183,8 @@ protected function setHostInContent(string $host = null): void return; } - $itemLink = implode("/", array_slice(explode("/", $itemFullLink), 0, -1))."/"; + $itemFullLink = $this->getLinkForAnalysis(); + $itemLink = implode("/", array_slice(explode("/", $itemFullLink ?? ''), 0, -1))."/"; // Replaced links like href="#aaa/bbb.xxx" $pattern = '(<\s*[^>]*)(href=|src=)(.?)(#)(?!(.(?!)'; @@ -190,10 +206,10 @@ public function pregReplaceInProperty(string $property, string $pattern, string public function getHostFromLink(): ?string { - if (is_null($this->getLink())) { + if (is_null($this->getLinkForAnalysis())) { return null; } - $partsUrl = parse_url($this->getLink()); + $partsUrl = parse_url($this->getLinkForAnalysis()); return $partsUrl['scheme']."://".$partsUrl['host']; } diff --git a/src/FeedIo/Parser/XmlParser.php b/src/FeedIo/Parser/XmlParser.php index 3156bc37..eb3696f3 100644 --- a/src/FeedIo/Parser/XmlParser.php +++ b/src/FeedIo/Parser/XmlParser.php @@ -77,7 +77,8 @@ protected function handleNode(NodeInterface $item, DOMElement $node, RuleSet $ru { if ($this->isItem($node->tagName) && $item instanceof FeedInterface) { $linkItem = $item->getLink(); - $newItem = $this->parseNode($item->newItem()->setLink($linkItem), $node, $this->getItemRuleSet()); + $newItem = $this->parseNode($item->newItem()->setLinkForAnalysis($linkItem), $node, $this->getItemRuleSet()); + $this->addValidItem($item, $newItem); } else { $rule = $ruleSet->get($node->tagName); diff --git a/src/FeedIo/Rule/Atom/Link.php b/src/FeedIo/Rule/Atom/Link.php index 57ac43a6..60dd7c64 100644 --- a/src/FeedIo/Rule/Atom/Link.php +++ b/src/FeedIo/Rule/Atom/Link.php @@ -28,7 +28,18 @@ protected function selectAlternateLink(NodeInterface $node, \DOMElement $element ($element->hasAttribute('rel') && $element->getAttribute('rel') == 'alternate') || is_null($node->getLink()) ) { - $node->setLink($element->getAttribute('href')); + $href = $element->getAttribute('href'); + if (parse_url($href, PHP_URL_HOST) == null) { + $baseUrl = $node->getHostFromLink(); + if ($baseUrl !== null) { + // Add slash if href doesn't start with one + if (!str_starts_with($href, '/')) { + $href = '/' . $href; + } + $href = $baseUrl . $href; + } + } + $node->setLink($href); } } diff --git a/tests/FeedIo/Rule/Atom/LinkTest.php b/tests/FeedIo/Rule/Atom/LinkTest.php index 35f57db6..becf6c48 100644 --- a/tests/FeedIo/Rule/Atom/LinkTest.php +++ b/tests/FeedIo/Rule/Atom/LinkTest.php @@ -60,4 +60,157 @@ public function testCreateElement() $document->saveXML() ); } + + public function testRelativeHrefWithoutLeadingSlash() + { + $item = new Item(); + $item->setLink('https://example.com/some/path.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', 'page.html'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/page.html', $item->getLink()); + } + + public function testRelativeHrefWithLeadingSlash() + { + $item = new Item(); + $item->setLink('https://example.com/some/path.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '/absolute/path.html'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/absolute/path.html', $item->getLink()); + } + + public function testAbsoluteHrefIsNotModified() + { + $item = new Item(); + $item->setLink('https://example.com/some/path.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', 'https://other.com/page.html'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://other.com/page.html', $item->getLink()); + } + + public function testNonAlternateLinkIsIgnored() + { + $item = new Item(); + $item->setLink('https://example.com/original.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '/new/path.html'); + $link->setAttribute('rel', 'stylesheet'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/original.html', $item->getLink()); + } + + public function testLinkWithoutRelAttributeWhenNodeLinkIsNull() + { + $item = new Item(); + $item->setLink(null); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '/path.html'); + $this->object->setProperty($item, $link); + + $this->assertEquals('/path.html', $item->getLink()); + } + + public function testLinkWithNullBaseUrl() + { + $item = new Item(); + $item->setLink(null); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', 'relative.html'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('relative.html', $item->getLink()); + } + + public function testProtocolRelativeUrl() + { + $item = new Item(); + $item->setLink('https://example.com/path.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '//cdn.example.com/resource.css'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('//cdn.example.com/resource.css', $item->getLink()); + } + + public function testFragmentUrl() + { + $item = new Item(); + $item->setLink('https://example.com/page.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '#section1'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/#section1', $item->getLink()); + } + + public function testQueryParameterUrl() + { + $item = new Item(); + $item->setLink('https://example.com/page.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '?param=value'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/?param=value', $item->getLink()); + } + + public function testHttpScheme() + { + $item = new Item(); + $item->setLink('http://example.com/path.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '/secure/path.html'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('http://example.com/secure/path.html', $item->getLink()); + } + + public function testEmptyHref() + { + $item = new Item(); + $item->setLink('https://example.com/original.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', ''); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/', $item->getLink()); + } }