From 65842dcac767d6f78171249bbedd5f89cf442c84 Mon Sep 17 00:00:00 2001 From: IgorA100 Date: Mon, 23 Dec 2024 10:10:45 +0300 Subject: [PATCH 1/3] Fix: Analysis of relative links (Node.php) 1 Cannot use the "Link" object to analyze relative links Because the "Link" for the Atom feed will be corrupted. Use the additionally created "LinkForAnalysis" object 2. - Fix: Error preg_replace(), add all other possible replacements for relative links. - Replaced links like href="#aaa/bbb.xxx" - Replaced links like href="aaa/bbb.xxx" Similar to #427 Use the new method "setLinkForAnalysis" instead of "setLink" (XmlParser.php) Added analysis of relative links for the Atom feed (Link.php) --- src/FeedIo/Feed/Node.php | 61 ++++++++++++++++++++++++--------- src/FeedIo/Parser/XmlParser.php | 3 +- src/FeedIo/Rule/Atom/Link.php | 6 +++- 3 files changed, 52 insertions(+), 18 deletions(-) diff --git a/src/FeedIo/Feed/Node.php b/src/FeedIo/Feed/Node.php index e29ab316..234057f4 100644 --- a/src/FeedIo/Feed/Node.php +++ b/src/FeedIo/Feed/Node.php @@ -30,6 +30,8 @@ class Node implements NodeInterface, ElementsAwareInterface, ArrayableInterface protected ?string $host = null; + protected ?string $linkLinkForAnalysis = null; + public function __construct() { $this->initElements(); @@ -135,10 +137,23 @@ public function getLink(): ?string return $this->link; } + public function getLinkForAnalysis(): ?string + { + return $this->linkForAnalysis; + } + public function setLink(string $link = null): NodeInterface { $this->link = $link; $this->setHost($link); + $this->setLinkForAnalysis($link); + + return $this; + } + + public function setLinkForAnalysis(string $link = null): NodeInterface + { + $this->linkForAnalysis = $link; return $this; } @@ -152,29 +167,43 @@ protected function setHost(string $link = null): void protected function setHostInContent(string $host = null): void { - if (property_exists($this, 'content')){ - if (!is_null($host) && !is_null($this->content)) { - $this->content = preg_replace('!(<*\s*[^>]*)(href=)(.?)(\/[^\/])!','\1 href=\3'.$host.'\4', $this->content ); - $this->content = preg_replace('!(<*\s*[^>]*)(src=)(.?)(\/[^\/])!','\1 src=\3'.$host.'\4', $this->content ); - } + if (is_null($host)) { + return; } - if (property_exists($this, 'description')){ - if (!is_null($host) && !is_null($this->description)) { - $this->description = preg_replace('!(<*\s*[^>]*)(href=)(.?)(\/[^\/])!','\1 href=\3'.$host.'\4', $this->description ); - $this->description = preg_replace('!(<*\s*[^>]*)(src=)(.?)(\/[^\/])!','\1 src=\3'.$host.'\4', $this->description ); - } + // Replaced links like href="/aaa/bbb.xxx" + $pattern = '(<\s*[^>]*)(href=|src=)(.?)(\/[^\/])(?!(.(?!)'; + $this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$host.'\4'); + $this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$host.'\4'); + + $itemFullLink = $this->getLinkForAnalysis(); + $itemLink = implode("/", array_slice(explode("/", $itemFullLink), 0, -1))."/"; + + // Replaced links like href="#aaa/bbb.xxx" + $pattern = '(<\s*[^>]*)(href=|src=)(.?)(#)(?!(.(?!)'; + $this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$itemFullLink.'\4'); + $this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$itemFullLink.'\4'); + + // Replaced links like href="aaa/bbb.xxx" + $pattern = '(<\s*[^>]*)(href=|src=)(.?)(\w+\b)(?![:])(?!(.(?!)'; + $this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$itemLink.'\4'); + $this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$itemLink.'\4'); + } + + public function pregReplaceInProperty(string $property, string $pattern, string $replacement): void + { + if (property_exists($this, $property) && !is_null($this->{$property})) { + $this->{$property} = preg_replace('~'.$pattern.'~', $replacement, $this->{$property}) ?? $this->{$property}; } } public function getHostFromLink(): ?string { - if (!is_null($this->getLink())) { - $partsUrl = parse_url($this->getLink()); - $result = $partsUrl['scheme']."://".$partsUrl['host']; - } else - $result = null; + if (is_null($this->getLinkForAnalysis())) { + return null; + } + $partsUrl = parse_url($this->getLinkForAnalysis()); - return $result; + return $partsUrl['scheme']."://".$partsUrl['host']; } public function getValue(string $name): ?string diff --git a/src/FeedIo/Parser/XmlParser.php b/src/FeedIo/Parser/XmlParser.php index 3156bc37..eb3696f3 100644 --- a/src/FeedIo/Parser/XmlParser.php +++ b/src/FeedIo/Parser/XmlParser.php @@ -77,7 +77,8 @@ protected function handleNode(NodeInterface $item, DOMElement $node, RuleSet $ru { if ($this->isItem($node->tagName) && $item instanceof FeedInterface) { $linkItem = $item->getLink(); - $newItem = $this->parseNode($item->newItem()->setLink($linkItem), $node, $this->getItemRuleSet()); + $newItem = $this->parseNode($item->newItem()->setLinkForAnalysis($linkItem), $node, $this->getItemRuleSet()); + $this->addValidItem($item, $newItem); } else { $rule = $ruleSet->get($node->tagName); diff --git a/src/FeedIo/Rule/Atom/Link.php b/src/FeedIo/Rule/Atom/Link.php index 57ac43a6..85cf9e50 100644 --- a/src/FeedIo/Rule/Atom/Link.php +++ b/src/FeedIo/Rule/Atom/Link.php @@ -28,7 +28,11 @@ protected function selectAlternateLink(NodeInterface $node, \DOMElement $element ($element->hasAttribute('rel') && $element->getAttribute('rel') == 'alternate') || is_null($node->getLink()) ) { - $node->setLink($element->getAttribute('href')); + $href = $element->getAttribute('href'); + if (parse_url($href, PHP_URL_HOST) == null) { + $href = $node->getHostFromLink(). $href; + } + $node->setLink($href); } } From 29601f38d770e580160a8805ba17fe9b6618f54f Mon Sep 17 00:00:00 2001 From: Benjamin Brahmer Date: Sat, 5 Jul 2025 10:51:34 +0200 Subject: [PATCH 2/3] add changelog entry --- CHANGELOG.md | 3 +-- src/FeedIo/Feed/Node.php | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cacaeb28..fe0c03a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Add logic to set the node's link from the element when isPermaLink="true" and no link is present. (#12) - ### Fixed - +- Fix: Analysis of relative links for the Atom feed (#10) diff --git a/src/FeedIo/Feed/Node.php b/src/FeedIo/Feed/Node.php index 234057f4..1726c5b0 100644 --- a/src/FeedIo/Feed/Node.php +++ b/src/FeedIo/Feed/Node.php @@ -30,7 +30,7 @@ class Node implements NodeInterface, ElementsAwareInterface, ArrayableInterface protected ?string $host = null; - protected ?string $linkLinkForAnalysis = null; + protected ?string $linkForAnalysis = null; public function __construct() { From 33ba99f7434dc3d16ba95050ee93d789b8a72b6d Mon Sep 17 00:00:00 2001 From: Benjamin Brahmer Date: Sat, 5 Jul 2025 11:52:36 +0200 Subject: [PATCH 3/3] make sure href starts with / Co-authored-by: Sean Molenaar --- CHANGELOG.md | 2 +- src/FeedIo/Feed/Node.php | 2 +- src/FeedIo/Rule/Atom/Link.php | 9 +- tests/FeedIo/Rule/Atom/LinkTest.php | 153 ++++++++++++++++++++++++++++ 4 files changed, 163 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe0c03a5..c5f91ae2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,4 +11,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add logic to set the node's link from the element when isPermaLink="true" and no link is present. (#12) ### Fixed -- Fix: Analysis of relative links for the Atom feed (#10) +- Fix: Analysis of relative links for the Atom feed (#13) diff --git a/src/FeedIo/Feed/Node.php b/src/FeedIo/Feed/Node.php index 1726c5b0..b5d28fba 100644 --- a/src/FeedIo/Feed/Node.php +++ b/src/FeedIo/Feed/Node.php @@ -176,7 +176,7 @@ protected function setHostInContent(string $host = null): void $this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$host.'\4'); $itemFullLink = $this->getLinkForAnalysis(); - $itemLink = implode("/", array_slice(explode("/", $itemFullLink), 0, -1))."/"; + $itemLink = implode("/", array_slice(explode("/", $itemFullLink ?? ''), 0, -1))."/"; // Replaced links like href="#aaa/bbb.xxx" $pattern = '(<\s*[^>]*)(href=|src=)(.?)(#)(?!(.(?!)'; diff --git a/src/FeedIo/Rule/Atom/Link.php b/src/FeedIo/Rule/Atom/Link.php index 85cf9e50..60dd7c64 100644 --- a/src/FeedIo/Rule/Atom/Link.php +++ b/src/FeedIo/Rule/Atom/Link.php @@ -30,7 +30,14 @@ protected function selectAlternateLink(NodeInterface $node, \DOMElement $element ) { $href = $element->getAttribute('href'); if (parse_url($href, PHP_URL_HOST) == null) { - $href = $node->getHostFromLink(). $href; + $baseUrl = $node->getHostFromLink(); + if ($baseUrl !== null) { + // Add slash if href doesn't start with one + if (!str_starts_with($href, '/')) { + $href = '/' . $href; + } + $href = $baseUrl . $href; + } } $node->setLink($href); } diff --git a/tests/FeedIo/Rule/Atom/LinkTest.php b/tests/FeedIo/Rule/Atom/LinkTest.php index 35f57db6..becf6c48 100644 --- a/tests/FeedIo/Rule/Atom/LinkTest.php +++ b/tests/FeedIo/Rule/Atom/LinkTest.php @@ -60,4 +60,157 @@ public function testCreateElement() $document->saveXML() ); } + + public function testRelativeHrefWithoutLeadingSlash() + { + $item = new Item(); + $item->setLink('https://example.com/some/path.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', 'page.html'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/page.html', $item->getLink()); + } + + public function testRelativeHrefWithLeadingSlash() + { + $item = new Item(); + $item->setLink('https://example.com/some/path.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '/absolute/path.html'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/absolute/path.html', $item->getLink()); + } + + public function testAbsoluteHrefIsNotModified() + { + $item = new Item(); + $item->setLink('https://example.com/some/path.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', 'https://other.com/page.html'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://other.com/page.html', $item->getLink()); + } + + public function testNonAlternateLinkIsIgnored() + { + $item = new Item(); + $item->setLink('https://example.com/original.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '/new/path.html'); + $link->setAttribute('rel', 'stylesheet'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/original.html', $item->getLink()); + } + + public function testLinkWithoutRelAttributeWhenNodeLinkIsNull() + { + $item = new Item(); + $item->setLink(null); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '/path.html'); + $this->object->setProperty($item, $link); + + $this->assertEquals('/path.html', $item->getLink()); + } + + public function testLinkWithNullBaseUrl() + { + $item = new Item(); + $item->setLink(null); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', 'relative.html'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('relative.html', $item->getLink()); + } + + public function testProtocolRelativeUrl() + { + $item = new Item(); + $item->setLink('https://example.com/path.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '//cdn.example.com/resource.css'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('//cdn.example.com/resource.css', $item->getLink()); + } + + public function testFragmentUrl() + { + $item = new Item(); + $item->setLink('https://example.com/page.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '#section1'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/#section1', $item->getLink()); + } + + public function testQueryParameterUrl() + { + $item = new Item(); + $item->setLink('https://example.com/page.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '?param=value'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/?param=value', $item->getLink()); + } + + public function testHttpScheme() + { + $item = new Item(); + $item->setLink('http://example.com/path.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', '/secure/path.html'); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('http://example.com/secure/path.html', $item->getLink()); + } + + public function testEmptyHref() + { + $item = new Item(); + $item->setLink('https://example.com/original.html'); + $document = new \DOMDocument(); + + $link = $document->createElement('link'); + $link->setAttribute('href', ''); + $link->setAttribute('rel', 'alternate'); + $this->object->setProperty($item, $link); + + $this->assertEquals('https://example.com/', $item->getLink()); + } }