Skip to content

Commit 65842dc

Browse files
IgorA100Grotax
authored andcommitted
Fix: Analysis of relative links (Node.php)
1 Cannot use the "Link" object to analyze relative links Because the "Link" for the Atom feed will be corrupted. Use the additionally created "LinkForAnalysis" object 2. - Fix: Error preg_replace(), add all other possible replacements for relative links. - Replaced links like href="#aaa/bbb.xxx" - Replaced links like href="aaa/bbb.xxx" Similar to alexdebril#427 Use the new method "setLinkForAnalysis" instead of "setLink" (XmlParser.php) Added analysis of relative links for the Atom feed (Link.php)
1 parent 4b3bdfb commit 65842dc

File tree

3 files changed

+52
-18
lines changed

3 files changed

+52
-18
lines changed

src/FeedIo/Feed/Node.php

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class Node implements NodeInterface, ElementsAwareInterface, ArrayableInterface
3030

3131
protected ?string $host = null;
3232

33+
protected ?string $linkLinkForAnalysis = null;
34+
3335
public function __construct()
3436
{
3537
$this->initElements();
@@ -135,10 +137,23 @@ public function getLink(): ?string
135137
return $this->link;
136138
}
137139

140+
public function getLinkForAnalysis(): ?string
141+
{
142+
return $this->linkForAnalysis;
143+
}
144+
138145
public function setLink(string $link = null): NodeInterface
139146
{
140147
$this->link = $link;
141148
$this->setHost($link);
149+
$this->setLinkForAnalysis($link);
150+
151+
return $this;
152+
}
153+
154+
public function setLinkForAnalysis(string $link = null): NodeInterface
155+
{
156+
$this->linkForAnalysis = $link;
142157

143158
return $this;
144159
}
@@ -152,29 +167,43 @@ protected function setHost(string $link = null): void
152167

153168
protected function setHostInContent(string $host = null): void
154169
{
155-
if (property_exists($this, 'content')){
156-
if (!is_null($host) && !is_null($this->content)) {
157-
$this->content = preg_replace('!(<*\s*[^>]*)(href=)(.?)(\/[^\/])!','\1 href=\3'.$host.'\4', $this->content );
158-
$this->content = preg_replace('!(<*\s*[^>]*)(src=)(.?)(\/[^\/])!','\1 src=\3'.$host.'\4', $this->content );
159-
}
170+
if (is_null($host)) {
171+
return;
160172
}
161-
if (property_exists($this, 'description')){
162-
if (!is_null($host) && !is_null($this->description)) {
163-
$this->description = preg_replace('!(<*\s*[^>]*)(href=)(.?)(\/[^\/])!','\1 href=\3'.$host.'\4', $this->description );
164-
$this->description = preg_replace('!(<*\s*[^>]*)(src=)(.?)(\/[^\/])!','\1 src=\3'.$host.'\4', $this->description );
165-
}
173+
// Replaced links like href="/aaa/bbb.xxx"
174+
$pattern = '(<\s*[^>]*)(href=|src=)(.?)(\/[^\/])(?!(.(?!<code))*<\/code>)';
175+
$this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$host.'\4');
176+
$this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$host.'\4');
177+
178+
$itemFullLink = $this->getLinkForAnalysis();
179+
$itemLink = implode("/", array_slice(explode("/", $itemFullLink), 0, -1))."/";
180+
181+
// Replaced links like href="#aaa/bbb.xxx"
182+
$pattern = '(<\s*[^>]*)(href=|src=)(.?)(#)(?!(.(?!<code))*<\/code>)';
183+
$this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$itemFullLink.'\4');
184+
$this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$itemFullLink.'\4');
185+
186+
// Replaced links like href="aaa/bbb.xxx"
187+
$pattern = '(<\s*[^>]*)(href=|src=)(.?)(\w+\b)(?![:])(?!(.(?!<code))*<\/code>)';
188+
$this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$itemLink.'\4');
189+
$this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$itemLink.'\4');
190+
}
191+
192+
public function pregReplaceInProperty(string $property, string $pattern, string $replacement): void
193+
{
194+
if (property_exists($this, $property) && !is_null($this->{$property})) {
195+
$this->{$property} = preg_replace('~'.$pattern.'~', $replacement, $this->{$property}) ?? $this->{$property};
166196
}
167197
}
168198

169199
public function getHostFromLink(): ?string
170200
{
171-
if (!is_null($this->getLink())) {
172-
$partsUrl = parse_url($this->getLink());
173-
$result = $partsUrl['scheme']."://".$partsUrl['host'];
174-
} else
175-
$result = null;
201+
if (is_null($this->getLinkForAnalysis())) {
202+
return null;
203+
}
204+
$partsUrl = parse_url($this->getLinkForAnalysis());
176205

177-
return $result;
206+
return $partsUrl['scheme']."://".$partsUrl['host'];
178207
}
179208

180209
public function getValue(string $name): ?string

src/FeedIo/Parser/XmlParser.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ protected function handleNode(NodeInterface $item, DOMElement $node, RuleSet $ru
7777
{
7878
if ($this->isItem($node->tagName) && $item instanceof FeedInterface) {
7979
$linkItem = $item->getLink();
80-
$newItem = $this->parseNode($item->newItem()->setLink($linkItem), $node, $this->getItemRuleSet());
80+
$newItem = $this->parseNode($item->newItem()->setLinkForAnalysis($linkItem), $node, $this->getItemRuleSet());
81+
8182
$this->addValidItem($item, $newItem);
8283
} else {
8384
$rule = $ruleSet->get($node->tagName);

src/FeedIo/Rule/Atom/Link.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ protected function selectAlternateLink(NodeInterface $node, \DOMElement $element
2828
($element->hasAttribute('rel') && $element->getAttribute('rel') == 'alternate')
2929
|| is_null($node->getLink())
3030
) {
31-
$node->setLink($element->getAttribute('href'));
31+
$href = $element->getAttribute('href');
32+
if (parse_url($href, PHP_URL_HOST) == null) {
33+
$href = $node->getHostFromLink(). $href;
34+
}
35+
$node->setLink($href);
3236
}
3337
}
3438

0 commit comments

Comments
 (0)