Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 12 additions & 16 deletions src/FeedIo/Feed/Node.php
Original file line number Diff line number Diff line change
Expand Up @@ -152,29 +152,25 @@ protected function setHost(string $link = null): void

protected function setHostInContent(string $host = null): void
{
if (property_exists($this, 'content')){
if (!is_null($host) && !is_null($this->content)) {
$this->content = preg_replace('!(<*\s*[^>]*)(href=)(.?)(\/[^\/])!','\1 href=\3'.$host.'\4', $this->content );
$this->content = preg_replace('!(<*\s*[^>]*)(src=)(.?)(\/[^\/])!','\1 src=\3'.$host.'\4', $this->content );
}
if (is_null($host)) {
return;
}
if (property_exists($this, 'description')){
if (!is_null($host) && !is_null($this->description)) {
$this->description = preg_replace('!(<*\s*[^>]*)(href=)(.?)(\/[^\/])!','\1 href=\3'.$host.'\4', $this->description );
$this->description = preg_replace('!(<*\s*[^>]*)(src=)(.?)(\/[^\/])!','\1 src=\3'.$host.'\4', $this->description );
}
if (property_exists($this, 'content') && !is_null($this->content)){
$this->content = preg_replace('!(<\s*[^>]*)(href=|src=)(.?)(\/[^\/])!','\1\2\3'.$host.'\4', $this->content) ?? $this->content;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have a lot of nested tags.
Change to:
$this->content = preg_replace('!(href=|src=)(.?)(\/[^\/])!','\1\2'.$host.'\3', $this->content) ?? $this->content;
Similarly for "$this->description"

This will fix error for preg_replace

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll write the most correct regular expression a little later.
Wait a bit. I have a lot to do now.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I already fixed the regex in my PR. The cycles went down from over 5.8 million to 10.000 by removing the * after the first <

Copy link
Contributor

@IgorA100 IgorA100 Apr 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is also necessary to exclude the processing of "href" in tags <code>

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point, maybe the regex should only search inside <a> and <img> tags or something?

Copy link
Contributor

@IgorA100 IgorA100 Apr 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is most correct regular expression:
preg_replace('~(<\s*[^>]*)(href=|src=)(.?)(\/[^\/])(?!(.(?!<code))*<\/code>)~','\1\2\3'.$host.'\4', $this->content)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe the regex should only search inside <a> and <img> tags

I'm not 100% sure about this, but I'll think about it. In addition, it is still necessary to replace relative links like href="xxx..."

Copy link
Contributor

@IgorA100 IgorA100 Apr 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even if analyze only <a> and <img>, don’t need to change them in <code>.

Copy link
Contributor

@IgorA100 IgorA100 Apr 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I modified the function to include all the other possible replacements for relative links.
Can you test my changes and make them in this PR?
All replacements can be moved to a separate function since there are more of them.

All links in https://go.dev/blog/feed.atom are replaced correctly.

    protected function setHostInContent(string $host = null): void
    {
        if (is_null($host)) {
            return;
        }
        // Replaced links like href="/aaa/bbb.xxx"
        if (property_exists($this, 'content') && !is_null($this->content)) {
            $this->content = preg_replace('~(<\s*[^>]*)(href=|src=)(.?)(\/[^\/])(?!(.(?!<code))*<\/code>)~','\1\2\3'.$host.'\4', $this->content) ?? $this->content;
        }
        if (property_exists($this, 'description') && !is_null($this->description)) {
            $this->description = preg_replace('~(<\s*[^>]*)(href=|src=)(.?)(\/[^\/])(?!(.(?!<code))*<\/code>)~','\1\2\3'.$host.'\4', $this->description) ?? $this->description;
        }

        $itemFullLink = $this->getLink();
        if (property_exists($this, 'link') && !is_null($itemFullLink)) {
            $itemLink = implode("/", array_slice(explode("/", $itemFullLink), 0, -1))."/";
            if (property_exists($this, 'content') && !is_null($this->content)){
                // Replaced links like href="#aaa/bbb.xxx"
                $this->content = preg_replace('~(<\s*[^>]*)(href=|src=)(.?)(#)(?!(.(?!<code))*<\/code>)~','\1\2\3'.$itemFullLink.'\4', $this->content) ?? $this->content;

                // Replaced links like href="aaa/bbb.xxx"
                $this->content = preg_replace('~(<\s*[^>]*)(href=|src=)(.?)(\w+\b)(?![:])(?!(.(?!<code))*<\/code>)~','\1\2\3'.$itemLink.'\4', $this->content) ?? $this->content;
            }
            if (property_exists($this, 'description') && !is_null($this->description)) {
                // Replaced links like href="#aaa/bbb.xxx"
                $this->description = preg_replace('~(<\s*[^>]*)(href=|src=)(.?)(#)(?!(.(?!<code))*<\/code>)~','\1\2\3'.$itemFullLink.'\4', $this->description) ?? $this->description;

                // Replaced links like href="aaa/bbb.xxx"
                $this->description = preg_replace('~(<\s*[^>]*)(href=|src=)(.?)(\w+\b)(?![:])(?!(.(?!<code))*<\/code>)~','\1\2\3'.$itemLink.'\4', $this->description) ?? $this->description;
            }
        }
    }


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cleanest code:

    protected function setHostInContent(string $host = null): void
    {
        if (is_null($host)) {
            return;
        }
        // Replaced links like href="/aaa/bbb.xxx"
        $pattern = '(<\s*[^>]*)(href=|src=)(.?)(\/[^\/])(?!(.(?!<code))*<\/code>)';
        $this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$host.'\4');
        $this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$host.'\4');

        $itemFullLink = $this->getLink();
        $itemLink = implode("/", array_slice(explode("/", $itemFullLink), 0, -1))."/";

        // Replaced links like href="#aaa/bbb.xxx"
        $pattern = '(<\s*[^>]*)(href=|src=)(.?)(#)(?!(.(?!<code))*<\/code>)';
        $this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$itemFullLink.'\4');
        $this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$itemFullLink.'\4');

        // Replaced links like href="aaa/bbb.xxx"
        $pattern = '(<\s*[^>]*)(href=|src=)(.?)(\w+\b)(?![:])(?!(.(?!<code))*<\/code>)';
        $this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$itemLink.'\4');
        $this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$itemLink.'\4');
    }

    public function pregReplaceInProperty(string $property, string $pattern, string $replacement): void
    {
        if (property_exists($this, $property) && !is_null($this->{$property})) {
            $this->{$property} = preg_replace('~'.$pattern.'~', $replacement, $this->{$property}) ?? $this->{$property};
        }
    }

}
if (property_exists($this, 'description') && !is_null($this->description)) {
$this->description = preg_replace('!(<\s*[^>]*)(href=|src=)(.?)(\/[^\/])!','\1\2\3'.$host.'\4', $this->description) ?? $this->description;
}
}

public function getHostFromLink(): ?string
{
if (!is_null($this->getLink())) {
$partsUrl = parse_url($this->getLink());
$result = $partsUrl['scheme']."://".$partsUrl['host'];
} else
$result = null;
if (is_null($this->getLink())) {
return null;
}
$partsUrl = parse_url($this->getLink());

return $result;
return $partsUrl['scheme']."://".$partsUrl['host'];
}

public function getValue(string $name): ?string
Expand Down