Skip to content

Commit 98661a0

Browse files
committed
New configuration setting LINKBACKS_IGNORED_URLS_PATTERN
1 parent 40fdc97 commit 98661a0

File tree

4 files changed

+31
-18
lines changed

4 files changed

+31
-18
lines changed

.pylintrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[MESSAGES CONTROL]
2-
disable = broad-except, missing-docstring, multiple-imports, too-few-public-methods, too-many-arguments, too-many-locals, too-many-positional-arguments
2+
disable = broad-except, missing-docstring, multiple-imports, too-few-public-methods, too-many-arguments, too-many-branches, too-many-locals, too-many-positional-arguments, wrong-import-order
33

44
[FORMAT]
55
max-line-length = 180

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1111

1212
### Added
1313

14+
- new configuration setting `LINKBACKS_IGNORED_URLS_PATTERN` to define some URLs that should never be considered for linkbacks (_e.g._ `youtube.com`)
1415
- manual execution mode: `python linkbacks.py $pelican_generated_html_file`
1516

1617
### Changed

pelican/plugins/linkbacks/linkbacks.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import json
1010
import logging
1111
import os
12+
import re
1213
import sys
1314
from os import makedirs
1415
from os.path import basename, splitext
@@ -32,6 +33,8 @@
3233
DEFAULT_USER_AGENT = 'pelican-plugin-linkbacks'
3334
DEFAULT_CERT_VERIFY = True
3435
DEFAULT_TIMEOUT = 3
36+
DEFAULT_IGNORED_URLS_PATTERN = 'artstation.com|deviantart.com|github.com|github.io|itch.io|readthedocs.io|youtube.com|wikipedia.org'
37+
IMAGE_EXTENSIONS = ('.gif', '.jpg', '.pdf', '.png', '.svg')
3538
WEBMENTION_POSS_REL = ('webmention', 'http://webmention.org', 'http://webmention.org/', 'https://webmention.org', 'https://webmention.org/')
3639

3740
LOGGER = logging.getLogger(__name__)
@@ -79,9 +82,12 @@ def process_all_links_of_an_article(config, cache, url, slug, content):
7982
if config.siteurl and link_url.startswith(config.siteurl):
8083
LOGGER.debug("Link url %s skipped because is starts with %s", link_url, config.siteurl)
8184
continue
82-
if splitext(link_url)[1] in ('.gif', '.jpg', '.pdf', '.png', '.svg'):
85+
if splitext(link_url)[1] in IMAGE_EXTENSIONS:
8386
LOGGER.debug("Link url %s skipped because it appears to be an image or PDF file", link_url)
8487
continue
88+
if config.ignored_urls_pattern.search(link_url):
89+
LOGGER.debug("Link url %s skipped because it matches the ignored URLs pattern", link_url)
90+
continue
8591
cache_status = cache.get_status(slug, link_url)
8692
if cache_status:
8793
LOGGER.debug("Link url %s skipped because it is present in cache with status: %s", link_url, cache_status)
@@ -104,9 +110,9 @@ def process_all_links_of_an_article(config, cache, url, slug, content):
104110
continue
105111
response = notifier.send()
106112
LOGGER.info("%s notification sent for URL %s, endpoint response: %s", notifier.kind, link_url, response)
107-
cache.add_success(slug, link_url, notifier.kind, notifier.server_uri)
113+
cache.add_success(slug, link_url, notifier.kind, notifier.server_uri, response)
108114
successful_notifs_count += 1
109-
except (ConnectionError, HTTPError, RequestException, SSLError, xmlrpc.client.ProtocolError) as error:
115+
except (ConnectionError, HTTPError, NotifierError, RequestException, SSLError, xmlrpc.client.ProtocolError) as error:
110116
LOGGER.error("Failed to send %s for link url %s: [%s] %s", notifier.kind, link_url, error.__class__.__name__, error)
111117
cache.add_failure(slug, link_url, error, notifier.kind, notifier.server_uri)
112118
except Exception as error: # unexpected exception => we display the stacktrace:
@@ -128,6 +134,9 @@ def __init__(self, settings=None):
128134
self.cert_verify = settings.get('LINKBACKS_CERT_VERIFY', DEFAULT_CERT_VERIFY)
129135
self.timeout = settings.get('LINKBACKS_REQUEST_TIMEOUT', DEFAULT_TIMEOUT)
130136
self.user_agent = settings.get('LINKBACKS_USERAGENT', DEFAULT_USER_AGENT)
137+
self.ignored_urls_pattern = settings.get('LINKBACKS_IGNORED_URLS_PATTERN', DEFAULT_IGNORED_URLS_PATTERN)
138+
if self.ignored_urls_pattern and isinstance(self.ignored_urls_pattern, str):
139+
self.ignored_urls_pattern = re.compile(self.ignored_urls_pattern)
131140

132141
class Cache:
133142
def __init__(self, config, data):
@@ -137,12 +146,14 @@ def __init__(self, config, data):
137146
# $article_slug: {
138147
# $link_url: {
139148
# "pingback": {
149+
# "error": // string or null if successful
150+
# "response": // string or null if failed
140151
# "server_uri": "http...", // optional string
141-
# "error": // string or null if successfull
142152
# },
143153
# "webmention": {
154+
# "error": // string or null if successful
155+
# "response": // string or null if failed
144156
# "server_uri": "http...", // optional string
145-
# "error": // string or null if successfull
146157
# }
147158
# },
148159
# ...
@@ -151,13 +162,14 @@ def __init__(self, config, data):
151162
# }
152163
self.data = defaultdict(dict)
153164
self.data.update(data)
154-
def add_success(self, article_slug, link_url, kind, server_uri):
165+
def add_success(self, article_slug, link_url, kind, server_uri, response):
155166
article_links = self.data[article_slug]
156167
link_status = article_links.get(link_url)
157168
if link_status is None:
158169
link_status = {}
159170
article_links[link_url] = link_status
160171
link_status[kind] = {
172+
"response": response,
161173
"server_uri": server_uri
162174
}
163175
def add_failure(self, article_slug, link_url, error, notifier_kind=None, server_uri=None):
@@ -186,11 +198,9 @@ def get_status(self, article_slug, link_url):
186198
return None # defensive, should never happen
187199
# For now we never retry sending pingbacks & webmentions if there is already an entry in the cache.
188200
# Later on, we could for example consider retrying on HTTP 5XX errors.
189-
pingback_error = pingback_status.get("error")
190-
webmention_error = webmention_status.get("error")
191-
if pingback_error is None or webmention_error is None:
201+
if pingback_status.get("response") or webmention_status.get("response"):
192202
return "ALREADY SUBMITTED"
193-
return pingback_error or webmention_error
203+
return pingback_status.get("error") or webmention_status.get("error")
194204
def links_count(self):
195205
return sum(len(url_statuses) for url_statuses in self.data.values())
196206
@classmethod
@@ -227,6 +237,9 @@ def discover_server_uri(self):
227237
def send(self):
228238
"Sends the actual notification."
229239

240+
class NotifierError(RuntimeError):
241+
pass
242+
230243
class PingbackNotifier(Notifier):
231244
def __init__(self, source_url, target_url, config=LinkbackConfig()):
232245
self.kind = "pingback"
@@ -253,8 +266,8 @@ def send(self):
253266
return xml_rpc_client.pingback.ping(self.source_url, self.target_url)
254267
except xmlrpc.client.Fault as fault:
255268
if fault.faultCode == 48: # pingback already registered
256-
raise RuntimeError(f"Pingback already registered for URL {self.target_url}, XML-RPC response: code={fault.faultCode} - {fault.faultString}") from fault
257-
raise RuntimeError(f"Pingback XML-RPC request failed for URL {self.target_url}: code={fault.faultCode} - {fault.faultString}") from fault
269+
raise NotifierError(f"Pingback already registered for URL {self.target_url}, XML-RPC response: code={fault.faultCode} - {fault.faultString}") from fault
270+
raise NotifierError(f"Pingback XML-RPC request failed for URL {self.target_url}: code={fault.faultCode} - {fault.faultString}") from fault
258271

259272
class WebmentionNotifier(Notifier):
260273
def __init__(self, source_url, target_url, config=LinkbackConfig()):

pelican/plugins/linkbacks/test_linkbacks.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
import json, logging, os
22

33
import httpretty
4+
from pelican.generators import ArticlesGenerator
5+
from pelican.tests.support import get_settings
6+
import pytest
7+
48
from linkbacks import (
59
process_all_articles_linkbacks,
610
Cache,
711
LinkbackConfig,
812
CACHE_FILENAME,
913
MAX_RESPONSE_LENGTH,
1014
)
11-
import pytest
12-
13-
from pelican.generators import ArticlesGenerator
14-
from pelican.tests.support import get_settings
15-
1615

1716
CUR_DIR = os.path.dirname(__file__)
1817
TEST_CONTENT_DIR = os.path.join(CUR_DIR, 'test_content')

0 commit comments

Comments
 (0)