Skip to content

Commit 09ad3ab

Browse files
committed
New configuration setting LINKBACKS_IGNORED_URLS_PATTERN
1 parent 40fdc97 commit 09ad3ab

File tree

3 files changed

+21
-10
lines changed

3 files changed

+21
-10
lines changed

.pylintrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[MESSAGES CONTROL]
2-
disable = broad-except, missing-docstring, multiple-imports, too-few-public-methods, too-many-arguments, too-many-locals, too-many-positional-arguments
2+
disable = broad-except, missing-docstring, multiple-imports, too-few-public-methods, too-many-arguments, too-many-branches, too-many-locals, too-many-positional-arguments
33

44
[FORMAT]
55
max-line-length = 180

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1111

1212
### Added
1313

14+
- new configuration setting `LINKBACKS_IGNORED_URLS_PATTERN` to define some URLs that should never be considered for linkbacks (_e.g._ `youtube.com`)
1415
- manual execution mode: `python linkbacks.py $pelican_generated_html_file`
1516

1617
### Changed

pelican/plugins/linkbacks/linkbacks.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import json
1010
import logging
1111
import os
12+
import re
1213
import sys
1314
from os import makedirs
1415
from os.path import basename, splitext
@@ -32,6 +33,8 @@
3233
DEFAULT_USER_AGENT = 'pelican-plugin-linkbacks'
3334
DEFAULT_CERT_VERIFY = True
3435
DEFAULT_TIMEOUT = 3
36+
DEFAULT_IGNORED_URLS_PATTERN = 'artstation.com|deviantart.com|github.com|github.io|itch.io|readthedocs.io|youtube.com|wikipedia.org'
37+
IMAGE_EXTENSIONS = ('.gif', '.jpg', '.pdf', '.png', '.svg')
3538
WEBMENTION_POSS_REL = ('webmention', 'http://webmention.org', 'http://webmention.org/', 'https://webmention.org', 'https://webmention.org/')
3639

3740
LOGGER = logging.getLogger(__name__)
@@ -79,9 +82,12 @@ def process_all_links_of_an_article(config, cache, url, slug, content):
7982
if config.siteurl and link_url.startswith(config.siteurl):
8083
LOGGER.debug("Link url %s skipped because is starts with %s", link_url, config.siteurl)
8184
continue
82-
if splitext(link_url)[1] in ('.gif', '.jpg', '.pdf', '.png', '.svg'):
85+
if splitext(link_url)[1] in IMAGE_EXTENSIONS:
8386
LOGGER.debug("Link url %s skipped because it appears to be an image or PDF file", link_url)
8487
continue
88+
if config.ignored_urls_pattern.search(link_url):
89+
LOGGER.debug("Link url %s skipped because it matches the ignored URLs pattern", link_url)
90+
continue
8591
cache_status = cache.get_status(slug, link_url)
8692
if cache_status:
8793
LOGGER.debug("Link url %s skipped because it is present in cache with status: %s", link_url, cache_status)
@@ -104,7 +110,7 @@ def process_all_links_of_an_article(config, cache, url, slug, content):
104110
continue
105111
response = notifier.send()
106112
LOGGER.info("%s notification sent for URL %s, endpoint response: %s", notifier.kind, link_url, response)
107-
cache.add_success(slug, link_url, notifier.kind, notifier.server_uri)
113+
cache.add_success(slug, link_url, notifier.kind, notifier.server_uri, response)
108114
successful_notifs_count += 1
109115
except (ConnectionError, HTTPError, RequestException, SSLError, xmlrpc.client.ProtocolError) as error:
110116
LOGGER.error("Failed to send %s for link url %s: [%s] %s", notifier.kind, link_url, error.__class__.__name__, error)
@@ -128,6 +134,9 @@ def __init__(self, settings=None):
128134
self.cert_verify = settings.get('LINKBACKS_CERT_VERIFY', DEFAULT_CERT_VERIFY)
129135
self.timeout = settings.get('LINKBACKS_REQUEST_TIMEOUT', DEFAULT_TIMEOUT)
130136
self.user_agent = settings.get('LINKBACKS_USERAGENT', DEFAULT_USER_AGENT)
137+
self.ignored_urls_pattern = settings.get('LINKBACKS_IGNORED_URLS_PATTERN', DEFAULT_IGNORED_URLS_PATTERN)
138+
if self.ignored_urls_pattern and isinstance(self.ignored_urls_pattern, str):
139+
self.ignored_urls_pattern = re.compile(self.ignored_urls_pattern)
131140

132141
class Cache:
133142
def __init__(self, config, data):
@@ -137,12 +146,14 @@ def __init__(self, config, data):
137146
# $article_slug: {
138147
# $link_url: {
139148
# "pingback": {
149+
# "error": // string or null if successful
150+
# "response": // string or null if failed
140151
# "server_uri": "http...", // optional string
141-
# "error": // string or null if successfull
142152
# },
143153
# "webmention": {
154+
# "error": // string or null if successful
155+
# "response": // string or null if failed
144156
# "server_uri": "http...", // optional string
145-
# "error": // string or null if successfull
146157
# }
147158
# },
148159
# ...
@@ -151,13 +162,14 @@ def __init__(self, config, data):
151162
# }
152163
self.data = defaultdict(dict)
153164
self.data.update(data)
154-
def add_success(self, article_slug, link_url, kind, server_uri):
165+
def add_success(self, article_slug, link_url, kind, server_uri, response):
155166
article_links = self.data[article_slug]
156167
link_status = article_links.get(link_url)
157168
if link_status is None:
158169
link_status = {}
159170
article_links[link_url] = link_status
160171
link_status[kind] = {
172+
"response": response,
161173
"server_uri": server_uri
162174
}
163175
def add_failure(self, article_slug, link_url, error, notifier_kind=None, server_uri=None):
@@ -186,11 +198,9 @@ def get_status(self, article_slug, link_url):
186198
return None # defensive, should never happen
187199
# For now we never retry sending pingbacks & webmentions if there is already an entry in the cache.
188200
# Later on, we could for example consider retrying on HTTP 5XX errors.
189-
pingback_error = pingback_status.get("error")
190-
webmention_error = webmention_status.get("error")
191-
if pingback_error is None or webmention_error is None:
201+
if pingback_status.get("response") or webmention_status.get("response"):
192202
return "ALREADY SUBMITTED"
193-
return pingback_error or webmention_error
203+
return pingback_status.get("error") or webmention_status.get("error")
194204
def links_count(self):
195205
return sum(len(url_statuses) for url_statuses in self.data.values())
196206
@classmethod

0 commit comments

Comments
 (0)