99import json
1010import logging
1111import os
12+ import re
1213import sys
1314from os import makedirs
1415from os .path import basename , splitext
3233DEFAULT_USER_AGENT = 'pelican-plugin-linkbacks'
3334DEFAULT_CERT_VERIFY = True
3435DEFAULT_TIMEOUT = 3
36+ DEFAULT_IGNORED_URLS_PATTERN = 'artstation.com|deviantart.com|github.com|github.io|itch.io|readthedocs.io|youtube.com|wikipedia.org'
37+ IMAGE_EXTENSIONS = ('.gif' , '.jpg' , '.pdf' , '.png' , '.svg' )
3538WEBMENTION_POSS_REL = ('webmention' , 'http://webmention.org' , 'http://webmention.org/' , 'https://webmention.org' , 'https://webmention.org/' )
3639
3740LOGGER = logging .getLogger (__name__ )
@@ -79,9 +82,12 @@ def process_all_links_of_an_article(config, cache, url, slug, content):
7982 if config .siteurl and link_url .startswith (config .siteurl ):
8083 LOGGER .debug ("Link url %s skipped because is starts with %s" , link_url , config .siteurl )
8184 continue
82- if splitext (link_url )[1 ] in ( '.gif' , '.jpg' , '.pdf' , '.png' , '.svg' ) :
85+ if splitext (link_url )[1 ] in IMAGE_EXTENSIONS :
8386 LOGGER .debug ("Link url %s skipped because it appears to be an image or PDF file" , link_url )
8487 continue
88+ if config .ignored_urls_pattern .search (link_url ):
89+ LOGGER .debug ("Link url %s skipped because it matches the ignored URLs pattern" , link_url )
90+ continue
8591 cache_status = cache .get_status (slug , link_url )
8692 if cache_status :
8793 LOGGER .debug ("Link url %s skipped because it is present in cache with status: %s" , link_url , cache_status )
@@ -104,7 +110,7 @@ def process_all_links_of_an_article(config, cache, url, slug, content):
104110 continue
105111 response = notifier .send ()
106112 LOGGER .info ("%s notification sent for URL %s, endpoint response: %s" , notifier .kind , link_url , response )
107- cache .add_success (slug , link_url , notifier .kind , notifier .server_uri )
113+ cache .add_success (slug , link_url , notifier .kind , notifier .server_uri , response )
108114 successful_notifs_count += 1
109115 except (ConnectionError , HTTPError , RequestException , SSLError , xmlrpc .client .ProtocolError ) as error :
110116 LOGGER .error ("Failed to send %s for link url %s: [%s] %s" , notifier .kind , link_url , error .__class__ .__name__ , error )
@@ -128,6 +134,9 @@ def __init__(self, settings=None):
128134 self .cert_verify = settings .get ('LINKBACKS_CERT_VERIFY' , DEFAULT_CERT_VERIFY )
129135 self .timeout = settings .get ('LINKBACKS_REQUEST_TIMEOUT' , DEFAULT_TIMEOUT )
130136 self .user_agent = settings .get ('LINKBACKS_USERAGENT' , DEFAULT_USER_AGENT )
137+ self .ignored_urls_pattern = settings .get ('LINKBACKS_IGNORED_URLS_PATTERN' , DEFAULT_IGNORED_URLS_PATTERN )
138+ if self .ignored_urls_pattern and isinstance (self .ignored_urls_pattern , str ):
139+ self .ignored_urls_pattern = re .compile (self .ignored_urls_pattern )
131140
132141class Cache :
133142 def __init__ (self , config , data ):
@@ -137,12 +146,14 @@ def __init__(self, config, data):
137146 # $article_slug: {
138147 # $link_url: {
139148 # "pingback": {
149+ # "error": // string or null if successful
150+ # "response": // string or null if failed
140151 # "server_uri": "http...", // optional string
141- # "error": // string or null if successfull
142152 # },
143153 # "webmention": {
154+ # "error": // string or null if successful
155+ # "response": // string or null if failed
144156 # "server_uri": "http...", // optional string
145- # "error": // string or null if successfull
146157 # }
147158 # },
148159 # ...
@@ -151,13 +162,14 @@ def __init__(self, config, data):
151162 # }
152163 self .data = defaultdict (dict )
153164 self .data .update (data )
154- def add_success (self , article_slug , link_url , kind , server_uri ):
165+ def add_success (self , article_slug , link_url , kind , server_uri , response ):
155166 article_links = self .data [article_slug ]
156167 link_status = article_links .get (link_url )
157168 if link_status is None :
158169 link_status = {}
159170 article_links [link_url ] = link_status
160171 link_status [kind ] = {
172+ "response" : response ,
161173 "server_uri" : server_uri
162174 }
163175 def add_failure (self , article_slug , link_url , error , notifier_kind = None , server_uri = None ):
@@ -186,11 +198,9 @@ def get_status(self, article_slug, link_url):
186198 return None # defensive, should never happen
187199 # For now we never retry sending pingbacks & webmentions if there is already an entry in the cache.
188200 # Later on, we could for example consider retrying on HTTP 5XX errors.
189- pingback_error = pingback_status .get ("error" )
190- webmention_error = webmention_status .get ("error" )
191- if pingback_error is None or webmention_error is None :
201+ if pingback_status .get ("response" ) or webmention_status .get ("response" ):
192202 return "ALREADY SUBMITTED"
193- return pingback_error or webmention_error
203+ return pingback_status . get ( "error" ) or webmention_status . get ( "error" )
194204 def links_count (self ):
195205 return sum (len (url_statuses ) for url_statuses in self .data .values ())
196206 @classmethod
0 commit comments