Skip to content

Commit a05665b

Browse files
committed
Changelog, add spider to some log messages
1 parent aa31b37 commit a05665b

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

docs/news.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@
33
Changes
44
=======
55

6+
v1.5.1 (2019-05-21)
7+
-------------------
8+
9+
- Remove username and password from settings since it's removed from crawlera.
10+
- Include affected spider in logs.
11+
- Handle situations when crawlera is restarted and reply with 407's for a few minutes
12+
by retrying the requests with a exponential backoff system.
13+
614
v1.5.0 (2019-01-23)
715
-------------------
816

scrapy_crawlera/middleware.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,13 @@ def process_response(self, request, response, spider):
179179
# we must retry
180180
retries = response.meta.get('crawlera_auth_retry_times', 0)
181181
if retries < self.max_auth_retry_times:
182-
return self._retry_auth(response, request)
182+
return self._retry_auth(response, request, spider)
183183
else:
184-
logging.warning("Max retries for authentication issues reached, please check"
185-
"auth information settings")
184+
logging.warning(
185+
"Max retries for authentication issues reached, please check auth"
186+
" information settings",
187+
extra={'spider': self.spider},
188+
)
186189

187190
if self._is_banned(response):
188191
self._bans[key] += 1
@@ -213,8 +216,11 @@ def process_exception(self, request, exception, spider):
213216
self._clear_dns_cache()
214217
self._set_custom_delay(request, self.connection_refused_delay)
215218

216-
def _retry_auth(self, response, request):
217-
logging.warning("Retrying crawlera request for authentication issue")
219+
def _retry_auth(self, response, request, spider):
220+
logging.warning(
221+
"Retrying crawlera request for authentication issue",
222+
extra={'spider': self.spider},
223+
)
218224
retries = response.meta.get('crawlera_auth_retry_times', 0) + 1
219225
retryreq = request.copy()
220226
retryreq.meta['crawlera_auth_retry_times'] = retries

0 commit comments

Comments
 (0)