Skip to content

Commit 0ab40e1

Browse files
authored
fix: Update estela-entrypoint to handle scrapy >2.10 versions (#34)
fix: Update estela-entrypoint to handle scrapy >2.10 versions
1 parent fbedd51 commit 0ab40e1

File tree

2 files changed

+10
-6
lines changed

2 files changed

+10
-6
lines changed

estela_scrapy/extensions.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
22
import os
3-
from datetime import datetime
3+
from datetime import datetime, timezone
44

55
import redis
66
from scrapy import signals
@@ -32,7 +32,7 @@ class ItemStorageExtension(BaseExtension):
3232
def __init__(self, stats):
3333
super().__init__(stats)
3434
exporter_kwargs = {"binary": False}
35-
self.exporter = PythonItemExporter(**exporter_kwargs)
35+
self.exporter = PythonItemExporter(**exporter_kwargs, dont_fail=True)
3636

3737
@classmethod
3838
def from_crawler(cls, crawler):
@@ -109,8 +109,12 @@ def spider_closed(self, spider, reason):
109109

110110
def store_stats(self, spider):
111111
stats = self.stats.get_stats()
112-
elapsed_time = int((datetime.now() - stats.get("start_time")).total_seconds())
113-
stats.update({"elapsed_time_seconds": elapsed_time})
114-
112+
start_time = stats.get("start_time")
113+
114+
if start_time is not None:
115+
now = datetime.now(timezone.utc) if start_time.tzinfo else datetime.now()
116+
elapsed_time = int((now - start_time).total_seconds())
117+
stats.update({"elapsed_time_seconds": elapsed_time})
118+
115119
parsed_stats = json.dumps(stats, default=json_serializer)
116120
self.redis_conn.hmset(self.stats_key, json.loads(parsed_stats))

estela_scrapy/middlewares.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from scrapy.exceptions import NotConfigured
66
from scrapy.utils.python import to_bytes
7-
from scrapy.utils.request import request_fingerprint
7+
from scrapy.utils.request import fingerprint as request_fingerprint
88
from twisted.web import http
99

1010
from estela_scrapy.utils import parse_time, producer

0 commit comments

Comments
 (0)