|
1 | 1 | import json |
2 | 2 | import os |
3 | | -from datetime import datetime |
| 3 | +from datetime import datetime, timezone |
4 | 4 |
|
5 | 5 | import redis |
6 | 6 | from scrapy import signals |
@@ -32,7 +32,7 @@ class ItemStorageExtension(BaseExtension): |
32 | 32 | def __init__(self, stats): |
33 | 33 | super().__init__(stats) |
34 | 34 | exporter_kwargs = {"binary": False} |
35 | | - self.exporter = PythonItemExporter(**exporter_kwargs) |
| 35 | + self.exporter = PythonItemExporter(**exporter_kwargs, dont_fail=True) |
36 | 36 |
|
37 | 37 | @classmethod |
38 | 38 | def from_crawler(cls, crawler): |
@@ -109,8 +109,12 @@ def spider_closed(self, spider, reason): |
109 | 109 |
|
110 | 110 | def store_stats(self, spider): |
111 | 111 | stats = self.stats.get_stats() |
112 | | - elapsed_time = int((datetime.now() - stats.get("start_time")).total_seconds()) |
113 | | - stats.update({"elapsed_time_seconds": elapsed_time}) |
114 | | - |
| 112 | + start_time = stats.get("start_time") |
| 113 | + |
| 114 | + if start_time is not None: |
| 115 | + now = datetime.now(timezone.utc) if start_time.tzinfo else datetime.now() |
| 116 | + elapsed_time = int((now - start_time).total_seconds()) |
| 117 | + stats.update({"elapsed_time_seconds": elapsed_time}) |
| 118 | + |
115 | 119 | parsed_stats = json.dumps(stats, default=json_serializer) |
116 | 120 | self.redis_conn.hmset(self.stats_key, json.loads(parsed_stats)) |
0 commit comments