|
14 | 14 |
|
15 | 15 | RUNNING_STATUS = "RUNNING" |
16 | 16 | COMPLETED_STATUS = "COMPLETED" |
| 17 | +ERROR_STATUS = "ERROR" |
17 | 18 |
|
18 | 19 |
|
19 | 20 | class BaseExtension: |
@@ -86,26 +87,38 @@ def spider_closed(self, spider, reason): |
86 | 87 | pass |
87 | 88 |
|
88 | 89 | stats = self.stats.get_stats() |
89 | | - update_job( |
90 | | - self.job_url, |
91 | | - self.auth_token, |
92 | | - status=COMPLETED_STATUS, |
93 | | - lifespan=int(stats.get("elapsed_time_seconds", 0)), |
94 | | - total_bytes=stats.get("downloader/response_bytes", 0), |
95 | | - item_count=stats.get("item_scraped_count", 0), |
96 | | - request_count=stats.get("downloader/request_count", 0), |
97 | | - proxy_usage_data={ |
98 | | - "proxy_name": stats.get("downloader/proxy_name", ""), |
99 | | - "bytes": stats.get("downloader/proxies/response_bytes", 0), |
100 | | - }, |
101 | | - ) |
102 | | - |
103 | | - parsed_stats = json.dumps(stats, default=json_serializer) |
104 | | - data = { |
105 | | - "jid": os.getenv("ESTELA_SPIDER_JOB"), |
106 | | - "payload": json.loads(parsed_stats), |
107 | | - } |
108 | | - producer.send("job_stats", data) |
| 90 | + job_status = COMPLETED_STATUS |
| 91 | + |
| 92 | + try: |
| 93 | + # Put all new code here |
| 94 | + parsed_stats = json.dumps(stats, default=json_serializer) |
| 95 | + data = { |
| 96 | + "jid": os.getenv("ESTELA_SPIDER_JOB"), |
| 97 | + "payload": json.loads(parsed_stats), |
| 98 | + } |
| 99 | + producer.send("job_stats", data) |
| 100 | + |
| 101 | + except Exception as e: |
| 102 | + print(f"Error during spider_closed: {e}") |
| 103 | + job_status = ERROR_STATUS |
| 104 | + |
| 105 | + finally: |
| 106 | + try: |
| 107 | + update_job( |
| 108 | + self.job_url, |
| 109 | + self.auth_token, |
| 110 | + status=job_status, |
| 111 | + lifespan=int(stats.get("elapsed_time_seconds", 0)), |
| 112 | + total_bytes=stats.get("downloader/response_bytes", 0), |
| 113 | + item_count=stats.get("item_scraped_count", 0), |
| 114 | + request_count=stats.get("downloader/request_count", 0), |
| 115 | + proxy_usage_data={ |
| 116 | + "proxy_name": stats.get("downloader/proxy_name", ""), |
| 117 | + "bytes": stats.get("downloader/proxies/response_bytes", 0), |
| 118 | + }, |
| 119 | + ) |
| 120 | + except Exception as e: |
| 121 | + print(f"CRITICAL ERROR: Could not update job status: {e}") |
109 | 122 |
|
110 | 123 | def store_stats(self, spider): |
111 | 124 | stats = self.stats.get_stats() |
|
0 commit comments