Skip to content

Commit 84c0715

Browse files
committed
Fix job status not updating when spider_closed fails
1 parent 0ab40e1 commit 84c0715

File tree

1 file changed

+33
-20
lines changed

1 file changed

+33
-20
lines changed

estela_scrapy/extensions.py

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
RUNNING_STATUS = "RUNNING"
1616
COMPLETED_STATUS = "COMPLETED"
17+
ERROR_STATUS = "ERROR"
1718

1819

1920
class BaseExtension:
@@ -86,26 +87,38 @@ def spider_closed(self, spider, reason):
8687
pass
8788

8889
stats = self.stats.get_stats()
89-
update_job(
90-
self.job_url,
91-
self.auth_token,
92-
status=COMPLETED_STATUS,
93-
lifespan=int(stats.get("elapsed_time_seconds", 0)),
94-
total_bytes=stats.get("downloader/response_bytes", 0),
95-
item_count=stats.get("item_scraped_count", 0),
96-
request_count=stats.get("downloader/request_count", 0),
97-
proxy_usage_data={
98-
"proxy_name": stats.get("downloader/proxy_name", ""),
99-
"bytes": stats.get("downloader/proxies/response_bytes", 0),
100-
},
101-
)
102-
103-
parsed_stats = json.dumps(stats, default=json_serializer)
104-
data = {
105-
"jid": os.getenv("ESTELA_SPIDER_JOB"),
106-
"payload": json.loads(parsed_stats),
107-
}
108-
producer.send("job_stats", data)
90+
job_status = COMPLETED_STATUS
91+
92+
try:
93+
# Put all new code here
94+
parsed_stats = json.dumps(stats, default=json_serializer)
95+
data = {
96+
"jid": os.getenv("ESTELA_SPIDER_JOB"),
97+
"payload": json.loads(parsed_stats),
98+
}
99+
producer.send("job_stats", data)
100+
101+
except Exception as e:
102+
print(f"Error during spider_closed: {e}")
103+
job_status = ERROR_STATUS
104+
105+
finally:
106+
try:
107+
update_job(
108+
self.job_url,
109+
self.auth_token,
110+
status=job_status,
111+
lifespan=int(stats.get("elapsed_time_seconds", 0)),
112+
total_bytes=stats.get("downloader/response_bytes", 0),
113+
item_count=stats.get("item_scraped_count", 0),
114+
request_count=stats.get("downloader/request_count", 0),
115+
proxy_usage_data={
116+
"proxy_name": stats.get("downloader/proxy_name", ""),
117+
"bytes": stats.get("downloader/proxies/response_bytes", 0),
118+
},
119+
)
120+
except Exception as e:
121+
print(f"CRITICAL ERROR: Could not update job status: {e}")
109122

110123
def store_stats(self, spider):
111124
stats = self.stats.get_stats()

0 commit comments

Comments
 (0)