File tree Expand file tree Collapse file tree 4 files changed +9
-2
lines changed Expand file tree Collapse file tree 4 files changed +9
-2
lines changed Original file line number Diff line number Diff line change 77- Fill in check throughout the process to send previous steps infos in case of late crash [ #293 ] ( https://github.com/datagouv/hydra/pull/293 )
88- Fix purge csv tables CLI by using the csv db connection [ #294 ] ( https://github.com/datagouv/hydra/pull/294 )
99- Better gz files extraction function name [ #295 ] ( https://github.com/datagouv/hydra/pull/295 )
10+ - Add more detailed statuses [ #297 ] ( https://github.com/datagouv/hydra/pull/297 )
1011
1112## 2.3.0 (2025-07-15)
1213
Original file line number Diff line number Diff line change @@ -119,6 +119,7 @@ async def analyse_csv(
119119 try :
120120 previous_analysis : dict | None = await get_previous_analysis (resource_id = resource_id )
121121 if previous_analysis :
122+ await Resource .update (resource_id , {"status" : "VALIDATING_CSV" })
122123 csv_inspection , df = validate_then_detect (
123124 file_path = tmp_file .name ,
124125 previous_analysis = previous_analysis ,
Original file line number Diff line number Diff line change @@ -65,7 +65,7 @@ async def analyse_resource(
6565
6666 # Update resource status to ANALYSING_RESOURCE
6767 resource : Record | None = await Resource .update (
68- resource_id , data = {"status" : "ANALYSING_RESOURCE " }
68+ resource_id , data = {"status" : "ANALYSING_RESOURCE_HEAD " }
6969 )
7070
7171 # let's see if we can infer a modification date on early hints based on harvest infos and headers
@@ -83,10 +83,12 @@ async def analyse_resource(
8383 tmp_file = None
8484 if change_status != Change .HAS_NOT_CHANGED or force_analysis :
8585 try :
86+ await Resource .update (resource_id , data = {"status" : "DOWNLOADING_RESOURCE" })
8687 tmp_file = await download_resource (url , headers , max_size_allowed )
8788 except IOException :
8889 dl_analysis ["analysis:error" ] = "File too large to download"
8990 else :
91+ await Resource .update (resource_id , data = {"status" : "ANALYSING_DOWNLOADED_RESOURCE" })
9092 # Get file size
9193 dl_analysis ["analysis:content-length" ] = os .path .getsize (tmp_file .name )
9294 # Get checksum
Original file line number Diff line number Diff line change @@ -13,9 +13,12 @@ class Resource:
1313 "BACKOFF" : "backoff period for this domain, will be checked later" ,
1414 "CRAWLING_URL" : "resource URL currently being crawled" ,
1515 "TO_ANALYSE_RESOURCE" : "resource to be processed for change, type and size analysis" ,
16- "ANALYSING_RESOURCE" : "currently being processed for change, type and size analysis" ,
16+ "ANALYSING_RESOURCE_HEAD" : "currently checking for change, type and size from headers" ,
17+ "DOWNLOADING_RESOURCE" : "currently being downloaded" ,
18+ "ANALYSING_DOWNLOADED_RESOURCE" : "currently checking for change, type and size from downloaded file" ,
1719 "TO_ANALYSE_CSV" : "resource content to be analysed by CSV detective" ,
1820 "ANALYSING_CSV" : "resource content currently being analysed by CSV detective" ,
21+ "VALIDATING_CSV" : "resource content being validated using the previous analysis" ,
1922 "INSERTING_IN_DB" : "currently being inserted in DB" ,
2023 "CONVERTING_TO_PARQUET" : "currently being converted to Parquet" ,
2124 "TO_ANALYSE_GEOJSON" : "geojson resource content to be analysed" ,
You can’t perform that action at this time.
0 commit comments