11from google .cloud import bigquery
22import google .api_core .exceptions
3- from aircan .dependencies .utils import AirflowCKANException , aircan_status_update
3+ from aircan .dependencies .utils import AirflowCKANException , aircan_status_update_nhs as aircan_status_update
44import json
55import logging
66
@@ -13,34 +13,51 @@ def bq_import_csv(table_id, gcs_path, table_schema, ckan_conf):
1313 try :
1414 client = bigquery .Client ()
1515
16- job_config = bigquery .LoadJobConfig ()
16+ try :
17+ job_config = bigquery .LoadJobConfig ()
1718
18- schema = bq_schema_from_table_schema (table_schema )
19- job_config .schema = schema
19+ schema = bq_schema_from_table_schema (table_schema )
20+ job_config .schema = schema
2021
21- job_config .skip_leading_rows = 1
22- job_config .source_format = bigquery .SourceFormat .CSV
23- # overwrite a Table
24- job_config .write_disposition = bigquery .WriteDisposition .WRITE_TRUNCATE
25- # set 'True' for schema autodetect but turning it off since we define schema in explicitly when publishing data using datapub
26- # job_config.autodetect = True
27- load_job = client .load_table_from_uri (
28- gcs_path , table_id , job_config = job_config
29- )
22+ job_config .skip_leading_rows = 1
23+ job_config .source_format = bigquery .SourceFormat .CSV
24+ # overwrite a Table
25+ job_config .write_disposition = bigquery .WriteDisposition .WRITE_TRUNCATE
26+ # set 'True' for schema autodetect but turning it off since we define schema in explicitly when publishing data using datapub
27+ # job_config.autodetect = True
28+ load_job = client .load_table_from_uri (
29+ gcs_path , table_id , job_config = job_config
30+ )
3031
31- load_job .result () # Waits for table load to complete.
32- destination_table = client .get_table (table_id )
32+ load_job .result () # Waits for table load to complete.
33+ destination_table = client .get_table (table_id )
34+ except Exception as e :
35+ job_config = bigquery .LoadJobConfig ()
36+
37+ job_config .skip_leading_rows = 1
38+ job_config .source_format = bigquery .SourceFormat .CSV
39+ # overwrite a Table
40+ job_config .write_disposition = bigquery .WriteDisposition .WRITE_TRUNCATE
41+ # set 'True' for schema autodetect but turning it off since we define schema in explicitly when publishing data using datapub
42+ # job_config.autodetect = True
43+ load_job = client .load_table_from_uri (
44+ gcs_path , table_id , job_config = job_config
45+ )
46+ load_job .result () # Waits for table load to complete.
47+ destination_table = client .get_table (table_id )
3348 status_dict = {
3449 'res_id' : ckan_conf .get ('resource_id' ),
3550 'state' : 'progress' ,
36- 'message' : 'Data ingestion is in progress.'
51+ 'message' : 'Data ingestion is in progress.' ,
52+ 'dag_run_id' : ckan_conf .get ('dag_run_id' )
3753 }
3854 aircan_status_update (ckan_conf .get ('site_url' ), ckan_conf .get ('api_key' ), status_dict )
3955 if destination_table :
4056 status_dict = {
4157 'res_id' : ckan_conf .get ('resource_id' ),
4258 'state' : 'complete' ,
43- 'message' : "Ingession Completed"
59+ 'message' : "Ingession Completed" ,
60+ 'dag_run_id' : ckan_conf .get ('dag_run_id' )
4461 }
4562 aircan_status_update (ckan_conf .get ('site_url' ), ckan_conf .get ('api_key' ), status_dict )
4663 return {'success' : True , 'message' : 'BigQuery Table created successfully.' }
@@ -60,6 +77,7 @@ def bq_import_csv(table_id, gcs_path, table_schema, ckan_conf):
6077 logging .info (e )
6178 status_dict = {
6279 'res_id' : ckan_conf .get ('resource_id' ),
80+ 'dag_run_id' : ckan_conf .get ('dag_run_id' ),
6381 'state' : 'failed' ,
6482 'message' : str (e )
6583 }
0 commit comments