@@ -24,11 +24,19 @@ def _get_bq_client(credentials_file):
2424
2525 LOGGER .info ('Loading BigQuery credentials from BIGQUERY_CREDENTIALS envvar' )
2626
27- service_account_info = json .loads (credentials_contents )
28- credentials = service_account .Credentials .from_service_account_info (
29- service_account_info ,
30- scopes = ['https://www.googleapis.com/auth/cloud-platform' ],
31- )
27+ if os .path .exists (credentials_contents ):
28+ LOGGER .info ('Loading BigQuery credentials from service account file' )
29+ credentials = service_account .Credentials .from_service_account_file (
30+ credentials_contents ,
31+ scopes = ['https://www.googleapis.com/auth/cloud-platform' ],
32+ )
33+ else :
34+ LOGGER .info ('Loading BigQuery credentials from service account info' )
35+ service_account_info = json .loads (credentials_contents )
36+ credentials = service_account .Credentials .from_service_account_info (
37+ service_account_info ,
38+ scopes = ['https://www.googleapis.com/auth/cloud-platform' ],
39+ )
3240
3341 return bigquery .Client (
3442 credentials = credentials ,
@@ -44,7 +52,14 @@ def run_query(query, dry_run=False, credentials_file=None):
4452
4553 job_config = bigquery .QueryJobConfig (dry_run = True , use_query_cache = False )
4654 dry_run_job = client .query (query , job_config = job_config )
47- LOGGER .info ('Estimated processed GBs: %.2f' , dry_run_job .total_bytes_processed / 1024 ** 3 )
55+ data_processed_gbs = dry_run_job .total_bytes_processed / 1024 ** 3
56+ LOGGER .info ('Estimated data processed in query (GBs): %.2f' , data_processed_gbs )
57+ # https://cloud.google.com/bigquery/pricing#on_demand_pricing
58+ # assuming have hit 1 terabyte processed in month
59+ cost_per_terabyte = 6.15
60+ bytes = dry_run_job .total_bytes_processed
61+ cost = cost_per_terabyte * bytes_to_terabytes (bytes )
62+ LOGGER .info ('Estimated cost for query: $%.2f' , cost )
4863
4964 if dry_run :
5065 return None
@@ -53,5 +68,21 @@ def run_query(query, dry_run=False, credentials_file=None):
5368 data = query_job .to_dataframe ()
5469 LOGGER .info ('Total processed GBs: %.2f' , query_job .total_bytes_processed / 1024 ** 3 )
5570 LOGGER .info ('Total billed GBs: %.2f' , query_job .total_bytes_billed / 1024 ** 3 )
56-
71+ cost = cost_per_terabyte * bytes_to_terabytes (query_job .total_bytes_billed )
72+ LOGGER .info ('Total cost for query: $%.2f' , cost )
5773 return data
74+
75+
76+ def bytes_to_megabytes (bytes ):
77+ """Convert bytes to megabytes."""
78+ return bytes / 1024 / 1024
79+
80+
81+ def bytes_to_gigabytes (bytes ):
82+ """Convert bytes to gigabytes."""
83+ return bytes_to_megabytes (bytes ) / 1024
84+
85+
86+ def bytes_to_terabytes (bytes ):
87+ """Convert bytes to terabytes."""
88+ return bytes_to_gigabytes (bytes ) / 1024
0 commit comments