66import logging
77import os
88import pathlib
9+ import pandas as pd
910
1011from google .cloud import bigquery
1112from google .oauth2 import service_account
@@ -24,11 +25,17 @@ def _get_bq_client(credentials_file):
2425
2526 LOGGER .info ('Loading BigQuery credentials from BIGQUERY_CREDENTIALS envvar' )
2627
27- service_account_info = json .loads (credentials_contents )
28- credentials = service_account .Credentials .from_service_account_info (
29- service_account_info ,
30- scopes = ['https://www.googleapis.com/auth/cloud-platform' ],
31- )
28+ if os .path .exists (credentials_contents ):
29+ credentials = service_account .Credentials .from_service_account_file (
30+ credentials_contents ,
31+ scopes = ['https://www.googleapis.com/auth/cloud-platform' ],
32+ )
33+ else :
34+ service_account_info = json .loads (credentials_contents )
35+ credentials = service_account .Credentials .from_service_account_info (
36+ service_account_info ,
37+ scopes = ['https://www.googleapis.com/auth/cloud-platform' ],
38+ )
3239
3340 return bigquery .Client (
3441 credentials = credentials ,
@@ -44,7 +51,13 @@ def run_query(query, dry_run=False, credentials_file=None):
4451
4552 job_config = bigquery .QueryJobConfig (dry_run = True , use_query_cache = False )
4653 dry_run_job = client .query (query , job_config = job_config )
47- LOGGER .info ('Estimated processed GBs: %.2f' , dry_run_job .total_bytes_processed / 1024 ** 3 )
54+ LOGGER .info ('Estimated data processed in query (GBs): %.2f' , dry_run_job .total_bytes_processed / 1024 ** 3 )
55+ # https://cloud.google.com/bigquery/pricing#on_demand_pricing
56+ # assuming have hit 1 terabyte processed in month
57+ cost_per_terabyte = 6.15
58+ bytes = dry_run_job .total_bytes_processed
59+ cost = cost_per_terabyte * bytes_to_terabytes (bytes )
60+ LOGGER .info ('Estimated cost for query: $%.2f' , cost )
4861
4962 if dry_run :
5063 return None
@@ -53,5 +66,15 @@ def run_query(query, dry_run=False, credentials_file=None):
5366 data = query_job .to_dataframe ()
5467 LOGGER .info ('Total processed GBs: %.2f' , query_job .total_bytes_processed / 1024 ** 3 )
5568 LOGGER .info ('Total billed GBs: %.2f' , query_job .total_bytes_billed / 1024 ** 3 )
56-
69+ cost = cost_per_terabyte * bytes_to_terabytes (query_job .total_bytes_billed )
70+ LOGGER .info ('Total cost for query: $%.2f' , cost )
5771 return data
72+
73+ def bytes_to_megabytes (bytes ):
74+ return bytes / 1024 / 1024
75+
76+ def bytes_to_gigabytes (bytes ):
77+ return bytes_to_megabytes (bytes ) / 1024
78+
79+ def bytes_to_terabytes (bytes ):
80+ return bytes_to_gigabytes (bytes ) / 1024
0 commit comments