Skip to content

Commit dcc63ad

Browse files
committed
script to import data from bigquery
1 parent 2bb31da commit dcc63ad

File tree

2 files changed

+106
-0
lines changed

2 files changed

+106
-0
lines changed

scripts/script.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import json
2+
import uuid
3+
from google.cloud import firestore
4+
from google.cloud import storage
5+
6+
storage_client = storage.Client()
7+
db = firestore.Client()
8+
9+
# Note: Client.list_blobs requires at least package version 1.17.0.
10+
blobs = storage_client.list_blobs("reports-table-exports")
11+
12+
# Note: The call returns a response only when the iterator is consumed.
13+
for blob in blobs:
14+
print(blob.path )
15+
blob.download_to_filename(blob.name)
16+
data = []
17+
18+
for line in open(blob.name, 'r'):
19+
data.append(json.loads(line))
20+
21+
idx = 0
22+
# # Loop over each row in the JSON data and insert it into the Firestore collection
23+
# doc_ref = db.collection('reports').document()
24+
# for row in data:
25+
# doc_ref.set(row)
26+
27+
doc_ref = db.collection('reports')
28+
batch = db.batch()
29+
for row in data:
30+
record_ref = doc_ref.document(uuid.uuid4().hex)
31+
batch.set(record_ref, row)
32+
idx += 1
33+
34+
# Commit the batch at every 500th record.
35+
if idx == 499:
36+
print('Committing..')
37+
batch.commit()
38+
# Start a new batch for the next iteration.
39+
batch = db.batch()
40+
idx = 0
41+
42+
print('Committing..')
43+
batch.commit()

scripts/script_technologies.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import sys
2+
from google.cloud import bigquery
3+
from google.cloud import firestore
4+
5+
def convert_to_float(value):
6+
con = str(value)
7+
if con != 'None':
8+
return float(con)
9+
else:
10+
return 0
11+
12+
def execute_query_and_insert_result(start_date, end_date):
13+
# Set up BigQuery client
14+
bq_client = bigquery.Client()
15+
16+
# Set up Firestore client
17+
firestore_client = firestore.Client()
18+
19+
# Define the BigQuery query with optional parameters
20+
query = """
21+
SELECT
22+
*
23+
FROM
24+
`httparchive.core_web_vitals.technologies`
25+
WHERE
26+
1=1
27+
"""
28+
29+
# Construct the WHERE clause based on the provided parameters
30+
if start_date and end_date:
31+
query += f" AND date >= '{start_date}' AND date <= '{end_date}'"
32+
33+
query += " ORDER BY date DESC LIMIT 10"
34+
35+
# Execute the BigQuery query
36+
query_job = bq_client.query(query)
37+
results = query_job.result()
38+
39+
# Create a new Firestore document for each result and insert it into the "technologies" collection
40+
collection_ref = firestore_client.collection('technologies')
41+
print(results)
42+
for row in results:
43+
44+
item = dict(row.items())
45+
item['date'] = str(row['date'])
46+
item['median_lighthouse_score_accessibility'] = convert_to_float(row['median_lighthouse_score_accessibility'])
47+
item['median_lighthouse_score_performance'] = convert_to_float(row['median_lighthouse_score_performance'])
48+
item['median_lighthouse_score_pwa'] = convert_to_float(row['median_lighthouse_score_pwa'])
49+
item['median_lighthouse_score_seo'] = convert_to_float(row['median_lighthouse_score_seo'])
50+
51+
print(item)
52+
53+
doc_ref = collection_ref.document()
54+
doc_ref.set(item)
55+
56+
print("Data inserted into Firestore successfully.")
57+
58+
# Get command-line arguments
59+
start_date = sys.argv[1] if len(sys.argv) > 1 else None
60+
end_date = sys.argv[2] if len(sys.argv) > 2 else None
61+
62+
# Call the function to execute the query and insert the result into Firestore
63+
execute_query_and_insert_result(start_date, end_date)

0 commit comments

Comments
 (0)