Skip to content

Commit edd3dba

Browse files
committed
fixes on migration scripts
1 parent eea6013 commit edd3dba

File tree

5 files changed

+32
-16
lines changed

5 files changed

+32
-16
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,5 @@ terraform.rc
3232
__pycache__
3333
.pytest_cache
3434

35-
utils.txt
35+
utils.txt
36+
logs

scripts/script_adoption.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ def execute_query_and_insert_result(start_date, end_date):
4242
))) AS adoption
4343
FROM
4444
`httparchive.core_web_vitals.technologies`
45-
WHERE
45+
4646
"""
4747

4848
# Construct the WHERE clause based on the provided parameters
4949
if start_date and end_date:
50-
query += f" date >= '{start_date}' AND date <= '{end_date}'"
50+
query += f"WHERE date >= '{start_date}' AND date <= '{end_date}'"
5151

5252
query += " GROUP BY date, app, rank, geo"
5353

scripts/script_categories.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,13 @@ def execute_query_and_insert_result(start_date, end_date):
7777

7878
# Create a new Firestore document for each result and insert it into the "technologies" collection
7979
collection_ref = firestore_client.collection('categories')
80-
print(results)
80+
81+
print("Data inserted started.")
8182
for row in results:
8283

8384
item = dict(row.items())
8485

85-
print(item)
86+
#print(item)
8687

8788
doc_ref = collection_ref.document()
8889
doc_ref.set(item)

scripts/script_core_web_vitals.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,12 @@ def execute_query_and_insert_result(start_date, end_date):
9595
))) AS vitals
9696
FROM
9797
`httparchive.core_web_vitals.technologies`
98-
WHERE
98+
9999
"""
100100

101101
# Construct the WHERE clause based on the provided parameters
102102
if start_date and end_date:
103-
query += f" date >= '{start_date}' AND date <= '{end_date}'"
103+
query += f"WHERE date >= '{start_date}' AND date <= '{end_date}'"
104104

105105
query += " GROUP BY date, app, rank, geo"
106106

@@ -110,13 +110,15 @@ def execute_query_and_insert_result(start_date, end_date):
110110

111111
# Create a new Firestore document for each result and insert it into the "technologies" collection
112112
collection_ref = firestore_client.collection('core_web_vitals')
113-
print(results)
113+
#print(results)
114+
115+
print("Data inserted started.")
114116
for row in results:
115117

116118
item = dict(row.items())
117119
item['date'] = str(row['date'])
118120

119-
print(item)
121+
#print(item)
120122

121123
doc_ref = collection_ref.document()
122124
doc_ref.set(item)

scripts/script_lighthouse.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import sys
2+
import uuid
23
from google.cloud import bigquery
34
from google.cloud import firestore
45
from decimal import Decimal
@@ -97,21 +98,32 @@ def execute_query_and_insert_result(start_date, end_date):
9798
query_job = bq_client.query(query)
9899
results = query_job.result()
99100

100-
# Create a new Firestore document for each result and insert it into the "technologies" collection
101101
collection_ref = firestore_client.collection('lighthouse')
102-
print(results)
103-
for row in results:
104102

103+
idx = 0
104+
105+
print("Data insert process started.")
106+
107+
batch = collection_ref.batch()
108+
for row in results:
109+
# Convert date
110+
#
105111
item = dict(row.items())
106112
item['date'] = str(row['date'])
107-
108113
item = convert_decimal_to_float(item)
109114

110-
print(item)
115+
record_ref = collection_ref.document(uuid.uuid4().hex)
116+
batch.set(record_ref, row)
117+
idx += 1
111118

112-
doc_ref = collection_ref.document()
113-
doc_ref.set(item)
119+
# Commit the batch at every 500th record.
120+
if idx == 499:
121+
batch.commit()
122+
# Start a new batch for the next iteration.
123+
batch = collection_ref.batch()
124+
idx = 0
114125

126+
batch.commit()
115127
print("Data inserted into Firestore successfully.")
116128

117129
# Get command-line arguments

0 commit comments

Comments
 (0)