1
+ ################################################################################
2
+ # Description: This script queries the BigQuery table "technologies"
3
+ # and inserts the result into
4
+ # the Firestore collection "technologies".
5
+ #
6
+ # Parameters: start_date (optional), end_date (optional)
7
+ # Usage example: python scripts/script_technologies.py 2020-01-01 2020-12-31
8
+ ################################################################################
9
+
1
10
import sys
2
11
from google .cloud import bigquery
3
12
from google .cloud import firestore
@@ -16,16 +25,6 @@ def execute_query_and_insert_result(start_date, end_date):
16
25
# Set up Firestore client
17
26
firestore_client = firestore .Client ()
18
27
19
- # Define the BigQuery query with optional parameters
20
- # query = """
21
- # SELECT
22
- # *
23
- # FROM
24
- # `httparchive.core_web_vitals.technologies`
25
- # WHERE
26
- # 1=1
27
- # """
28
-
29
28
query = """
30
29
SELECT
31
30
date,
@@ -44,7 +43,6 @@ def execute_query_and_insert_result(start_date, end_date):
44
43
geo = 'ALL' AND
45
44
rank = 'ALL'
46
45
"""
47
-
48
46
# Construct the WHERE clause based on the provided parameters
49
47
if start_date and end_date :
50
48
query += f" AND date >= '{ start_date } ' AND date <= '{ end_date } '"
@@ -56,12 +54,26 @@ def execute_query_and_insert_result(start_date, end_date):
56
54
results = query_job .result ()
57
55
58
56
# Create a new Firestore document for each result and insert it into the "technologies" collection
59
- collection_ref = firestore_client .collection ('technologies' )
60
- print (results )
57
+ collection_ref = firestore_client .collection (u'technologies' )
58
+
59
+ tech_collection_ref = firestore_client .collection (u'technologies-list' )
60
+
61
61
for row in results :
62
62
63
63
item = dict (row .items ())
64
+
65
+ # Query the techonologies-list collection for the description
66
+ #
67
+ tech_query = tech_collection_ref .where ('name' , '==' , row ['technology' ])
68
+ tech_query = tech_query .limit (1 )
69
+ tech_results = tech_query .stream ()
70
+ technology = {}
71
+ for tech in tech_results :
72
+ technology = tech .to_dict ()
73
+
74
+ # overriding BQ fields
64
75
item ['date' ] = str (row ['date' ])
76
+ item ['description' ] = technology .get ('description' ,'' )
65
77
66
78
print (item )
67
79
0 commit comments