Skip to content

Commit ddb70b1

Browse files
committed
new scripts for data transformation added
1 parent 262dad5 commit ddb70b1

File tree

5 files changed

+451
-8
lines changed

5 files changed

+451
-8
lines changed

scripts/script_adoption.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import sys
2+
from google.cloud import bigquery
3+
from google.cloud import firestore
4+
5+
def convert_to_float(value):
6+
con = str(value)
7+
if con != 'None':
8+
return float(con)
9+
else:
10+
return 0
11+
12+
def execute_query_and_insert_result(start_date, end_date):
13+
# Set up BigQuery client
14+
bq_client = bigquery.Client()
15+
16+
# Set up Firestore client
17+
firestore_client = firestore.Client()
18+
19+
query = """
20+
CREATE TEMPORARY FUNCTION GET_ADOPTION(
21+
records ARRAY<STRUCT<
22+
client STRING,
23+
origins INT64
24+
>>
25+
) RETURNS STRUCT<
26+
desktop INT64,
27+
mobile INT64
28+
> LANGUAGE js AS '''
29+
return Object.fromEntries(records.map(({client, origins}) => {
30+
return [client, origins];
31+
}));
32+
''';
33+
34+
SELECT
35+
date,
36+
app AS technology,
37+
rank,
38+
geo,
39+
GET_ADOPTION(ARRAY_AGG(STRUCT(
40+
client,
41+
origins
42+
))) AS adoption
43+
FROM
44+
`httparchive.core_web_vitals.technologies`
45+
WHERE
46+
"""
47+
48+
# Construct the WHERE clause based on the provided parameters
49+
if start_date and end_date:
50+
query += f" date >= '{start_date}' AND date <= '{end_date}'"
51+
52+
query += " GROUP BY date, app, rank, geo"
53+
54+
# Execute the BigQuery query
55+
query_job = bq_client.query(query)
56+
results = query_job.result()
57+
58+
# Create a new Firestore document for each result and insert it into the "technologies" collection
59+
collection_ref = firestore_client.collection('adoption')
60+
print(results)
61+
for row in results:
62+
63+
item = dict(row.items())
64+
item['date'] = str(row['date'])
65+
66+
print(item)
67+
68+
doc_ref = collection_ref.document()
69+
doc_ref.set(item)
70+
71+
print("Data inserted into Firestore successfully.")
72+
73+
# Get command-line arguments
74+
start_date = sys.argv[1] if len(sys.argv) > 1 else None
75+
end_date = sys.argv[2] if len(sys.argv) > 2 else None
76+
77+
# Call the function to execute the query and insert the result into Firestore
78+
execute_query_and_insert_result(start_date, end_date)

scripts/script_categories.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import sys
2+
from google.cloud import bigquery
3+
from google.cloud import firestore
4+
5+
def convert_to_float(value):
6+
con = str(value)
7+
if con != 'None':
8+
return float(con)
9+
else:
10+
return 0
11+
12+
def execute_query_and_insert_result(start_date, end_date):
13+
# Set up BigQuery client
14+
bq_client = bigquery.Client()
15+
16+
# Set up Firestore client
17+
firestore_client = firestore.Client()
18+
19+
query = """
20+
WITH categories AS (
21+
SELECT
22+
category,
23+
COUNT(DISTINCT root_page) AS origins
24+
FROM
25+
`httparchive.all.pages`,
26+
UNNEST(technologies) AS t,
27+
UNNEST(t.categories) AS category
28+
WHERE
29+
client = 'mobile'
30+
"""
31+
# Construct the WHERE clause based on the provided parameters
32+
if start_date and end_date:
33+
query += f" AND date >= '{start_date}' AND date <= '{end_date}'"
34+
35+
query += " GROUP BY category ), "
36+
37+
query += """
38+
technologies AS (
39+
SELECT
40+
category,
41+
technology,
42+
COUNT(DISTINCT root_page) AS origins
43+
FROM
44+
`httparchive.all.pages`,
45+
UNNEST(technologies) AS t,
46+
UNNEST(t.categories) AS category
47+
WHERE
48+
client = 'mobile'
49+
"""
50+
# Construct the WHERE clause based on the provided parameters
51+
if start_date and end_date:
52+
query += f" AND date >= '{start_date}' AND date <= '{end_date}'"
53+
54+
query += " GROUP BY category, technology ) "
55+
56+
query += """
57+
SELECT
58+
category,
59+
categories.origins,
60+
ARRAY_AGG(technology ORDER BY technologies.origins DESC) AS technologies
61+
FROM
62+
categories
63+
JOIN
64+
technologies
65+
USING
66+
(category)
67+
GROUP BY
68+
category,
69+
categories.origins
70+
ORDER BY
71+
categories.origins DESC
72+
"""
73+
74+
# Execute the BigQuery query
75+
query_job = bq_client.query(query)
76+
results = query_job.result()
77+
78+
# Create a new Firestore document for each result and insert it into the "technologies" collection
79+
collection_ref = firestore_client.collection('categories')
80+
print(results)
81+
for row in results:
82+
83+
item = dict(row.items())
84+
85+
print(item)
86+
87+
doc_ref = collection_ref.document()
88+
doc_ref.set(item)
89+
90+
print("Data inserted into Firestore successfully.")
91+
92+
# Get command-line arguments
93+
start_date = sys.argv[1] if len(sys.argv) > 1 else None
94+
end_date = sys.argv[2] if len(sys.argv) > 2 else None
95+
96+
# Call the function to execute the query and insert the result into Firestore
97+
execute_query_and_insert_result(start_date, end_date)

scripts/script_core_web_vitals.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import sys
2+
from google.cloud import bigquery
3+
from google.cloud import firestore
4+
5+
def convert_to_float(value):
6+
con = str(value)
7+
if con != 'None':
8+
return float(con)
9+
else:
10+
return 0
11+
12+
def execute_query_and_insert_result(start_date, end_date):
13+
# Set up BigQuery client
14+
bq_client = bigquery.Client()
15+
16+
# Set up Firestore client
17+
firestore_client = firestore.Client()
18+
19+
query = """
20+
CREATE TEMPORARY FUNCTION GET_VITALS(
21+
records ARRAY<STRUCT<
22+
client STRING,
23+
origins_with_good_fid INT64,
24+
origins_with_good_cls INT64,
25+
origins_with_good_lcp INT64,
26+
origins_with_good_fcp INT64,
27+
origins_with_good_ttfb INT64,
28+
origins_with_good_inp INT64,
29+
origins_with_any_fid INT64,
30+
origins_with_any_cls INT64,
31+
origins_with_any_lcp INT64,
32+
origins_with_any_fcp INT64,
33+
origins_with_any_ttfb INT64,
34+
origins_with_any_inp INT64,
35+
origins_with_good_cwv INT64,
36+
origins_eligible_for_cwv INT64
37+
>>
38+
) RETURNS ARRAY<STRUCT<
39+
name STRING,
40+
desktop STRUCT<
41+
good_number INT64,
42+
tested INT64
43+
>,
44+
mobile STRUCT<
45+
good_number INT64,
46+
tested INT64
47+
>
48+
>> LANGUAGE js AS '''
49+
const METRIC_MAP = {
50+
overall: ['origins_with_good_cwv', 'origins_eligible_for_cwv'],
51+
LCP: ['origins_with_good_lcp', 'origins_with_any_lcp'],
52+
CLS: ['origins_with_good_cls', 'origins_with_any_cls'],
53+
FID: ['origins_with_good_fid', 'origins_with_any_fid'],
54+
FCP: ['origins_with_good_fcp', 'origins_with_any_fcp'],
55+
TTFB: ['origins_with_good_ttfb', 'origins_with_any_ttfb'],
56+
INP: ['origins_with_good_inp', 'origins_with_any_inp']
57+
};
58+
59+
// Initialize the vitals map.
60+
const vitals = Object.fromEntries(Object.keys(METRIC_MAP).map(metricName => {
61+
return [metricName, {name: metricName}];
62+
}));
63+
64+
// Populate each client record.
65+
records.forEach(record => {
66+
Object.entries(METRIC_MAP).forEach(([metricName, [good_number, tested]]) => {
67+
vitals[metricName][record.client] = {good_number: record[good_number], tested: record[tested]};
68+
});
69+
});
70+
71+
return Object.values(vitals);
72+
''';
73+
74+
SELECT
75+
date,
76+
app AS technology,
77+
rank,
78+
geo,
79+
GET_VITALS(ARRAY_AGG(STRUCT(
80+
client,
81+
origins_with_good_fid,
82+
origins_with_good_cls,
83+
origins_with_good_lcp,
84+
origins_with_good_fcp,
85+
origins_with_good_ttfb,
86+
origins_with_good_inp,
87+
origins_with_any_fid,
88+
origins_with_any_cls,
89+
origins_with_any_lcp,
90+
origins_with_any_fcp,
91+
origins_with_any_ttfb,
92+
origins_with_any_inp,
93+
origins_with_good_cwv,
94+
origins_eligible_for_cwv
95+
))) AS vitals
96+
FROM
97+
`httparchive.core_web_vitals.technologies`
98+
WHERE
99+
"""
100+
101+
# Construct the WHERE clause based on the provided parameters
102+
if start_date and end_date:
103+
query += f" date >= '{start_date}' AND date <= '{end_date}'"
104+
105+
query += " GROUP BY date, app, rank, geo"
106+
107+
# Execute the BigQuery query
108+
query_job = bq_client.query(query)
109+
results = query_job.result()
110+
111+
# Create a new Firestore document for each result and insert it into the "technologies" collection
112+
collection_ref = firestore_client.collection('core_web_vitals')
113+
print(results)
114+
for row in results:
115+
116+
item = dict(row.items())
117+
item['date'] = str(row['date'])
118+
119+
print(item)
120+
121+
doc_ref = collection_ref.document()
122+
doc_ref.set(item)
123+
124+
print("Data inserted into Firestore successfully.")
125+
126+
# Get command-line arguments
127+
start_date = sys.argv[1] if len(sys.argv) > 1 else None
128+
end_date = sys.argv[2] if len(sys.argv) > 2 else None
129+
130+
# Call the function to execute the query and insert the result into Firestore
131+
execute_query_and_insert_result(start_date, end_date)

0 commit comments

Comments
 (0)