Skip to content

Commit aea4218

Browse files
Fixes to CrUX pipeline (#36)
* skip null technologies * ignore null technologies * sql review * updated data fixed
1 parent 2e81f77 commit aea4218

File tree

4 files changed

+22
-23
lines changed

4 files changed

+22
-23
lines changed

definitions/output/core_web_vitals/technologies.js

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ crux AS (
6161
WHEN 10000 THEN 'Top 10k'
6262
WHEN 1000 THEN 'Top 1k'
6363
END AS rank,
64-
CONCAT(origin, '/') AS root_page_url,
64+
CONCAT(origin, '/') AS root_page,
6565
IF(device = 'desktop', 'desktop', 'mobile') AS client,
6666
6767
# CWV
@@ -94,9 +94,9 @@ crux AS (
9494
9595
technologies AS (
9696
SELECT
97-
technology.technology AS app,
97+
technology.technology,
9898
client,
99-
page AS url
99+
page
100100
FROM ${ctx.ref('crawl', 'pages')},
101101
UNNEST(technologies) AS technology
102102
WHERE
@@ -106,9 +106,9 @@ technologies AS (
106106
technology.technology != ''
107107
UNION ALL
108108
SELECT
109-
'ALL' AS app,
109+
'ALL' AS technology,
110110
client,
111-
page AS url
111+
page
112112
FROM ${ctx.ref('crawl', 'pages')}
113113
WHERE
114114
date = '${pastMonth}'
@@ -117,18 +117,18 @@ UNION ALL
117117
118118
categories AS (
119119
SELECT
120-
technology.technology AS app,
120+
technology.technology,
121121
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
122122
FROM ${ctx.ref('crawl', 'pages')},
123123
UNNEST(technologies) AS technology,
124124
UNNEST(technology.categories) AS category
125125
WHERE
126126
date = '${pastMonth}'
127127
${constants.devRankFilter}
128-
GROUP BY app
128+
GROUP BY technology
129129
UNION ALL
130130
SELECT
131-
'ALL' AS app,
131+
'ALL' AS technology,
132132
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
133133
FROM ${ctx.ref('crawl', 'pages')},
134134
UNNEST(technologies) AS technology,
@@ -142,8 +142,8 @@ UNION ALL
142142
summary_stats AS (
143143
SELECT
144144
client,
145-
page AS url,
146-
root_page AS root_page_url,
145+
page,
146+
root_page AS root_page,
147147
SAFE.INT64(summary.bytesTotal) AS bytesTotal,
148148
SAFE.INT64(summary.bytesJS) AS bytesJS,
149149
SAFE.INT64(summary.bytesImg) AS bytesImg,
@@ -161,8 +161,8 @@ summary_stats AS (
161161
lab_data AS (
162162
SELECT
163163
client,
164-
root_page_url,
165-
app,
164+
root_page,
165+
technology,
166166
ANY_VALUE(category) AS category,
167167
AVG(bytesTotal) AS bytesTotal,
168168
AVG(bytesJS) AS bytesJS,
@@ -174,21 +174,21 @@ lab_data AS (
174174
AVG(seo) AS seo
175175
FROM summary_stats
176176
JOIN technologies
177-
USING (client, url)
177+
USING (client, page)
178178
JOIN categories
179-
USING (app)
179+
USING (technology)
180180
GROUP BY
181181
client,
182-
root_page_url,
183-
app
182+
root_page,
183+
technology
184184
)
185185
186186
SELECT
187187
DATE('${pastMonth}') AS date,
188188
geo,
189189
rank,
190190
ANY_VALUE(category) AS category,
191-
app,
191+
technology AS app,
192192
client,
193193
COUNT(0) AS origins,
194194
@@ -226,9 +226,8 @@ SELECT
226226
SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS median_bytes_image
227227
228228
FROM lab_data
229-
JOIN crux
230-
USING
231-
(client, root_page_url)
229+
INNER JOIN crux
230+
USING (client, root_page)
232231
GROUP BY
233232
app,
234233
geo,

definitions/output/reports/cwv_tech_categories.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ technologies AS (
4040
SELECT
4141
category,
4242
categories.origins,
43-
ARRAY_AGG(technology ORDER BY technologies.origins DESC) AS technologies
43+
ARRAY_AGG(technology IGNORE NULLS ORDER BY technologies.origins DESC) AS technologies
4444
FROM categories
4545
JOIN technologies
4646
USING (category)

infra/dataform-trigger/index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ DECLARE previousMonth_YYYYMM STRING DEFAULT SUBSTR(previousMonth, 1, 6);
1212
WITH crux AS (
1313
SELECT
1414
LOGICAL_AND(total_rows > 0) AS rows_available,
15-
LOGICAL_AND(TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), last_modified_time, HOUR) < 7) AS recent_last_modified
15+
LOGICAL_OR(TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), last_modified_time, HOUR) < 8) AS recent_last_modified
1616
FROM chrome-ux-report.materialized.INFORMATION_SCHEMA.PARTITIONS
1717
WHERE table_name IN ('device_summary', 'country_summary')
1818
AND partition_id IN (previousMonth, previousMonth_YYYYMM)

infra/tf/function_dataform_trigger.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ resource "google_cloud_scheduler_job" "bq-poller-crux-ready" {
105105
paused = false
106106
project = local.project
107107
region = local.region
108-
schedule = "0 */7 8-14 * *"
108+
schedule = "0 */8 8-14 * *"
109109
time_zone = "Etc/UTC"
110110
http_target {
111111
body = base64encode(local.crux_ready_scheduler_body)

0 commit comments

Comments
 (0)