Skip to content

Commit 68fce03

Browse files
Tech Report: Category total origins by client (#50)
* by client * clients in a record * fix aggregation * Update definitions/output/reports/cwv_tech_categories.js Co-authored-by: Barry Pollard <[email protected]> * Update definitions/output/reports/cwv_tech_categories.js Co-authored-by: Barry Pollard <[email protected]> * consistent naming * category sort * cross-platform origins * distinct technologies --------- Co-authored-by: Barry Pollard <[email protected]>
1 parent 295a45d commit 68fce03

File tree

1 file changed

+38
-22
lines changed

1 file changed

+38
-22
lines changed

definitions/output/reports/cwv_tech_categories.js

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,53 +7,69 @@ publish('cwv_tech_categories', {
77
}).query(ctx => `
88
/* {"dataform_trigger": "report_cwv_tech_complete", "name": "categories", "type": "dict"} */
99
WITH pages AS (
10-
SELECT
10+
SELECT DISTINCT
11+
client,
1112
root_page,
1213
technologies
1314
FROM ${ctx.ref('crawl', 'pages')}
1415
WHERE
15-
date = '${pastMonth}' AND
16-
client = 'mobile'
16+
date = '${pastMonth}'
1717
${constants.devRankFilter}
18-
), categories AS (
18+
),
19+
20+
category_descriptions AS (
1921
SELECT
2022
name AS category,
2123
description
2224
FROM ${ctx.ref('wappalyzer', 'categories')}
23-
), category_stats AS (
25+
),
26+
27+
category_stats AS (
2428
SELECT
2529
category,
26-
COUNT(DISTINCT root_page) AS origins
27-
FROM pages,
28-
UNNEST(technologies) AS t,
29-
UNNEST(t.categories) AS category
30+
STRUCT(
31+
COALESCE(MAX(IF(client = 'desktop', origins, 0))) AS desktop,
32+
COALESCE(MAX(IF(client = 'mobile', origins, 0))) AS mobile
33+
) AS origins
34+
FROM (
35+
SELECT
36+
client,
37+
category,
38+
COUNT(DISTINCT root_page) AS origins
39+
FROM pages
40+
LEFT JOIN pages.technologies AS tech
41+
LEFT JOIN tech.categories AS category
42+
GROUP BY
43+
client,
44+
category
45+
)
3046
GROUP BY category
31-
), technology_stats AS (
47+
),
48+
49+
technology_stats AS (
3250
SELECT
33-
category,
3451
technology,
35-
COUNT(DISTINCT root_page) AS origins
36-
FROM pages,
37-
UNNEST(technologies) AS t,
38-
UNNEST(t.categories) AS category
52+
category_obj AS categories,
53+
SUM(origins) AS total_origins
54+
FROM ${ctx.ref('reports', 'cwv_tech_technologies')}
3955
GROUP BY
40-
category,
41-
technology
56+
technology,
57+
categories
4258
)
4359
4460
SELECT
4561
category,
4662
description,
47-
category_stats.origins,
48-
ARRAY_AGG(technology IGNORE NULLS ORDER BY technology_stats.origins DESC) AS technologies
63+
origins,
64+
ARRAY_AGG(technology IGNORE NULLS ORDER BY technology_stats.total_origins DESC) AS technologies
4965
FROM category_stats
5066
INNER JOIN technology_stats
51-
USING (category)
52-
LEFT JOIN categories
67+
ON category_stats.category IN UNNEST(technology_stats.categories)
68+
INNER JOIN category_descriptions
5369
USING (category)
5470
GROUP BY
5571
category,
5672
description,
5773
origins
58-
ORDER BY origins DESC
74+
ORDER BY category ASC
5975
`)

0 commit comments

Comments
 (0)