Skip to content

Commit 5f0c2ed

Browse files
Tech Detections dashboard (#12)
* meta reports * format * page level changes * lint * include all_dev * ref pages * blink_features tables * features from partitions meta * files renamed * lint * cleanup * dependencies as option * migrate to crawl * update after migration * table updated * table renamed * wappalyzer joined
1 parent 12e95b1 commit 5f0c2ed

File tree

2 files changed

+128
-2
lines changed

2 files changed

+128
-2
lines changed
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
const pastMonth = constants.fnPastMonth(constants.currentMonth)
2+
3+
publish('tech_detections', {
4+
type: 'table',
5+
description: 'Used in dashboard: https://lookerstudio.google.com/u/7/reporting/1jh_ScPlCIbSYTf2r2Y6EftqmX9SQy4Gn/page/p_an38lbzywc/edit',
6+
schema: 'wappalyzer',
7+
tags: ['crawl_results']
8+
}).query(ctx => `
9+
WITH source AS (
10+
SELECT DISTINCT
11+
date,
12+
root_page AS page,
13+
tech.technology
14+
FROM ${ctx.ref('crawl', 'pages')},
15+
UNNEST(technologies) AS tech
16+
WHERE date >= "${pastMonth}" ${constants.devRankFilter}
17+
),
18+
-- Technology in the previous month (August)
19+
tech_before AS (
20+
SELECT
21+
page,
22+
technology
23+
FROM source
24+
WHERE date = "${pastMonth}"
25+
),
26+
-- Technology in the current month (September)
27+
tech_current AS (
28+
SELECT
29+
page,
30+
technology
31+
FROM source
32+
WHERE date = "${constants.currentMonth}"
33+
),
34+
-- Summary of technology and categories per page in the previous month
35+
tech_before_summary AS (
36+
SELECT
37+
technology,
38+
COUNT(DISTINCT page) AS total_pages_before
39+
FROM tech_before
40+
GROUP BY technology
41+
),
42+
-- Pages that existed last month but introduced the technology in the current month
43+
tech_introduced_existing_pages AS (
44+
SELECT
45+
tech_current.technology,
46+
COUNT(DISTINCT tech_current.page) AS total_pages_introduced_existing,
47+
STRING_AGG(DISTINCT tech_current.page LIMIT 5) AS sample_pages_introduced_existing
48+
FROM tech_current
49+
JOIN tech_before
50+
USING (page)
51+
LEFT JOIN tech_before AS tb
52+
ON tech_current.page = tb.page AND tech_current.technology = tb.technology
53+
WHERE tb.page IS NULL -- Technology was not detected last month
54+
GROUP BY tech_current.technology
55+
),
56+
-- Pages that were not in the dataset last month but appeared this month with the technology
57+
tech_introduced_new_pages AS (
58+
SELECT
59+
tech_current.technology,
60+
COUNT(DISTINCT tech_current.page) AS total_pages_introduced_new,
61+
STRING_AGG(DISTINCT tech_current.page LIMIT 5) AS sample_pages_introduced_new
62+
FROM tech_current
63+
LEFT JOIN tech_before
64+
USING (page)
65+
WHERE tech_before.page IS NULL -- Page was not present last month
66+
GROUP BY tech_current.technology
67+
),
68+
-- Pages that existed this month but no longer have the technology
69+
tech_deprecated_existing_pages AS (
70+
SELECT
71+
tech_before.technology,
72+
COUNT(DISTINCT tech_before.page) AS total_pages_deprecated_existing,
73+
STRING_AGG(DISTINCT tech_before.page LIMIT 5) AS sample_pages_deprecated_existing
74+
FROM tech_before
75+
JOIN tech_current
76+
USING (page)
77+
LEFT JOIN tech_current AS tc
78+
ON tech_before.page = tc.page AND tech_before.technology = tc.technology
79+
WHERE tc.page IS NULL -- Technology is not detected in the current month
80+
GROUP BY tech_before.technology
81+
),
82+
-- Pages that no longer exist in the current dataset
83+
tech_deprecated_gone_pages AS (
84+
SELECT
85+
tech_before.technology,
86+
COUNT(DISTINCT tech_before.page) AS total_pages_deprecated_gone,
87+
STRING_AGG(DISTINCT tech_before.page LIMIT 5) AS sample_pages_deprecated_gone
88+
FROM tech_before
89+
LEFT JOIN tech_current
90+
USING (page)
91+
WHERE tech_current.page IS NULL -- Page no longer exists in current dataset
92+
GROUP BY tech_before.technology
93+
)
94+
95+
-- Final aggregation and comparison of technology adoption/deprecation metrics
96+
SELECT
97+
COALESCE(before_summary.technology, tech_introduced_existing_pages.technology, tech_introduced_new_pages.technology, apps.name) AS technology,
98+
99+
-- Pages summary
100+
0-COALESCE(total_pages_deprecated_existing, 0) AS total_pages_deprecated_existing,
101+
0-COALESCE(total_pages_deprecated_gone, 0) AS total_pages_deprecated_gone,
102+
103+
COALESCE(total_pages_before, 0) - COALESCE(total_pages_deprecated_existing, 0) - COALESCE(total_pages_deprecated_gone, 0) AS total_pages_persisted,
104+
105+
COALESCE(total_pages_introduced_existing, 0) AS total_pages_introduced_existing,
106+
COALESCE(total_pages_introduced_new, 0) AS total_pages_introduced_new,
107+
108+
-- Sample pages
109+
COALESCE(sample_pages_deprecated_existing, "") AS sample_pages_deprecated_existing,
110+
COALESCE(sample_pages_deprecated_gone, "") AS sample_pages_deprecated_gone,
111+
112+
COALESCE(tech_introduced_existing_pages.sample_pages_introduced_existing, "") AS sample_pages_introduced_existing,
113+
COALESCE(tech_introduced_new_pages.sample_pages_introduced_new, "") AS sample_pages_introduced_new
114+
115+
FROM tech_before_summary before_summary
116+
FULL OUTER JOIN tech_introduced_existing_pages
117+
ON before_summary.technology = tech_introduced_existing_pages.technology
118+
FULL OUTER JOIN tech_introduced_new_pages
119+
ON before_summary.technology = tech_introduced_new_pages.technology
120+
LEFT JOIN tech_deprecated_existing_pages
121+
ON before_summary.technology = tech_deprecated_existing_pages.technology
122+
LEFT JOIN tech_deprecated_gone_pages
123+
ON before_summary.technology = tech_deprecated_gone_pages.technology
124+
FULL OUTER JOIN wappalyzer.apps
125+
ON before_summary.technology = apps.name
126+
ORDER BY total_pages_persisted DESC
127+
`)

infra/tf/dataform.tf

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ locals {
1313
"core_web_vitals",
1414
"crawl",
1515
"sample_data",
16+
"wappalyzer",
1617
// Legacy
1718
"all",
1819
"lighthouse",
@@ -22,8 +23,6 @@ locals {
2223
"summary_pages",
2324
"summary_requests",
2425
"technologies",
25-
// Temporary
26-
"scratchspace"
2726
]
2827

2928
dataform_service_account_roles = [

0 commit comments

Comments
 (0)