|
| 1 | +const pastMonth = constants.fnPastMonth(constants.currentMonth) |
| 2 | + |
| 3 | +publish('tech_detections', { |
| 4 | + type: 'table', |
| 5 | + description: 'Used in dashboard: https://lookerstudio.google.com/u/7/reporting/1jh_ScPlCIbSYTf2r2Y6EftqmX9SQy4Gn/page/p_an38lbzywc/edit', |
| 6 | + schema: 'wappalyzer', |
| 7 | + tags: ['crawl_results'] |
| 8 | +}).query(ctx => ` |
| 9 | +WITH source AS ( |
| 10 | + SELECT DISTINCT |
| 11 | + date, |
| 12 | + root_page AS page, |
| 13 | + tech.technology |
| 14 | + FROM ${ctx.ref('crawl', 'pages')}, |
| 15 | + UNNEST(technologies) AS tech |
| 16 | + WHERE date >= "${pastMonth}" ${constants.devRankFilter} |
| 17 | +), |
| 18 | +-- Technology in the previous month (August) |
| 19 | +tech_before AS ( |
| 20 | + SELECT |
| 21 | + page, |
| 22 | + technology |
| 23 | + FROM source |
| 24 | + WHERE date = "${pastMonth}" |
| 25 | +), |
| 26 | +-- Technology in the current month (September) |
| 27 | +tech_current AS ( |
| 28 | + SELECT |
| 29 | + page, |
| 30 | + technology |
| 31 | + FROM source |
| 32 | + WHERE date = "${constants.currentMonth}" |
| 33 | +), |
| 34 | +-- Summary of technology and categories per page in the previous month |
| 35 | +tech_before_summary AS ( |
| 36 | + SELECT |
| 37 | + technology, |
| 38 | + COUNT(DISTINCT page) AS total_pages_before |
| 39 | + FROM tech_before |
| 40 | + GROUP BY technology |
| 41 | +), |
| 42 | +-- Pages that existed last month but introduced the technology in the current month |
| 43 | +tech_introduced_existing_pages AS ( |
| 44 | + SELECT |
| 45 | + tech_current.technology, |
| 46 | + COUNT(DISTINCT tech_current.page) AS total_pages_introduced_existing, |
| 47 | + STRING_AGG(DISTINCT tech_current.page LIMIT 5) AS sample_pages_introduced_existing |
| 48 | + FROM tech_current |
| 49 | + JOIN tech_before |
| 50 | + USING (page) |
| 51 | + LEFT JOIN tech_before AS tb |
| 52 | + ON tech_current.page = tb.page AND tech_current.technology = tb.technology |
| 53 | + WHERE tb.page IS NULL -- Technology was not detected last month |
| 54 | + GROUP BY tech_current.technology |
| 55 | +), |
| 56 | +-- Pages that were not in the dataset last month but appeared this month with the technology |
| 57 | +tech_introduced_new_pages AS ( |
| 58 | + SELECT |
| 59 | + tech_current.technology, |
| 60 | + COUNT(DISTINCT tech_current.page) AS total_pages_introduced_new, |
| 61 | + STRING_AGG(DISTINCT tech_current.page LIMIT 5) AS sample_pages_introduced_new |
| 62 | + FROM tech_current |
| 63 | + LEFT JOIN tech_before |
| 64 | + USING (page) |
| 65 | + WHERE tech_before.page IS NULL -- Page was not present last month |
| 66 | + GROUP BY tech_current.technology |
| 67 | +), |
| 68 | +-- Pages that existed this month but no longer have the technology |
| 69 | +tech_deprecated_existing_pages AS ( |
| 70 | + SELECT |
| 71 | + tech_before.technology, |
| 72 | + COUNT(DISTINCT tech_before.page) AS total_pages_deprecated_existing, |
| 73 | + STRING_AGG(DISTINCT tech_before.page LIMIT 5) AS sample_pages_deprecated_existing |
| 74 | + FROM tech_before |
| 75 | + JOIN tech_current |
| 76 | + USING (page) |
| 77 | + LEFT JOIN tech_current AS tc |
| 78 | + ON tech_before.page = tc.page AND tech_before.technology = tc.technology |
| 79 | + WHERE tc.page IS NULL -- Technology is not detected in the current month |
| 80 | + GROUP BY tech_before.technology |
| 81 | +), |
| 82 | +-- Pages that no longer exist in the current dataset |
| 83 | +tech_deprecated_gone_pages AS ( |
| 84 | + SELECT |
| 85 | + tech_before.technology, |
| 86 | + COUNT(DISTINCT tech_before.page) AS total_pages_deprecated_gone, |
| 87 | + STRING_AGG(DISTINCT tech_before.page LIMIT 5) AS sample_pages_deprecated_gone |
| 88 | + FROM tech_before |
| 89 | + LEFT JOIN tech_current |
| 90 | + USING (page) |
| 91 | + WHERE tech_current.page IS NULL -- Page no longer exists in current dataset |
| 92 | + GROUP BY tech_before.technology |
| 93 | +) |
| 94 | +
|
| 95 | +-- Final aggregation and comparison of technology adoption/deprecation metrics |
| 96 | +SELECT |
| 97 | + COALESCE(before_summary.technology, tech_introduced_existing_pages.technology, tech_introduced_new_pages.technology, apps.name) AS technology, |
| 98 | +
|
| 99 | + -- Pages summary |
| 100 | + 0-COALESCE(total_pages_deprecated_existing, 0) AS total_pages_deprecated_existing, |
| 101 | + 0-COALESCE(total_pages_deprecated_gone, 0) AS total_pages_deprecated_gone, |
| 102 | +
|
| 103 | + COALESCE(total_pages_before, 0) - COALESCE(total_pages_deprecated_existing, 0) - COALESCE(total_pages_deprecated_gone, 0) AS total_pages_persisted, |
| 104 | +
|
| 105 | + COALESCE(total_pages_introduced_existing, 0) AS total_pages_introduced_existing, |
| 106 | + COALESCE(total_pages_introduced_new, 0) AS total_pages_introduced_new, |
| 107 | +
|
| 108 | + -- Sample pages |
| 109 | + COALESCE(sample_pages_deprecated_existing, "") AS sample_pages_deprecated_existing, |
| 110 | + COALESCE(sample_pages_deprecated_gone, "") AS sample_pages_deprecated_gone, |
| 111 | +
|
| 112 | + COALESCE(tech_introduced_existing_pages.sample_pages_introduced_existing, "") AS sample_pages_introduced_existing, |
| 113 | + COALESCE(tech_introduced_new_pages.sample_pages_introduced_new, "") AS sample_pages_introduced_new |
| 114 | +
|
| 115 | +FROM tech_before_summary before_summary |
| 116 | +FULL OUTER JOIN tech_introduced_existing_pages |
| 117 | + ON before_summary.technology = tech_introduced_existing_pages.technology |
| 118 | +FULL OUTER JOIN tech_introduced_new_pages |
| 119 | + ON before_summary.technology = tech_introduced_new_pages.technology |
| 120 | +LEFT JOIN tech_deprecated_existing_pages |
| 121 | + ON before_summary.technology = tech_deprecated_existing_pages.technology |
| 122 | +LEFT JOIN tech_deprecated_gone_pages |
| 123 | + ON before_summary.technology = tech_deprecated_gone_pages.technology |
| 124 | +FULL OUTER JOIN wappalyzer.apps |
| 125 | + ON before_summary.technology = apps.name |
| 126 | +ORDER BY total_pages_persisted DESC |
| 127 | +`) |
0 commit comments