From 9d4d1a41bf896ff36475e7a904f58b6f2b7c1ac2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Mon, 21 Jul 2025 09:48:46 +0200 Subject: [PATCH 01/16] Migrate 2024 sustainability queries to 2025 crawl dataset --- .../sustainability/cache_header_usage.sql | 50 ++++++ sql/2025/sustainability/cdn_adoption.sql | 31 ++++ .../sustainability/cms_bytes_per_type.sql | 162 +++++++++++++++++ .../cms_bytes_per_type_2022.sql | 163 +++++++++++++++++ .../sustainability/content-visibility.sql | 65 +++++++ .../ecommerce_bytes_per_type.sql | 167 +++++++++++++++++ .../ecommerce_bytes_per_type_2022.sql | 168 +++++++++++++++++ sql/2025/sustainability/favicons.sql | 71 ++++++++ .../global_emissions_per_page.sql | 79 ++++++++ .../global_emissions_per_page_2022.sql | 80 +++++++++ .../green_third_party_requests.sql | 135 ++++++++++++++ sql/2025/sustainability/green_web_hosting.sql | 59 ++++++ .../sustainability/page_byte_pre_type.sql | 79 ++++++++ sql/2025/sustainability/query_run_size.sql | 11 ++ sql/2025/sustainability/responsive_images.sql | 46 +++++ sql/2025/sustainability/script_count.sql | 40 +++++ .../sustainability/ssg_bytes_per_type.sql | 169 +++++++++++++++++ .../ssg_bytes_per_type_2022.sql | 170 ++++++++++++++++++ sql/2025/sustainability/stylesheet_count.sql | 42 +++++ sql/2025/sustainability/text_compression.sql | 53 ++++++ .../sustainability/unminified_css_bytes.sql | 18 ++ .../sustainability/unminified_js_bytes.sql | 18 ++ sql/2025/sustainability/unused_css_bytes.sql | 18 ++ sql/2025/sustainability/unused_js_bytes.sql | 18 ++ .../use_of_prefers_dark_mode_usage.sql | 37 ++++ .../sustainability/video_autoplay_values.sql | 29 +++ .../sustainability/video_preload_values.sql | 31 ++++ 27 files changed, 2009 insertions(+) create mode 100644 sql/2025/sustainability/cache_header_usage.sql create mode 100644 sql/2025/sustainability/cdn_adoption.sql create mode 100644 sql/2025/sustainability/cms_bytes_per_type.sql create mode 100644 sql/2025/sustainability/cms_bytes_per_type_2022.sql create mode 100644 sql/2025/sustainability/content-visibility.sql create mode 100644 sql/2025/sustainability/ecommerce_bytes_per_type.sql create mode 100644 sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql create mode 100644 sql/2025/sustainability/favicons.sql create mode 100644 sql/2025/sustainability/global_emissions_per_page.sql create mode 100644 sql/2025/sustainability/global_emissions_per_page_2022.sql create mode 100644 sql/2025/sustainability/green_third_party_requests.sql create mode 100644 sql/2025/sustainability/green_web_hosting.sql create mode 100644 sql/2025/sustainability/page_byte_pre_type.sql create mode 100644 sql/2025/sustainability/query_run_size.sql create mode 100644 sql/2025/sustainability/responsive_images.sql create mode 100644 sql/2025/sustainability/script_count.sql create mode 100644 sql/2025/sustainability/ssg_bytes_per_type.sql create mode 100644 sql/2025/sustainability/ssg_bytes_per_type_2022.sql create mode 100644 sql/2025/sustainability/stylesheet_count.sql create mode 100644 sql/2025/sustainability/text_compression.sql create mode 100644 sql/2025/sustainability/unminified_css_bytes.sql create mode 100644 sql/2025/sustainability/unminified_js_bytes.sql create mode 100644 sql/2025/sustainability/unused_css_bytes.sql create mode 100644 sql/2025/sustainability/unused_js_bytes.sql create mode 100644 sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql create mode 100644 sql/2025/sustainability/video_autoplay_values.sql create mode 100644 sql/2025/sustainability/video_preload_values.sql diff --git a/sql/2025/sustainability/cache_header_usage.sql b/sql/2025/sustainability/cache_header_usage.sql new file mode 100644 index 00000000000..5bc07eccc3c --- /dev/null +++ b/sql/2025/sustainability/cache_header_usage.sql @@ -0,0 +1,50 @@ +#standardSQL +# The distribution of cache header adoption on websites by client. + +SELECT + client, + COUNT(0) AS total_requests, + + COUNTIF(uses_cache_control) AS total_using_cache_control, + COUNTIF(uses_max_age) AS total_using_max_age, + COUNTIF(uses_expires) AS total_using_expires, + COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires, + COUNTIF(uses_cache_control AND uses_expires) AS total_using_both_cc_and_expires, + COUNTIF(NOT uses_cache_control AND NOT uses_expires) AS total_using_neither_cc_and_expires, + COUNTIF(uses_cache_control AND NOT uses_expires) AS total_using_only_cache_control, + COUNTIF(NOT uses_cache_control AND uses_expires) AS total_using_only_expires, + + COUNTIF(uses_cache_control) / COUNT(0) AS pct_cache_control, + COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age, + COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires, + COUNTIF(uses_max_age AND uses_expires) / COUNT(0) AS pct_using_max_age_and_expires, + COUNTIF(uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_both_cc_and_expires, + COUNTIF(NOT uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_neither_cc_nor_expires, + COUNTIF(uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_only_cache_control, + COUNTIF(NOT uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_only_expires + +FROM ( + SELECT + client, + + JSON_EXTRACT_SCALAR(summary, '$.resp_expires') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_expires')) != '' AS uses_expires, + JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control')) != '' AS uses_cache_control, + REGEXP_CONTAINS(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'), r'(?i)max-age\s*=\s*[0-9]+') AS uses_max_age, + + JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NULL OR TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) = '' AS uses_no_etag, + JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) != '' AS uses_etag, + JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified')) != '' AS uses_last_modified, + + REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"') AS uses_weak_etag, + REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"') AS uses_strong_etag + + FROM + `httparchive.crawl.requests` + WHERE + date = '2025-06-01' +) + +GROUP BY + client +ORDER BY + client; \ No newline at end of file diff --git a/sql/2025/sustainability/cdn_adoption.sql b/sql/2025/sustainability/cdn_adoption.sql new file mode 100644 index 00000000000..7123f708bbd --- /dev/null +++ b/sql/2025/sustainability/cdn_adoption.sql @@ -0,0 +1,31 @@ +#standardSQL +# The distribution of CDN adoption on websites by client. + +SELECT + client, + IF(cdn = '', 'No CDN', cdn) AS cdn, + COUNT(0) AS freq, + total, + COUNT(0) / total AS pct +FROM ( + SELECT + client, + COUNT(0) AS total, + ARRAY_CONCAT_AGG(SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ')) AS cdn_list + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' AND + is_root_page = TRUE + GROUP BY + client +), + UNNEST(cdn_list) AS cdn +GROUP BY + client, + cdn, + total +ORDER BY + pct DESC, + client, + cdn; \ No newline at end of file diff --git a/sql/2025/sustainability/cms_bytes_per_type.sql b/sql/2025/sustainability/cms_bytes_per_type.sql new file mode 100644 index 00000000000..abd91177e58 --- /dev/null +++ b/sql/2025/sustainability/cms_bytes_per_type.sql @@ -0,0 +1,162 @@ +#standardSQL +# Median resource weights by CMS + +# Declare variables to calculate the carbon emissions of one byte +# Source: https://sustainablewebdesign.org/calculating-digital-emissions/ +# The implementation below does not make the assumptions about returning visitors or caching that are present in the Sustainable Web Design model. + +DECLARE grid_intensity NUMERIC DEFAULT 494; +DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; +DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013; +DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081; +DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055; +DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; +DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; + +WITH cms_data AS ( + SELECT + client, + page, + tech.technology AS cms, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, + + -- Resource-specific emissions calculations + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2025-06-01' AND + is_root_page = TRUE AND + 'CMS' IN UNNEST(tech.categories) +) + +SELECT + client, + cms, + COUNT(0) AS pages, + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions +FROM + cms_data +GROUP BY + client, + cms +ORDER BY + pages DESC, + cms, + client; \ No newline at end of file diff --git a/sql/2025/sustainability/cms_bytes_per_type_2022.sql b/sql/2025/sustainability/cms_bytes_per_type_2022.sql new file mode 100644 index 00000000000..ddcb70ba716 --- /dev/null +++ b/sql/2025/sustainability/cms_bytes_per_type_2022.sql @@ -0,0 +1,163 @@ +#standardSQL +# Copied from cms_bytes_per_type.sql +# Median resource weights by CMS + +# Declare variables to calculate the carbon emissions of one byte +# Source: https://sustainablewebdesign.org/calculating-digital-emissions/ +# The implementation below does not make the assumptions about returning visitors or caching that are present in the Sustainable Web Design model. + +DECLARE grid_intensity NUMERIC DEFAULT 494; +DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; +DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013; +DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081; +DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055; +DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; +DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; + +WITH cms_data AS ( + SELECT + client, + page, + tech.technology AS cms, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, + + -- Resource-specific emissions calculations + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2022-06-01' AND + is_root_page = TRUE AND + 'CMS' IN UNNEST(tech.categories) +) + +SELECT + client, + cms, + COUNT(0) AS pages, + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions +FROM + cms_data +GROUP BY + client, + cms +ORDER BY + pages DESC, + cms, + client; \ No newline at end of file diff --git a/sql/2025/sustainability/content-visibility.sql b/sql/2025/sustainability/content-visibility.sql new file mode 100644 index 00000000000..65c69831f58 --- /dev/null +++ b/sql/2025/sustainability/content-visibility.sql @@ -0,0 +1,65 @@ +#standardSQL +CREATE TEMPORARY FUNCTION hasContentVisibility(css STRING) +RETURNS ARRAY> +LANGUAGE js +OPTIONS (library = "gs://httparchive/lib/css-utils.js") +AS ''' +try { + var ast = JSON.parse(css); + + let ret = {}; + + walkDeclarations(ast, ({property}) => { + // Strip hacks like *property, _property etc and normalize to lowercase + property = property.replace(/[^a-z-]/g, "").toLowerCase(); + + if (matches(property, 'content-visibility')) { + incrementByKey(ret, property); + } + }); + + return Object.entries(ret).map(([property, freq]) => { + return {property, freq}; + }); +} catch (e) { + return []; +} +'''; + +WITH totals AS ( + SELECT + client, + COUNT(distinct root_page) AS total_pages + FROM + `httparchive.crawl.parsed_css` + WHERE + date = '2025-06-01' AND + is_root_page + GROUP BY + client +), +content_visibility_pages AS ( + SELECT + client, + COUNT(distinct root_page) AS pages_with_content_visibility + FROM + `httparchive.crawl.parsed_css`, + UNNEST (hasContentVisibility(css)) + WHERE + date = '2025-06-01' AND + is_root_page + GROUP BY + client +) +SELECT + totals.client, + IFNULL(content_visibility_pages.pages_with_content_visibility, 0) AS pages_with_content_visibility, + totals.total_pages, + ROUND(IFNULL(content_visibility_pages.pages_with_content_visibility, 0) * 100.0 / totals.total_pages, 2) AS pct_pages +FROM + totals +LEFT JOIN + content_visibility_pages +USING (client) +ORDER BY + totals.client \ No newline at end of file diff --git a/sql/2025/sustainability/ecommerce_bytes_per_type.sql b/sql/2025/sustainability/ecommerce_bytes_per_type.sql new file mode 100644 index 00000000000..152ee7db82b --- /dev/null +++ b/sql/2025/sustainability/ecommerce_bytes_per_type.sql @@ -0,0 +1,167 @@ +#standardSQL +# Median resource weights by ecommerce platform with detailed CO2e breakdown +# Source: https://sustainablewebdesign.org/calculating-digital-emissions/ +# Declare variables to calculate the carbon emissions per gigabyte (kWh/GB) + +DECLARE grid_intensity NUMERIC DEFAULT 494; +DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; +DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013; +DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081; +DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055; +DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; +DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; + +WITH ecommerce_data AS ( + SELECT + client, + page, + tech.technology AS ecommerce, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, + + -- Resource-specific emissions calculations + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2025-06-01' AND + is_root_page = TRUE AND + EXISTS ( + SELECT 1 + FROM UNNEST(tech.categories) AS category + WHERE category = 'Ecommerce' AND + tech.technology NOT IN ('Cart Functionality', 'Google Analytics Enhanced eCommerce') + ) +) + +SELECT + client, + ecommerce, + COUNT(0) AS pages, + + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions +FROM + ecommerce_data +GROUP BY + client, + ecommerce +ORDER BY + pages DESC, + ecommerce, + client; \ No newline at end of file diff --git a/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql b/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql new file mode 100644 index 00000000000..8e73857c90d --- /dev/null +++ b/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql @@ -0,0 +1,168 @@ +#standardSQL +# Copied from ecommerce_bytes_per_type.sql +# Median resource weights by ecommerce platform with detailed CO2e breakdown +# Source: https://sustainablewebdesign.org/calculating-digital-emissions/ +# Declare variables to calculate the carbon emissions per gigabyte (kWh/GB) + +DECLARE grid_intensity NUMERIC DEFAULT 494; +DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; +DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013; +DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081; +DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055; +DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; +DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; + +WITH ecommerce_data AS ( + SELECT + client, + page, + tech.technology AS ecommerce, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, + + -- Resource-specific emissions calculations + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2022-06-01' AND + is_root_page = TRUE AND + EXISTS ( + SELECT 1 + FROM UNNEST(tech.categories) AS category + WHERE category = 'Ecommerce' AND + tech.technology NOT IN ('Cart Functionality', 'Google Analytics Enhanced eCommerce') + ) +) + +SELECT + client, + ecommerce, + COUNT(0) AS pages, + + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions +FROM + ecommerce_data +GROUP BY + client, + ecommerce +ORDER BY + pages DESC, + ecommerce, + client; \ No newline at end of file diff --git a/sql/2025/sustainability/favicons.sql b/sql/2025/sustainability/favicons.sql new file mode 100644 index 00000000000..f0455aa4182 --- /dev/null +++ b/sql/2025/sustainability/favicons.sql @@ -0,0 +1,71 @@ +#standardSQL +# Temporary function to extract favicon image extensions from the JSON payload +CREATE TEMPORARY FUNCTION getFaviconImage(payload STRING) +RETURNS STRING LANGUAGE js AS ''' +var result = 'NO_DATA'; +try { + var almanac = JSON.parse(payload); + + if (Array.isArray(almanac) || typeof almanac != 'object') return result; + + if (almanac["link-nodes"] && almanac["link-nodes"].nodes && almanac["link-nodes"].nodes.find) { + var faviconNode = almanac["link-nodes"].nodes.find(n => n.rel && n.rel.split(' ').find(r => r.trim().toLowerCase() == 'icon')); + + if (faviconNode) { + if (faviconNode.href) { + var temp = faviconNode.href; + + if (temp.includes('?')) { + temp = temp.substring(0, temp.indexOf('?')); + } + + if (temp.includes('.')) { + temp = temp.substring(temp.lastIndexOf('.')+1); + + result = temp.toLowerCase().trim(); + } + else { + result = "NO_EXTENSION"; + } + + } else { + result = "NO_HREF"; + } + } else { + result = "NO_ICON"; + } + } + else { + result = "NO_DATA"; + } + +} catch (e) {} +return result; +'''; + +# Main query to analyze favicon image extensions with sampling +WITH favicons AS ( + SELECT + client, + getFaviconImage(JSON_EXTRACT_SCALAR(payload, '$._almanac')) AS image_type_extension, + COUNT(0) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS percentage_of_total + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' + GROUP BY + client, + image_type_extension +) + +SELECT + *, + percentage_of_total AS pct +FROM + favicons +ORDER BY + pct DESC +LIMIT + 1000; \ No newline at end of file diff --git a/sql/2025/sustainability/global_emissions_per_page.sql b/sql/2025/sustainability/global_emissions_per_page.sql new file mode 100644 index 00000000000..07171b4ae0a --- /dev/null +++ b/sql/2025/sustainability/global_emissions_per_page.sql @@ -0,0 +1,79 @@ +#standardSQL +# The distribution of page weight by resource type and client, with updated SWDM v4 methodology including both operational and embodied emissions + +-- Energy consumption factors from SWDM v4 (in kWh/GB) +DECLARE energy_per_GB_datacenter NUMERIC DEFAULT CAST(0.055 + 0.012 AS NUMERIC); -- Operational + Embodied +DECLARE energy_per_GB_network NUMERIC DEFAULT CAST(0.059 + 0.013 AS NUMERIC); -- Operational + Embodied +DECLARE energy_per_GB_device NUMERIC DEFAULT CAST(0.080 + 0.081 AS NUMERIC); -- Operational + Embodied + +-- Total energy consumption per GB, calculated by summing the above factors +DECLARE kw_per_GB NUMERIC DEFAULT CAST(energy_per_GB_datacenter + energy_per_GB_network + energy_per_GB_device AS NUMERIC); -- Sum of all operational and embodied energies + +-- Global average carbon intensity of electricity generation (gCO2/kWh) +DECLARE global_grid_intensity NUMERIC DEFAULT 494; + +-- Function to calculate emissions in gCO2 +CREATE TEMP FUNCTION calculate_emissions( + bytes FLOAT64, + kw_per_GB FLOAT64, + grid_intensity FLOAT64 +) RETURNS FLOAT64 AS ( + (bytes / 1024 / 1024 / 1024) * -- Convert bytes to GB + (kw_per_GB) * + grid_intensity +); + +WITH page_data AS ( + SELECT + client, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS bytesTotal, + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) AS bytesHtml, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) AS bytesJS, + CAST(COALESCE(JSON_VALUE(summary, '$.bytesCss'), JSON_VALUE(summary, '$.bytesStyle')) AS INT64) AS bytesCSS, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) AS bytesImg, + CAST(JSON_VALUE(summary, '$.bytesOther') AS INT64) AS bytesOther, + CAST(JSON_VALUE(summary, '$.bytesHtmlDoc') AS INT64) AS bytesHtmlDoc, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) AS bytesFont + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' AND is_root_page +) + +SELECT + percentile, + client, + -- For each resource type, calculate the size in KB and the associated emissions + -- Total resources + APPROX_QUANTILES(bytesTotal / 1024, 1000)[OFFSET(percentile * 10)] AS total_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS total_emissions, + -- HTML resources + APPROX_QUANTILES(bytesHtml / 1024, 1000)[OFFSET(percentile * 10)] AS html_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_emissions, + -- JavaScript resources + APPROX_QUANTILES(bytesJS / 1024, 1000)[OFFSET(percentile * 10)] AS js_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS js_emissions, + -- CSS resources + APPROX_QUANTILES(bytesCSS / 1024, 1000)[OFFSET(percentile * 10)] AS css_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS css_emissions, + -- Image resources + APPROX_QUANTILES(bytesImg / 1024, 1000)[OFFSET(percentile * 10)] AS img_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS img_emissions, + -- Other resources + APPROX_QUANTILES(bytesOther / 1024, 1000)[OFFSET(percentile * 10)] AS other_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS other_emissions, + -- HTML document + APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000)[OFFSET(percentile * 10)] AS html_doc_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_doc_emissions, + -- Font resources + APPROX_QUANTILES(bytesFont / 1024, 1000)[OFFSET(percentile * 10)] AS font_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS font_emissions +FROM + page_data, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile +GROUP BY + percentile, + client +ORDER BY + client, + percentile \ No newline at end of file diff --git a/sql/2025/sustainability/global_emissions_per_page_2022.sql b/sql/2025/sustainability/global_emissions_per_page_2022.sql new file mode 100644 index 00000000000..24822fe6542 --- /dev/null +++ b/sql/2025/sustainability/global_emissions_per_page_2022.sql @@ -0,0 +1,80 @@ +#standardSQL +# Copied global_emissions_per_page.sql +# The distribution of page weight by resource type and client, with updated SWDM v4 methodology including both operational and embodied emissions + +-- Energy consumption factors from SWDM v4 (in kWh/GB) +DECLARE energy_per_GB_datacenter NUMERIC DEFAULT CAST(0.055 + 0.012 AS NUMERIC); -- Operational + Embodied +DECLARE energy_per_GB_network NUMERIC DEFAULT CAST(0.059 + 0.013 AS NUMERIC); -- Operational + Embodied +DECLARE energy_per_GB_device NUMERIC DEFAULT CAST(0.080 + 0.081 AS NUMERIC); -- Operational + Embodied + +-- Total energy consumption per GB, calculated by summing the above factors +DECLARE kw_per_GB NUMERIC DEFAULT CAST(energy_per_GB_datacenter + energy_per_GB_network + energy_per_GB_device AS NUMERIC); -- Sum of all operational and embodied energies + +-- Global average carbon intensity of electricity generation (gCO2/kWh) +DECLARE global_grid_intensity NUMERIC DEFAULT 494; + +-- Function to calculate emissions in gCO2 +CREATE TEMP FUNCTION calculate_emissions( + bytes FLOAT64, + kw_per_GB FLOAT64, + grid_intensity FLOAT64 +) RETURNS FLOAT64 AS ( + (bytes / 1024 / 1024 / 1024) * -- Convert bytes to GB + (kw_per_GB) * + grid_intensity +); + +WITH page_data AS ( + SELECT + client, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS bytesTotal, + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) AS bytesHtml, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) AS bytesJS, + CAST(COALESCE(JSON_VALUE(summary, '$.bytesCss'), JSON_VALUE(summary, '$.bytesStyle')) AS INT64) AS bytesCSS, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) AS bytesImg, + CAST(JSON_VALUE(summary, '$.bytesOther') AS INT64) AS bytesOther, + CAST(JSON_VALUE(summary, '$.bytesHtmlDoc') AS INT64) AS bytesHtmlDoc, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) AS bytesFont + FROM + `httparchive.crawl.pages` + WHERE + date = '2022-06-01' AND is_root_page +) + +SELECT + percentile, + client, + -- For each resource type, calculate the size in KB and the associated emissions + -- Total resources + APPROX_QUANTILES(bytesTotal / 1024, 1000)[OFFSET(percentile * 10)] AS total_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS total_emissions, + -- HTML resources + APPROX_QUANTILES(bytesHtml / 1024, 1000)[OFFSET(percentile * 10)] AS html_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_emissions, + -- JavaScript resources + APPROX_QUANTILES(bytesJS / 1024, 1000)[OFFSET(percentile * 10)] AS js_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS js_emissions, + -- CSS resources + APPROX_QUANTILES(bytesCSS / 1024, 1000)[OFFSET(percentile * 10)] AS css_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS css_emissions, + -- Image resources + APPROX_QUANTILES(bytesImg / 1024, 1000)[OFFSET(percentile * 10)] AS img_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS img_emissions, + -- Other resources + APPROX_QUANTILES(bytesOther / 1024, 1000)[OFFSET(percentile * 10)] AS other_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS other_emissions, + -- HTML document + APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000)[OFFSET(percentile * 10)] AS html_doc_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_doc_emissions, + -- Font resources + APPROX_QUANTILES(bytesFont / 1024, 1000)[OFFSET(percentile * 10)] AS font_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS font_emissions +FROM + page_data, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile +GROUP BY + percentile, + client +ORDER BY + client, + percentile \ No newline at end of file diff --git a/sql/2025/sustainability/green_third_party_requests.sql b/sql/2025/sustainability/green_third_party_requests.sql new file mode 100644 index 00000000000..e20aed40183 --- /dev/null +++ b/sql/2025/sustainability/green_third_party_requests.sql @@ -0,0 +1,135 @@ +#standardSQL +# Median number of third-parties & green third-party requests per websites by rank + +WITH requests AS ( + SELECT + client, + CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page, + url + FROM + `httparchive.crawl.requests` + WHERE + date = '2025-06-01' +), + +green AS ( + SELECT + NET.HOST(url) AS host, + TRUE AS is_green + FROM + `httparchive.almanac.green_web_foundation` + WHERE + date = '2025-09-01' +), + +pages AS ( + SELECT + client, + CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page, + rank + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' +), + +third_party AS ( + SELECT + domain, + COUNT(DISTINCT page) AS page_usage + FROM + `httparchive.almanac.third_parties` tp + JOIN + requests r + ON NET.HOST(r.url) = NET.HOST(tp.domain) + WHERE + date = '2025-06-01' AND + category NOT IN ('hosting') + GROUP BY + domain + HAVING + page_usage >= 50 +), + +green_tp AS ( + SELECT + domain + FROM + `httparchive.almanac.third_parties` tp + JOIN + green g + ON NET.HOST(g.host) = NET.HOST(tp.domain) + WHERE + date = '2025-06-01' AND + category NOT IN ('hosting') + GROUP BY + domain +), + +base AS ( + SELECT + client, + page, + rank, + COUNT(domain) AS third_parties_per_page + FROM + requests + LEFT JOIN + third_party + ON + NET.HOST(requests.url) = NET.HOST(third_party.domain) + INNER JOIN + pages + USING (client, page) + GROUP BY + client, + page, + rank +), + +base_green AS ( + SELECT + client, + page, + rank, + COUNT(domain) AS green_third_parties_per_page + FROM + requests + LEFT JOIN + green_tp + ON + NET.HOST(requests.url) = NET.HOST(green_tp.domain) + INNER JOIN + pages + USING (client, page) + GROUP BY + client, + page, + rank +) + +SELECT + client, + rank_grouping, + CASE + WHEN rank_grouping = 0 THEN '' + WHEN rank_grouping = 100000000 THEN 'all' + ELSE FORMAT("%'d", rank_grouping) + END AS ranking, + APPROX_QUANTILES(third_parties_per_page, 1000)[OFFSET(500)] AS p50_third_parties_per_page, + APPROX_QUANTILES(green_third_parties_per_page, 1000)[OFFSET(500)] AS p50_green_third_parties_per_page, + APPROX_QUANTILES(SAFE_DIVIDE(green_third_parties_per_page, third_parties_per_page), 1000)[OFFSET(500)] AS pct_green +FROM + base, + UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping +JOIN + base_green +USING (client, page, rank) +WHERE + rank <= rank_grouping +GROUP BY + client, + rank_grouping +ORDER BY + client, + rank_grouping \ No newline at end of file diff --git a/sql/2025/sustainability/green_web_hosting.sql b/sql/2025/sustainability/green_web_hosting.sql new file mode 100644 index 00000000000..41d8355c6fa --- /dev/null +++ b/sql/2025/sustainability/green_web_hosting.sql @@ -0,0 +1,59 @@ +# standardSQL +# What percentage of URLs are hosted on a known green web hosting provider? + +WITH green AS ( + SELECT + NET.HOST(url) AS host, + TRUE AS is_green + FROM + `httparchive.almanac.green_web_foundation` + WHERE + date = '2025-09-01' +), + +pages AS ( + SELECT + client, + NET.HOST(root_page) AS host, + rank + FROM + `httparchive.crawl.pages` + WHERE + is_root_page = TRUE AND + date = '2025-06-01' +) + +-- Apply rank grouping +SELECT + client, + rank_grouping, + CASE + WHEN rank_grouping = 0 THEN '' + WHEN rank_grouping = 100000000 THEN 'all' + ELSE FORMAT("%'d", rank_grouping) + END AS ranking, + COUNTIF(is_green) AS total_green, + COUNT(0) AS total_sites, + SAFE_DIVIDE(COUNTIF(is_green), COUNT(0)) AS pct_green +FROM ( + -- Left join green hosting information + SELECT + p.client, + p.host, + p.rank, + g.is_green + FROM + pages p + LEFT JOIN + green g + USING (host) +), + UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping +WHERE + rank <= rank_grouping +GROUP BY + client, + rank_grouping +ORDER BY + client, + rank_grouping; \ No newline at end of file diff --git a/sql/2025/sustainability/page_byte_pre_type.sql b/sql/2025/sustainability/page_byte_pre_type.sql new file mode 100644 index 00000000000..00eed369428 --- /dev/null +++ b/sql/2025/sustainability/page_byte_pre_type.sql @@ -0,0 +1,79 @@ +#standardSQL +# The distribution of page weight by resource type and client, with updated SWDM v4 methodology + +-- Energy consumption factors from SWDM v4 (in TWh/ZB) +DECLARE energy_per_GB_datacenter NUMERIC DEFAULT 0.00006829493087557603; # 290 TWh / 5.29 ZB +DECLARE energy_per_GB_network NUMERIC DEFAULT 0.05859598853868195; # 310 TWh / 5.29 ZB +DECLARE energy_per_GB_device NUMERIC DEFAULT 0.07956802188162324; # 421 TWh / 5.29 ZB + +-- Total energy consumption per GB, calculated by summing the above factors and converting to kWh/GB +DECLARE kw_per_GB NUMERIC DEFAULT 0.19300566251415094; # (290 + 310 + 421) TWh / 5.29 ZB * 1000000 kWh/TWh / 1000000000 GB/ZB + +-- Global average carbon intensity of electricity generation (gCO2/kWh) +DECLARE global_grid_intensity NUMERIC DEFAULT 494; + +-- Function to calculate emissions in gCO2 +CREATE TEMP FUNCTION calculate_emissions( + bytes FLOAT64, + kw_per_GB FLOAT64, + grid_intensity FLOAT64 +) RETURNS FLOAT64 AS ( + (bytes / 1024 / 1024 / 1024) * -- Convert bytes to GB + (kw_per_GB) * + grid_intensity +); + +WITH page_data AS ( + SELECT + client, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS bytesTotal, + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) AS bytesHtml, + CAST(COALESCE(JSON_VALUE(summary, '$.bytesCss'), JSON_VALUE(summary, '$.bytesStyle')) AS INT64) AS bytesCSS, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) AS bytesJS, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) AS bytesImg, + CAST(JSON_VALUE(summary, '$.bytesOther') AS INT64) AS bytesOther, + CAST(JSON_VALUE(summary, '$.bytesHtmlDoc') AS INT64) AS bytesHtmlDoc, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) AS bytesFont + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' AND is_root_page +) + +SELECT + percentile, + client, + -- For each resource type, calculate the size in KB and the associated emissions + -- Total resources + APPROX_QUANTILES(bytesTotal / 1024, 1000)[OFFSET(percentile * 10)] AS total_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS total_emissions, + -- HTML resources + APPROX_QUANTILES(bytesHtml / 1024, 1000)[OFFSET(percentile * 10)] AS html_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_emissions, + -- JavaScript resources + APPROX_QUANTILES(bytesJS / 1024, 1000)[OFFSET(percentile * 10)] AS js_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS js_emissions, + -- CSS resources + APPROX_QUANTILES(bytesCSS / 1024, 1000)[OFFSET(percentile * 10)] AS css_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS css_emissions, + -- Image resources + APPROX_QUANTILES(bytesImg / 1024, 1000)[OFFSET(percentile * 10)] AS img_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS img_emissions, + -- Other resources + APPROX_QUANTILES(bytesOther / 1024, 1000)[OFFSET(percentile * 10)] AS other_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS other_emissions, + -- HTML document + APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000)[OFFSET(percentile * 10)] AS html_doc_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_doc_emissions, + -- Font resources + APPROX_QUANTILES(bytesFont / 1024, 1000)[OFFSET(percentile * 10)] AS font_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS font_emissions +FROM + page_data, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile +GROUP BY + percentile, + client +ORDER BY + client, + percentile \ No newline at end of file diff --git a/sql/2025/sustainability/query_run_size.sql b/sql/2025/sustainability/query_run_size.sql new file mode 100644 index 00000000000..4a0a5a1f8e5 --- /dev/null +++ b/sql/2025/sustainability/query_run_size.sql @@ -0,0 +1,11 @@ +# standardSQL +# Monthly query run size average (MB) and total (TB) +# (0.012+0.013+0.081+0.055+0.0590.080)x494x [Total TB] *1024 = Total kg CO2e + +SELECT + AVG(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) / 1048576 AS avg_size_MB, + SUM(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) / 1099511627776 AS total_size_TB +FROM + `httparchive.crawl.pages` +WHERE + date = '2025-06-01' \ No newline at end of file diff --git a/sql/2025/sustainability/responsive_images.sql b/sql/2025/sustainability/responsive_images.sql new file mode 100644 index 00000000000..1fbec57408e --- /dev/null +++ b/sql/2025/sustainability/responsive_images.sql @@ -0,0 +1,46 @@ +#standardSQL +# percent of sites using images with srcset w/wo sizes, or picture element +CREATE TEMPORARY FUNCTION get_media_info(media_string STRING) +RETURNS STRUCT< + num_srcset_all INT64, + num_srcset_sizes INT64, + num_picture_img INT64 +> LANGUAGE js AS ''' +var result = { + num_srcset_all: 0, + num_srcset_sizes: 0, + num_picture_img: 0 +}; +try { + var media = JSON.parse(media_string); + if (Array.isArray(media) || typeof media != 'object') return result; + result.num_srcset_all = media.num_srcset_all || 0; + result.num_srcset_sizes = media.num_srcset_sizes || 0; + result.num_picture_img = media.num_picture_img || 0; +} catch (e) {} +return result; +'''; + +WITH page_data AS ( + SELECT + client, + get_media_info(JSON_EXTRACT_SCALAR(payload, '$._media')) AS media_info + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' AND is_root_page +) + +SELECT + client, + ROUND(SAFE_DIVIDE(COUNTIF(media_info.num_srcset_all > 0), COUNT(0)) * 100, 2) AS pages_with_srcset_pct, + ROUND(SAFE_DIVIDE(COUNTIF(media_info.num_srcset_sizes > 0), COUNT(0)) * 100, 2) AS pages_with_srcset_sizes_pct, + ROUND(SAFE_DIVIDE((COUNTIF(media_info.num_srcset_all > 0) - COUNTIF(media_info.num_srcset_sizes > 0)), COUNT(0)) * 100, 2) AS pages_with_srcset_wo_sizes_pct, + ROUND(SAFE_DIVIDE(SUM(media_info.num_srcset_sizes), SUM(media_info.num_srcset_all)) * 100, 2) AS instances_of_srcset_sizes_pct, + ROUND(SAFE_DIVIDE((SUM(media_info.num_srcset_all) - SUM(media_info.num_srcset_sizes)), SUM(media_info.num_srcset_all)) * 100, 2) AS instances_of_srcset_wo_sizes_pct, + ROUND(SAFE_DIVIDE(COUNTIF(media_info.num_picture_img > 0), COUNT(0)) * 100, 2) AS pages_with_picture_pct +FROM page_data +GROUP BY + client +ORDER BY + client \ No newline at end of file diff --git a/sql/2025/sustainability/script_count.sql b/sql/2025/sustainability/script_count.sql new file mode 100644 index 00000000000..d4f5260821e --- /dev/null +++ b/sql/2025/sustainability/script_count.sql @@ -0,0 +1,40 @@ +#standardSQL +# Breakdown of inline vs external scripts +WITH script_data AS ( + SELECT + client, + page, + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.total') AS INT64) AS total_scripts, + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.inline') AS INT64) AS inline_scripts, + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.src') AS INT64) AS external_scripts, + SAFE_DIVIDE( + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.inline') AS INT64), + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.total') AS INT64) + ) AS pct_inline_script, + SAFE_DIVIDE( + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.src') AS INT64), + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.total') AS INT64) + ) AS pct_external_script + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' AND + JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.total') IS NOT NULL +) + +SELECT + client, + COUNT(DISTINCT page) AS pages_analyzed, + SUM(total_scripts) AS total_scripts, + SUM(inline_scripts) AS inline_scripts, + SUM(external_scripts) AS external_scripts, + SAFE_DIVIDE(SUM(external_scripts), SUM(total_scripts)) AS pct_external_script, + SAFE_DIVIDE(SUM(inline_scripts), SUM(total_scripts)) AS pct_inline_script, + APPROX_QUANTILES(SAFE_DIVIDE(external_scripts, total_scripts), 1000)[OFFSET(500)] AS median_external, + APPROX_QUANTILES(SAFE_DIVIDE(inline_scripts, total_scripts), 1000)[OFFSET(500)] AS median_inline +FROM + script_data +GROUP BY + client +ORDER BY + client; \ No newline at end of file diff --git a/sql/2025/sustainability/ssg_bytes_per_type.sql b/sql/2025/sustainability/ssg_bytes_per_type.sql new file mode 100644 index 00000000000..d83c8549a63 --- /dev/null +++ b/sql/2025/sustainability/ssg_bytes_per_type.sql @@ -0,0 +1,169 @@ +#standardSQL + +# Median resource weights by static site generator with detailed CO2e breakdown +# Source: https://sustainablewebdesign.org/calculating-digital-emissions/ +# Declare variables to calculate the carbon emissions per gigabyte (kWh/GB) + +DECLARE grid_intensity NUMERIC DEFAULT 494; +DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; +DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013; +DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081; +DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055; +DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; +DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; + +WITH ssg_data AS ( + SELECT + client, + page, + tech.technology AS ssg, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, + + -- Resource-specific emissions calculations + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2025-06-01' AND + is_root_page = TRUE AND + EXISTS ( + SELECT 1 + FROM UNNEST(tech.categories) AS category + WHERE LOWER(category) = 'static site generator' OR + tech.technology IN ('Next.js', 'Nuxt.js') + ) +) + +SELECT + client, + ssg, + COUNT(0) AS pages, + + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions + +FROM + ssg_data +GROUP BY + client, + ssg +ORDER BY + pages DESC, + ssg, + client; \ No newline at end of file diff --git a/sql/2025/sustainability/ssg_bytes_per_type_2022.sql b/sql/2025/sustainability/ssg_bytes_per_type_2022.sql new file mode 100644 index 00000000000..99ff0f16838 --- /dev/null +++ b/sql/2025/sustainability/ssg_bytes_per_type_2022.sql @@ -0,0 +1,170 @@ +#standardSQL + +# Copied from ssg_bytes_per_type.sql +# Median resource weights by static site generator with detailed CO2e breakdown +# Source: https://sustainablewebdesign.org/calculating-digital-emissions/ +# Declare variables to calculate the carbon emissions per gigabyte (kWh/GB) + +DECLARE grid_intensity NUMERIC DEFAULT 494; +DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; +DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013; +DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081; +DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055; +DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; +DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; + +WITH ssg_data AS ( + SELECT + client, + page, + tech.technology AS ssg, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, + + -- Resource-specific emissions calculations + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2022-06-01' AND + is_root_page = TRUE AND + EXISTS ( + SELECT 1 + FROM UNNEST(tech.categories) AS category + WHERE LOWER(category) = 'static site generator' OR + tech.technology IN ('Next.js', 'Nuxt.js') + ) +) + +SELECT + client, + ssg, + COUNT(0) AS pages, + + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions + +FROM + ssg_data +GROUP BY + client, + ssg +ORDER BY + pages DESC, + ssg, + client; \ No newline at end of file diff --git a/sql/2025/sustainability/stylesheet_count.sql b/sql/2025/sustainability/stylesheet_count.sql new file mode 100644 index 00000000000..dde471da2cd --- /dev/null +++ b/sql/2025/sustainability/stylesheet_count.sql @@ -0,0 +1,42 @@ +#standardSQL +# Breakdown of inline vs external scripts +WITH stylesheet_data AS ( + SELECT + client, + page, + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) AS external_stylesheets, + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) AS inline_stylesheets, + SAFE_DIVIDE( + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64), + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) + + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) + ) AS pct_inline_stylesheets, + SAFE_DIVIDE( + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64), + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) + + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) + ) AS pct_external_stylesheets + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' + AND + is_root_page = TRUE AND + JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') IS NOT NULL +) + +SELECT + client, + COUNT(DISTINCT page) AS pages_analyzed, + SUM(external_stylesheets) AS external_stylesheets, + SUM(inline_stylesheets) AS inline_stylesheets, + SAFE_DIVIDE(SUM(inline_stylesheets), SUM(inline_stylesheets + external_stylesheets)) AS pct_inline_stylesheets, + SAFE_DIVIDE(SUM(external_stylesheets), SUM(inline_stylesheets + external_stylesheets)) AS pct_external_stylesheets, + APPROX_QUANTILES(SAFE_DIVIDE(inline_stylesheets, inline_stylesheets + external_stylesheets), 1000)[OFFSET(500)] AS median_inline_stylesheets, + APPROX_QUANTILES(SAFE_DIVIDE(external_stylesheets, inline_stylesheets + external_stylesheets), 1000)[OFFSET(500)] AS median_external_stylesheets +FROM + stylesheet_data +GROUP BY + client +ORDER BY + client; \ No newline at end of file diff --git a/sql/2025/sustainability/text_compression.sql b/sql/2025/sustainability/text_compression.sql new file mode 100644 index 00000000000..f4559675463 --- /dev/null +++ b/sql/2025/sustainability/text_compression.sql @@ -0,0 +1,53 @@ +CREATE TEMP FUNCTION getContentEncoding(headers STRING) +RETURNS STRING +LANGUAGE js AS """ + try { + const parsedHeaders = JSON.parse(headers); + for (let i = 0; i < parsedHeaders.length; i++) { + if (parsedHeaders[i].name.toLowerCase() === 'content-encoding') { + return parsedHeaders[i].value.toLowerCase(); + } + } + } catch (e) {} + return null; +"""; + +WITH request_data AS ( + SELECT + client, + getContentEncoding(JSON_EXTRACT(payload, '$.response.headers')) AS resp_content_encoding + FROM + `httparchive.crawl.requests` + WHERE + date = '2025-06-01' +), + +compression_data AS ( + SELECT + client, + CASE + WHEN resp_content_encoding = 'gzip' THEN 'Gzip' + WHEN resp_content_encoding = 'br' THEN 'Brotli' + WHEN resp_content_encoding IS NULL THEN 'no text compression' + ELSE 'other' + END AS compression_type, + COUNT(0) AS num_requests, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + ROUND(COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) * 100, 2) AS pct + FROM + request_data + GROUP BY + client, + compression_type +) + +SELECT + client, + compression_type, + num_requests, + total, + pct +FROM compression_data +ORDER BY + client, + num_requests DESC \ No newline at end of file diff --git a/sql/2025/sustainability/unminified_css_bytes.sql b/sql/2025/sustainability/unminified_css_bytes.sql new file mode 100644 index 00000000000..e662388ad82 --- /dev/null +++ b/sql/2025/sustainability/unminified_css_bytes.sql @@ -0,0 +1,18 @@ +#standardSQL +# Distribution of unminified CSS request bytes per page + +SELECT + client, + percentile, + APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.minify-css.details.overallSavingsBytes') AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS css_kilobytes +FROM + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile +WHERE + date = '2025-06-01' +GROUP BY + client, + percentile +ORDER BY + client, + percentile \ No newline at end of file diff --git a/sql/2025/sustainability/unminified_js_bytes.sql b/sql/2025/sustainability/unminified_js_bytes.sql new file mode 100644 index 00000000000..03d7a31540a --- /dev/null +++ b/sql/2025/sustainability/unminified_js_bytes.sql @@ -0,0 +1,18 @@ +#standardSQL +# Distribution of unminified JS request bytes per page + +SELECT + client, + percentile, + APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.minify-javascript.details.overallSavingsBytes') AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS js_kilobytes +FROM + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile +WHERE + date = '2025-06-01' +GROUP BY + client, + percentile +ORDER BY + client, + percentile \ No newline at end of file diff --git a/sql/2025/sustainability/unused_css_bytes.sql b/sql/2025/sustainability/unused_css_bytes.sql new file mode 100644 index 00000000000..cf2b27ed407 --- /dev/null +++ b/sql/2025/sustainability/unused_css_bytes.sql @@ -0,0 +1,18 @@ +#standardSQL +# Distribution of unused CSS request bytes per page + +SELECT + client, + percentile, + APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.unused-css-rules.details.overallSavingsBytes') AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS css_kilobytes +FROM + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile +WHERE + date = '2025-06-01' +GROUP BY + client, + percentile +ORDER BY + client, + percentile \ No newline at end of file diff --git a/sql/2025/sustainability/unused_js_bytes.sql b/sql/2025/sustainability/unused_js_bytes.sql new file mode 100644 index 00000000000..a46a72d9a32 --- /dev/null +++ b/sql/2025/sustainability/unused_js_bytes.sql @@ -0,0 +1,18 @@ +#standardSQL +# Distribution of unused JS request bytes per page + +SELECT + client, + percentile, + APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.unused-javascript.details.overallSavingsBytes') AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS js_kilobytes +FROM + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile +WHERE + date = '2025-06-01' +GROUP BY + client, + percentile +ORDER BY + client, + percentile \ No newline at end of file diff --git a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql new file mode 100644 index 00000000000..53b9727d5b7 --- /dev/null +++ b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql @@ -0,0 +1,37 @@ +#standardSQL +# The distribution of websites by client that use the prefers-color-scheme:dark media query. + +WITH combined_data AS ( + SELECT + client, + page, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages, + SUM( + CASE + WHEN EXISTS ( + SELECT 1 + FROM UNNEST(JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules')) AS rule + WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND + JSON_EXTRACT_SCALAR(rule, '$.media') = '(prefers-color-scheme:dark)' + ) + THEN 1 + ELSE 0 + END + ) OVER (PARTITION BY client, page) AS is_dark_mode_page + FROM + `httparchive.crawl.parsed_css` + WHERE + date = '2025-06-01' +) + +SELECT + client, + MAX(total_pages) AS total_pages, + SUM(is_dark_mode_page) AS pages_using_dark_mode, + SUM(is_dark_mode_page) / MAX(total_pages) * 100 AS percentage_of_pages +FROM + combined_data +GROUP BY + client +ORDER BY + percentage_of_pages DESC, client; \ No newline at end of file diff --git a/sql/2025/sustainability/video_autoplay_values.sql b/sql/2025/sustainability/video_autoplay_values.sql new file mode 100644 index 00000000000..680a22e4039 --- /dev/null +++ b/sql/2025/sustainability/video_autoplay_values.sql @@ -0,0 +1,29 @@ +WITH video_data AS ( + SELECT + client, + LOWER(IFNULL(JSON_EXTRACT_SCALAR(video_nodes, '$.autoplay'), '(autoplay not used)')) AS autoplay_value + FROM + `httparchive.crawl.pages`, + UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.videos.nodes')) AS video_nodes + WHERE + date = '2025-06-01' AND -- Updated date + is_root_page + LIMIT 10000 -- Limit the number of rows processed for faster testing +) + +SELECT + client, + IF(autoplay_value = '', '(empty)', autoplay_value) AS autoplay_value, + COUNT(0) AS autoplay_value_count, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total_videos, + ROUND(SAFE_DIVIDE(COUNT(0), SUM(COUNT(0)) OVER (PARTITION BY client)) * 100, 2) AS autoplay_value_pct +FROM + video_data +GROUP BY + client, + autoplay_value +QUALIFY + autoplay_value_count > 10 +ORDER BY + client, + autoplay_value_count DESC \ No newline at end of file diff --git a/sql/2025/sustainability/video_preload_values.sql b/sql/2025/sustainability/video_preload_values.sql new file mode 100644 index 00000000000..1191a7cddfb --- /dev/null +++ b/sql/2025/sustainability/video_preload_values.sql @@ -0,0 +1,31 @@ +WITH video_data AS ( + SELECT + date, + client, + LOWER(IFNULL(JSON_EXTRACT_SCALAR(video_nodes, '$.preload'), '(preload not used)')) AS preload_value + FROM + `httparchive.crawl.pages`, + UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.videos.nodes')) AS video_nodes + WHERE + date IN ('2025-06-01', '2024-07-01') AND -- Updated dates + is_root_page +) + +SELECT + date, + client, + IF(preload_value = '', '(empty)', preload_value) AS preload_value, + COUNT(0) AS preload_value_count, + SAFE_DIVIDE(COUNT(0), SUM(COUNT(0)) OVER (PARTITION BY date, client)) AS preload_value_pct +FROM + video_data +GROUP BY + date, + client, + preload_value +QUALIFY + preload_value_count > 10 +ORDER BY + date, + client, + preload_value_count DESC \ No newline at end of file From e0dd613900403463ee23bbb9ff98acc70ad8e5b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Mon, 21 Jul 2025 22:01:28 +0200 Subject: [PATCH 02/16] Fix linter errors --- .../sustainability/cms_bytes_per_type.sql | 30 ++++++++-------- .../cms_bytes_per_type_2022.sql | 30 ++++++++-------- .../ecommerce_bytes_per_type.sql | 30 ++++++++-------- .../ecommerce_bytes_per_type_2022.sql | 30 ++++++++-------- .../global_emissions_per_page.sql | 34 +++++++++---------- .../global_emissions_per_page_2022.sql | 34 +++++++++---------- .../green_third_party_requests.sql | 8 ++--- .../sustainability/page_byte_pre_type.sql | 34 +++++++++---------- sql/2025/sustainability/script_count.sql | 6 ++-- .../sustainability/ssg_bytes_per_type.sql | 30 ++++++++-------- .../ssg_bytes_per_type_2022.sql | 30 ++++++++-------- sql/2025/sustainability/stylesheet_count.sql | 6 ++-- .../sustainability/unminified_css_bytes.sql | 4 +-- .../sustainability/unminified_js_bytes.sql | 4 +-- sql/2025/sustainability/unused_css_bytes.sql | 4 +-- sql/2025/sustainability/unused_js_bytes.sql | 4 +-- 16 files changed, 159 insertions(+), 159 deletions(-) diff --git a/sql/2025/sustainability/cms_bytes_per_type.sql b/sql/2025/sustainability/cms_bytes_per_type.sql index abd91177e58..ba698179877 100644 --- a/sql/2025/sustainability/cms_bytes_per_type.sql +++ b/sql/2025/sustainability/cms_bytes_per_type.sql @@ -135,22 +135,22 @@ SELECT cms, COUNT(0) AS pages, -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions FROM cms_data GROUP BY @@ -159,4 +159,4 @@ GROUP BY ORDER BY pages DESC, cms, - client; \ No newline at end of file + client; diff --git a/sql/2025/sustainability/cms_bytes_per_type_2022.sql b/sql/2025/sustainability/cms_bytes_per_type_2022.sql index ddcb70ba716..488a738893a 100644 --- a/sql/2025/sustainability/cms_bytes_per_type_2022.sql +++ b/sql/2025/sustainability/cms_bytes_per_type_2022.sql @@ -136,22 +136,22 @@ SELECT cms, COUNT(0) AS pages, -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions FROM cms_data GROUP BY @@ -160,4 +160,4 @@ GROUP BY ORDER BY pages DESC, cms, - client; \ No newline at end of file + client; diff --git a/sql/2025/sustainability/ecommerce_bytes_per_type.sql b/sql/2025/sustainability/ecommerce_bytes_per_type.sql index 152ee7db82b..03bdaa059cb 100644 --- a/sql/2025/sustainability/ecommerce_bytes_per_type.sql +++ b/sql/2025/sustainability/ecommerce_bytes_per_type.sql @@ -140,22 +140,22 @@ SELECT COUNT(0) AS pages, -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions FROM ecommerce_data GROUP BY @@ -164,4 +164,4 @@ GROUP BY ORDER BY pages DESC, ecommerce, - client; \ No newline at end of file + client; diff --git a/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql b/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql index 8e73857c90d..eb0cdac3b97 100644 --- a/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql +++ b/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql @@ -141,22 +141,22 @@ SELECT COUNT(0) AS pages, -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions FROM ecommerce_data GROUP BY @@ -165,4 +165,4 @@ GROUP BY ORDER BY pages DESC, ecommerce, - client; \ No newline at end of file + client; diff --git a/sql/2025/sustainability/global_emissions_per_page.sql b/sql/2025/sustainability/global_emissions_per_page.sql index 07171b4ae0a..afbcd5c3966 100644 --- a/sql/2025/sustainability/global_emissions_per_page.sql +++ b/sql/2025/sustainability/global_emissions_per_page.sql @@ -45,29 +45,29 @@ SELECT client, -- For each resource type, calculate the size in KB and the associated emissions -- Total resources - APPROX_QUANTILES(bytesTotal / 1024, 1000)[OFFSET(percentile * 10)] AS total_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS total_emissions, + APPROX_QUANTILES(bytesTotal / 1024, 1000) [OFFSET(percentile * 10)] AS total_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS total_emissions, -- HTML resources - APPROX_QUANTILES(bytesHtml / 1024, 1000)[OFFSET(percentile * 10)] AS html_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_emissions, + APPROX_QUANTILES(bytesHtml / 1024, 1000) [OFFSET(percentile * 10)] AS html_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_emissions, -- JavaScript resources - APPROX_QUANTILES(bytesJS / 1024, 1000)[OFFSET(percentile * 10)] AS js_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS js_emissions, + APPROX_QUANTILES(bytesJS / 1024, 1000) [OFFSET(percentile * 10)] AS js_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS js_emissions, -- CSS resources - APPROX_QUANTILES(bytesCSS / 1024, 1000)[OFFSET(percentile * 10)] AS css_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS css_emissions, + APPROX_QUANTILES(bytesCSS / 1024, 1000) [OFFSET(percentile * 10)] AS css_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS css_emissions, -- Image resources - APPROX_QUANTILES(bytesImg / 1024, 1000)[OFFSET(percentile * 10)] AS img_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS img_emissions, + APPROX_QUANTILES(bytesImg / 1024, 1000) [OFFSET(percentile * 10)] AS img_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS img_emissions, -- Other resources - APPROX_QUANTILES(bytesOther / 1024, 1000)[OFFSET(percentile * 10)] AS other_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS other_emissions, + APPROX_QUANTILES(bytesOther / 1024, 1000) [OFFSET(percentile * 10)] AS other_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS other_emissions, -- HTML document - APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000)[OFFSET(percentile * 10)] AS html_doc_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_doc_emissions, + APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000) [OFFSET(percentile * 10)] AS html_doc_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_doc_emissions, -- Font resources - APPROX_QUANTILES(bytesFont / 1024, 1000)[OFFSET(percentile * 10)] AS font_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS font_emissions + APPROX_QUANTILES(bytesFont / 1024, 1000) [OFFSET(percentile * 10)] AS font_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS font_emissions FROM page_data, UNNEST([10, 25, 50, 75, 90, 100]) AS percentile @@ -76,4 +76,4 @@ GROUP BY client ORDER BY client, - percentile \ No newline at end of file + percentile diff --git a/sql/2025/sustainability/global_emissions_per_page_2022.sql b/sql/2025/sustainability/global_emissions_per_page_2022.sql index 24822fe6542..2a2b3f1c7a9 100644 --- a/sql/2025/sustainability/global_emissions_per_page_2022.sql +++ b/sql/2025/sustainability/global_emissions_per_page_2022.sql @@ -46,29 +46,29 @@ SELECT client, -- For each resource type, calculate the size in KB and the associated emissions -- Total resources - APPROX_QUANTILES(bytesTotal / 1024, 1000)[OFFSET(percentile * 10)] AS total_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS total_emissions, + APPROX_QUANTILES(bytesTotal / 1024, 1000) [OFFSET(percentile * 10)] AS total_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS total_emissions, -- HTML resources - APPROX_QUANTILES(bytesHtml / 1024, 1000)[OFFSET(percentile * 10)] AS html_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_emissions, + APPROX_QUANTILES(bytesHtml / 1024, 1000) [OFFSET(percentile * 10)] AS html_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_emissions, -- JavaScript resources - APPROX_QUANTILES(bytesJS / 1024, 1000)[OFFSET(percentile * 10)] AS js_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS js_emissions, + APPROX_QUANTILES(bytesJS / 1024, 1000) [OFFSET(percentile * 10)] AS js_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS js_emissions, -- CSS resources - APPROX_QUANTILES(bytesCSS / 1024, 1000)[OFFSET(percentile * 10)] AS css_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS css_emissions, + APPROX_QUANTILES(bytesCSS / 1024, 1000) [OFFSET(percentile * 10)] AS css_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS css_emissions, -- Image resources - APPROX_QUANTILES(bytesImg / 1024, 1000)[OFFSET(percentile * 10)] AS img_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS img_emissions, + APPROX_QUANTILES(bytesImg / 1024, 1000) [OFFSET(percentile * 10)] AS img_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS img_emissions, -- Other resources - APPROX_QUANTILES(bytesOther / 1024, 1000)[OFFSET(percentile * 10)] AS other_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS other_emissions, + APPROX_QUANTILES(bytesOther / 1024, 1000) [OFFSET(percentile * 10)] AS other_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS other_emissions, -- HTML document - APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000)[OFFSET(percentile * 10)] AS html_doc_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_doc_emissions, + APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000) [OFFSET(percentile * 10)] AS html_doc_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_doc_emissions, -- Font resources - APPROX_QUANTILES(bytesFont / 1024, 1000)[OFFSET(percentile * 10)] AS font_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS font_emissions + APPROX_QUANTILES(bytesFont / 1024, 1000) [OFFSET(percentile * 10)] AS font_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS font_emissions FROM page_data, UNNEST([10, 25, 50, 75, 90, 100]) AS percentile @@ -77,4 +77,4 @@ GROUP BY client ORDER BY client, - percentile \ No newline at end of file + percentile diff --git a/sql/2025/sustainability/green_third_party_requests.sql b/sql/2025/sustainability/green_third_party_requests.sql index e20aed40183..b177acba29f 100644 --- a/sql/2025/sustainability/green_third_party_requests.sql +++ b/sql/2025/sustainability/green_third_party_requests.sql @@ -116,9 +116,9 @@ SELECT WHEN rank_grouping = 100000000 THEN 'all' ELSE FORMAT("%'d", rank_grouping) END AS ranking, - APPROX_QUANTILES(third_parties_per_page, 1000)[OFFSET(500)] AS p50_third_parties_per_page, - APPROX_QUANTILES(green_third_parties_per_page, 1000)[OFFSET(500)] AS p50_green_third_parties_per_page, - APPROX_QUANTILES(SAFE_DIVIDE(green_third_parties_per_page, third_parties_per_page), 1000)[OFFSET(500)] AS pct_green + APPROX_QUANTILES(third_parties_per_page, 1000) [OFFSET(500)] AS p50_third_parties_per_page, + APPROX_QUANTILES(green_third_parties_per_page, 1000) [OFFSET(500)] AS p50_green_third_parties_per_page, + APPROX_QUANTILES(SAFE_DIVIDE(green_third_parties_per_page, third_parties_per_page), 1000) [OFFSET(500)] AS pct_green FROM base, UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping @@ -132,4 +132,4 @@ GROUP BY rank_grouping ORDER BY client, - rank_grouping \ No newline at end of file + rank_grouping diff --git a/sql/2025/sustainability/page_byte_pre_type.sql b/sql/2025/sustainability/page_byte_pre_type.sql index 00eed369428..846aaf2e506 100644 --- a/sql/2025/sustainability/page_byte_pre_type.sql +++ b/sql/2025/sustainability/page_byte_pre_type.sql @@ -45,29 +45,29 @@ SELECT client, -- For each resource type, calculate the size in KB and the associated emissions -- Total resources - APPROX_QUANTILES(bytesTotal / 1024, 1000)[OFFSET(percentile * 10)] AS total_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS total_emissions, + APPROX_QUANTILES(bytesTotal / 1024, 1000) [OFFSET(percentile * 10)] AS total_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS total_emissions, -- HTML resources - APPROX_QUANTILES(bytesHtml / 1024, 1000)[OFFSET(percentile * 10)] AS html_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_emissions, + APPROX_QUANTILES(bytesHtml / 1024, 1000) [OFFSET(percentile * 10)] AS html_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_emissions, -- JavaScript resources - APPROX_QUANTILES(bytesJS / 1024, 1000)[OFFSET(percentile * 10)] AS js_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS js_emissions, + APPROX_QUANTILES(bytesJS / 1024, 1000) [OFFSET(percentile * 10)] AS js_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS js_emissions, -- CSS resources - APPROX_QUANTILES(bytesCSS / 1024, 1000)[OFFSET(percentile * 10)] AS css_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS css_emissions, + APPROX_QUANTILES(bytesCSS / 1024, 1000) [OFFSET(percentile * 10)] AS css_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS css_emissions, -- Image resources - APPROX_QUANTILES(bytesImg / 1024, 1000)[OFFSET(percentile * 10)] AS img_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS img_emissions, + APPROX_QUANTILES(bytesImg / 1024, 1000) [OFFSET(percentile * 10)] AS img_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS img_emissions, -- Other resources - APPROX_QUANTILES(bytesOther / 1024, 1000)[OFFSET(percentile * 10)] AS other_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS other_emissions, + APPROX_QUANTILES(bytesOther / 1024, 1000) [OFFSET(percentile * 10)] AS other_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS other_emissions, -- HTML document - APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000)[OFFSET(percentile * 10)] AS html_doc_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_doc_emissions, + APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000) [OFFSET(percentile * 10)] AS html_doc_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_doc_emissions, -- Font resources - APPROX_QUANTILES(bytesFont / 1024, 1000)[OFFSET(percentile * 10)] AS font_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS font_emissions + APPROX_QUANTILES(bytesFont / 1024, 1000) [OFFSET(percentile * 10)] AS font_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS font_emissions FROM page_data, UNNEST([10, 25, 50, 75, 90, 100]) AS percentile @@ -76,4 +76,4 @@ GROUP BY client ORDER BY client, - percentile \ No newline at end of file + percentile diff --git a/sql/2025/sustainability/script_count.sql b/sql/2025/sustainability/script_count.sql index d4f5260821e..7a78885e329 100644 --- a/sql/2025/sustainability/script_count.sql +++ b/sql/2025/sustainability/script_count.sql @@ -30,11 +30,11 @@ SELECT SUM(external_scripts) AS external_scripts, SAFE_DIVIDE(SUM(external_scripts), SUM(total_scripts)) AS pct_external_script, SAFE_DIVIDE(SUM(inline_scripts), SUM(total_scripts)) AS pct_inline_script, - APPROX_QUANTILES(SAFE_DIVIDE(external_scripts, total_scripts), 1000)[OFFSET(500)] AS median_external, - APPROX_QUANTILES(SAFE_DIVIDE(inline_scripts, total_scripts), 1000)[OFFSET(500)] AS median_inline + APPROX_QUANTILES(SAFE_DIVIDE(external_scripts, total_scripts), 1000) [OFFSET(500)] AS median_external, + APPROX_QUANTILES(SAFE_DIVIDE(inline_scripts, total_scripts), 1000) [OFFSET(500)] AS median_inline FROM script_data GROUP BY client ORDER BY - client; \ No newline at end of file + client; diff --git a/sql/2025/sustainability/ssg_bytes_per_type.sql b/sql/2025/sustainability/ssg_bytes_per_type.sql index d83c8549a63..6768e0edc2e 100644 --- a/sql/2025/sustainability/ssg_bytes_per_type.sql +++ b/sql/2025/sustainability/ssg_bytes_per_type.sql @@ -141,22 +141,22 @@ SELECT COUNT(0) AS pages, -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions FROM ssg_data @@ -166,4 +166,4 @@ GROUP BY ORDER BY pages DESC, ssg, - client; \ No newline at end of file + client; diff --git a/sql/2025/sustainability/ssg_bytes_per_type_2022.sql b/sql/2025/sustainability/ssg_bytes_per_type_2022.sql index 99ff0f16838..d95fb394ae0 100644 --- a/sql/2025/sustainability/ssg_bytes_per_type_2022.sql +++ b/sql/2025/sustainability/ssg_bytes_per_type_2022.sql @@ -142,22 +142,22 @@ SELECT COUNT(0) AS pages, -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions FROM ssg_data @@ -167,4 +167,4 @@ GROUP BY ORDER BY pages DESC, ssg, - client; \ No newline at end of file + client; diff --git a/sql/2025/sustainability/stylesheet_count.sql b/sql/2025/sustainability/stylesheet_count.sql index dde471da2cd..c0bb071e246 100644 --- a/sql/2025/sustainability/stylesheet_count.sql +++ b/sql/2025/sustainability/stylesheet_count.sql @@ -32,11 +32,11 @@ SELECT SUM(inline_stylesheets) AS inline_stylesheets, SAFE_DIVIDE(SUM(inline_stylesheets), SUM(inline_stylesheets + external_stylesheets)) AS pct_inline_stylesheets, SAFE_DIVIDE(SUM(external_stylesheets), SUM(inline_stylesheets + external_stylesheets)) AS pct_external_stylesheets, - APPROX_QUANTILES(SAFE_DIVIDE(inline_stylesheets, inline_stylesheets + external_stylesheets), 1000)[OFFSET(500)] AS median_inline_stylesheets, - APPROX_QUANTILES(SAFE_DIVIDE(external_stylesheets, inline_stylesheets + external_stylesheets), 1000)[OFFSET(500)] AS median_external_stylesheets + APPROX_QUANTILES(SAFE_DIVIDE(inline_stylesheets, inline_stylesheets + external_stylesheets), 1000) [OFFSET(500)] AS median_inline_stylesheets, + APPROX_QUANTILES(SAFE_DIVIDE(external_stylesheets, inline_stylesheets + external_stylesheets), 1000) [OFFSET(500)] AS median_external_stylesheets FROM stylesheet_data GROUP BY client ORDER BY - client; \ No newline at end of file + client; diff --git a/sql/2025/sustainability/unminified_css_bytes.sql b/sql/2025/sustainability/unminified_css_bytes.sql index e662388ad82..9abec7684d0 100644 --- a/sql/2025/sustainability/unminified_css_bytes.sql +++ b/sql/2025/sustainability/unminified_css_bytes.sql @@ -4,7 +4,7 @@ SELECT client, percentile, - APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.minify-css.details.overallSavingsBytes') AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS css_kilobytes + APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.minify-css.details.overallSavingsBytes') AS INT64) / 1024, 1000) [OFFSET(percentile * 10)] AS css_kilobytes FROM `httparchive.crawl.pages`, UNNEST([10, 25, 50, 75, 90, 100]) AS percentile @@ -15,4 +15,4 @@ GROUP BY percentile ORDER BY client, - percentile \ No newline at end of file + percentile diff --git a/sql/2025/sustainability/unminified_js_bytes.sql b/sql/2025/sustainability/unminified_js_bytes.sql index 03d7a31540a..d4fb9b9697f 100644 --- a/sql/2025/sustainability/unminified_js_bytes.sql +++ b/sql/2025/sustainability/unminified_js_bytes.sql @@ -4,7 +4,7 @@ SELECT client, percentile, - APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.minify-javascript.details.overallSavingsBytes') AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS js_kilobytes + APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.minify-javascript.details.overallSavingsBytes') AS INT64) / 1024, 1000) [OFFSET(percentile * 10)] AS js_kilobytes FROM `httparchive.crawl.pages`, UNNEST([10, 25, 50, 75, 90, 100]) AS percentile @@ -15,4 +15,4 @@ GROUP BY percentile ORDER BY client, - percentile \ No newline at end of file + percentile diff --git a/sql/2025/sustainability/unused_css_bytes.sql b/sql/2025/sustainability/unused_css_bytes.sql index cf2b27ed407..7acbaa1cdfc 100644 --- a/sql/2025/sustainability/unused_css_bytes.sql +++ b/sql/2025/sustainability/unused_css_bytes.sql @@ -4,7 +4,7 @@ SELECT client, percentile, - APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.unused-css-rules.details.overallSavingsBytes') AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS css_kilobytes + APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.unused-css-rules.details.overallSavingsBytes') AS INT64) / 1024, 1000) [OFFSET(percentile * 10)] AS css_kilobytes FROM `httparchive.crawl.pages`, UNNEST([10, 25, 50, 75, 90, 100]) AS percentile @@ -15,4 +15,4 @@ GROUP BY percentile ORDER BY client, - percentile \ No newline at end of file + percentile diff --git a/sql/2025/sustainability/unused_js_bytes.sql b/sql/2025/sustainability/unused_js_bytes.sql index a46a72d9a32..9286e1db667 100644 --- a/sql/2025/sustainability/unused_js_bytes.sql +++ b/sql/2025/sustainability/unused_js_bytes.sql @@ -4,7 +4,7 @@ SELECT client, percentile, - APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.unused-javascript.details.overallSavingsBytes') AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS js_kilobytes + APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.unused-javascript.details.overallSavingsBytes') AS INT64) / 1024, 1000) [OFFSET(percentile * 10)] AS js_kilobytes FROM `httparchive.crawl.pages`, UNNEST([10, 25, 50, 75, 90, 100]) AS percentile @@ -15,4 +15,4 @@ GROUP BY percentile ORDER BY client, - percentile \ No newline at end of file + percentile From c5a6a4ecabc6b0624591e938aa466b72013ec9b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Tue, 29 Jul 2025 11:49:25 +0100 Subject: [PATCH 03/16] fix linter issues --- .../sustainability/cache_header_usage.sql | 117 +++-- sql/2025/sustainability/cdn_adoption.sql | 48 +- .../sustainability/cms_bytes_per_type.sql | 433 +++++++++++------ .../cms_bytes_per_type_2022.sql | 432 +++++++++++------ .../sustainability/content-visibility.sql | 69 +-- .../ecommerce_bytes_per_type.sql | 448 ++++++++++++------ .../ecommerce_bytes_per_type_2022.sql | 448 ++++++++++++------ sql/2025/sustainability/favicons.sql | 42 +- .../global_emissions_per_page.sql | 166 ++++--- .../global_emissions_per_page_2022.sql | 166 ++++--- .../green_third_party_requests.sql | 221 ++++----- sql/2025/sustainability/green_web_hosting.sql | 86 ++-- .../sustainability/page_byte_pre_type.sql | 161 ++++--- sql/2025/sustainability/query_run_size.sql | 12 +- sql/2025/sustainability/responsive_images.sql | 65 ++- sql/2025/sustainability/script_count.sql | 129 +++-- .../sustainability/ssg_bytes_per_type.sql | 446 +++++++++++------ .../ssg_bytes_per_type_2022.sql | 446 +++++++++++------ sql/2025/sustainability/stylesheet_count.sql | 146 ++++-- sql/2025/sustainability/text_compression.sql | 66 +-- .../sustainability/unminified_css_bytes.sql | 27 +- .../sustainability/unminified_js_bytes.sql | 28 +- sql/2025/sustainability/unused_css_bytes.sql | 28 +- sql/2025/sustainability/unused_js_bytes.sql | 28 +- .../use_of_prefers_dark_mode_usage.sql | 60 +-- .../sustainability/video_autoplay_values.sql | 53 ++- .../sustainability/video_preload_values.sql | 57 ++- 27 files changed, 2888 insertions(+), 1540 deletions(-) diff --git a/sql/2025/sustainability/cache_header_usage.sql b/sql/2025/sustainability/cache_header_usage.sql index 5bc07eccc3c..d3e4f661abb 100644 --- a/sql/2025/sustainability/cache_header_usage.sql +++ b/sql/2025/sustainability/cache_header_usage.sql @@ -2,49 +2,94 @@ # The distribution of cache header adoption on websites by client. SELECT - client, - COUNT(0) AS total_requests, - - COUNTIF(uses_cache_control) AS total_using_cache_control, - COUNTIF(uses_max_age) AS total_using_max_age, - COUNTIF(uses_expires) AS total_using_expires, - COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires, - COUNTIF(uses_cache_control AND uses_expires) AS total_using_both_cc_and_expires, - COUNTIF(NOT uses_cache_control AND NOT uses_expires) AS total_using_neither_cc_and_expires, - COUNTIF(uses_cache_control AND NOT uses_expires) AS total_using_only_cache_control, - COUNTIF(NOT uses_cache_control AND uses_expires) AS total_using_only_expires, - - COUNTIF(uses_cache_control) / COUNT(0) AS pct_cache_control, - COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age, - COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires, - COUNTIF(uses_max_age AND uses_expires) / COUNT(0) AS pct_using_max_age_and_expires, - COUNTIF(uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_both_cc_and_expires, - COUNTIF(NOT uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_neither_cc_nor_expires, - COUNTIF(uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_only_cache_control, - COUNTIF(NOT uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_only_expires + client, + COUNT(*) AS total_requests, + + COUNTIF(uses_cache_control) AS total_using_cache_control, + COUNTIF(uses_max_age) AS total_using_max_age, + COUNTIF(uses_expires) AS total_using_expires, + COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires, + COUNTIF( + uses_cache_control AND uses_expires + ) AS total_using_both_cc_and_expires, + COUNTIF( + NOT uses_cache_control AND NOT uses_expires + ) AS total_using_neither_cc_and_expires, + COUNTIF( + uses_cache_control AND NOT uses_expires + ) AS total_using_only_cache_control, + COUNTIF( + NOT uses_cache_control AND uses_expires + ) AS total_using_only_expires, + + COUNTIF(uses_cache_control) / COUNT(*) AS pct_cache_control, + COUNTIF(uses_max_age) / COUNT(*) AS pct_using_max_age, + COUNTIF(uses_expires) / COUNT(*) AS pct_using_expires, + COUNTIF( + uses_max_age AND uses_expires + ) / COUNT(*) AS pct_using_max_age_and_expires, + COUNTIF( + uses_cache_control AND uses_expires + ) / COUNT(*) AS pct_using_both_cc_and_expires, + COUNTIF( + NOT uses_cache_control AND NOT uses_expires + ) / COUNT(*) AS pct_using_neither_cc_nor_expires, + COUNTIF( + uses_cache_control AND NOT uses_expires + ) / COUNT(*) AS pct_using_only_cache_control, + COUNTIF( + NOT uses_cache_control AND uses_expires + ) / COUNT(*) AS pct_using_only_expires FROM ( - SELECT - client, + SELECT + client, - JSON_EXTRACT_SCALAR(summary, '$.resp_expires') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_expires')) != '' AS uses_expires, - JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control')) != '' AS uses_cache_control, - REGEXP_CONTAINS(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'), r'(?i)max-age\s*=\s*[0-9]+') AS uses_max_age, + JSON_EXTRACT_SCALAR( + summary, '$.resp_expires' + ) IS NOT NULL AND TRIM( + JSON_EXTRACT_SCALAR(summary, '$.resp_expires') + ) != '' AS uses_expires, + JSON_EXTRACT_SCALAR( + summary, '$.resp_cache_control' + ) IS NOT NULL AND TRIM( + JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control') + ) != '' AS uses_cache_control, + REGEXP_CONTAINS( + JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'), + r'(?i)max-age\s*=\s*[0-9]+' + ) AS uses_max_age, - JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NULL OR TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) = '' AS uses_no_etag, - JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) != '' AS uses_etag, - JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified')) != '' AS uses_last_modified, + JSON_EXTRACT_SCALAR( + summary, '$.resp_etag' + ) IS NULL OR TRIM( + JSON_EXTRACT_SCALAR(summary, '$.resp_etag') + ) = '' AS uses_no_etag, + JSON_EXTRACT_SCALAR( + summary, '$.resp_etag' + ) IS NOT NULL AND TRIM( + JSON_EXTRACT_SCALAR(summary, '$.resp_etag') + ) != '' AS uses_etag, + JSON_EXTRACT_SCALAR( + summary, '$.resp_last_modified' + ) IS NOT NULL AND TRIM( + JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified') + ) != '' AS uses_last_modified, - REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"') AS uses_weak_etag, - REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"') AS uses_strong_etag + REGEXP_CONTAINS( + TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"' + ) AS uses_weak_etag, + REGEXP_CONTAINS( + TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"' + ) AS uses_strong_etag - FROM - `httparchive.crawl.requests` - WHERE - date = '2025-06-01' + FROM + `httparchive.crawl.requests` + WHERE + date = '2025-06-01' ) GROUP BY - client + client ORDER BY - client; \ No newline at end of file + client; diff --git a/sql/2025/sustainability/cdn_adoption.sql b/sql/2025/sustainability/cdn_adoption.sql index 7123f708bbd..d37b0529d4c 100644 --- a/sql/2025/sustainability/cdn_adoption.sql +++ b/sql/2025/sustainability/cdn_adoption.sql @@ -2,30 +2,32 @@ # The distribution of CDN adoption on websites by client. SELECT - client, - IF(cdn = '', 'No CDN', cdn) AS cdn, - COUNT(0) AS freq, - total, - COUNT(0) / total AS pct -FROM ( - SELECT client, - COUNT(0) AS total, - ARRAY_CONCAT_AGG(SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ')) AS cdn_list - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' AND - is_root_page = TRUE - GROUP BY - client + total, + IF(cdn = '', 'No CDN', cdn) AS cdn, + COUNT(*) AS freq, + COUNT(*) / total AS pct +FROM ( + SELECT + client, + COUNT(*) AS total, + ARRAY_CONCAT_AGG( + SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ') + ) AS cdn_list + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' AND + is_root_page = TRUE + GROUP BY + client ), - UNNEST(cdn_list) AS cdn +UNNEST(cdn_list) AS cdn GROUP BY - client, - cdn, - total + client, + cdn, + total ORDER BY - pct DESC, - client, - cdn; \ No newline at end of file + pct DESC, + client ASC, + cdn ASC; diff --git a/sql/2025/sustainability/cms_bytes_per_type.sql b/sql/2025/sustainability/cms_bytes_per_type.sql index ba698179877..7fcb216d16c 100644 --- a/sql/2025/sustainability/cms_bytes_per_type.sql +++ b/sql/2025/sustainability/cms_bytes_per_type.sql @@ -3,7 +3,6 @@ # Declare variables to calculate the carbon emissions of one byte # Source: https://sustainablewebdesign.org/calculating-digital-emissions/ -# The implementation below does not make the assumptions about returning visitors or caching that are present in the Sustainable Web Design model. DECLARE grid_intensity NUMERIC DEFAULT 494; DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; @@ -14,149 +13,301 @@ DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; WITH cms_data AS ( - SELECT - client, - page, - tech.technology AS cms, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - - -- Operational emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, - - -- Embodied emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, - - -- Total emissions (operational + embodied) - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, - - -- Proportions of each resource type relative to total bytes - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, - - -- Resource-specific emissions calculations - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, - - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2025-06-01' AND - is_root_page = TRUE AND - 'CMS' IN UNNEST(tech.categories) + SELECT + client, + page, + tech.technology AS cms, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * + grid_intensity AS op_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * + grid_intensity AS op_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * + grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * + grid_intensity AS em_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * + grid_intensity AS em_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * + grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST( + JSON_VALUE(summary, '$.bytesHtml') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS html_proportion, + CAST( + JSON_VALUE(summary, '$.bytesJS') AS INT64 + ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST( + JSON_VALUE(summary, '$.bytesCss') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS css_proportion, + CAST( + JSON_VALUE(summary, '$.bytesImg') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS img_proportion, + CAST( + JSON_VALUE(summary, '$.bytesFont') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS font_proportion, + + -- Resource-specific emissions calculations + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2025-06-01' AND + is_root_page = TRUE AND + 'CMS' IN UNNEST(tech.categories) ) SELECT - client, - cms, - COUNT(0) AS pages, - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, - - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions + client, + cms, + COUNT(*) AS pages, + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES( + total_operational_emissions, 1000 + ) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES( + total_embodied_emissions, 1000 + ) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES( + total_emissions, 1000 + ) [OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES( + total_html_emissions, 1000 + ) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES( + total_js_emissions, 1000 + ) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES( + total_css_emissions, 1000 + ) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES( + total_img_emissions, 1000 + ) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES( + total_font_emissions, 1000 + ) [OFFSET(500)] AS median_total_font_emissions FROM - cms_data + cms_data GROUP BY - client, - cms + client, + cms ORDER BY - pages DESC, - cms, - client; + pages DESC, + cms ASC, + client ASC; diff --git a/sql/2025/sustainability/cms_bytes_per_type_2022.sql b/sql/2025/sustainability/cms_bytes_per_type_2022.sql index 488a738893a..86062822806 100644 --- a/sql/2025/sustainability/cms_bytes_per_type_2022.sql +++ b/sql/2025/sustainability/cms_bytes_per_type_2022.sql @@ -4,7 +4,6 @@ # Declare variables to calculate the carbon emissions of one byte # Source: https://sustainablewebdesign.org/calculating-digital-emissions/ -# The implementation below does not make the assumptions about returning visitors or caching that are present in the Sustainable Web Design model. DECLARE grid_intensity NUMERIC DEFAULT 494; DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; @@ -15,149 +14,300 @@ DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; WITH cms_data AS ( - SELECT - client, - page, - tech.technology AS cms, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - - -- Operational emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, - - -- Embodied emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, - - -- Total emissions (operational + embodied) - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, - - -- Proportions of each resource type relative to total bytes - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, - - -- Resource-specific emissions calculations - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, - - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2022-06-01' AND - is_root_page = TRUE AND - 'CMS' IN UNNEST(tech.categories) + SELECT + client, + page, + tech.technology AS cms, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * + grid_intensity AS op_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * + grid_intensity AS op_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * + grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * + grid_intensity AS em_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * + grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST( + JSON_VALUE(summary, '$.bytesHtml') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS html_proportion, + CAST( + JSON_VALUE(summary, '$.bytesJS') AS INT64 + ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST( + JSON_VALUE(summary, '$.bytesCss') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS css_proportion, + CAST( + JSON_VALUE(summary, '$.bytesImg') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS img_proportion, + CAST( + JSON_VALUE(summary, '$.bytesFont') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS font_proportion, + + -- Resource-specific emissions calculations + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2022-06-01' AND + is_root_page = TRUE AND + 'CMS' IN UNNEST(tech.categories) ) SELECT - client, - cms, - COUNT(0) AS pages, - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, - - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions + client, + cms, + COUNT(*) AS pages, + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES( + total_operational_emissions, 1000 + ) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES( + total_embodied_emissions, 1000 + ) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES( + total_emissions, 1000 + ) [OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES( + total_html_emissions, 1000 + ) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES( + total_js_emissions, 1000 + ) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES( + total_css_emissions, 1000 + ) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES( + total_img_emissions, 1000 + ) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES( + total_font_emissions, 1000 + ) [OFFSET(500)] AS median_total_font_emissions FROM - cms_data + cms_data GROUP BY - client, - cms + client, + cms ORDER BY - pages DESC, - cms, - client; + pages DESC, + cms ASC, + client ASC; diff --git a/sql/2025/sustainability/content-visibility.sql b/sql/2025/sustainability/content-visibility.sql index 65c69831f58..29b709474dd 100644 --- a/sql/2025/sustainability/content-visibility.sql +++ b/sql/2025/sustainability/content-visibility.sql @@ -1,5 +1,5 @@ #standardSQL -CREATE TEMPORARY FUNCTION hasContentVisibility(css STRING) +CREATE TEMPORARY FUNCTION HASCONTENTVISIBILITY(css STRING) RETURNS ARRAY> LANGUAGE js OPTIONS (library = "gs://httparchive/lib/css-utils.js") @@ -27,39 +27,48 @@ try { '''; WITH totals AS ( - SELECT - client, - COUNT(distinct root_page) AS total_pages - FROM - `httparchive.crawl.parsed_css` - WHERE - date = '2025-06-01' AND - is_root_page - GROUP BY - client + SELECT + client, + COUNT(DISTINCT root_page) AS total_pages + FROM + `httparchive.crawl.parsed_css` + WHERE + date = '2025-06-01' AND + is_root_page + GROUP BY + client ), + content_visibility_pages AS ( - SELECT - client, - COUNT(distinct root_page) AS pages_with_content_visibility - FROM - `httparchive.crawl.parsed_css`, - UNNEST (hasContentVisibility(css)) - WHERE - date = '2025-06-01' AND - is_root_page - GROUP BY - client + SELECT + client, + COUNT(DISTINCT root_page) AS pages_with_content_visibility + FROM + `httparchive.crawl.parsed_css`, + UNNEST(HASCONTENTVISIBILITY(css)) + WHERE + date = '2025-06-01' AND + is_root_page + GROUP BY + client ) + SELECT - totals.client, - IFNULL(content_visibility_pages.pages_with_content_visibility, 0) AS pages_with_content_visibility, - totals.total_pages, - ROUND(IFNULL(content_visibility_pages.pages_with_content_visibility, 0) * 100.0 / totals.total_pages, 2) AS pct_pages + totals.client, + totals.total_pages, + COALESCE( + content_visibility_pages.pages_with_content_visibility, 0 + ) AS pages_with_content_visibility, + ROUND( + COALESCE( + content_visibility_pages.pages_with_content_visibility, 0 + ) * 100.0 / totals.total_pages, + 2 + ) AS pct_pages FROM - totals + totals LEFT JOIN - content_visibility_pages -USING (client) + content_visibility_pages +ON totals.client = content_visibility_pages.client ORDER BY - totals.client \ No newline at end of file + totals.client diff --git a/sql/2025/sustainability/ecommerce_bytes_per_type.sql b/sql/2025/sustainability/ecommerce_bytes_per_type.sql index 03bdaa059cb..0802327915f 100644 --- a/sql/2025/sustainability/ecommerce_bytes_per_type.sql +++ b/sql/2025/sustainability/ecommerce_bytes_per_type.sql @@ -12,156 +12,310 @@ DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; WITH ecommerce_data AS ( - SELECT - client, - page, - tech.technology AS ecommerce, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - - -- Operational emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, - - -- Embodied emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, - - -- Total emissions (operational + embodied) - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, - - -- Proportions of each resource type relative to total bytes - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, - - -- Resource-specific emissions calculations - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, - - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb - - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2025-06-01' AND - is_root_page = TRUE AND - EXISTS ( - SELECT 1 - FROM UNNEST(tech.categories) AS category - WHERE category = 'Ecommerce' AND - tech.technology NOT IN ('Cart Functionality', 'Google Analytics Enhanced eCommerce') - ) + SELECT + client, + page, + tech.technology AS ecommerce, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * + grid_intensity AS op_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * + grid_intensity AS op_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * + grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * + grid_intensity AS em_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * + grid_intensity AS em_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * + grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST( + JSON_VALUE(summary, '$.bytesHtml') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS html_proportion, + CAST( + JSON_VALUE(summary, '$.bytesJS') AS INT64 + ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST( + JSON_VALUE(summary, '$.bytesCss') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS css_proportion, + CAST( + JSON_VALUE(summary, '$.bytesImg') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS img_proportion, + CAST( + JSON_VALUE(summary, '$.bytesFont') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS font_proportion, + + -- Resource-specific emissions calculations + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2025-06-01' AND + is_root_page = TRUE AND + EXISTS ( + SELECT 1 + FROM UNNEST(tech.categories) AS category + WHERE category = 'Ecommerce' AND + tech.technology NOT IN ( + 'Cart Functionality', 'Google Analytics Enhanced eCommerce' + ) + ) ) SELECT - client, - ecommerce, - COUNT(0) AS pages, - - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, - - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions + client, + ecommerce, + COUNT(*) AS pages, + + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES( + total_operational_emissions, 1000 + ) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES( + total_embodied_emissions, 1000 + ) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES( + total_emissions, 1000 + ) [OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES( + total_html_emissions, 1000 + ) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES( + total_js_emissions, 1000 + ) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES( + total_css_emissions, 1000 + ) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES( + total_img_emissions, 1000 + ) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES( + total_font_emissions, 1000 + ) [OFFSET(500)] AS median_total_font_emissions FROM - ecommerce_data + ecommerce_data GROUP BY - client, - ecommerce + client, + ecommerce ORDER BY - pages DESC, - ecommerce, - client; + pages DESC, + ecommerce ASC, + client ASC; diff --git a/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql b/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql index eb0cdac3b97..5aad7696fe3 100644 --- a/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql +++ b/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql @@ -13,156 +13,310 @@ DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; WITH ecommerce_data AS ( - SELECT - client, - page, - tech.technology AS ecommerce, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - - -- Operational emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, - - -- Embodied emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, - - -- Total emissions (operational + embodied) - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, - - -- Proportions of each resource type relative to total bytes - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, - - -- Resource-specific emissions calculations - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, - - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb - - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2022-06-01' AND - is_root_page = TRUE AND - EXISTS ( - SELECT 1 - FROM UNNEST(tech.categories) AS category - WHERE category = 'Ecommerce' AND - tech.technology NOT IN ('Cart Functionality', 'Google Analytics Enhanced eCommerce') - ) + SELECT + client, + page, + tech.technology AS ecommerce, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * + grid_intensity AS op_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * + grid_intensity AS op_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * + grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * + grid_intensity AS em_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * + grid_intensity AS em_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * + grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST( + JSON_VALUE(summary, '$.bytesHtml') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS html_proportion, + CAST( + JSON_VALUE(summary, '$.bytesJS') AS INT64 + ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST( + JSON_VALUE(summary, '$.bytesCss') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS css_proportion, + CAST( + JSON_VALUE(summary, '$.bytesImg') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS img_proportion, + CAST( + JSON_VALUE(summary, '$.bytesFont') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS font_proportion, + + -- Resource-specific emissions calculations + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2022-06-01' AND + is_root_page = TRUE AND + EXISTS ( + SELECT 1 + FROM UNNEST(tech.categories) AS category + WHERE category = 'Ecommerce' AND + tech.technology NOT IN ( + 'Cart Functionality', 'Google Analytics Enhanced eCommerce' + ) + ) ) SELECT - client, - ecommerce, - COUNT(0) AS pages, - - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, - - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions + client, + ecommerce, + COUNT(*) AS pages, + + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES( + total_operational_emissions, 1000 + ) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES( + total_embodied_emissions, 1000 + ) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES( + total_emissions, 1000 + ) [OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES( + total_html_emissions, 1000 + ) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES( + total_js_emissions, 1000 + ) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES( + total_css_emissions, 1000 + ) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES( + total_img_emissions, 1000 + ) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES( + total_font_emissions, 1000 + ) [OFFSET(500)] AS median_total_font_emissions FROM - ecommerce_data + ecommerce_data GROUP BY - client, - ecommerce + client, + ecommerce ORDER BY - pages DESC, - ecommerce, - client; + pages DESC, + ecommerce ASC, + client ASC; diff --git a/sql/2025/sustainability/favicons.sql b/sql/2025/sustainability/favicons.sql index f0455aa4182..bd92270e003 100644 --- a/sql/2025/sustainability/favicons.sql +++ b/sql/2025/sustainability/favicons.sql @@ -1,6 +1,6 @@ #standardSQL # Temporary function to extract favicon image extensions from the JSON payload -CREATE TEMPORARY FUNCTION getFaviconImage(payload STRING) +CREATE TEMPORARY FUNCTION GETFAVICONIMAGE(payload STRING) RETURNS STRING LANGUAGE js AS ''' var result = 'NO_DATA'; try { @@ -45,27 +45,31 @@ return result; # Main query to analyze favicon image extensions with sampling WITH favicons AS ( - SELECT - client, - getFaviconImage(JSON_EXTRACT_SCALAR(payload, '$._almanac')) AS image_type_extension, - COUNT(0) AS freq, - SUM(COUNT(0)) OVER (PARTITION BY client) AS total, - COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS percentage_of_total - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' - GROUP BY - client, - image_type_extension + SELECT + client, + GETFAVICONIMAGE( + JSON_EXTRACT_SCALAR(payload, '$._almanac') + ) AS image_type_extension, + COUNT(*) AS freq, + SUM(COUNT(*)) OVER (PARTITION BY client) AS total, + COUNT( + * + ) / SUM(COUNT(*)) OVER (PARTITION BY client) AS percentage_of_total + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' + GROUP BY + client, + image_type_extension ) SELECT - *, - percentage_of_total AS pct + *, + percentage_of_total AS pct FROM - favicons + favicons ORDER BY - pct DESC + pct DESC LIMIT - 1000; \ No newline at end of file + 1000; diff --git a/sql/2025/sustainability/global_emissions_per_page.sql b/sql/2025/sustainability/global_emissions_per_page.sql index afbcd5c3966..395a7bdbf73 100644 --- a/sql/2025/sustainability/global_emissions_per_page.sql +++ b/sql/2025/sustainability/global_emissions_per_page.sql @@ -1,79 +1,123 @@ #standardSQL -# The distribution of page weight by resource type and client, with updated SWDM v4 methodology including both operational and embodied emissions -- Energy consumption factors from SWDM v4 (in kWh/GB) -DECLARE energy_per_GB_datacenter NUMERIC DEFAULT CAST(0.055 + 0.012 AS NUMERIC); -- Operational + Embodied -DECLARE energy_per_GB_network NUMERIC DEFAULT CAST(0.059 + 0.013 AS NUMERIC); -- Operational + Embodied -DECLARE energy_per_GB_device NUMERIC DEFAULT CAST(0.080 + 0.081 AS NUMERIC); -- Operational + Embodied +-- Operational + Embodied +DECLARE ENERGY_PER_GB_DATACENTER NUMERIC DEFAULT CAST(0.055 + 0.012 AS NUMERIC); +-- Operational + Embodied +DECLARE ENERGY_PER_GB_NETWORK NUMERIC DEFAULT CAST(0.059 + 0.013 AS NUMERIC); +-- Operational + Embodied +DECLARE ENERGY_PER_GB_DEVICE NUMERIC DEFAULT CAST(0.080 + 0.081 AS NUMERIC); -- Total energy consumption per GB, calculated by summing the above factors -DECLARE kw_per_GB NUMERIC DEFAULT CAST(energy_per_GB_datacenter + energy_per_GB_network + energy_per_GB_device AS NUMERIC); -- Sum of all operational and embodied energies +-- Sum of all operational and embodied energies +DECLARE KW_PER_GB NUMERIC DEFAULT CAST( + ENERGY_PER_GB_DATACENTER + + ENERGY_PER_GB_NETWORK + + ENERGY_PER_GB_DEVICE AS NUMERIC +); -- Global average carbon intensity of electricity generation (gCO2/kWh) -DECLARE global_grid_intensity NUMERIC DEFAULT 494; +DECLARE GLOBAL_GRID_INTENSITY NUMERIC DEFAULT 494; -- Function to calculate emissions in gCO2 -CREATE TEMP FUNCTION calculate_emissions( - bytes FLOAT64, - kw_per_GB FLOAT64, - grid_intensity FLOAT64 +CREATE TEMP FUNCTION CALCULATE_EMISSIONS( + bytes FLOAT64, + kw_per_GB FLOAT64, + grid_intensity FLOAT64 ) RETURNS FLOAT64 AS ( - (bytes / 1024 / 1024 / 1024) * -- Convert bytes to GB - (kw_per_GB) * - grid_intensity + (BYTES / 1024 / 1024 / 1024) * -- Convert bytes to GB + (KW_PER_GB) * + GRID_INTENSITY ); -WITH page_data AS ( - SELECT - client, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS bytesTotal, - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) AS bytesHtml, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) AS bytesJS, - CAST(COALESCE(JSON_VALUE(summary, '$.bytesCss'), JSON_VALUE(summary, '$.bytesStyle')) AS INT64) AS bytesCSS, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) AS bytesImg, - CAST(JSON_VALUE(summary, '$.bytesOther') AS INT64) AS bytesOther, - CAST(JSON_VALUE(summary, '$.bytesHtmlDoc') AS INT64) AS bytesHtmlDoc, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) AS bytesFont - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' AND is_root_page +WITH PAGE_DATA AS ( + SELECT + CLIENT, + CAST(JSON_VALUE(SUMMARY, '$.bytesTotal') AS INT64) AS BYTESTOTAL, + CAST(JSON_VALUE(SUMMARY, '$.bytesHtml') AS INT64) AS BYTESHTML, + CAST(JSON_VALUE(SUMMARY, '$.bytesJS') AS INT64) AS BYTESJS, + CAST( + COALESCE( + JSON_VALUE(SUMMARY, '$.bytesCss'), + JSON_VALUE(SUMMARY, '$.bytesStyle') + ) AS INT64 + ) AS BYTESCSS, + CAST(JSON_VALUE(SUMMARY, '$.bytesImg') AS INT64) AS BYTESIMG, + CAST(JSON_VALUE(SUMMARY, '$.bytesOther') AS INT64) AS BYTESOTHER, + CAST(JSON_VALUE(SUMMARY, '$.bytesHtmlDoc') AS INT64) AS BYTESHTMLDOC, + CAST(JSON_VALUE(SUMMARY, '$.bytesFont') AS INT64) AS BYTESFONT + FROM + `httparchive.crawl.pages` + WHERE + DATE = '2025-06-01' AND IS_ROOT_PAGE ) SELECT - percentile, - client, - -- For each resource type, calculate the size in KB and the associated emissions - -- Total resources - APPROX_QUANTILES(bytesTotal / 1024, 1000) [OFFSET(percentile * 10)] AS total_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS total_emissions, - -- HTML resources - APPROX_QUANTILES(bytesHtml / 1024, 1000) [OFFSET(percentile * 10)] AS html_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_emissions, - -- JavaScript resources - APPROX_QUANTILES(bytesJS / 1024, 1000) [OFFSET(percentile * 10)] AS js_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS js_emissions, - -- CSS resources - APPROX_QUANTILES(bytesCSS / 1024, 1000) [OFFSET(percentile * 10)] AS css_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS css_emissions, - -- Image resources - APPROX_QUANTILES(bytesImg / 1024, 1000) [OFFSET(percentile * 10)] AS img_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS img_emissions, - -- Other resources - APPROX_QUANTILES(bytesOther / 1024, 1000) [OFFSET(percentile * 10)] AS other_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS other_emissions, - -- HTML document - APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000) [OFFSET(percentile * 10)] AS html_doc_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_doc_emissions, - -- Font resources - APPROX_QUANTILES(bytesFont / 1024, 1000) [OFFSET(percentile * 10)] AS font_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS font_emissions + PERCENTILE, + CLIENT, + -- Total resources + APPROX_QUANTILES( + BYTESTOTAL / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS TOTAL_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESTOTAL, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS TOTAL_EMISSIONS, + -- HTML resources + APPROX_QUANTILES( + BYTESHTML / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESHTML, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_EMISSIONS, + -- JavaScript resources + APPROX_QUANTILES( + BYTESJS / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS JS_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESJS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS JS_EMISSIONS, + -- CSS resources + APPROX_QUANTILES( + BYTESCSS / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS CSS_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESCSS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS CSS_EMISSIONS, + -- Image resources + APPROX_QUANTILES( + BYTESIMG / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS IMG_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESIMG, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS IMG_EMISSIONS, + -- Other resources + APPROX_QUANTILES( + BYTESOTHER / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS OTHER_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESOTHER, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS OTHER_EMISSIONS, + -- HTML document + APPROX_QUANTILES( + BYTESHTMLDOC / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESHTMLDOC, KW_PER_GB, GLOBAL_GRID_INTENSITY), + 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_EMISSIONS, + -- Font resources + APPROX_QUANTILES( + BYTESFONT / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS FONT_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESFONT, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS FONT_EMISSIONS FROM - page_data, - UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + PAGE_DATA, + UNNEST([10, 25, 50, 75, 90, 100]) AS PERCENTILE GROUP BY - percentile, - client + PERCENTILE, + CLIENT ORDER BY - client, - percentile + CLIENT, + PERCENTILE diff --git a/sql/2025/sustainability/global_emissions_per_page_2022.sql b/sql/2025/sustainability/global_emissions_per_page_2022.sql index 2a2b3f1c7a9..2fc357a33f2 100644 --- a/sql/2025/sustainability/global_emissions_per_page_2022.sql +++ b/sql/2025/sustainability/global_emissions_per_page_2022.sql @@ -1,80 +1,124 @@ #standardSQL # Copied global_emissions_per_page.sql -# The distribution of page weight by resource type and client, with updated SWDM v4 methodology including both operational and embodied emissions -- Energy consumption factors from SWDM v4 (in kWh/GB) -DECLARE energy_per_GB_datacenter NUMERIC DEFAULT CAST(0.055 + 0.012 AS NUMERIC); -- Operational + Embodied -DECLARE energy_per_GB_network NUMERIC DEFAULT CAST(0.059 + 0.013 AS NUMERIC); -- Operational + Embodied -DECLARE energy_per_GB_device NUMERIC DEFAULT CAST(0.080 + 0.081 AS NUMERIC); -- Operational + Embodied +-- Operational + Embodied +DECLARE ENERGY_PER_GB_DATACENTER NUMERIC DEFAULT CAST(0.055 + 0.012 AS NUMERIC); +-- Operational + Embodied +DECLARE ENERGY_PER_GB_NETWORK NUMERIC DEFAULT CAST(0.059 + 0.013 AS NUMERIC); +-- Operational + Embodied +DECLARE ENERGY_PER_GB_DEVICE NUMERIC DEFAULT CAST(0.080 + 0.081 AS NUMERIC); -- Total energy consumption per GB, calculated by summing the above factors -DECLARE kw_per_GB NUMERIC DEFAULT CAST(energy_per_GB_datacenter + energy_per_GB_network + energy_per_GB_device AS NUMERIC); -- Sum of all operational and embodied energies +-- Sum of all operational and embodied energies +DECLARE KW_PER_GB NUMERIC DEFAULT CAST( + ENERGY_PER_GB_DATACENTER + + ENERGY_PER_GB_NETWORK + + ENERGY_PER_GB_DEVICE AS NUMERIC +); -- Global average carbon intensity of electricity generation (gCO2/kWh) -DECLARE global_grid_intensity NUMERIC DEFAULT 494; +DECLARE GLOBAL_GRID_INTENSITY NUMERIC DEFAULT 494; -- Function to calculate emissions in gCO2 -CREATE TEMP FUNCTION calculate_emissions( - bytes FLOAT64, - kw_per_GB FLOAT64, - grid_intensity FLOAT64 +CREATE TEMP FUNCTION CALCULATE_EMISSIONS( + bytes FLOAT64, + kw_per_GB FLOAT64, + grid_intensity FLOAT64 ) RETURNS FLOAT64 AS ( - (bytes / 1024 / 1024 / 1024) * -- Convert bytes to GB - (kw_per_GB) * - grid_intensity + (BYTES / 1024 / 1024 / 1024) * -- Convert bytes to GB + (KW_PER_GB) * + GRID_INTENSITY ); -WITH page_data AS ( - SELECT - client, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS bytesTotal, - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) AS bytesHtml, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) AS bytesJS, - CAST(COALESCE(JSON_VALUE(summary, '$.bytesCss'), JSON_VALUE(summary, '$.bytesStyle')) AS INT64) AS bytesCSS, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) AS bytesImg, - CAST(JSON_VALUE(summary, '$.bytesOther') AS INT64) AS bytesOther, - CAST(JSON_VALUE(summary, '$.bytesHtmlDoc') AS INT64) AS bytesHtmlDoc, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) AS bytesFont - FROM - `httparchive.crawl.pages` - WHERE - date = '2022-06-01' AND is_root_page +WITH PAGE_DATA AS ( + SELECT + CLIENT, + CAST(JSON_VALUE(SUMMARY, '$.bytesTotal') AS INT64) AS BYTESTOTAL, + CAST(JSON_VALUE(SUMMARY, '$.bytesHtml') AS INT64) AS BYTESHTML, + CAST(JSON_VALUE(SUMMARY, '$.bytesJS') AS INT64) AS BYTESJS, + CAST( + COALESCE( + JSON_VALUE(SUMMARY, '$.bytesCss'), + JSON_VALUE(SUMMARY, '$.bytesStyle') + ) AS INT64 + ) AS BYTESCSS, + CAST(JSON_VALUE(SUMMARY, '$.bytesImg') AS INT64) AS BYTESIMG, + CAST(JSON_VALUE(SUMMARY, '$.bytesOther') AS INT64) AS BYTESOTHER, + CAST(JSON_VALUE(SUMMARY, '$.bytesHtmlDoc') AS INT64) AS BYTESHTMLDOC, + CAST(JSON_VALUE(SUMMARY, '$.bytesFont') AS INT64) AS BYTESFONT + FROM + `httparchive.crawl.pages` + WHERE + DATE = '2022-06-01' AND IS_ROOT_PAGE ) SELECT - percentile, - client, - -- For each resource type, calculate the size in KB and the associated emissions - -- Total resources - APPROX_QUANTILES(bytesTotal / 1024, 1000) [OFFSET(percentile * 10)] AS total_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS total_emissions, - -- HTML resources - APPROX_QUANTILES(bytesHtml / 1024, 1000) [OFFSET(percentile * 10)] AS html_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_emissions, - -- JavaScript resources - APPROX_QUANTILES(bytesJS / 1024, 1000) [OFFSET(percentile * 10)] AS js_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS js_emissions, - -- CSS resources - APPROX_QUANTILES(bytesCSS / 1024, 1000) [OFFSET(percentile * 10)] AS css_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS css_emissions, - -- Image resources - APPROX_QUANTILES(bytesImg / 1024, 1000) [OFFSET(percentile * 10)] AS img_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS img_emissions, - -- Other resources - APPROX_QUANTILES(bytesOther / 1024, 1000) [OFFSET(percentile * 10)] AS other_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS other_emissions, - -- HTML document - APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000) [OFFSET(percentile * 10)] AS html_doc_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_doc_emissions, - -- Font resources - APPROX_QUANTILES(bytesFont / 1024, 1000) [OFFSET(percentile * 10)] AS font_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS font_emissions + PERCENTILE, + CLIENT, + -- Total resources + APPROX_QUANTILES( + BYTESTOTAL / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS TOTAL_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESTOTAL, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS TOTAL_EMISSIONS, + -- HTML resources + APPROX_QUANTILES( + BYTESHTML / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESHTML, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_EMISSIONS, + -- JavaScript resources + APPROX_QUANTILES( + BYTESJS / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS JS_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESJS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS JS_EMISSIONS, + -- CSS resources + APPROX_QUANTILES( + BYTESCSS / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS CSS_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESCSS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS CSS_EMISSIONS, + -- Image resources + APPROX_QUANTILES( + BYTESIMG / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS IMG_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESIMG, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS IMG_EMISSIONS, + -- Other resources + APPROX_QUANTILES( + BYTESOTHER / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS OTHER_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESOTHER, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS OTHER_EMISSIONS, + -- HTML document + APPROX_QUANTILES( + BYTESHTMLDOC / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESHTMLDOC, KW_PER_GB, GLOBAL_GRID_INTENSITY), + 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_EMISSIONS, + -- Font resources + APPROX_QUANTILES( + BYTESFONT / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS FONT_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESFONT, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS FONT_EMISSIONS FROM - page_data, - UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + PAGE_DATA, + UNNEST([10, 25, 50, 75, 90, 100]) AS PERCENTILE GROUP BY - percentile, - client + PERCENTILE, + CLIENT ORDER BY - client, - percentile + CLIENT, + PERCENTILE diff --git a/sql/2025/sustainability/green_third_party_requests.sql b/sql/2025/sustainability/green_third_party_requests.sql index b177acba29f..f773ef3395d 100644 --- a/sql/2025/sustainability/green_third_party_requests.sql +++ b/sql/2025/sustainability/green_third_party_requests.sql @@ -1,135 +1,142 @@ #standardSQL -# Median number of third-parties & green third-party requests per websites by rank WITH requests AS ( - SELECT - client, - CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page, - url - FROM - `httparchive.crawl.requests` - WHERE - date = '2025-06-01' + SELECT + client, + url, + CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page + FROM + `httparchive.crawl.requests` + WHERE + date = '2025-06-01' ), green AS ( - SELECT - NET.HOST(url) AS host, - TRUE AS is_green - FROM - `httparchive.almanac.green_web_foundation` - WHERE - date = '2025-09-01' + SELECT + TRUE AS is_green, + NET.HOST(url) AS host + FROM + `httparchive.almanac.green_web_foundation` + WHERE + date = '2025-09-01' ), pages AS ( - SELECT - client, - CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page, - rank - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' + SELECT + client, + rank, + CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' ), third_party AS ( - SELECT - domain, - COUNT(DISTINCT page) AS page_usage - FROM - `httparchive.almanac.third_parties` tp - JOIN - requests r - ON NET.HOST(r.url) = NET.HOST(tp.domain) - WHERE - date = '2025-06-01' AND - category NOT IN ('hosting') - GROUP BY - domain - HAVING - page_usage >= 50 + SELECT + domain, + COUNT(DISTINCT page) AS page_usage + FROM + `httparchive.almanac.third_parties` AS tp + INNER JOIN + requests AS r + ON NET.HOST(r.url) = NET.HOST(tp.domain) + WHERE + date = '2025-06-01' AND + category NOT IN ('hosting') + GROUP BY + domain + HAVING + page_usage >= 50 ), green_tp AS ( - SELECT - domain - FROM - `httparchive.almanac.third_parties` tp - JOIN - green g - ON NET.HOST(g.host) = NET.HOST(tp.domain) - WHERE - date = '2025-06-01' AND - category NOT IN ('hosting') - GROUP BY - domain + SELECT domain + FROM + `httparchive.almanac.third_parties` AS tp + INNER JOIN + green AS g + ON NET.HOST(g.host) = NET.HOST(tp.domain) + WHERE + date = '2025-06-01' AND + category NOT IN ('hosting') + GROUP BY + domain ), base AS ( - SELECT - client, - page, - rank, - COUNT(domain) AS third_parties_per_page - FROM - requests - LEFT JOIN - third_party - ON - NET.HOST(requests.url) = NET.HOST(third_party.domain) - INNER JOIN - pages - USING (client, page) - GROUP BY - client, - page, - rank + SELECT + client, + page, + rank, + COUNT(domain) AS third_parties_per_page + FROM + requests + LEFT JOIN + third_party + ON + NET.HOST(requests.url) = NET.HOST(third_party.domain) + INNER JOIN + pages + USING (client, page) + GROUP BY + client, + page, + rank ), base_green AS ( - SELECT - client, - page, - rank, - COUNT(domain) AS green_third_parties_per_page - FROM - requests - LEFT JOIN - green_tp - ON - NET.HOST(requests.url) = NET.HOST(green_tp.domain) - INNER JOIN - pages - USING (client, page) - GROUP BY - client, - page, - rank + SELECT + client, + page, + rank, + COUNT(domain) AS green_third_parties_per_page + FROM + requests + LEFT JOIN + green_tp + ON + NET.HOST(requests.url) = NET.HOST(green_tp.domain) + INNER JOIN + pages + USING (client, page) + GROUP BY + client, + page, + rank ) SELECT - client, - rank_grouping, - CASE - WHEN rank_grouping = 0 THEN '' - WHEN rank_grouping = 100000000 THEN 'all' - ELSE FORMAT("%'d", rank_grouping) - END AS ranking, - APPROX_QUANTILES(third_parties_per_page, 1000) [OFFSET(500)] AS p50_third_parties_per_page, - APPROX_QUANTILES(green_third_parties_per_page, 1000) [OFFSET(500)] AS p50_green_third_parties_per_page, - APPROX_QUANTILES(SAFE_DIVIDE(green_third_parties_per_page, third_parties_per_page), 1000) [OFFSET(500)] AS pct_green + client, + rank_grouping, + CASE + WHEN rank_grouping = 0 THEN '' + WHEN rank_grouping = 100000000 THEN 'all' + ELSE FORMAT("%'d", rank_grouping) + END AS ranking, + APPROX_QUANTILES( + third_parties_per_page, 1000 + ) [OFFSET(500)] AS p50_third_parties_per_page, + APPROX_QUANTILES( + green_third_parties_per_page, 1000 + ) [OFFSET(500)] AS p50_green_third_parties_per_page, + APPROX_QUANTILES( + SAFE_DIVIDE(green_third_parties_per_page, third_parties_per_page), 1000 + ) [OFFSET(500)] AS pct_green FROM - base, - UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping -JOIN - base_green -USING (client, page, rank) + base, + UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping +INNER JOIN + base_green +ON + base.client = base_green.client AND + base.page = base_green.page AND + base.rank = base_green.rank WHERE - rank <= rank_grouping + rank <= rank_grouping GROUP BY - client, - rank_grouping + client, + rank_grouping ORDER BY - client, - rank_grouping + client, + rank_grouping diff --git a/sql/2025/sustainability/green_web_hosting.sql b/sql/2025/sustainability/green_web_hosting.sql index 41d8355c6fa..9143c1da79d 100644 --- a/sql/2025/sustainability/green_web_hosting.sql +++ b/sql/2025/sustainability/green_web_hosting.sql @@ -2,58 +2,58 @@ # What percentage of URLs are hosted on a known green web hosting provider? WITH green AS ( - SELECT - NET.HOST(url) AS host, - TRUE AS is_green - FROM - `httparchive.almanac.green_web_foundation` - WHERE - date = '2025-09-01' + SELECT + TRUE AS is_green, + NET.HOST(url) AS host + FROM + `httparchive.almanac.green_web_foundation` + WHERE + date = '2025-09-01' ), pages AS ( - SELECT - client, - NET.HOST(root_page) AS host, - rank - FROM - `httparchive.crawl.pages` - WHERE - is_root_page = TRUE AND - date = '2025-06-01' + SELECT + client, + rank, + NET.HOST(root_page) AS host + FROM + `httparchive.crawl.pages` + WHERE + is_root_page = TRUE AND + date = '2025-06-01' ) -- Apply rank grouping SELECT - client, - rank_grouping, - CASE - WHEN rank_grouping = 0 THEN '' - WHEN rank_grouping = 100000000 THEN 'all' - ELSE FORMAT("%'d", rank_grouping) - END AS ranking, - COUNTIF(is_green) AS total_green, - COUNT(0) AS total_sites, - SAFE_DIVIDE(COUNTIF(is_green), COUNT(0)) AS pct_green + client, + rank_grouping, + CASE + WHEN rank_grouping = 0 THEN '' + WHEN rank_grouping = 100000000 THEN 'all' + ELSE FORMAT("%'d", rank_grouping) + END AS ranking, + COUNTIF(is_green) AS total_green, + COUNT(*) AS total_sites, + SAFE_DIVIDE(COUNTIF(is_green), COUNT(*)) AS pct_green FROM ( - -- Left join green hosting information - SELECT - p.client, - p.host, - p.rank, - g.is_green - FROM - pages p - LEFT JOIN - green g - USING (host) + -- Left join green hosting information + SELECT + p.client, + p.host, + p.rank, + g.is_green + FROM + pages AS p + LEFT JOIN + green AS g + ON p.host = g.host ), - UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping +UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping WHERE - rank <= rank_grouping + rank <= rank_grouping GROUP BY - client, - rank_grouping + client, + rank_grouping ORDER BY - client, - rank_grouping; \ No newline at end of file + client, + rank_grouping; diff --git a/sql/2025/sustainability/page_byte_pre_type.sql b/sql/2025/sustainability/page_byte_pre_type.sql index 846aaf2e506..48cc2113902 100644 --- a/sql/2025/sustainability/page_byte_pre_type.sql +++ b/sql/2025/sustainability/page_byte_pre_type.sql @@ -1,79 +1,118 @@ #standardSQL -# The distribution of page weight by resource type and client, with updated SWDM v4 methodology -- Energy consumption factors from SWDM v4 (in TWh/ZB) -DECLARE energy_per_GB_datacenter NUMERIC DEFAULT 0.00006829493087557603; # 290 TWh / 5.29 ZB -DECLARE energy_per_GB_network NUMERIC DEFAULT 0.05859598853868195; # 310 TWh / 5.29 ZB -DECLARE energy_per_GB_device NUMERIC DEFAULT 0.07956802188162324; # 421 TWh / 5.29 ZB +# 290 TWh / 5.29 ZB +DECLARE ENERGY_PER_GB_DATACENTER NUMERIC DEFAULT 0.00006829493087557603; +# 310 TWh / 5.29 ZB +DECLARE ENERGY_PER_GB_NETWORK NUMERIC DEFAULT 0.05859598853868195; +# 421 TWh / 5.29 ZB +DECLARE ENERGY_PER_GB_DEVICE NUMERIC DEFAULT 0.07956802188162324; --- Total energy consumption per GB, calculated by summing the above factors and converting to kWh/GB -DECLARE kw_per_GB NUMERIC DEFAULT 0.19300566251415094; # (290 + 310 + 421) TWh / 5.29 ZB * 1000000 kWh/TWh / 1000000000 GB/ZB +# (290 + 310 + 421) TWh / 5.29 ZB * 1000000 kWh/TWh / 1000000000 GB/ZB +DECLARE KW_PER_GB NUMERIC DEFAULT 0.19300566251415094; -- Global average carbon intensity of electricity generation (gCO2/kWh) -DECLARE global_grid_intensity NUMERIC DEFAULT 494; +DECLARE GLOBAL_GRID_INTENSITY NUMERIC DEFAULT 494; -- Function to calculate emissions in gCO2 CREATE TEMP FUNCTION calculate_emissions( - bytes FLOAT64, - kw_per_GB FLOAT64, - grid_intensity FLOAT64 + bytes FLOAT64, + kw_per_GB FLOAT64, + grid_intensity FLOAT64 ) RETURNS FLOAT64 AS ( - (bytes / 1024 / 1024 / 1024) * -- Convert bytes to GB - (kw_per_GB) * - grid_intensity + (BYTES / 1024 / 1024 / 1024) * -- Convert bytes to GB + (KW_PER_GB) * + GRID_INTENSITY ); -WITH page_data AS ( - SELECT - client, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS bytesTotal, - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) AS bytesHtml, - CAST(COALESCE(JSON_VALUE(summary, '$.bytesCss'), JSON_VALUE(summary, '$.bytesStyle')) AS INT64) AS bytesCSS, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) AS bytesJS, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) AS bytesImg, - CAST(JSON_VALUE(summary, '$.bytesOther') AS INT64) AS bytesOther, - CAST(JSON_VALUE(summary, '$.bytesHtmlDoc') AS INT64) AS bytesHtmlDoc, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) AS bytesFont - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' AND is_root_page +WITH PAGE_DATA AS ( + SELECT + CLIENT, + cast(json_value(SUMMARY, '$.bytesTotal') AS INT64) AS BYTESTOTAL, + cast(json_value(SUMMARY, '$.bytesHtml') AS INT64) AS BYTESHTML, + cast( + coalesce( + json_value(SUMMARY, '$.bytesCss'), + json_value(SUMMARY, '$.bytesStyle') + ) AS INT64 + ) AS BYTESCSS, + cast(json_value(SUMMARY, '$.bytesJS') AS INT64) AS BYTESJS, + cast(json_value(SUMMARY, '$.bytesImg') AS INT64) AS BYTESIMG, + cast(json_value(SUMMARY, '$.bytesOther') AS INT64) AS BYTESOTHER, + cast(json_value(SUMMARY, '$.bytesHtmlDoc') AS INT64) AS BYTESHTMLDOC, + cast(json_value(SUMMARY, '$.bytesFont') AS INT64) AS BYTESFONT + FROM + `httparchive.crawl.pages` + WHERE + DATE = '2025-06-01' AND IS_ROOT_PAGE ) SELECT - percentile, - client, - -- For each resource type, calculate the size in KB and the associated emissions - -- Total resources - APPROX_QUANTILES(bytesTotal / 1024, 1000) [OFFSET(percentile * 10)] AS total_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS total_emissions, - -- HTML resources - APPROX_QUANTILES(bytesHtml / 1024, 1000) [OFFSET(percentile * 10)] AS html_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_emissions, - -- JavaScript resources - APPROX_QUANTILES(bytesJS / 1024, 1000) [OFFSET(percentile * 10)] AS js_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS js_emissions, - -- CSS resources - APPROX_QUANTILES(bytesCSS / 1024, 1000) [OFFSET(percentile * 10)] AS css_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS css_emissions, - -- Image resources - APPROX_QUANTILES(bytesImg / 1024, 1000) [OFFSET(percentile * 10)] AS img_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS img_emissions, - -- Other resources - APPROX_QUANTILES(bytesOther / 1024, 1000) [OFFSET(percentile * 10)] AS other_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS other_emissions, - -- HTML document - APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000) [OFFSET(percentile * 10)] AS html_doc_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS html_doc_emissions, - -- Font resources - APPROX_QUANTILES(bytesFont / 1024, 1000) [OFFSET(percentile * 10)] AS font_kbytes, - APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000) [OFFSET(percentile * 10)] AS font_emissions + PERCENTILE, + CLIENT, + -- Total resources + approx_quantiles( + BYTESTOTAL / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS TOTAL_KBYTES, + approx_quantiles( + calculate_emissions(BYTESTOTAL, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS TOTAL_EMISSIONS, + -- HTML resources + approx_quantiles( + BYTESHTML / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS HTML_KBYTES, + approx_quantiles( + calculate_emissions(BYTESHTML, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS HTML_EMISSIONS, + -- JavaScript resources + approx_quantiles( + BYTESJS / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS JS_KBYTES, + approx_quantiles( + calculate_emissions(BYTESJS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS JS_EMISSIONS, + -- CSS resources + approx_quantiles( + BYTESCSS / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS CSS_KBYTES, + approx_quantiles( + calculate_emissions(BYTESCSS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS CSS_EMISSIONS, + -- Image resources + approx_quantiles( + BYTESIMG / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS IMG_KBYTES, + approx_quantiles( + calculate_emissions(BYTESIMG, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS IMG_EMISSIONS, + -- Other resources + approx_quantiles( + BYTESOTHER / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS OTHER_KBYTES, + approx_quantiles( + calculate_emissions(BYTESOTHER, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS OTHER_EMISSIONS, + -- HTML document + approx_quantiles( + BYTESHTMLDOC / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS HTML_DOC_KBYTES, + approx_quantiles( + calculate_emissions(BYTESHTMLDOC, KW_PER_GB, GLOBAL_GRID_INTENSITY), + 1000 + ) [offset(PERCENTILE * 10)] AS HTML_DOC_EMISSIONS, + -- Font resources + approx_quantiles( + BYTESFONT / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS FONT_KBYTES, + approx_quantiles( + calculate_emissions(BYTESFONT, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS FONT_EMISSIONS FROM - page_data, - UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + PAGE_DATA, + unnest([10, 25, 50, 75, 90, 100]) AS PERCENTILE GROUP BY - percentile, - client + PERCENTILE, + CLIENT ORDER BY - client, - percentile + CLIENT, + PERCENTILE diff --git a/sql/2025/sustainability/query_run_size.sql b/sql/2025/sustainability/query_run_size.sql index 4a0a5a1f8e5..bc24c40709f 100644 --- a/sql/2025/sustainability/query_run_size.sql +++ b/sql/2025/sustainability/query_run_size.sql @@ -3,9 +3,13 @@ # (0.012+0.013+0.081+0.055+0.0590.080)x494x [Total TB] *1024 = Total kg CO2e SELECT - AVG(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) / 1048576 AS avg_size_MB, - SUM(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) / 1099511627776 AS total_size_TB + AVG( + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) / 1048576 AS avg_size_mb, + SUM( + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) / 1099511627776 AS total_size_tb FROM - `httparchive.crawl.pages` + `httparchive.crawl.pages` WHERE - date = '2025-06-01' \ No newline at end of file + date = '2025-06-01' diff --git a/sql/2025/sustainability/responsive_images.sql b/sql/2025/sustainability/responsive_images.sql index 1fbec57408e..c3c85e80b54 100644 --- a/sql/2025/sustainability/responsive_images.sql +++ b/sql/2025/sustainability/responsive_images.sql @@ -2,9 +2,9 @@ # percent of sites using images with srcset w/wo sizes, or picture element CREATE TEMPORARY FUNCTION get_media_info(media_string STRING) RETURNS STRUCT< - num_srcset_all INT64, - num_srcset_sizes INT64, - num_picture_img INT64 + num_srcset_all INT64, + num_srcset_sizes INT64, + num_picture_img INT64 > LANGUAGE js AS ''' var result = { num_srcset_all: 0, @@ -22,25 +22,52 @@ return result; '''; WITH page_data AS ( - SELECT - client, - get_media_info(JSON_EXTRACT_SCALAR(payload, '$._media')) AS media_info - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' AND is_root_page + SELECT + client, + get_media_info(json_extract_scalar(payload, '$._media')) AS media_info + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' AND is_root_page ) SELECT - client, - ROUND(SAFE_DIVIDE(COUNTIF(media_info.num_srcset_all > 0), COUNT(0)) * 100, 2) AS pages_with_srcset_pct, - ROUND(SAFE_DIVIDE(COUNTIF(media_info.num_srcset_sizes > 0), COUNT(0)) * 100, 2) AS pages_with_srcset_sizes_pct, - ROUND(SAFE_DIVIDE((COUNTIF(media_info.num_srcset_all > 0) - COUNTIF(media_info.num_srcset_sizes > 0)), COUNT(0)) * 100, 2) AS pages_with_srcset_wo_sizes_pct, - ROUND(SAFE_DIVIDE(SUM(media_info.num_srcset_sizes), SUM(media_info.num_srcset_all)) * 100, 2) AS instances_of_srcset_sizes_pct, - ROUND(SAFE_DIVIDE((SUM(media_info.num_srcset_all) - SUM(media_info.num_srcset_sizes)), SUM(media_info.num_srcset_all)) * 100, 2) AS instances_of_srcset_wo_sizes_pct, - ROUND(SAFE_DIVIDE(COUNTIF(media_info.num_picture_img > 0), COUNT(0)) * 100, 2) AS pages_with_picture_pct + client, + round( + safe_divide(countif(media_info.num_srcset_all > 0), count(*)) * 100, 2 + ) AS pages_with_srcset_pct, + round( + safe_divide(countif(media_info.num_srcset_sizes > 0), count(*)) * 100, 2 + ) AS pages_with_srcset_sizes_pct, + round( + safe_divide( + ( + countif( + media_info.num_srcset_all > 0 + ) - countif(media_info.num_srcset_sizes > 0) + ), + count(*) + ) * 100, + 2 + ) AS pages_with_srcset_wo_sizes_pct, + round( + safe_divide( + sum(media_info.num_srcset_sizes), sum(media_info.num_srcset_all) + ) * 100, + 2 + ) AS instances_of_srcset_sizes_pct, + round( + safe_divide( + (sum(media_info.num_srcset_all) - sum(media_info.num_srcset_sizes)), + sum(media_info.num_srcset_all) + ) * 100, + 2 + ) AS instances_of_srcset_wo_sizes_pct, + round( + safe_divide(countif(media_info.num_picture_img > 0), count(*)) * 100, 2 + ) AS pages_with_picture_pct FROM page_data GROUP BY - client + client ORDER BY - client \ No newline at end of file + client diff --git a/sql/2025/sustainability/script_count.sql b/sql/2025/sustainability/script_count.sql index 7a78885e329..10be69b9c0a 100644 --- a/sql/2025/sustainability/script_count.sql +++ b/sql/2025/sustainability/script_count.sql @@ -1,40 +1,107 @@ #standardSQL # Breakdown of inline vs external scripts WITH script_data AS ( - SELECT - client, - page, - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.total') AS INT64) AS total_scripts, - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.inline') AS INT64) AS inline_scripts, - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.src') AS INT64) AS external_scripts, - SAFE_DIVIDE( - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.inline') AS INT64), - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.total') AS INT64) - ) AS pct_inline_script, - SAFE_DIVIDE( - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.src') AS INT64), - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.total') AS INT64) - ) AS pct_external_script - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' AND - JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags'), '$.total') IS NOT NULL + SELECT + client, + page, + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.total' + ) AS INT64 + ) AS total_scripts, + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.inline' + ) AS INT64 + ) AS inline_scripts, + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.src' + ) AS INT64 + ) AS external_scripts, + SAFE_DIVIDE( + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.inline' + ) AS INT64 + ), + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.total' + ) AS INT64 + ) + ) AS pct_inline_script, + SAFE_DIVIDE( + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.src' + ) AS INT64 + ), + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.total' + ) AS INT64 + ) + ) AS pct_external_script + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' AND + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags' + ), + '$.total' + ) IS NOT NULL ) SELECT - client, - COUNT(DISTINCT page) AS pages_analyzed, - SUM(total_scripts) AS total_scripts, - SUM(inline_scripts) AS inline_scripts, - SUM(external_scripts) AS external_scripts, - SAFE_DIVIDE(SUM(external_scripts), SUM(total_scripts)) AS pct_external_script, - SAFE_DIVIDE(SUM(inline_scripts), SUM(total_scripts)) AS pct_inline_script, - APPROX_QUANTILES(SAFE_DIVIDE(external_scripts, total_scripts), 1000) [OFFSET(500)] AS median_external, - APPROX_QUANTILES(SAFE_DIVIDE(inline_scripts, total_scripts), 1000) [OFFSET(500)] AS median_inline + client, + COUNT(DISTINCT page) AS pages_analyzed, + SUM(total_scripts) AS total_scripts, + SUM(inline_scripts) AS inline_scripts, + SUM(external_scripts) AS external_scripts, + SAFE_DIVIDE( + SUM(external_scripts), SUM(total_scripts) + ) AS pct_external_script, + SAFE_DIVIDE(SUM(inline_scripts), SUM(total_scripts)) AS pct_inline_script, + APPROX_QUANTILES( + SAFE_DIVIDE(external_scripts, total_scripts), 1000 + ) [OFFSET(500)] AS median_external, + APPROX_QUANTILES( + SAFE_DIVIDE(inline_scripts, total_scripts), 1000 + ) [OFFSET(500)] AS median_inline FROM - script_data + script_data GROUP BY - client + client ORDER BY - client; + client; diff --git a/sql/2025/sustainability/ssg_bytes_per_type.sql b/sql/2025/sustainability/ssg_bytes_per_type.sql index 6768e0edc2e..6a89d8a3b9e 100644 --- a/sql/2025/sustainability/ssg_bytes_per_type.sql +++ b/sql/2025/sustainability/ssg_bytes_per_type.sql @@ -13,157 +13,309 @@ DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; WITH ssg_data AS ( - SELECT - client, - page, - tech.technology AS ssg, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - - -- Operational emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, - - -- Embodied emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, - - -- Total emissions (operational + embodied) - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, - - -- Proportions of each resource type relative to total bytes - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, - - -- Resource-specific emissions calculations - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, - - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb - - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2025-06-01' AND - is_root_page = TRUE AND - EXISTS ( - SELECT 1 - FROM UNNEST(tech.categories) AS category - WHERE LOWER(category) = 'static site generator' OR - tech.technology IN ('Next.js', 'Nuxt.js') - ) + SELECT + client, + page, + tech.technology AS ssg, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * + grid_intensity AS op_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * + grid_intensity AS op_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * + grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * + grid_intensity AS em_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * + grid_intensity AS em_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * + grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST( + JSON_VALUE(summary, '$.bytesHtml') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS html_proportion, + CAST( + JSON_VALUE(summary, '$.bytesJS') AS INT64 + ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST( + JSON_VALUE(summary, '$.bytesCss') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS css_proportion, + CAST( + JSON_VALUE(summary, '$.bytesImg') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS img_proportion, + CAST( + JSON_VALUE(summary, '$.bytesFont') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS font_proportion, + + -- Resource-specific emissions calculations + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2025-06-01' AND + is_root_page = TRUE AND + EXISTS ( + SELECT 1 + FROM UNNEST(tech.categories) AS category + WHERE LOWER(category) = 'static site generator' OR + tech.technology IN ('Next.js', 'Nuxt.js') + ) ) SELECT - client, - ssg, - COUNT(0) AS pages, - - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, - - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions + client, + ssg, + COUNT(*) AS pages, + + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES( + total_operational_emissions, 1000 + ) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES( + total_embodied_emissions, 1000 + ) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES( + total_emissions, 1000 + ) [OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES( + total_html_emissions, 1000 + ) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES( + total_js_emissions, 1000 + ) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES( + total_css_emissions, 1000 + ) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES( + total_img_emissions, 1000 + ) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES( + total_font_emissions, 1000 + ) [OFFSET(500)] AS median_total_font_emissions FROM - ssg_data + ssg_data GROUP BY - client, - ssg + client, + ssg ORDER BY - pages DESC, - ssg, - client; + pages DESC, + ssg ASC, + client ASC; diff --git a/sql/2025/sustainability/ssg_bytes_per_type_2022.sql b/sql/2025/sustainability/ssg_bytes_per_type_2022.sql index d95fb394ae0..4471dcf439c 100644 --- a/sql/2025/sustainability/ssg_bytes_per_type_2022.sql +++ b/sql/2025/sustainability/ssg_bytes_per_type_2022.sql @@ -14,157 +14,309 @@ DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; WITH ssg_data AS ( - SELECT - client, - page, - tech.technology AS ssg, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - - -- Operational emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, - - -- Embodied emissions calculations - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, - - -- Total emissions (operational + embodied) - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, - - ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, - - -- Proportions of each resource type relative to total bytes - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, - - -- Resource-specific emissions calculations - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, - - (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( - (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, - - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb - - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2022-06-01' AND - is_root_page = TRUE AND - EXISTS ( - SELECT 1 - FROM UNNEST(tech.categories) AS category - WHERE LOWER(category) = 'static site generator' OR - tech.technology IN ('Next.js', 'Nuxt.js') - ) + SELECT + client, + page, + tech.technology AS ssg, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + + -- Operational emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * + grid_intensity AS op_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * + grid_intensity AS op_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * + grid_intensity AS op_emissions_devices, + + -- Embodied emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * + grid_intensity AS em_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * + grid_intensity AS em_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * + grid_intensity AS em_emissions_devices, + + -- Total emissions (operational + embodied) + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, + + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST( + JSON_VALUE(summary, '$.bytesHtml') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS html_proportion, + CAST( + JSON_VALUE(summary, '$.bytesJS') AS INT64 + ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST( + JSON_VALUE(summary, '$.bytesCss') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS css_proportion, + CAST( + JSON_VALUE(summary, '$.bytesImg') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS img_proportion, + CAST( + JSON_VALUE(summary, '$.bytesFont') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS font_proportion, + + -- Resource-specific emissions calculations + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2022-06-01' AND + is_root_page = TRUE AND + EXISTS ( + SELECT 1 + FROM UNNEST(tech.categories) AS category + WHERE LOWER(category) = 'static site generator' OR + tech.technology IN ('Next.js', 'Nuxt.js') + ) ) SELECT - client, - ssg, - COUNT(0) AS pages, - - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES(total_operational_emissions, 1000) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES(total_embodied_emissions, 1000) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES(total_emissions, 1000) [OFFSET(500)] AS median_total_emissions, - - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES(total_html_emissions, 1000) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES(total_js_emissions, 1000) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES(total_css_emissions, 1000) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES(total_img_emissions, 1000) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES(total_font_emissions, 1000) [OFFSET(500)] AS median_total_font_emissions + client, + ssg, + COUNT(*) AS pages, + + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES( + total_operational_emissions, 1000 + ) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES( + total_embodied_emissions, 1000 + ) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES( + total_emissions, 1000 + ) [OFFSET(500)] AS median_total_emissions, + + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES( + total_html_emissions, 1000 + ) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES( + total_js_emissions, 1000 + ) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES( + total_css_emissions, 1000 + ) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES( + total_img_emissions, 1000 + ) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES( + total_font_emissions, 1000 + ) [OFFSET(500)] AS median_total_font_emissions FROM - ssg_data + ssg_data GROUP BY - client, - ssg + client, + ssg ORDER BY - pages DESC, - ssg, - client; + pages DESC, + ssg ASC, + client ASC; diff --git a/sql/2025/sustainability/stylesheet_count.sql b/sql/2025/sustainability/stylesheet_count.sql index c0bb071e246..150d75e94a0 100644 --- a/sql/2025/sustainability/stylesheet_count.sql +++ b/sql/2025/sustainability/stylesheet_count.sql @@ -1,42 +1,124 @@ #standardSQL # Breakdown of inline vs external scripts WITH stylesheet_data AS ( - SELECT + SELECT + client, + page, + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' + ), + '$.stylesheets' + ) AS INT64 + ) AS external_stylesheets, + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' + ), + '$.inlineStyles' + ) AS INT64 + ) AS inline_stylesheets, + SAFE_DIVIDE( + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.inlineStyles' + ) AS INT64 + ), + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.stylesheets' + ) AS INT64 + ) + + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.inlineStyles' + ) AS INT64 + ) + ) AS pct_inline_stylesheets, + SAFE_DIVIDE( + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.stylesheets' + ) AS INT64 + ), + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.stylesheets' + ) AS INT64 + ) + + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.inlineStyles' + ) AS INT64 + ) + ) AS pct_external_stylesheets + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' + AND + is_root_page = TRUE AND + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' + ), + '$.stylesheets' + ) IS NOT NULL +) + +SELECT client, - page, - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) AS external_stylesheets, - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) AS inline_stylesheets, + COUNT(DISTINCT page) AS pages_analyzed, + SUM(external_stylesheets) AS external_stylesheets, + SUM(inline_stylesheets) AS inline_stylesheets, SAFE_DIVIDE( - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64), - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) + - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) + SUM(inline_stylesheets), SUM(inline_stylesheets + external_stylesheets) ) AS pct_inline_stylesheets, SAFE_DIVIDE( - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64), - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) + - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) - ) AS pct_external_stylesheets - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' - AND - is_root_page = TRUE AND - JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') IS NOT NULL -) - -SELECT - client, - COUNT(DISTINCT page) AS pages_analyzed, - SUM(external_stylesheets) AS external_stylesheets, - SUM(inline_stylesheets) AS inline_stylesheets, - SAFE_DIVIDE(SUM(inline_stylesheets), SUM(inline_stylesheets + external_stylesheets)) AS pct_inline_stylesheets, - SAFE_DIVIDE(SUM(external_stylesheets), SUM(inline_stylesheets + external_stylesheets)) AS pct_external_stylesheets, - APPROX_QUANTILES(SAFE_DIVIDE(inline_stylesheets, inline_stylesheets + external_stylesheets), 1000) [OFFSET(500)] AS median_inline_stylesheets, - APPROX_QUANTILES(SAFE_DIVIDE(external_stylesheets, inline_stylesheets + external_stylesheets), 1000) [OFFSET(500)] AS median_external_stylesheets + SUM(external_stylesheets), + SUM(inline_stylesheets + external_stylesheets) + ) AS pct_external_stylesheets, + APPROX_QUANTILES( + SAFE_DIVIDE( + inline_stylesheets, inline_stylesheets + external_stylesheets + ), + 1000 + ) [OFFSET(500)] AS median_inline_stylesheets, + APPROX_QUANTILES( + SAFE_DIVIDE( + external_stylesheets, inline_stylesheets + external_stylesheets + ), + 1000 + ) [OFFSET(500)] AS median_external_stylesheets FROM - stylesheet_data + stylesheet_data GROUP BY - client + client ORDER BY - client; + client; diff --git a/sql/2025/sustainability/text_compression.sql b/sql/2025/sustainability/text_compression.sql index f4559675463..dac91ca0bdb 100644 --- a/sql/2025/sustainability/text_compression.sql +++ b/sql/2025/sustainability/text_compression.sql @@ -1,4 +1,4 @@ -CREATE TEMP FUNCTION getContentEncoding(headers STRING) +CREATE TEMP FUNCTION GETCONTENTENCODING(headers STRING) RETURNS STRING LANGUAGE js AS """ try { @@ -13,41 +13,45 @@ LANGUAGE js AS """ """; WITH request_data AS ( - SELECT - client, - getContentEncoding(JSON_EXTRACT(payload, '$.response.headers')) AS resp_content_encoding - FROM - `httparchive.crawl.requests` - WHERE - date = '2025-06-01' + SELECT + client, + GETCONTENTENCODING( + JSON_EXTRACT(payload, '$.response.headers') + ) AS resp_content_encoding + FROM + `httparchive.crawl.requests` + WHERE + date = '2025-06-01' ), compression_data AS ( - SELECT - client, - CASE - WHEN resp_content_encoding = 'gzip' THEN 'Gzip' - WHEN resp_content_encoding = 'br' THEN 'Brotli' - WHEN resp_content_encoding IS NULL THEN 'no text compression' - ELSE 'other' - END AS compression_type, - COUNT(0) AS num_requests, - SUM(COUNT(0)) OVER (PARTITION BY client) AS total, - ROUND(COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) * 100, 2) AS pct - FROM - request_data - GROUP BY - client, - compression_type + SELECT + client, + CASE + WHEN resp_content_encoding = 'gzip' THEN 'Gzip' + WHEN resp_content_encoding = 'br' THEN 'Brotli' + WHEN resp_content_encoding IS NULL THEN 'no text compression' + ELSE 'other' + END AS compression_type, + COUNT(*) AS num_requests, + SUM(COUNT(*)) OVER (PARTITION BY client) AS total, + ROUND( + COUNT(*) / SUM(COUNT(*)) OVER (PARTITION BY client) * 100, 2 + ) AS pct + FROM + request_data + GROUP BY + client, + compression_type ) SELECT - client, - compression_type, - num_requests, - total, - pct + client, + compression_type, + num_requests, + total, + pct FROM compression_data ORDER BY - client, - num_requests DESC \ No newline at end of file + client ASC, + num_requests DESC diff --git a/sql/2025/sustainability/unminified_css_bytes.sql b/sql/2025/sustainability/unminified_css_bytes.sql index 9abec7684d0..cd771f2375f 100644 --- a/sql/2025/sustainability/unminified_css_bytes.sql +++ b/sql/2025/sustainability/unminified_css_bytes.sql @@ -2,17 +2,24 @@ # Distribution of unminified CSS request bytes per page SELECT - client, - percentile, - APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.minify-css.details.overallSavingsBytes') AS INT64) / 1024, 1000) [OFFSET(percentile * 10)] AS css_kilobytes + client, + percentile, + APPROX_QUANTILES( + CAST( + JSON_VALUE( + lighthouse, '$.audits.minify-css.details.overallSavingsBytes' + ) AS INT64 + ) / 1024, + 1000 + ) [OFFSET(percentile * 10)] AS css_kilobytes FROM - `httparchive.crawl.pages`, - UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile WHERE - date = '2025-06-01' + date = '2025-06-01' GROUP BY - client, - percentile + client, + percentile ORDER BY - client, - percentile + client, + percentile diff --git a/sql/2025/sustainability/unminified_js_bytes.sql b/sql/2025/sustainability/unminified_js_bytes.sql index d4fb9b9697f..fb6722303e4 100644 --- a/sql/2025/sustainability/unminified_js_bytes.sql +++ b/sql/2025/sustainability/unminified_js_bytes.sql @@ -2,17 +2,25 @@ # Distribution of unminified JS request bytes per page SELECT - client, - percentile, - APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.minify-javascript.details.overallSavingsBytes') AS INT64) / 1024, 1000) [OFFSET(percentile * 10)] AS js_kilobytes + client, + percentile, + APPROX_QUANTILES( + CAST( + JSON_VALUE( + lighthouse, + '$.audits.minify-javascript.details.overallSavingsBytes' + ) AS INT64 + ) / 1024, + 1000 + ) [OFFSET(percentile * 10)] AS js_kilobytes FROM - `httparchive.crawl.pages`, - UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile WHERE - date = '2025-06-01' + date = '2025-06-01' GROUP BY - client, - percentile + client, + percentile ORDER BY - client, - percentile + client, + percentile diff --git a/sql/2025/sustainability/unused_css_bytes.sql b/sql/2025/sustainability/unused_css_bytes.sql index 7acbaa1cdfc..a1adf733b81 100644 --- a/sql/2025/sustainability/unused_css_bytes.sql +++ b/sql/2025/sustainability/unused_css_bytes.sql @@ -2,17 +2,25 @@ # Distribution of unused CSS request bytes per page SELECT - client, - percentile, - APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.unused-css-rules.details.overallSavingsBytes') AS INT64) / 1024, 1000) [OFFSET(percentile * 10)] AS css_kilobytes + client, + percentile, + APPROX_QUANTILES( + CAST( + JSON_VALUE( + lighthouse, + '$.audits.unused-css-rules.details.overallSavingsBytes' + ) AS INT64 + ) / 1024, + 1000 + ) [OFFSET(percentile * 10)] AS css_kilobytes FROM - `httparchive.crawl.pages`, - UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile WHERE - date = '2025-06-01' + date = '2025-06-01' GROUP BY - client, - percentile + client, + percentile ORDER BY - client, - percentile + client, + percentile diff --git a/sql/2025/sustainability/unused_js_bytes.sql b/sql/2025/sustainability/unused_js_bytes.sql index 9286e1db667..600550bf3b8 100644 --- a/sql/2025/sustainability/unused_js_bytes.sql +++ b/sql/2025/sustainability/unused_js_bytes.sql @@ -2,17 +2,25 @@ # Distribution of unused JS request bytes per page SELECT - client, - percentile, - APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.unused-javascript.details.overallSavingsBytes') AS INT64) / 1024, 1000) [OFFSET(percentile * 10)] AS js_kilobytes + client, + percentile, + APPROX_QUANTILES( + CAST( + JSON_VALUE( + lighthouse, + '$.audits.unused-javascript.details.overallSavingsBytes' + ) AS INT64 + ) / 1024, + 1000 + ) [OFFSET(percentile * 10)] AS js_kilobytes FROM - `httparchive.crawl.pages`, - UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile WHERE - date = '2025-06-01' + date = '2025-06-01' GROUP BY - client, - percentile + client, + percentile ORDER BY - client, - percentile + client, + percentile diff --git a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql index 53b9727d5b7..ac18b1b30f3 100644 --- a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql +++ b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql @@ -1,37 +1,41 @@ #standardSQL -# The distribution of websites by client that use the prefers-color-scheme:dark media query. WITH combined_data AS ( - SELECT - client, - page, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages, - SUM( - CASE - WHEN EXISTS ( - SELECT 1 - FROM UNNEST(JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules')) AS rule - WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND - JSON_EXTRACT_SCALAR(rule, '$.media') = '(prefers-color-scheme:dark)' - ) - THEN 1 - ELSE 0 - END - ) OVER (PARTITION BY client, page) AS is_dark_mode_page - FROM - `httparchive.crawl.parsed_css` - WHERE - date = '2025-06-01' + SELECT + client, + page, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages, + SUM( + CASE + WHEN EXISTS ( + SELECT 1 + FROM + UNNEST( + JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules') + ) AS rule + WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND + JSON_EXTRACT_SCALAR( + rule, '$.media' + ) = '(prefers-color-scheme:dark)' + ) + THEN 1 + ELSE 0 + END + ) OVER (PARTITION BY client, page) AS is_dark_mode_page + FROM + `httparchive.crawl.parsed_css` + WHERE + date = '2025-06-01' ) SELECT - client, - MAX(total_pages) AS total_pages, - SUM(is_dark_mode_page) AS pages_using_dark_mode, - SUM(is_dark_mode_page) / MAX(total_pages) * 100 AS percentage_of_pages + client, + MAX(total_pages) AS total_pages, + SUM(is_dark_mode_page) AS pages_using_dark_mode, + SUM(is_dark_mode_page) / MAX(total_pages) * 100 AS percentage_of_pages FROM - combined_data + combined_data GROUP BY - client + client ORDER BY - percentage_of_pages DESC, client; \ No newline at end of file + percentage_of_pages DESC, client ASC; diff --git a/sql/2025/sustainability/video_autoplay_values.sql b/sql/2025/sustainability/video_autoplay_values.sql index 680a22e4039..98f90a16533 100644 --- a/sql/2025/sustainability/video_autoplay_values.sql +++ b/sql/2025/sustainability/video_autoplay_values.sql @@ -1,29 +1,40 @@ WITH video_data AS ( - SELECT - client, - LOWER(IFNULL(JSON_EXTRACT_SCALAR(video_nodes, '$.autoplay'), '(autoplay not used)')) AS autoplay_value - FROM - `httparchive.crawl.pages`, - UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.videos.nodes')) AS video_nodes - WHERE - date = '2025-06-01' AND -- Updated date - is_root_page - LIMIT 10000 -- Limit the number of rows processed for faster testing + SELECT + client, + LOWER( + COALESCE( + JSON_EXTRACT_SCALAR(video_nodes, '$.autoplay'), + '(autoplay not used)' + ) + ) AS autoplay_value + FROM + `httparchive.crawl.pages`, + UNNEST( + JSON_EXTRACT_ARRAY( + JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.videos.nodes' + ) + ) AS video_nodes + WHERE + date = '2025-06-01' AND -- Updated date + is_root_page + LIMIT 10000 -- Limit the number of rows processed for faster testing ) SELECT - client, - IF(autoplay_value = '', '(empty)', autoplay_value) AS autoplay_value, - COUNT(0) AS autoplay_value_count, - SUM(COUNT(0)) OVER (PARTITION BY client) AS total_videos, - ROUND(SAFE_DIVIDE(COUNT(0), SUM(COUNT(0)) OVER (PARTITION BY client)) * 100, 2) AS autoplay_value_pct + client, + IF(autoplay_value = '', '(empty)', autoplay_value) AS autoplay_value, + COUNT(*) AS autoplay_value_count, + SUM(COUNT(*)) OVER (PARTITION BY client) AS total_videos, + ROUND( + SAFE_DIVIDE(COUNT(*), SUM(COUNT(*)) OVER (PARTITION BY client)) * 100, 2 + ) AS autoplay_value_pct FROM - video_data + video_data GROUP BY - client, - autoplay_value + client, + autoplay_value QUALIFY - autoplay_value_count > 10 + autoplay_value_count > 10 ORDER BY - client, - autoplay_value_count DESC \ No newline at end of file + client ASC, + autoplay_value_count DESC diff --git a/sql/2025/sustainability/video_preload_values.sql b/sql/2025/sustainability/video_preload_values.sql index 1191a7cddfb..fa88ca1cba4 100644 --- a/sql/2025/sustainability/video_preload_values.sql +++ b/sql/2025/sustainability/video_preload_values.sql @@ -1,31 +1,42 @@ WITH video_data AS ( - SELECT - date, - client, - LOWER(IFNULL(JSON_EXTRACT_SCALAR(video_nodes, '$.preload'), '(preload not used)')) AS preload_value - FROM - `httparchive.crawl.pages`, - UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.videos.nodes')) AS video_nodes - WHERE - date IN ('2025-06-01', '2024-07-01') AND -- Updated dates - is_root_page + SELECT + date, + client, + LOWER( + COALESCE( + JSON_EXTRACT_SCALAR(video_nodes, '$.preload'), + '(preload not used)' + ) + ) AS preload_value + FROM + `httparchive.crawl.pages`, + UNNEST( + JSON_EXTRACT_ARRAY( + JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.videos.nodes' + ) + ) AS video_nodes + WHERE + date IN ('2025-06-01', '2024-07-01') AND -- Updated dates + is_root_page ) SELECT - date, - client, - IF(preload_value = '', '(empty)', preload_value) AS preload_value, - COUNT(0) AS preload_value_count, - SAFE_DIVIDE(COUNT(0), SUM(COUNT(0)) OVER (PARTITION BY date, client)) AS preload_value_pct + date, + client, + IF(preload_value = '', '(empty)', preload_value) AS preload_value, + COUNT(*) AS preload_value_count, + SAFE_DIVIDE( + COUNT(*), SUM(COUNT(*)) OVER (PARTITION BY date, client) + ) AS preload_value_pct FROM - video_data + video_data GROUP BY - date, - client, - preload_value + date, + client, + preload_value QUALIFY - preload_value_count > 10 + preload_value_count > 10 ORDER BY - date, - client, - preload_value_count DESC \ No newline at end of file + date ASC, + client ASC, + preload_value_count DESC From badf4154cd484407bd985f967269e546618b142e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Tue, 29 Jul 2025 13:50:55 +0100 Subject: [PATCH 04/16] fix linter errors for green_third_party --- .../green_third_party_requests.sql | 94 ++++++++++--------- 1 file changed, 49 insertions(+), 45 deletions(-) diff --git a/sql/2025/sustainability/green_third_party_requests.sql b/sql/2025/sustainability/green_third_party_requests.sql index f773ef3395d..9ddd0fc7ebb 100644 --- a/sql/2025/sustainability/green_third_party_requests.sql +++ b/sql/2025/sustainability/green_third_party_requests.sql @@ -1,4 +1,5 @@ #standardSQL +# Median third-parties & green third-party requests per websites by rank WITH requests AS ( SELECT @@ -34,80 +35,80 @@ pages AS ( third_party AS ( SELECT - domain, - COUNT(DISTINCT page) AS page_usage + tp.domain, + COUNT(DISTINCT r.page) AS page_usage FROM `httparchive.almanac.third_parties` AS tp INNER JOIN requests AS r ON NET.HOST(r.url) = NET.HOST(tp.domain) WHERE - date = '2025-06-01' AND - category NOT IN ('hosting') + tp.date = '2025-06-01' AND + tp.category NOT IN ('hosting') GROUP BY - domain + tp.domain HAVING page_usage >= 50 ), green_tp AS ( - SELECT domain + SELECT tp.domain FROM `httparchive.almanac.third_parties` AS tp INNER JOIN green AS g ON NET.HOST(g.host) = NET.HOST(tp.domain) WHERE - date = '2025-06-01' AND - category NOT IN ('hosting') + tp.date = '2025-06-01' AND + tp.category NOT IN ('hosting') GROUP BY - domain + tp.domain ), base AS ( SELECT - client, - page, - rank, - COUNT(domain) AS third_parties_per_page + r.client, + r.page, + p.rank, + COUNT(tp.domain) AS third_parties_per_page FROM - requests + requests AS r LEFT JOIN - third_party + third_party AS tp ON - NET.HOST(requests.url) = NET.HOST(third_party.domain) + NET.HOST(r.url) = NET.HOST(tp.domain) INNER JOIN - pages - USING (client, page) + pages AS p + ON r.client = p.client AND r.page = p.page GROUP BY - client, - page, - rank + r.client, + r.page, + p.rank ), base_green AS ( SELECT - client, - page, - rank, - COUNT(domain) AS green_third_parties_per_page + r.client, + r.page, + p.rank, + COUNT(gtp.domain) AS green_third_parties_per_page FROM - requests + requests AS r LEFT JOIN - green_tp + green_tp AS gtp ON - NET.HOST(requests.url) = NET.HOST(green_tp.domain) + NET.HOST(r.url) = NET.HOST(gtp.domain) INNER JOIN - pages - USING (client, page) + pages AS p + ON r.client = p.client AND r.page = p.page GROUP BY - client, - page, - rank + r.client, + r.page, + p.rank ) SELECT - client, + b.client, rank_grouping, CASE WHEN rank_grouping = 0 THEN '' @@ -115,28 +116,31 @@ SELECT ELSE FORMAT("%'d", rank_grouping) END AS ranking, APPROX_QUANTILES( - third_parties_per_page, 1000 + b.third_parties_per_page, 1000 ) [OFFSET(500)] AS p50_third_parties_per_page, APPROX_QUANTILES( - green_third_parties_per_page, 1000 + bg.green_third_parties_per_page, 1000 ) [OFFSET(500)] AS p50_green_third_parties_per_page, APPROX_QUANTILES( - SAFE_DIVIDE(green_third_parties_per_page, third_parties_per_page), 1000 + SAFE_DIVIDE( + bg.green_third_parties_per_page, + b.third_parties_per_page + ), 1000 ) [OFFSET(500)] AS pct_green FROM - base, + base AS b, UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping INNER JOIN - base_green + base_green AS bg ON - base.client = base_green.client AND - base.page = base_green.page AND - base.rank = base_green.rank + b.client = bg.client AND + b.page = bg.page AND + b.rank = bg.rank WHERE - rank <= rank_grouping + b.rank <= rank_grouping GROUP BY - client, + b.client, rank_grouping ORDER BY - client, + b.client, rank_grouping From b62f1d41a607f02232ed8c26892aefcfa2ee76a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Tue, 29 Jul 2025 15:05:50 +0100 Subject: [PATCH 05/16] update indentation from 4 to 2 --- .../sustainability/cache_header_usage.sql | 158 ++--- sql/2025/sustainability/cdn_adoption.sql | 48 +- .../sustainability/cms_bytes_per_type.sql | 540 ++++++++--------- .../cms_bytes_per_type_2022.sql | 313 ---------- .../sustainability/content-visibility.sql | 68 +-- .../ecommerce_bytes_per_type.sql | 553 +++++++++--------- .../ecommerce_bytes_per_type_2022.sql | 322 ---------- sql/2025/sustainability/favicons.sql | 44 +- .../global_emissions_per_page.sql | 186 +++--- .../global_emissions_per_page_2022.sql | 124 ---- .../green_third_party_requests.sql | 230 ++++---- sql/2025/sustainability/green_web_hosting.sql | 84 +-- .../sustainability/page_byte_pre_type.sql | 180 +++--- sql/2025/sustainability/query_run_size.sql | 16 +- sql/2025/sustainability/responsive_images.sql | 92 +-- sql/2025/sustainability/script_count.sql | 194 +++--- .../sustainability/ssg_bytes_per_type.sql | 552 +++++++++-------- .../ssg_bytes_per_type_2022.sql | 322 ---------- sql/2025/sustainability/stylesheet_count.sql | 224 +++---- sql/2025/sustainability/text_compression.sql | 68 +-- .../sustainability/unminified_css_bytes.sql | 34 +- .../sustainability/unminified_js_bytes.sql | 36 +- sql/2025/sustainability/unused_css_bytes.sql | 36 +- sql/2025/sustainability/unused_js_bytes.sql | 36 +- .../use_of_prefers_dark_mode_usage.sql | 64 +- .../sustainability/video_autoplay_values.sql | 64 +- .../sustainability/video_preload_values.sql | 68 +-- 27 files changed, 1786 insertions(+), 2870 deletions(-) delete mode 100644 sql/2025/sustainability/cms_bytes_per_type_2022.sql delete mode 100644 sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql delete mode 100644 sql/2025/sustainability/global_emissions_per_page_2022.sql delete mode 100644 sql/2025/sustainability/ssg_bytes_per_type_2022.sql diff --git a/sql/2025/sustainability/cache_header_usage.sql b/sql/2025/sustainability/cache_header_usage.sql index d3e4f661abb..6f6a2a7087b 100644 --- a/sql/2025/sustainability/cache_header_usage.sql +++ b/sql/2025/sustainability/cache_header_usage.sql @@ -2,94 +2,94 @@ # The distribution of cache header adoption on websites by client. SELECT - client, - COUNT(*) AS total_requests, + client, + COUNT(*) AS total_requests, - COUNTIF(uses_cache_control) AS total_using_cache_control, - COUNTIF(uses_max_age) AS total_using_max_age, - COUNTIF(uses_expires) AS total_using_expires, - COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires, - COUNTIF( - uses_cache_control AND uses_expires - ) AS total_using_both_cc_and_expires, - COUNTIF( - NOT uses_cache_control AND NOT uses_expires - ) AS total_using_neither_cc_and_expires, - COUNTIF( - uses_cache_control AND NOT uses_expires - ) AS total_using_only_cache_control, - COUNTIF( - NOT uses_cache_control AND uses_expires - ) AS total_using_only_expires, + COUNTIF(uses_cache_control) AS total_using_cache_control, + COUNTIF(uses_max_age) AS total_using_max_age, + COUNTIF(uses_expires) AS total_using_expires, + COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires, + COUNTIF( + uses_cache_control AND uses_expires + ) AS total_using_both_cc_and_expires, + COUNTIF( + NOT uses_cache_control AND NOT uses_expires + ) AS total_using_neither_cc_and_expires, + COUNTIF( + uses_cache_control AND NOT uses_expires + ) AS total_using_only_cache_control, + COUNTIF( + NOT uses_cache_control AND uses_expires + ) AS total_using_only_expires, - COUNTIF(uses_cache_control) / COUNT(*) AS pct_cache_control, - COUNTIF(uses_max_age) / COUNT(*) AS pct_using_max_age, - COUNTIF(uses_expires) / COUNT(*) AS pct_using_expires, - COUNTIF( - uses_max_age AND uses_expires - ) / COUNT(*) AS pct_using_max_age_and_expires, - COUNTIF( - uses_cache_control AND uses_expires - ) / COUNT(*) AS pct_using_both_cc_and_expires, - COUNTIF( - NOT uses_cache_control AND NOT uses_expires - ) / COUNT(*) AS pct_using_neither_cc_nor_expires, - COUNTIF( - uses_cache_control AND NOT uses_expires - ) / COUNT(*) AS pct_using_only_cache_control, - COUNTIF( - NOT uses_cache_control AND uses_expires - ) / COUNT(*) AS pct_using_only_expires + COUNTIF(uses_cache_control) / COUNT(*) AS pct_cache_control, + COUNTIF(uses_max_age) / COUNT(*) AS pct_using_max_age, + COUNTIF(uses_expires) / COUNT(*) AS pct_using_expires, + COUNTIF( + uses_max_age AND uses_expires + ) / COUNT(*) AS pct_using_max_age_and_expires, + COUNTIF( + uses_cache_control AND uses_expires + ) / COUNT(*) AS pct_using_both_cc_and_expires, + COUNTIF( + NOT uses_cache_control AND NOT uses_expires + ) / COUNT(*) AS pct_using_neither_cc_nor_expires, + COUNTIF( + uses_cache_control AND NOT uses_expires + ) / COUNT(*) AS pct_using_only_cache_control, + COUNTIF( + NOT uses_cache_control AND uses_expires + ) / COUNT(*) AS pct_using_only_expires FROM ( - SELECT - client, + SELECT + client, - JSON_EXTRACT_SCALAR( - summary, '$.resp_expires' - ) IS NOT NULL AND TRIM( - JSON_EXTRACT_SCALAR(summary, '$.resp_expires') - ) != '' AS uses_expires, - JSON_EXTRACT_SCALAR( - summary, '$.resp_cache_control' - ) IS NOT NULL AND TRIM( - JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control') - ) != '' AS uses_cache_control, - REGEXP_CONTAINS( - JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'), - r'(?i)max-age\s*=\s*[0-9]+' - ) AS uses_max_age, + JSON_EXTRACT_SCALAR( + summary, '$.resp_expires' + ) IS NOT NULL AND TRIM( + JSON_EXTRACT_SCALAR(summary, '$.resp_expires') + ) != '' AS uses_expires, + JSON_EXTRACT_SCALAR( + summary, '$.resp_cache_control' + ) IS NOT NULL AND TRIM( + JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control') + ) != '' AS uses_cache_control, + REGEXP_CONTAINS( + JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'), + r'(?i)max-age\s*=\s*[0-9]+' + ) AS uses_max_age, - JSON_EXTRACT_SCALAR( - summary, '$.resp_etag' - ) IS NULL OR TRIM( - JSON_EXTRACT_SCALAR(summary, '$.resp_etag') - ) = '' AS uses_no_etag, - JSON_EXTRACT_SCALAR( - summary, '$.resp_etag' - ) IS NOT NULL AND TRIM( - JSON_EXTRACT_SCALAR(summary, '$.resp_etag') - ) != '' AS uses_etag, - JSON_EXTRACT_SCALAR( - summary, '$.resp_last_modified' - ) IS NOT NULL AND TRIM( - JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified') - ) != '' AS uses_last_modified, + JSON_EXTRACT_SCALAR( + summary, '$.resp_etag' + ) IS NULL OR TRIM( + JSON_EXTRACT_SCALAR(summary, '$.resp_etag') + ) = '' AS uses_no_etag, + JSON_EXTRACT_SCALAR( + summary, '$.resp_etag' + ) IS NOT NULL AND TRIM( + JSON_EXTRACT_SCALAR(summary, '$.resp_etag') + ) != '' AS uses_etag, + JSON_EXTRACT_SCALAR( + summary, '$.resp_last_modified' + ) IS NOT NULL AND TRIM( + JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified') + ) != '' AS uses_last_modified, - REGEXP_CONTAINS( - TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"' - ) AS uses_weak_etag, - REGEXP_CONTAINS( - TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"' - ) AS uses_strong_etag + REGEXP_CONTAINS( + TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"' + ) AS uses_weak_etag, + REGEXP_CONTAINS( + TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"' + ) AS uses_strong_etag - FROM - `httparchive.crawl.requests` - WHERE - date = '2025-06-01' + FROM + `httparchive.crawl.requests` + WHERE + date = '2025-06-01' ) GROUP BY - client + client ORDER BY - client; + client; diff --git a/sql/2025/sustainability/cdn_adoption.sql b/sql/2025/sustainability/cdn_adoption.sql index d37b0529d4c..6e4c06db59d 100644 --- a/sql/2025/sustainability/cdn_adoption.sql +++ b/sql/2025/sustainability/cdn_adoption.sql @@ -2,32 +2,32 @@ # The distribution of CDN adoption on websites by client. SELECT - client, - total, - IF(cdn = '', 'No CDN', cdn) AS cdn, - COUNT(*) AS freq, - COUNT(*) / total AS pct + client, + total, + IF(cdn = '', 'No CDN', cdn) AS cdn, + COUNT(*) AS freq, + COUNT(*) / total AS pct FROM ( - SELECT - client, - COUNT(*) AS total, - ARRAY_CONCAT_AGG( - SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ') - ) AS cdn_list - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' AND - is_root_page = TRUE - GROUP BY - client + SELECT + client, + COUNT(*) AS total, + ARRAY_CONCAT_AGG( + SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ') + ) AS cdn_list + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' AND + is_root_page = TRUE + GROUP BY + client ), UNNEST(cdn_list) AS cdn GROUP BY - client, - cdn, - total + client, + cdn, + total ORDER BY - pct DESC, - client ASC, - cdn ASC; + pct DESC, + client ASC, + cdn ASC; diff --git a/sql/2025/sustainability/cms_bytes_per_type.sql b/sql/2025/sustainability/cms_bytes_per_type.sql index 7fcb216d16c..817cf890e24 100644 --- a/sql/2025/sustainability/cms_bytes_per_type.sql +++ b/sql/2025/sustainability/cms_bytes_per_type.sql @@ -13,301 +13,301 @@ DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; WITH cms_data AS ( - SELECT - client, - page, - tech.technology AS cms, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + SELECT + client, + page, + tech.technology AS cms, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - -- Operational emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * - grid_intensity AS op_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * - grid_intensity AS op_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * - grid_intensity AS op_emissions_devices, + -- Operational emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * + grid_intensity AS op_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * + grid_intensity AS op_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * + grid_intensity AS op_emissions_devices, - -- Embodied emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * - grid_intensity AS em_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * - grid_intensity AS em_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * - grid_intensity AS em_emissions_devices, + -- Embodied emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * + grid_intensity AS em_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * + grid_intensity AS em_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * + grid_intensity AS em_emissions_devices, - -- Total emissions (operational + embodied) - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, - - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, + -- Total emissions (operational + embodied) + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, - -- Proportions of each resource type relative to total bytes + ( + ( CAST( - JSON_VALUE(summary, '$.bytesHtml') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS html_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesJS') AS INT64 - ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesCss') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS css_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesImg') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS img_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesFont') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS font_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, + + -- Proportions of each resource type relative to total bytes + CAST( + JSON_VALUE(summary, '$.bytesHtml') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS html_proportion, + CAST( + JSON_VALUE(summary, '$.bytesJS') AS INT64 + ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST( + JSON_VALUE(summary, '$.bytesCss') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS css_proportion, + CAST( + JSON_VALUE(summary, '$.bytesImg') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS img_proportion, + CAST( + JSON_VALUE(summary, '$.bytesFont') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS font_proportion, - -- Resource-specific emissions calculations + -- Resource-specific emissions calculations + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2025-06-01' AND - is_root_page = TRUE AND - 'CMS' IN UNNEST(tech.categories) + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2025-06-01' AND + is_root_page = TRUE AND + 'CMS' IN UNNEST(tech.categories) ) SELECT - client, - cms, - COUNT(*) AS pages, - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES( - total_operational_emissions, 1000 - ) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES( - total_embodied_emissions, 1000 - ) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES( - total_emissions, 1000 - ) [OFFSET(500)] AS median_total_emissions, + client, + cms, + COUNT(*) AS pages, + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES( + total_operational_emissions, 1000 + ) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES( + total_embodied_emissions, 1000 + ) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES( + total_emissions, 1000 + ) [OFFSET(500)] AS median_total_emissions, - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES( - total_html_emissions, 1000 - ) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES( - total_js_emissions, 1000 - ) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES( - total_css_emissions, 1000 - ) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES( - total_img_emissions, 1000 - ) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES( - total_font_emissions, 1000 - ) [OFFSET(500)] AS median_total_font_emissions + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES( + total_html_emissions, 1000 + ) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES( + total_js_emissions, 1000 + ) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES( + total_css_emissions, 1000 + ) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES( + total_img_emissions, 1000 + ) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES( + total_font_emissions, 1000 + ) [OFFSET(500)] AS median_total_font_emissions FROM - cms_data + cms_data GROUP BY - client, - cms + client, + cms ORDER BY - pages DESC, - cms ASC, - client ASC; + pages DESC, + cms ASC, + client ASC; diff --git a/sql/2025/sustainability/cms_bytes_per_type_2022.sql b/sql/2025/sustainability/cms_bytes_per_type_2022.sql deleted file mode 100644 index 86062822806..00000000000 --- a/sql/2025/sustainability/cms_bytes_per_type_2022.sql +++ /dev/null @@ -1,313 +0,0 @@ -#standardSQL -# Copied from cms_bytes_per_type.sql -# Median resource weights by CMS - -# Declare variables to calculate the carbon emissions of one byte -# Source: https://sustainablewebdesign.org/calculating-digital-emissions/ - -DECLARE grid_intensity NUMERIC DEFAULT 494; -DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; -DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013; -DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081; -DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055; -DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; -DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; - -WITH cms_data AS ( - SELECT - client, - page, - tech.technology AS cms, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - - -- Operational emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * - grid_intensity AS op_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * - grid_intensity AS op_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * - grid_intensity AS op_emissions_devices, - - -- Embodied emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * - grid_intensity AS em_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * - grid_intensity AS em_emissions_devices, - - -- Total emissions (operational + embodied) - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, - - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, - - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, - - -- Proportions of each resource type relative to total bytes - CAST( - JSON_VALUE(summary, '$.bytesHtml') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS html_proportion, - CAST( - JSON_VALUE(summary, '$.bytesJS') AS INT64 - ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST( - JSON_VALUE(summary, '$.bytesCss') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS css_proportion, - CAST( - JSON_VALUE(summary, '$.bytesImg') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS img_proportion, - CAST( - JSON_VALUE(summary, '$.bytesFont') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS font_proportion, - - -- Resource-specific emissions calculations - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, - - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2022-06-01' AND - is_root_page = TRUE AND - 'CMS' IN UNNEST(tech.categories) -) - -SELECT - client, - cms, - COUNT(*) AS pages, - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES( - total_operational_emissions, 1000 - ) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES( - total_embodied_emissions, 1000 - ) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES( - total_emissions, 1000 - ) [OFFSET(500)] AS median_total_emissions, - - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES( - total_html_emissions, 1000 - ) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES( - total_js_emissions, 1000 - ) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES( - total_css_emissions, 1000 - ) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES( - total_img_emissions, 1000 - ) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES( - total_font_emissions, 1000 - ) [OFFSET(500)] AS median_total_font_emissions -FROM - cms_data -GROUP BY - client, - cms -ORDER BY - pages DESC, - cms ASC, - client ASC; diff --git a/sql/2025/sustainability/content-visibility.sql b/sql/2025/sustainability/content-visibility.sql index 29b709474dd..1e84d3b5ab8 100644 --- a/sql/2025/sustainability/content-visibility.sql +++ b/sql/2025/sustainability/content-visibility.sql @@ -27,48 +27,48 @@ try { '''; WITH totals AS ( - SELECT - client, - COUNT(DISTINCT root_page) AS total_pages - FROM - `httparchive.crawl.parsed_css` - WHERE - date = '2025-06-01' AND - is_root_page - GROUP BY - client + SELECT + client, + COUNT(DISTINCT root_page) AS total_pages + FROM + `httparchive.crawl.parsed_css` + WHERE + date = '2025-06-01' AND + is_root_page + GROUP BY + client ), content_visibility_pages AS ( - SELECT - client, - COUNT(DISTINCT root_page) AS pages_with_content_visibility - FROM - `httparchive.crawl.parsed_css`, - UNNEST(HASCONTENTVISIBILITY(css)) - WHERE - date = '2025-06-01' AND - is_root_page - GROUP BY - client + SELECT + client, + COUNT(DISTINCT root_page) AS pages_with_content_visibility + FROM + `httparchive.crawl.parsed_css`, + UNNEST(HASCONTENTVISIBILITY(css)) + WHERE + date = '2025-06-01' AND + is_root_page + GROUP BY + client ) SELECT - totals.client, - totals.total_pages, + totals.client, + totals.total_pages, + COALESCE( + content_visibility_pages.pages_with_content_visibility, 0 + ) AS pages_with_content_visibility, + ROUND( COALESCE( - content_visibility_pages.pages_with_content_visibility, 0 - ) AS pages_with_content_visibility, - ROUND( - COALESCE( - content_visibility_pages.pages_with_content_visibility, 0 - ) * 100.0 / totals.total_pages, - 2 - ) AS pct_pages + content_visibility_pages.pages_with_content_visibility, 0 + ) * 100.0 / totals.total_pages, + 2 + ) AS pct_pages FROM - totals + totals LEFT JOIN - content_visibility_pages + content_visibility_pages ON totals.client = content_visibility_pages.client ORDER BY - totals.client + totals.client diff --git a/sql/2025/sustainability/ecommerce_bytes_per_type.sql b/sql/2025/sustainability/ecommerce_bytes_per_type.sql index 0802327915f..82b301ee373 100644 --- a/sql/2025/sustainability/ecommerce_bytes_per_type.sql +++ b/sql/2025/sustainability/ecommerce_bytes_per_type.sql @@ -12,310 +12,309 @@ DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; WITH ecommerce_data AS ( - SELECT - client, - page, - tech.technology AS ecommerce, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, + SELECT + client, + page, + tech.technology AS ecommerce, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - -- Operational emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * - grid_intensity AS op_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * - grid_intensity AS op_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * - grid_intensity AS op_emissions_devices, + -- Operational emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * + grid_intensity AS op_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * + grid_intensity AS op_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * + grid_intensity AS op_emissions_devices, - -- Embodied emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * - grid_intensity AS em_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * - grid_intensity AS em_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * - grid_intensity AS em_emissions_devices, - - -- Total emissions (operational + embodied) - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, + -- Embodied emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * + grid_intensity AS em_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * + grid_intensity AS em_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * + grid_intensity AS em_emissions_devices, - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, + -- Total emissions (operational + embodied) + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, - -- Proportions of each resource type relative to total bytes + ( + ( CAST( - JSON_VALUE(summary, '$.bytesHtml') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS html_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesJS') AS INT64 - ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesCss') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS css_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesImg') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS img_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesFont') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS font_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, - -- Resource-specific emissions calculations - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, + -- Proportions of each resource type relative to total bytes + CAST( + JSON_VALUE(summary, '$.bytesHtml') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS html_proportion, + CAST( + JSON_VALUE(summary, '$.bytesJS') AS INT64 + ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST( + JSON_VALUE(summary, '$.bytesCss') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS css_proportion, + CAST( + JSON_VALUE(summary, '$.bytesImg') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS img_proportion, + CAST( + JSON_VALUE(summary, '$.bytesFont') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS font_proportion, + -- Resource-specific emissions calculations + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2025-06-01' AND - is_root_page = TRUE AND - EXISTS ( - SELECT 1 - FROM UNNEST(tech.categories) AS category - WHERE category = 'Ecommerce' AND - tech.technology NOT IN ( - 'Cart Functionality', 'Google Analytics Enhanced eCommerce' - ) + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2025-06-01' AND + is_root_page = TRUE AND + EXISTS ( + SELECT 1 + FROM UNNEST(tech.categories) AS category + WHERE category = 'Ecommerce' AND + tech.technology NOT IN ( + 'Cart Functionality', 'Google Analytics Enhanced eCommerce' ) + ) ) SELECT - client, - ecommerce, - COUNT(*) AS pages, + client, + ecommerce, + COUNT(*) AS pages, - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES( - total_operational_emissions, 1000 - ) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES( - total_embodied_emissions, 1000 - ) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES( - total_emissions, 1000 - ) [OFFSET(500)] AS median_total_emissions, + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES( + total_operational_emissions, 1000 + ) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES( + total_embodied_emissions, 1000 + ) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES( + total_emissions, 1000 + ) [OFFSET(500)] AS median_total_emissions, - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES( - total_html_emissions, 1000 - ) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES( - total_js_emissions, 1000 - ) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES( - total_css_emissions, 1000 - ) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES( - total_img_emissions, 1000 - ) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES( - total_font_emissions, 1000 - ) [OFFSET(500)] AS median_total_font_emissions + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES( + total_html_emissions, 1000 + ) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES( + total_js_emissions, 1000 + ) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES( + total_css_emissions, 1000 + ) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES( + total_img_emissions, 1000 + ) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES( + total_font_emissions, 1000 + ) [OFFSET(500)] AS median_total_font_emissions FROM - ecommerce_data + ecommerce_data GROUP BY - client, - ecommerce + client, + ecommerce ORDER BY - pages DESC, - ecommerce ASC, - client ASC; + pages DESC, + ecommerce ASC, + client ASC; diff --git a/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql b/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql deleted file mode 100644 index 5aad7696fe3..00000000000 --- a/sql/2025/sustainability/ecommerce_bytes_per_type_2022.sql +++ /dev/null @@ -1,322 +0,0 @@ -#standardSQL -# Copied from ecommerce_bytes_per_type.sql -# Median resource weights by ecommerce platform with detailed CO2e breakdown -# Source: https://sustainablewebdesign.org/calculating-digital-emissions/ -# Declare variables to calculate the carbon emissions per gigabyte (kWh/GB) - -DECLARE grid_intensity NUMERIC DEFAULT 494; -DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; -DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013; -DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081; -DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055; -DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; -DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; - -WITH ecommerce_data AS ( - SELECT - client, - page, - tech.technology AS ecommerce, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - - -- Operational emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * - grid_intensity AS op_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * - grid_intensity AS op_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * - grid_intensity AS op_emissions_devices, - - -- Embodied emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * - grid_intensity AS em_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * - grid_intensity AS em_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * - grid_intensity AS em_emissions_devices, - - -- Total emissions (operational + embodied) - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, - - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, - - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, - - -- Proportions of each resource type relative to total bytes - CAST( - JSON_VALUE(summary, '$.bytesHtml') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS html_proportion, - CAST( - JSON_VALUE(summary, '$.bytesJS') AS INT64 - ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST( - JSON_VALUE(summary, '$.bytesCss') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS css_proportion, - CAST( - JSON_VALUE(summary, '$.bytesImg') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS img_proportion, - CAST( - JSON_VALUE(summary, '$.bytesFont') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS font_proportion, - - -- Resource-specific emissions calculations - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, - - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb - - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2022-06-01' AND - is_root_page = TRUE AND - EXISTS ( - SELECT 1 - FROM UNNEST(tech.categories) AS category - WHERE category = 'Ecommerce' AND - tech.technology NOT IN ( - 'Cart Functionality', 'Google Analytics Enhanced eCommerce' - ) - ) -) - -SELECT - client, - ecommerce, - COUNT(*) AS pages, - - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES( - total_operational_emissions, 1000 - ) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES( - total_embodied_emissions, 1000 - ) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES( - total_emissions, 1000 - ) [OFFSET(500)] AS median_total_emissions, - - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES( - total_html_emissions, 1000 - ) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES( - total_js_emissions, 1000 - ) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES( - total_css_emissions, 1000 - ) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES( - total_img_emissions, 1000 - ) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES( - total_font_emissions, 1000 - ) [OFFSET(500)] AS median_total_font_emissions -FROM - ecommerce_data -GROUP BY - client, - ecommerce -ORDER BY - pages DESC, - ecommerce ASC, - client ASC; diff --git a/sql/2025/sustainability/favicons.sql b/sql/2025/sustainability/favicons.sql index bd92270e003..eed571d33c5 100644 --- a/sql/2025/sustainability/favicons.sql +++ b/sql/2025/sustainability/favicons.sql @@ -45,31 +45,31 @@ return result; # Main query to analyze favicon image extensions with sampling WITH favicons AS ( - SELECT - client, - GETFAVICONIMAGE( - JSON_EXTRACT_SCALAR(payload, '$._almanac') - ) AS image_type_extension, - COUNT(*) AS freq, - SUM(COUNT(*)) OVER (PARTITION BY client) AS total, - COUNT( - * - ) / SUM(COUNT(*)) OVER (PARTITION BY client) AS percentage_of_total - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' - GROUP BY - client, - image_type_extension + SELECT + client, + GETFAVICONIMAGE( + JSON_EXTRACT_SCALAR(payload, '$._almanac') + ) AS image_type_extension, + COUNT(*) AS freq, + SUM(COUNT(*)) OVER (PARTITION BY client) AS total, + COUNT( + * + ) / SUM(COUNT(*)) OVER (PARTITION BY client) AS percentage_of_total + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' + GROUP BY + client, + image_type_extension ) SELECT - *, - percentage_of_total AS pct + *, + percentage_of_total AS pct FROM - favicons + favicons ORDER BY - pct DESC + pct DESC LIMIT - 1000; + 1000; diff --git a/sql/2025/sustainability/global_emissions_per_page.sql b/sql/2025/sustainability/global_emissions_per_page.sql index 395a7bdbf73..3403fb0c233 100644 --- a/sql/2025/sustainability/global_emissions_per_page.sql +++ b/sql/2025/sustainability/global_emissions_per_page.sql @@ -11,9 +11,9 @@ DECLARE ENERGY_PER_GB_DEVICE NUMERIC DEFAULT CAST(0.080 + 0.081 AS NUMERIC); -- Total energy consumption per GB, calculated by summing the above factors -- Sum of all operational and embodied energies DECLARE KW_PER_GB NUMERIC DEFAULT CAST( - ENERGY_PER_GB_DATACENTER + - ENERGY_PER_GB_NETWORK + - ENERGY_PER_GB_DEVICE AS NUMERIC + ENERGY_PER_GB_DATACENTER + + ENERGY_PER_GB_NETWORK + + ENERGY_PER_GB_DEVICE AS NUMERIC ); -- Global average carbon intensity of electricity generation (gCO2/kWh) @@ -21,103 +21,103 @@ DECLARE GLOBAL_GRID_INTENSITY NUMERIC DEFAULT 494; -- Function to calculate emissions in gCO2 CREATE TEMP FUNCTION CALCULATE_EMISSIONS( - bytes FLOAT64, - kw_per_GB FLOAT64, - grid_intensity FLOAT64 + bytes FLOAT64, + kw_per_GB FLOAT64, + grid_intensity FLOAT64 ) RETURNS FLOAT64 AS ( - (BYTES / 1024 / 1024 / 1024) * -- Convert bytes to GB - (KW_PER_GB) * - GRID_INTENSITY + (BYTES / 1024 / 1024 / 1024) * -- Convert bytes to GB + (KW_PER_GB) * + GRID_INTENSITY ); WITH PAGE_DATA AS ( - SELECT - CLIENT, - CAST(JSON_VALUE(SUMMARY, '$.bytesTotal') AS INT64) AS BYTESTOTAL, - CAST(JSON_VALUE(SUMMARY, '$.bytesHtml') AS INT64) AS BYTESHTML, - CAST(JSON_VALUE(SUMMARY, '$.bytesJS') AS INT64) AS BYTESJS, - CAST( - COALESCE( - JSON_VALUE(SUMMARY, '$.bytesCss'), - JSON_VALUE(SUMMARY, '$.bytesStyle') - ) AS INT64 - ) AS BYTESCSS, - CAST(JSON_VALUE(SUMMARY, '$.bytesImg') AS INT64) AS BYTESIMG, - CAST(JSON_VALUE(SUMMARY, '$.bytesOther') AS INT64) AS BYTESOTHER, - CAST(JSON_VALUE(SUMMARY, '$.bytesHtmlDoc') AS INT64) AS BYTESHTMLDOC, - CAST(JSON_VALUE(SUMMARY, '$.bytesFont') AS INT64) AS BYTESFONT - FROM - `httparchive.crawl.pages` - WHERE - DATE = '2025-06-01' AND IS_ROOT_PAGE + SELECT + CLIENT, + CAST(JSON_VALUE(SUMMARY, '$.bytesTotal') AS INT64) AS BYTESTOTAL, + CAST(JSON_VALUE(SUMMARY, '$.bytesHtml') AS INT64) AS BYTESHTML, + CAST(JSON_VALUE(SUMMARY, '$.bytesJS') AS INT64) AS BYTESJS, + CAST( + COALESCE( + JSON_VALUE(SUMMARY, '$.bytesCss'), + JSON_VALUE(SUMMARY, '$.bytesStyle') + ) AS INT64 + ) AS BYTESCSS, + CAST(JSON_VALUE(SUMMARY, '$.bytesImg') AS INT64) AS BYTESIMG, + CAST(JSON_VALUE(SUMMARY, '$.bytesOther') AS INT64) AS BYTESOTHER, + CAST(JSON_VALUE(SUMMARY, '$.bytesHtmlDoc') AS INT64) AS BYTESHTMLDOC, + CAST(JSON_VALUE(SUMMARY, '$.bytesFont') AS INT64) AS BYTESFONT + FROM + `httparchive.crawl.pages` + WHERE + DATE = '2025-06-01' AND IS_ROOT_PAGE ) SELECT - PERCENTILE, - CLIENT, - -- Total resources - APPROX_QUANTILES( - BYTESTOTAL / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS TOTAL_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESTOTAL, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS TOTAL_EMISSIONS, - -- HTML resources - APPROX_QUANTILES( - BYTESHTML / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESHTML, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_EMISSIONS, - -- JavaScript resources - APPROX_QUANTILES( - BYTESJS / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS JS_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESJS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS JS_EMISSIONS, - -- CSS resources - APPROX_QUANTILES( - BYTESCSS / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS CSS_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESCSS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS CSS_EMISSIONS, - -- Image resources - APPROX_QUANTILES( - BYTESIMG / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS IMG_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESIMG, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS IMG_EMISSIONS, - -- Other resources - APPROX_QUANTILES( - BYTESOTHER / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS OTHER_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESOTHER, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS OTHER_EMISSIONS, - -- HTML document - APPROX_QUANTILES( - BYTESHTMLDOC / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESHTMLDOC, KW_PER_GB, GLOBAL_GRID_INTENSITY), - 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_EMISSIONS, - -- Font resources - APPROX_QUANTILES( - BYTESFONT / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS FONT_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESFONT, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS FONT_EMISSIONS + PERCENTILE, + CLIENT, + -- Total resources + APPROX_QUANTILES( + BYTESTOTAL / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS TOTAL_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESTOTAL, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS TOTAL_EMISSIONS, + -- HTML resources + APPROX_QUANTILES( + BYTESHTML / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESHTML, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_EMISSIONS, + -- JavaScript resources + APPROX_QUANTILES( + BYTESJS / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS JS_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESJS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS JS_EMISSIONS, + -- CSS resources + APPROX_QUANTILES( + BYTESCSS / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS CSS_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESCSS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS CSS_EMISSIONS, + -- Image resources + APPROX_QUANTILES( + BYTESIMG / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS IMG_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESIMG, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS IMG_EMISSIONS, + -- Other resources + APPROX_QUANTILES( + BYTESOTHER / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS OTHER_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESOTHER, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS OTHER_EMISSIONS, + -- HTML document + APPROX_QUANTILES( + BYTESHTMLDOC / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESHTMLDOC, KW_PER_GB, GLOBAL_GRID_INTENSITY), + 1000 + ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_EMISSIONS, + -- Font resources + APPROX_QUANTILES( + BYTESFONT / 1024, 1000 + ) [OFFSET(PERCENTILE * 10)] AS FONT_KBYTES, + APPROX_QUANTILES( + CALCULATE_EMISSIONS(BYTESFONT, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [OFFSET(PERCENTILE * 10)] AS FONT_EMISSIONS FROM - PAGE_DATA, - UNNEST([10, 25, 50, 75, 90, 100]) AS PERCENTILE + PAGE_DATA, + UNNEST([10, 25, 50, 75, 90, 100]) AS PERCENTILE GROUP BY - PERCENTILE, - CLIENT + PERCENTILE, + CLIENT ORDER BY - CLIENT, - PERCENTILE + CLIENT, + PERCENTILE diff --git a/sql/2025/sustainability/global_emissions_per_page_2022.sql b/sql/2025/sustainability/global_emissions_per_page_2022.sql deleted file mode 100644 index 2fc357a33f2..00000000000 --- a/sql/2025/sustainability/global_emissions_per_page_2022.sql +++ /dev/null @@ -1,124 +0,0 @@ -#standardSQL -# Copied global_emissions_per_page.sql - --- Energy consumption factors from SWDM v4 (in kWh/GB) --- Operational + Embodied -DECLARE ENERGY_PER_GB_DATACENTER NUMERIC DEFAULT CAST(0.055 + 0.012 AS NUMERIC); --- Operational + Embodied -DECLARE ENERGY_PER_GB_NETWORK NUMERIC DEFAULT CAST(0.059 + 0.013 AS NUMERIC); --- Operational + Embodied -DECLARE ENERGY_PER_GB_DEVICE NUMERIC DEFAULT CAST(0.080 + 0.081 AS NUMERIC); - --- Total energy consumption per GB, calculated by summing the above factors --- Sum of all operational and embodied energies -DECLARE KW_PER_GB NUMERIC DEFAULT CAST( - ENERGY_PER_GB_DATACENTER + - ENERGY_PER_GB_NETWORK + - ENERGY_PER_GB_DEVICE AS NUMERIC -); - --- Global average carbon intensity of electricity generation (gCO2/kWh) -DECLARE GLOBAL_GRID_INTENSITY NUMERIC DEFAULT 494; - --- Function to calculate emissions in gCO2 -CREATE TEMP FUNCTION CALCULATE_EMISSIONS( - bytes FLOAT64, - kw_per_GB FLOAT64, - grid_intensity FLOAT64 -) RETURNS FLOAT64 AS ( - (BYTES / 1024 / 1024 / 1024) * -- Convert bytes to GB - (KW_PER_GB) * - GRID_INTENSITY -); - -WITH PAGE_DATA AS ( - SELECT - CLIENT, - CAST(JSON_VALUE(SUMMARY, '$.bytesTotal') AS INT64) AS BYTESTOTAL, - CAST(JSON_VALUE(SUMMARY, '$.bytesHtml') AS INT64) AS BYTESHTML, - CAST(JSON_VALUE(SUMMARY, '$.bytesJS') AS INT64) AS BYTESJS, - CAST( - COALESCE( - JSON_VALUE(SUMMARY, '$.bytesCss'), - JSON_VALUE(SUMMARY, '$.bytesStyle') - ) AS INT64 - ) AS BYTESCSS, - CAST(JSON_VALUE(SUMMARY, '$.bytesImg') AS INT64) AS BYTESIMG, - CAST(JSON_VALUE(SUMMARY, '$.bytesOther') AS INT64) AS BYTESOTHER, - CAST(JSON_VALUE(SUMMARY, '$.bytesHtmlDoc') AS INT64) AS BYTESHTMLDOC, - CAST(JSON_VALUE(SUMMARY, '$.bytesFont') AS INT64) AS BYTESFONT - FROM - `httparchive.crawl.pages` - WHERE - DATE = '2022-06-01' AND IS_ROOT_PAGE -) - -SELECT - PERCENTILE, - CLIENT, - -- Total resources - APPROX_QUANTILES( - BYTESTOTAL / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS TOTAL_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESTOTAL, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS TOTAL_EMISSIONS, - -- HTML resources - APPROX_QUANTILES( - BYTESHTML / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESHTML, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_EMISSIONS, - -- JavaScript resources - APPROX_QUANTILES( - BYTESJS / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS JS_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESJS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS JS_EMISSIONS, - -- CSS resources - APPROX_QUANTILES( - BYTESCSS / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS CSS_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESCSS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS CSS_EMISSIONS, - -- Image resources - APPROX_QUANTILES( - BYTESIMG / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS IMG_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESIMG, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS IMG_EMISSIONS, - -- Other resources - APPROX_QUANTILES( - BYTESOTHER / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS OTHER_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESOTHER, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS OTHER_EMISSIONS, - -- HTML document - APPROX_QUANTILES( - BYTESHTMLDOC / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESHTMLDOC, KW_PER_GB, GLOBAL_GRID_INTENSITY), - 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_EMISSIONS, - -- Font resources - APPROX_QUANTILES( - BYTESFONT / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS FONT_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESFONT, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS FONT_EMISSIONS -FROM - PAGE_DATA, - UNNEST([10, 25, 50, 75, 90, 100]) AS PERCENTILE -GROUP BY - PERCENTILE, - CLIENT -ORDER BY - CLIENT, - PERCENTILE diff --git a/sql/2025/sustainability/green_third_party_requests.sql b/sql/2025/sustainability/green_third_party_requests.sql index 9ddd0fc7ebb..be5f2ae7475 100644 --- a/sql/2025/sustainability/green_third_party_requests.sql +++ b/sql/2025/sustainability/green_third_party_requests.sql @@ -2,145 +2,145 @@ # Median third-parties & green third-party requests per websites by rank WITH requests AS ( - SELECT - client, - url, - CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page - FROM - `httparchive.crawl.requests` - WHERE - date = '2025-06-01' + SELECT + client, + url, + CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page + FROM + `httparchive.crawl.requests` + WHERE + date = '2025-06-01' ), green AS ( - SELECT - TRUE AS is_green, - NET.HOST(url) AS host - FROM - `httparchive.almanac.green_web_foundation` - WHERE - date = '2025-09-01' + SELECT + TRUE AS is_green, + NET.HOST(url) AS host + FROM + `httparchive.almanac.green_web_foundation` + WHERE + date = '2025-09-01' ), pages AS ( - SELECT - client, - rank, - CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' + SELECT + client, + rank, + CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' ), third_party AS ( - SELECT - tp.domain, - COUNT(DISTINCT r.page) AS page_usage - FROM - `httparchive.almanac.third_parties` AS tp - INNER JOIN - requests AS r - ON NET.HOST(r.url) = NET.HOST(tp.domain) - WHERE - tp.date = '2025-06-01' AND - tp.category NOT IN ('hosting') - GROUP BY - tp.domain - HAVING - page_usage >= 50 + SELECT + tp.domain, + COUNT(DISTINCT r.page) AS page_usage + FROM + `httparchive.almanac.third_parties` AS tp + INNER JOIN + requests AS r + ON NET.HOST(r.url) = NET.HOST(tp.domain) + WHERE + tp.date = '2025-06-01' AND + tp.category NOT IN ('hosting') + GROUP BY + tp.domain + HAVING + page_usage >= 50 ), green_tp AS ( - SELECT tp.domain - FROM - `httparchive.almanac.third_parties` AS tp - INNER JOIN - green AS g - ON NET.HOST(g.host) = NET.HOST(tp.domain) - WHERE - tp.date = '2025-06-01' AND - tp.category NOT IN ('hosting') - GROUP BY - tp.domain + SELECT tp.domain + FROM + `httparchive.almanac.third_parties` AS tp + INNER JOIN + green AS g + ON NET.HOST(g.host) = NET.HOST(tp.domain) + WHERE + tp.date = '2025-06-01' AND + tp.category NOT IN ('hosting') + GROUP BY + tp.domain ), base AS ( - SELECT - r.client, - r.page, - p.rank, - COUNT(tp.domain) AS third_parties_per_page - FROM - requests AS r - LEFT JOIN - third_party AS tp - ON - NET.HOST(r.url) = NET.HOST(tp.domain) - INNER JOIN - pages AS p - ON r.client = p.client AND r.page = p.page - GROUP BY - r.client, - r.page, - p.rank + SELECT + r.client, + r.page, + p.rank, + COUNT(tp.domain) AS third_parties_per_page + FROM + requests AS r + LEFT JOIN + third_party AS tp + ON + NET.HOST(r.url) = NET.HOST(tp.domain) + INNER JOIN + pages AS p + ON r.client = p.client AND r.page = p.page + GROUP BY + r.client, + r.page, + p.rank ), base_green AS ( - SELECT - r.client, - r.page, - p.rank, - COUNT(gtp.domain) AS green_third_parties_per_page - FROM - requests AS r - LEFT JOIN - green_tp AS gtp - ON - NET.HOST(r.url) = NET.HOST(gtp.domain) - INNER JOIN - pages AS p - ON r.client = p.client AND r.page = p.page - GROUP BY - r.client, - r.page, - p.rank + SELECT + r.client, + r.page, + p.rank, + COUNT(gtp.domain) AS green_third_parties_per_page + FROM + requests AS r + LEFT JOIN + green_tp AS gtp + ON + NET.HOST(r.url) = NET.HOST(gtp.domain) + INNER JOIN + pages AS p + ON r.client = p.client AND r.page = p.page + GROUP BY + r.client, + r.page, + p.rank ) SELECT - b.client, - rank_grouping, - CASE - WHEN rank_grouping = 0 THEN '' - WHEN rank_grouping = 100000000 THEN 'all' - ELSE FORMAT("%'d", rank_grouping) - END AS ranking, - APPROX_QUANTILES( - b.third_parties_per_page, 1000 - ) [OFFSET(500)] AS p50_third_parties_per_page, - APPROX_QUANTILES( - bg.green_third_parties_per_page, 1000 - ) [OFFSET(500)] AS p50_green_third_parties_per_page, - APPROX_QUANTILES( - SAFE_DIVIDE( - bg.green_third_parties_per_page, - b.third_parties_per_page - ), 1000 - ) [OFFSET(500)] AS pct_green + b.client, + rank_grouping, + CASE + WHEN rank_grouping = 0 THEN '' + WHEN rank_grouping = 100000000 THEN 'all' + ELSE FORMAT("%'d", rank_grouping) + END AS ranking, + APPROX_QUANTILES( + b.third_parties_per_page, 1000 + ) [OFFSET(500)] AS p50_third_parties_per_page, + APPROX_QUANTILES( + bg.green_third_parties_per_page, 1000 + ) [OFFSET(500)] AS p50_green_third_parties_per_page, + APPROX_QUANTILES( + SAFE_DIVIDE( + bg.green_third_parties_per_page, + b.third_parties_per_page + ), 1000 + ) [OFFSET(500)] AS pct_green FROM - base AS b, - UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping + base AS b, + UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping INNER JOIN - base_green AS bg + base_green AS bg ON - b.client = bg.client AND - b.page = bg.page AND - b.rank = bg.rank + b.client = bg.client AND + b.page = bg.page AND + b.rank = bg.rank WHERE - b.rank <= rank_grouping + b.rank <= rank_grouping GROUP BY - b.client, - rank_grouping + b.client, + rank_grouping ORDER BY - b.client, - rank_grouping + b.client, + rank_grouping diff --git a/sql/2025/sustainability/green_web_hosting.sql b/sql/2025/sustainability/green_web_hosting.sql index 9143c1da79d..5c2b3086155 100644 --- a/sql/2025/sustainability/green_web_hosting.sql +++ b/sql/2025/sustainability/green_web_hosting.sql @@ -2,58 +2,58 @@ # What percentage of URLs are hosted on a known green web hosting provider? WITH green AS ( - SELECT - TRUE AS is_green, - NET.HOST(url) AS host - FROM - `httparchive.almanac.green_web_foundation` - WHERE - date = '2025-09-01' + SELECT + TRUE AS is_green, + NET.HOST(url) AS host + FROM + `httparchive.almanac.green_web_foundation` + WHERE + date = '2025-09-01' ), pages AS ( - SELECT - client, - rank, - NET.HOST(root_page) AS host - FROM - `httparchive.crawl.pages` - WHERE - is_root_page = TRUE AND - date = '2025-06-01' + SELECT + client, + rank, + NET.HOST(root_page) AS host + FROM + `httparchive.crawl.pages` + WHERE + is_root_page = TRUE AND + date = '2025-06-01' ) -- Apply rank grouping SELECT - client, - rank_grouping, - CASE - WHEN rank_grouping = 0 THEN '' - WHEN rank_grouping = 100000000 THEN 'all' - ELSE FORMAT("%'d", rank_grouping) - END AS ranking, - COUNTIF(is_green) AS total_green, - COUNT(*) AS total_sites, - SAFE_DIVIDE(COUNTIF(is_green), COUNT(*)) AS pct_green + client, + rank_grouping, + CASE + WHEN rank_grouping = 0 THEN '' + WHEN rank_grouping = 100000000 THEN 'all' + ELSE FORMAT("%'d", rank_grouping) + END AS ranking, + COUNTIF(is_green) AS total_green, + COUNT(*) AS total_sites, + SAFE_DIVIDE(COUNTIF(is_green), COUNT(*)) AS pct_green FROM ( - -- Left join green hosting information - SELECT - p.client, - p.host, - p.rank, - g.is_green - FROM - pages AS p - LEFT JOIN - green AS g - ON p.host = g.host + -- Left join green hosting information + SELECT + p.client, + p.host, + p.rank, + g.is_green + FROM + pages AS p + LEFT JOIN + green AS g + ON p.host = g.host ), UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping WHERE - rank <= rank_grouping + rank <= rank_grouping GROUP BY - client, - rank_grouping + client, + rank_grouping ORDER BY - client, - rank_grouping; + client, + rank_grouping; diff --git a/sql/2025/sustainability/page_byte_pre_type.sql b/sql/2025/sustainability/page_byte_pre_type.sql index 48cc2113902..20b7742218b 100644 --- a/sql/2025/sustainability/page_byte_pre_type.sql +++ b/sql/2025/sustainability/page_byte_pre_type.sql @@ -16,103 +16,103 @@ DECLARE GLOBAL_GRID_INTENSITY NUMERIC DEFAULT 494; -- Function to calculate emissions in gCO2 CREATE TEMP FUNCTION calculate_emissions( - bytes FLOAT64, - kw_per_GB FLOAT64, - grid_intensity FLOAT64 + bytes FLOAT64, + kw_per_GB FLOAT64, + grid_intensity FLOAT64 ) RETURNS FLOAT64 AS ( - (BYTES / 1024 / 1024 / 1024) * -- Convert bytes to GB - (KW_PER_GB) * - GRID_INTENSITY + (BYTES / 1024 / 1024 / 1024) * -- Convert bytes to GB + (KW_PER_GB) * + GRID_INTENSITY ); WITH PAGE_DATA AS ( - SELECT - CLIENT, - cast(json_value(SUMMARY, '$.bytesTotal') AS INT64) AS BYTESTOTAL, - cast(json_value(SUMMARY, '$.bytesHtml') AS INT64) AS BYTESHTML, - cast( - coalesce( - json_value(SUMMARY, '$.bytesCss'), - json_value(SUMMARY, '$.bytesStyle') - ) AS INT64 - ) AS BYTESCSS, - cast(json_value(SUMMARY, '$.bytesJS') AS INT64) AS BYTESJS, - cast(json_value(SUMMARY, '$.bytesImg') AS INT64) AS BYTESIMG, - cast(json_value(SUMMARY, '$.bytesOther') AS INT64) AS BYTESOTHER, - cast(json_value(SUMMARY, '$.bytesHtmlDoc') AS INT64) AS BYTESHTMLDOC, - cast(json_value(SUMMARY, '$.bytesFont') AS INT64) AS BYTESFONT - FROM - `httparchive.crawl.pages` - WHERE - DATE = '2025-06-01' AND IS_ROOT_PAGE + SELECT + CLIENT, + cast(json_value(SUMMARY, '$.bytesTotal') AS INT64) AS BYTESTOTAL, + cast(json_value(SUMMARY, '$.bytesHtml') AS INT64) AS BYTESHTML, + cast( + coalesce( + json_value(SUMMARY, '$.bytesCss'), + json_value(SUMMARY, '$.bytesStyle') + ) AS INT64 + ) AS BYTESCSS, + cast(json_value(SUMMARY, '$.bytesJS') AS INT64) AS BYTESJS, + cast(json_value(SUMMARY, '$.bytesImg') AS INT64) AS BYTESIMG, + cast(json_value(SUMMARY, '$.bytesOther') AS INT64) AS BYTESOTHER, + cast(json_value(SUMMARY, '$.bytesHtmlDoc') AS INT64) AS BYTESHTMLDOC, + cast(json_value(SUMMARY, '$.bytesFont') AS INT64) AS BYTESFONT + FROM + `httparchive.crawl.pages` + WHERE + DATE = '2025-06-01' AND IS_ROOT_PAGE ) SELECT - PERCENTILE, - CLIENT, - -- Total resources - approx_quantiles( - BYTESTOTAL / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS TOTAL_KBYTES, - approx_quantiles( - calculate_emissions(BYTESTOTAL, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS TOTAL_EMISSIONS, - -- HTML resources - approx_quantiles( - BYTESHTML / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS HTML_KBYTES, - approx_quantiles( - calculate_emissions(BYTESHTML, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS HTML_EMISSIONS, - -- JavaScript resources - approx_quantiles( - BYTESJS / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS JS_KBYTES, - approx_quantiles( - calculate_emissions(BYTESJS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS JS_EMISSIONS, - -- CSS resources - approx_quantiles( - BYTESCSS / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS CSS_KBYTES, - approx_quantiles( - calculate_emissions(BYTESCSS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS CSS_EMISSIONS, - -- Image resources - approx_quantiles( - BYTESIMG / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS IMG_KBYTES, - approx_quantiles( - calculate_emissions(BYTESIMG, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS IMG_EMISSIONS, - -- Other resources - approx_quantiles( - BYTESOTHER / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS OTHER_KBYTES, - approx_quantiles( - calculate_emissions(BYTESOTHER, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS OTHER_EMISSIONS, - -- HTML document - approx_quantiles( - BYTESHTMLDOC / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS HTML_DOC_KBYTES, - approx_quantiles( - calculate_emissions(BYTESHTMLDOC, KW_PER_GB, GLOBAL_GRID_INTENSITY), - 1000 - ) [offset(PERCENTILE * 10)] AS HTML_DOC_EMISSIONS, - -- Font resources - approx_quantiles( - BYTESFONT / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS FONT_KBYTES, - approx_quantiles( - calculate_emissions(BYTESFONT, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS FONT_EMISSIONS + PERCENTILE, + CLIENT, + -- Total resources + approx_quantiles( + BYTESTOTAL / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS TOTAL_KBYTES, + approx_quantiles( + calculate_emissions(BYTESTOTAL, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS TOTAL_EMISSIONS, + -- HTML resources + approx_quantiles( + BYTESHTML / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS HTML_KBYTES, + approx_quantiles( + calculate_emissions(BYTESHTML, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS HTML_EMISSIONS, + -- JavaScript resources + approx_quantiles( + BYTESJS / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS JS_KBYTES, + approx_quantiles( + calculate_emissions(BYTESJS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS JS_EMISSIONS, + -- CSS resources + approx_quantiles( + BYTESCSS / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS CSS_KBYTES, + approx_quantiles( + calculate_emissions(BYTESCSS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS CSS_EMISSIONS, + -- Image resources + approx_quantiles( + BYTESIMG / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS IMG_KBYTES, + approx_quantiles( + calculate_emissions(BYTESIMG, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS IMG_EMISSIONS, + -- Other resources + approx_quantiles( + BYTESOTHER / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS OTHER_KBYTES, + approx_quantiles( + calculate_emissions(BYTESOTHER, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS OTHER_EMISSIONS, + -- HTML document + approx_quantiles( + BYTESHTMLDOC / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS HTML_DOC_KBYTES, + approx_quantiles( + calculate_emissions(BYTESHTMLDOC, KW_PER_GB, GLOBAL_GRID_INTENSITY), + 1000 + ) [offset(PERCENTILE * 10)] AS HTML_DOC_EMISSIONS, + -- Font resources + approx_quantiles( + BYTESFONT / 1024, 1000 + ) [offset(PERCENTILE * 10)] AS FONT_KBYTES, + approx_quantiles( + calculate_emissions(BYTESFONT, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 + ) [offset(PERCENTILE * 10)] AS FONT_EMISSIONS FROM - PAGE_DATA, - unnest([10, 25, 50, 75, 90, 100]) AS PERCENTILE + PAGE_DATA, + unnest([10, 25, 50, 75, 90, 100]) AS PERCENTILE GROUP BY - PERCENTILE, - CLIENT + PERCENTILE, + CLIENT ORDER BY - CLIENT, - PERCENTILE + CLIENT, + PERCENTILE diff --git a/sql/2025/sustainability/query_run_size.sql b/sql/2025/sustainability/query_run_size.sql index bc24c40709f..a79df28f1f3 100644 --- a/sql/2025/sustainability/query_run_size.sql +++ b/sql/2025/sustainability/query_run_size.sql @@ -3,13 +3,13 @@ # (0.012+0.013+0.081+0.055+0.0590.080)x494x [Total TB] *1024 = Total kg CO2e SELECT - AVG( - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) / 1048576 AS avg_size_mb, - SUM( - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) / 1099511627776 AS total_size_tb + AVG( + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) / 1048576 AS avg_size_mb, + SUM( + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) / 1099511627776 AS total_size_tb FROM - `httparchive.crawl.pages` + `httparchive.crawl.pages` WHERE - date = '2025-06-01' + date = '2025-06-01' diff --git a/sql/2025/sustainability/responsive_images.sql b/sql/2025/sustainability/responsive_images.sql index c3c85e80b54..f1052e620a9 100644 --- a/sql/2025/sustainability/responsive_images.sql +++ b/sql/2025/sustainability/responsive_images.sql @@ -2,9 +2,9 @@ # percent of sites using images with srcset w/wo sizes, or picture element CREATE TEMPORARY FUNCTION get_media_info(media_string STRING) RETURNS STRUCT< - num_srcset_all INT64, - num_srcset_sizes INT64, - num_picture_img INT64 + num_srcset_all INT64, + num_srcset_sizes INT64, + num_picture_img INT64 > LANGUAGE js AS ''' var result = { num_srcset_all: 0, @@ -22,52 +22,52 @@ return result; '''; WITH page_data AS ( - SELECT - client, - get_media_info(json_extract_scalar(payload, '$._media')) AS media_info - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' AND is_root_page + SELECT + client, + get_media_info(json_extract_scalar(payload, '$._media')) AS media_info + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' AND is_root_page ) SELECT - client, - round( - safe_divide(countif(media_info.num_srcset_all > 0), count(*)) * 100, 2 - ) AS pages_with_srcset_pct, - round( - safe_divide(countif(media_info.num_srcset_sizes > 0), count(*)) * 100, 2 - ) AS pages_with_srcset_sizes_pct, - round( - safe_divide( - ( - countif( - media_info.num_srcset_all > 0 - ) - countif(media_info.num_srcset_sizes > 0) - ), - count(*) - ) * 100, - 2 - ) AS pages_with_srcset_wo_sizes_pct, - round( - safe_divide( - sum(media_info.num_srcset_sizes), sum(media_info.num_srcset_all) - ) * 100, - 2 - ) AS instances_of_srcset_sizes_pct, - round( - safe_divide( - (sum(media_info.num_srcset_all) - sum(media_info.num_srcset_sizes)), - sum(media_info.num_srcset_all) - ) * 100, - 2 - ) AS instances_of_srcset_wo_sizes_pct, - round( - safe_divide(countif(media_info.num_picture_img > 0), count(*)) * 100, 2 - ) AS pages_with_picture_pct + client, + round( + safe_divide(countif(media_info.num_srcset_all > 0), count(*)) * 100, 2 + ) AS pages_with_srcset_pct, + round( + safe_divide(countif(media_info.num_srcset_sizes > 0), count(*)) * 100, 2 + ) AS pages_with_srcset_sizes_pct, + round( + safe_divide( + ( + countif( + media_info.num_srcset_all > 0 + ) - countif(media_info.num_srcset_sizes > 0) + ), + count(*) + ) * 100, + 2 + ) AS pages_with_srcset_wo_sizes_pct, + round( + safe_divide( + sum(media_info.num_srcset_sizes), sum(media_info.num_srcset_all) + ) * 100, + 2 + ) AS instances_of_srcset_sizes_pct, + round( + safe_divide( + (sum(media_info.num_srcset_all) - sum(media_info.num_srcset_sizes)), + sum(media_info.num_srcset_all) + ) * 100, + 2 + ) AS instances_of_srcset_wo_sizes_pct, + round( + safe_divide(countif(media_info.num_picture_img > 0), count(*)) * 100, 2 + ) AS pages_with_picture_pct FROM page_data GROUP BY - client + client ORDER BY - client + client diff --git a/sql/2025/sustainability/script_count.sql b/sql/2025/sustainability/script_count.sql index 10be69b9c0a..b7019b73d25 100644 --- a/sql/2025/sustainability/script_count.sql +++ b/sql/2025/sustainability/script_count.sql @@ -1,107 +1,107 @@ #standardSQL # Breakdown of inline vs external scripts WITH script_data AS ( - SELECT - client, - page, - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.script_tags' - ), - '$.total' - ) AS INT64 - ) AS total_scripts, - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.script_tags' - ), - '$.inline' - ) AS INT64 - ) AS inline_scripts, - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.script_tags' - ), - '$.src' - ) AS INT64 - ) AS external_scripts, - SAFE_DIVIDE( - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.script_tags' - ), - '$.inline' - ) AS INT64 - ), - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.script_tags' - ), - '$.total' - ) AS INT64 - ) - ) AS pct_inline_script, - SAFE_DIVIDE( - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.script_tags' - ), - '$.src' - ) AS INT64 - ), - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.script_tags' - ), - '$.total' - ) AS INT64 - ) - ) AS pct_external_script - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' AND + SELECT + client, + page, + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.total' + ) AS INT64 + ) AS total_scripts, + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.inline' + ) AS INT64 + ) AS inline_scripts, + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.src' + ) AS INT64 + ) AS external_scripts, + SAFE_DIVIDE( + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.inline' + ) AS INT64 + ), + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.total' + ) AS INT64 + ) + ) AS pct_inline_script, + SAFE_DIVIDE( + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.src' + ) AS INT64 + ), + CAST( JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags' - ), - '$.total' - ) IS NOT NULL + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.script_tags' + ), + '$.total' + ) AS INT64 + ) + ) AS pct_external_script + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' AND + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags' + ), + '$.total' + ) IS NOT NULL ) SELECT - client, - COUNT(DISTINCT page) AS pages_analyzed, - SUM(total_scripts) AS total_scripts, - SUM(inline_scripts) AS inline_scripts, - SUM(external_scripts) AS external_scripts, - SAFE_DIVIDE( - SUM(external_scripts), SUM(total_scripts) - ) AS pct_external_script, - SAFE_DIVIDE(SUM(inline_scripts), SUM(total_scripts)) AS pct_inline_script, - APPROX_QUANTILES( - SAFE_DIVIDE(external_scripts, total_scripts), 1000 - ) [OFFSET(500)] AS median_external, - APPROX_QUANTILES( - SAFE_DIVIDE(inline_scripts, total_scripts), 1000 - ) [OFFSET(500)] AS median_inline + client, + COUNT(DISTINCT page) AS pages_analyzed, + SUM(total_scripts) AS total_scripts, + SUM(inline_scripts) AS inline_scripts, + SUM(external_scripts) AS external_scripts, + SAFE_DIVIDE( + SUM(external_scripts), SUM(total_scripts) + ) AS pct_external_script, + SAFE_DIVIDE(SUM(inline_scripts), SUM(total_scripts)) AS pct_inline_script, + APPROX_QUANTILES( + SAFE_DIVIDE(external_scripts, total_scripts), 1000 + ) [OFFSET(500)] AS median_external, + APPROX_QUANTILES( + SAFE_DIVIDE(inline_scripts, total_scripts), 1000 + ) [OFFSET(500)] AS median_inline FROM - script_data + script_data GROUP BY - client + client ORDER BY - client; + client; diff --git a/sql/2025/sustainability/ssg_bytes_per_type.sql b/sql/2025/sustainability/ssg_bytes_per_type.sql index 6a89d8a3b9e..fefc7d065c6 100644 --- a/sql/2025/sustainability/ssg_bytes_per_type.sql +++ b/sql/2025/sustainability/ssg_bytes_per_type.sql @@ -13,309 +13,307 @@ DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; WITH ssg_data AS ( - SELECT - client, - page, - tech.technology AS ssg, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - - -- Operational emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * - grid_intensity AS op_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * - grid_intensity AS op_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * - grid_intensity AS op_emissions_devices, + SELECT + client, + page, + tech.technology AS ssg, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - -- Embodied emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * - grid_intensity AS em_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * - grid_intensity AS em_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * - grid_intensity AS em_emissions_devices, + -- Operational emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * + grid_intensity AS op_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * + grid_intensity AS op_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * + grid_intensity AS op_emissions_devices, - -- Total emissions (operational + embodied) - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, + -- Embodied emissions calculations + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * + grid_intensity AS em_emissions_dc, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * + grid_intensity AS em_emissions_networks, + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * + grid_intensity AS em_emissions_devices, - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, + -- Total emissions (operational + embodied) + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + ) AS total_operational_emissions, - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, + ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_embodied_emissions, - -- Proportions of each resource type relative to total bytes + ( + ( CAST( - JSON_VALUE(summary, '$.bytesHtml') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS html_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_data_centers * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesJS') AS INT64 - ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_network * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesCss') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS css_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * operational_emissions_user_devices * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesImg') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS img_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_data_centers * grid_intensity + + ( CAST( - JSON_VALUE(summary, '$.bytesFont') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS font_proportion, + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_network * grid_intensity + + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * embodied_emissions_user_devices * grid_intensity + ) AS total_emissions, - -- Resource-specific emissions calculations - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, + -- Proportions of each resource type relative to total bytes + CAST( + JSON_VALUE(summary, '$.bytesHtml') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS html_proportion, + CAST( + JSON_VALUE(summary, '$.bytesJS') AS INT64 + ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST( + JSON_VALUE(summary, '$.bytesCss') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS css_proportion, + CAST( + JSON_VALUE(summary, '$.bytesImg') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS img_proportion, + CAST( + JSON_VALUE(summary, '$.bytesFont') AS INT64 + ) / CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) AS font_proportion, + -- Resource-specific emissions calculations + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, - - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2025-06-01' AND - is_root_page = TRUE AND - EXISTS ( - SELECT 1 - FROM UNNEST(tech.categories) AS category - WHERE LOWER(category) = 'static site generator' OR - tech.technology IN ('Next.js', 'Nuxt.js') + ( + SAFE_DIVIDE( + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) + ) * ( + ( + CAST( + JSON_VALUE(summary, '$.bytesTotal') AS INT64 + ) / 1024 / 1024 / 1024 + ) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity ) + )) AS total_font_emissions, + + -- Resource-specific size in KB + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + FROM + `httparchive.crawl.pages`, + UNNEST(technologies) AS tech + WHERE + date = '2025-06-01' AND + is_root_page = TRUE AND + EXISTS ( + SELECT 1 + FROM UNNEST(tech.categories) AS category + WHERE LOWER(category) = 'static site generator' OR + tech.technology IN ('Next.js', 'Nuxt.js') + ) ) SELECT - client, - ssg, - COUNT(*) AS pages, + client, + ssg, + COUNT(*) AS pages, - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES( - total_operational_emissions, 1000 - ) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES( - total_embodied_emissions, 1000 - ) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES( - total_emissions, 1000 - ) [OFFSET(500)] AS median_total_emissions, - - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES( - total_html_emissions, 1000 - ) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES( - total_js_emissions, 1000 - ) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES( - total_css_emissions, 1000 - ) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES( - total_img_emissions, 1000 - ) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES( - total_font_emissions, 1000 - ) [OFFSET(500)] AS median_total_font_emissions + -- Median resource weights and emissions + APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES( + total_operational_emissions, 1000 + ) [OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES( + total_embodied_emissions, 1000 + ) [OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES( + total_emissions, 1000 + ) [OFFSET(500)] AS median_total_emissions, + -- Resource-specific medians + APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES( + total_html_emissions, 1000 + ) [OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES( + total_js_emissions, 1000 + ) [OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES( + total_css_emissions, 1000 + ) [OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES( + total_img_emissions, 1000 + ) [OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES( + total_font_emissions, 1000 + ) [OFFSET(500)] AS median_total_font_emissions FROM - ssg_data + ssg_data GROUP BY - client, - ssg + client, + ssg ORDER BY - pages DESC, - ssg ASC, - client ASC; + pages DESC, + ssg ASC, + client ASC; diff --git a/sql/2025/sustainability/ssg_bytes_per_type_2022.sql b/sql/2025/sustainability/ssg_bytes_per_type_2022.sql deleted file mode 100644 index 4471dcf439c..00000000000 --- a/sql/2025/sustainability/ssg_bytes_per_type_2022.sql +++ /dev/null @@ -1,322 +0,0 @@ -#standardSQL - -# Copied from ssg_bytes_per_type.sql -# Median resource weights by static site generator with detailed CO2e breakdown -# Source: https://sustainablewebdesign.org/calculating-digital-emissions/ -# Declare variables to calculate the carbon emissions per gigabyte (kWh/GB) - -DECLARE grid_intensity NUMERIC DEFAULT 494; -DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012; -DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013; -DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081; -DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055; -DECLARE operational_emissions_network NUMERIC DEFAULT 0.059; -DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080; - -WITH ssg_data AS ( - SELECT - client, - page, - tech.technology AS ssg, - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, - - -- Operational emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * - grid_intensity AS op_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * - grid_intensity AS op_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * - grid_intensity AS op_emissions_devices, - - -- Embodied emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * - grid_intensity AS em_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * - grid_intensity AS em_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * - grid_intensity AS em_emissions_devices, - - -- Total emissions (operational + embodied) - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity - ) AS total_operational_emissions, - - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_embodied_emissions, - - ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity - ) AS total_emissions, - - -- Proportions of each resource type relative to total bytes - CAST( - JSON_VALUE(summary, '$.bytesHtml') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS html_proportion, - CAST( - JSON_VALUE(summary, '$.bytesJS') AS INT64 - ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST( - JSON_VALUE(summary, '$.bytesCss') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS css_proportion, - CAST( - JSON_VALUE(summary, '$.bytesImg') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS img_proportion, - CAST( - JSON_VALUE(summary, '$.bytesFont') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS font_proportion, - - -- Resource-specific emissions calculations - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, - - -- Resource-specific size in KB - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb, - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb - - FROM - `httparchive.crawl.pages`, - UNNEST(technologies) AS tech - WHERE - date = '2022-06-01' AND - is_root_page = TRUE AND - EXISTS ( - SELECT 1 - FROM UNNEST(tech.categories) AS category - WHERE LOWER(category) = 'static site generator' OR - tech.technology IN ('Next.js', 'Nuxt.js') - ) -) - -SELECT - client, - ssg, - COUNT(*) AS pages, - - -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES( - total_operational_emissions, 1000 - ) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES( - total_embodied_emissions, 1000 - ) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES( - total_emissions, 1000 - ) [OFFSET(500)] AS median_total_emissions, - - -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES( - total_html_emissions, 1000 - ) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES( - total_js_emissions, 1000 - ) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES( - total_css_emissions, 1000 - ) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES( - total_img_emissions, 1000 - ) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES( - total_font_emissions, 1000 - ) [OFFSET(500)] AS median_total_font_emissions - -FROM - ssg_data -GROUP BY - client, - ssg -ORDER BY - pages DESC, - ssg ASC, - client ASC; diff --git a/sql/2025/sustainability/stylesheet_count.sql b/sql/2025/sustainability/stylesheet_count.sql index 150d75e94a0..4d9ad7c9595 100644 --- a/sql/2025/sustainability/stylesheet_count.sql +++ b/sql/2025/sustainability/stylesheet_count.sql @@ -1,124 +1,124 @@ #standardSQL # Breakdown of inline vs external scripts WITH stylesheet_data AS ( - SELECT - client, - page, - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' - ), - '$.stylesheets' - ) AS INT64 - ) AS external_stylesheets, - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' - ), - '$.inlineStyles' - ) AS INT64 - ) AS inline_stylesheets, - SAFE_DIVIDE( - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.inlineStyles' - ) AS INT64 - ), - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.stylesheets' - ) AS INT64 - ) + - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.inlineStyles' - ) AS INT64 - ) - ) AS pct_inline_stylesheets, - SAFE_DIVIDE( - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.stylesheets' - ) AS INT64 - ), - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.stylesheets' - ) AS INT64 - ) + - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.inlineStyles' - ) AS INT64 - ) - ) AS pct_external_stylesheets - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-06-01' - AND - is_root_page = TRUE AND + SELECT + client, + page, + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' + ), + '$.stylesheets' + ) AS INT64 + ) AS external_stylesheets, + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' + ), + '$.inlineStyles' + ) AS INT64 + ) AS inline_stylesheets, + SAFE_DIVIDE( + CAST( JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' - ), - '$.stylesheets' - ) IS NOT NULL + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.inlineStyles' + ) AS INT64 + ), + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.stylesheets' + ) AS INT64 + ) + + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.inlineStyles' + ) AS INT64 + ) + ) AS pct_inline_stylesheets, + SAFE_DIVIDE( + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.stylesheets' + ) AS INT64 + ), + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.stylesheets' + ) AS INT64 + ) + + CAST( + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), + '$.document' + ), + '$.inlineStyles' + ) AS INT64 + ) + ) AS pct_external_stylesheets + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-06-01' + AND + is_root_page = TRUE AND + JSON_EXTRACT_SCALAR( + JSON_EXTRACT( + JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' + ), + '$.stylesheets' + ) IS NOT NULL ) SELECT - client, - COUNT(DISTINCT page) AS pages_analyzed, - SUM(external_stylesheets) AS external_stylesheets, - SUM(inline_stylesheets) AS inline_stylesheets, + client, + COUNT(DISTINCT page) AS pages_analyzed, + SUM(external_stylesheets) AS external_stylesheets, + SUM(inline_stylesheets) AS inline_stylesheets, + SAFE_DIVIDE( + SUM(inline_stylesheets), SUM(inline_stylesheets + external_stylesheets) + ) AS pct_inline_stylesheets, + SAFE_DIVIDE( + SUM(external_stylesheets), + SUM(inline_stylesheets + external_stylesheets) + ) AS pct_external_stylesheets, + APPROX_QUANTILES( SAFE_DIVIDE( - SUM(inline_stylesheets), SUM(inline_stylesheets + external_stylesheets) - ) AS pct_inline_stylesheets, + inline_stylesheets, inline_stylesheets + external_stylesheets + ), + 1000 + ) [OFFSET(500)] AS median_inline_stylesheets, + APPROX_QUANTILES( SAFE_DIVIDE( - SUM(external_stylesheets), - SUM(inline_stylesheets + external_stylesheets) - ) AS pct_external_stylesheets, - APPROX_QUANTILES( - SAFE_DIVIDE( - inline_stylesheets, inline_stylesheets + external_stylesheets - ), - 1000 - ) [OFFSET(500)] AS median_inline_stylesheets, - APPROX_QUANTILES( - SAFE_DIVIDE( - external_stylesheets, inline_stylesheets + external_stylesheets - ), - 1000 - ) [OFFSET(500)] AS median_external_stylesheets + external_stylesheets, inline_stylesheets + external_stylesheets + ), + 1000 + ) [OFFSET(500)] AS median_external_stylesheets FROM - stylesheet_data + stylesheet_data GROUP BY - client + client ORDER BY - client; + client; diff --git a/sql/2025/sustainability/text_compression.sql b/sql/2025/sustainability/text_compression.sql index dac91ca0bdb..345ef8d6c74 100644 --- a/sql/2025/sustainability/text_compression.sql +++ b/sql/2025/sustainability/text_compression.sql @@ -13,45 +13,45 @@ LANGUAGE js AS """ """; WITH request_data AS ( - SELECT - client, - GETCONTENTENCODING( - JSON_EXTRACT(payload, '$.response.headers') - ) AS resp_content_encoding - FROM - `httparchive.crawl.requests` - WHERE - date = '2025-06-01' + SELECT + client, + GETCONTENTENCODING( + JSON_EXTRACT(payload, '$.response.headers') + ) AS resp_content_encoding + FROM + `httparchive.crawl.requests` + WHERE + date = '2025-06-01' ), compression_data AS ( - SELECT - client, - CASE - WHEN resp_content_encoding = 'gzip' THEN 'Gzip' - WHEN resp_content_encoding = 'br' THEN 'Brotli' - WHEN resp_content_encoding IS NULL THEN 'no text compression' - ELSE 'other' - END AS compression_type, - COUNT(*) AS num_requests, - SUM(COUNT(*)) OVER (PARTITION BY client) AS total, - ROUND( - COUNT(*) / SUM(COUNT(*)) OVER (PARTITION BY client) * 100, 2 - ) AS pct - FROM - request_data - GROUP BY - client, - compression_type + SELECT + client, + CASE + WHEN resp_content_encoding = 'gzip' THEN 'Gzip' + WHEN resp_content_encoding = 'br' THEN 'Brotli' + WHEN resp_content_encoding IS NULL THEN 'no text compression' + ELSE 'other' + END AS compression_type, + COUNT(*) AS num_requests, + SUM(COUNT(*)) OVER (PARTITION BY client) AS total, + ROUND( + COUNT(*) / SUM(COUNT(*)) OVER (PARTITION BY client) * 100, 2 + ) AS pct + FROM + request_data + GROUP BY + client, + compression_type ) SELECT - client, - compression_type, - num_requests, - total, - pct + client, + compression_type, + num_requests, + total, + pct FROM compression_data ORDER BY - client ASC, - num_requests DESC + client ASC, + num_requests DESC diff --git a/sql/2025/sustainability/unminified_css_bytes.sql b/sql/2025/sustainability/unminified_css_bytes.sql index cd771f2375f..c08dd4f7a4e 100644 --- a/sql/2025/sustainability/unminified_css_bytes.sql +++ b/sql/2025/sustainability/unminified_css_bytes.sql @@ -2,24 +2,24 @@ # Distribution of unminified CSS request bytes per page SELECT - client, - percentile, - APPROX_QUANTILES( - CAST( - JSON_VALUE( - lighthouse, '$.audits.minify-css.details.overallSavingsBytes' - ) AS INT64 - ) / 1024, - 1000 - ) [OFFSET(percentile * 10)] AS css_kilobytes + client, + percentile, + APPROX_QUANTILES( + CAST( + JSON_VALUE( + lighthouse, '$.audits.minify-css.details.overallSavingsBytes' + ) AS INT64 + ) / 1024, + 1000 + ) [OFFSET(percentile * 10)] AS css_kilobytes FROM - `httparchive.crawl.pages`, - UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile WHERE - date = '2025-06-01' + date = '2025-06-01' GROUP BY - client, - percentile + client, + percentile ORDER BY - client, - percentile + client, + percentile diff --git a/sql/2025/sustainability/unminified_js_bytes.sql b/sql/2025/sustainability/unminified_js_bytes.sql index fb6722303e4..2c43ba0d6c5 100644 --- a/sql/2025/sustainability/unminified_js_bytes.sql +++ b/sql/2025/sustainability/unminified_js_bytes.sql @@ -2,25 +2,25 @@ # Distribution of unminified JS request bytes per page SELECT - client, - percentile, - APPROX_QUANTILES( - CAST( - JSON_VALUE( - lighthouse, - '$.audits.minify-javascript.details.overallSavingsBytes' - ) AS INT64 - ) / 1024, - 1000 - ) [OFFSET(percentile * 10)] AS js_kilobytes + client, + percentile, + APPROX_QUANTILES( + CAST( + JSON_VALUE( + lighthouse, + '$.audits.minify-javascript.details.overallSavingsBytes' + ) AS INT64 + ) / 1024, + 1000 + ) [OFFSET(percentile * 10)] AS js_kilobytes FROM - `httparchive.crawl.pages`, - UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile WHERE - date = '2025-06-01' + date = '2025-06-01' GROUP BY - client, - percentile + client, + percentile ORDER BY - client, - percentile + client, + percentile diff --git a/sql/2025/sustainability/unused_css_bytes.sql b/sql/2025/sustainability/unused_css_bytes.sql index a1adf733b81..f3006d13881 100644 --- a/sql/2025/sustainability/unused_css_bytes.sql +++ b/sql/2025/sustainability/unused_css_bytes.sql @@ -2,25 +2,25 @@ # Distribution of unused CSS request bytes per page SELECT - client, - percentile, - APPROX_QUANTILES( - CAST( - JSON_VALUE( - lighthouse, - '$.audits.unused-css-rules.details.overallSavingsBytes' - ) AS INT64 - ) / 1024, - 1000 - ) [OFFSET(percentile * 10)] AS css_kilobytes + client, + percentile, + APPROX_QUANTILES( + CAST( + JSON_VALUE( + lighthouse, + '$.audits.unused-css-rules.details.overallSavingsBytes' + ) AS INT64 + ) / 1024, + 1000 + ) [OFFSET(percentile * 10)] AS css_kilobytes FROM - `httparchive.crawl.pages`, - UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile WHERE - date = '2025-06-01' + date = '2025-06-01' GROUP BY - client, - percentile + client, + percentile ORDER BY - client, - percentile + client, + percentile diff --git a/sql/2025/sustainability/unused_js_bytes.sql b/sql/2025/sustainability/unused_js_bytes.sql index 600550bf3b8..c9615e4f906 100644 --- a/sql/2025/sustainability/unused_js_bytes.sql +++ b/sql/2025/sustainability/unused_js_bytes.sql @@ -2,25 +2,25 @@ # Distribution of unused JS request bytes per page SELECT - client, - percentile, - APPROX_QUANTILES( - CAST( - JSON_VALUE( - lighthouse, - '$.audits.unused-javascript.details.overallSavingsBytes' - ) AS INT64 - ) / 1024, - 1000 - ) [OFFSET(percentile * 10)] AS js_kilobytes + client, + percentile, + APPROX_QUANTILES( + CAST( + JSON_VALUE( + lighthouse, + '$.audits.unused-javascript.details.overallSavingsBytes' + ) AS INT64 + ) / 1024, + 1000 + ) [OFFSET(percentile * 10)] AS js_kilobytes FROM - `httparchive.crawl.pages`, - UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + `httparchive.crawl.pages`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile WHERE - date = '2025-06-01' + date = '2025-06-01' GROUP BY - client, - percentile + client, + percentile ORDER BY - client, - percentile + client, + percentile diff --git a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql index ac18b1b30f3..532e28fbb47 100644 --- a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql +++ b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql @@ -1,41 +1,41 @@ #standardSQL WITH combined_data AS ( - SELECT - client, - page, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages, - SUM( - CASE - WHEN EXISTS ( - SELECT 1 - FROM - UNNEST( - JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules') - ) AS rule - WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND - JSON_EXTRACT_SCALAR( - rule, '$.media' - ) = '(prefers-color-scheme:dark)' - ) - THEN 1 - ELSE 0 - END - ) OVER (PARTITION BY client, page) AS is_dark_mode_page - FROM - `httparchive.crawl.parsed_css` - WHERE - date = '2025-06-01' + SELECT + client, + page, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages, + SUM( + CASE + WHEN EXISTS ( + SELECT 1 + FROM + UNNEST( + JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules') + ) AS rule + WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND + JSON_EXTRACT_SCALAR( + rule, '$.media' + ) = '(prefers-color-scheme:dark)' + ) + THEN 1 + ELSE 0 + END + ) OVER (PARTITION BY client, page) AS is_dark_mode_page + FROM + `httparchive.crawl.parsed_css` + WHERE + date = '2025-06-01' ) SELECT - client, - MAX(total_pages) AS total_pages, - SUM(is_dark_mode_page) AS pages_using_dark_mode, - SUM(is_dark_mode_page) / MAX(total_pages) * 100 AS percentage_of_pages + client, + MAX(total_pages) AS total_pages, + SUM(is_dark_mode_page) AS pages_using_dark_mode, + SUM(is_dark_mode_page) / MAX(total_pages) * 100 AS percentage_of_pages FROM - combined_data + combined_data GROUP BY - client + client ORDER BY - percentage_of_pages DESC, client ASC; + percentage_of_pages DESC, client ASC; diff --git a/sql/2025/sustainability/video_autoplay_values.sql b/sql/2025/sustainability/video_autoplay_values.sql index 98f90a16533..933e9b0ecdb 100644 --- a/sql/2025/sustainability/video_autoplay_values.sql +++ b/sql/2025/sustainability/video_autoplay_values.sql @@ -1,40 +1,40 @@ WITH video_data AS ( - SELECT - client, - LOWER( - COALESCE( - JSON_EXTRACT_SCALAR(video_nodes, '$.autoplay'), - '(autoplay not used)' - ) - ) AS autoplay_value - FROM - `httparchive.crawl.pages`, - UNNEST( - JSON_EXTRACT_ARRAY( - JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.videos.nodes' - ) - ) AS video_nodes - WHERE - date = '2025-06-01' AND -- Updated date - is_root_page - LIMIT 10000 -- Limit the number of rows processed for faster testing + SELECT + client, + LOWER( + COALESCE( + JSON_EXTRACT_SCALAR(video_nodes, '$.autoplay'), + '(autoplay not used)' + ) + ) AS autoplay_value + FROM + `httparchive.crawl.pages`, + UNNEST( + JSON_EXTRACT_ARRAY( + JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.videos.nodes' + ) + ) AS video_nodes + WHERE + date = '2025-06-01' AND -- Updated date + is_root_page + LIMIT 10000 -- Limit the number of rows processed for faster testing ) SELECT - client, - IF(autoplay_value = '', '(empty)', autoplay_value) AS autoplay_value, - COUNT(*) AS autoplay_value_count, - SUM(COUNT(*)) OVER (PARTITION BY client) AS total_videos, - ROUND( - SAFE_DIVIDE(COUNT(*), SUM(COUNT(*)) OVER (PARTITION BY client)) * 100, 2 - ) AS autoplay_value_pct + client, + IF(autoplay_value = '', '(empty)', autoplay_value) AS autoplay_value, + COUNT(*) AS autoplay_value_count, + SUM(COUNT(*)) OVER (PARTITION BY client) AS total_videos, + ROUND( + SAFE_DIVIDE(COUNT(*), SUM(COUNT(*)) OVER (PARTITION BY client)) * 100, 2 + ) AS autoplay_value_pct FROM - video_data + video_data GROUP BY - client, - autoplay_value + client, + autoplay_value QUALIFY - autoplay_value_count > 10 + autoplay_value_count > 10 ORDER BY - client ASC, - autoplay_value_count DESC + client ASC, + autoplay_value_count DESC diff --git a/sql/2025/sustainability/video_preload_values.sql b/sql/2025/sustainability/video_preload_values.sql index fa88ca1cba4..36c9bcab586 100644 --- a/sql/2025/sustainability/video_preload_values.sql +++ b/sql/2025/sustainability/video_preload_values.sql @@ -1,42 +1,42 @@ WITH video_data AS ( - SELECT - date, - client, - LOWER( - COALESCE( - JSON_EXTRACT_SCALAR(video_nodes, '$.preload'), - '(preload not used)' - ) - ) AS preload_value - FROM - `httparchive.crawl.pages`, - UNNEST( - JSON_EXTRACT_ARRAY( - JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.videos.nodes' - ) - ) AS video_nodes - WHERE - date IN ('2025-06-01', '2024-07-01') AND -- Updated dates - is_root_page + SELECT + date, + client, + LOWER( + COALESCE( + JSON_EXTRACT_SCALAR(video_nodes, '$.preload'), + '(preload not used)' + ) + ) AS preload_value + FROM + `httparchive.crawl.pages`, + UNNEST( + JSON_EXTRACT_ARRAY( + JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.videos.nodes' + ) + ) AS video_nodes + WHERE + date IN ('2025-06-01', '2024-07-01') AND -- Updated dates + is_root_page ) SELECT - date, - client, - IF(preload_value = '', '(empty)', preload_value) AS preload_value, - COUNT(*) AS preload_value_count, - SAFE_DIVIDE( - COUNT(*), SUM(COUNT(*)) OVER (PARTITION BY date, client) - ) AS preload_value_pct + date, + client, + IF(preload_value = '', '(empty)', preload_value) AS preload_value, + COUNT(*) AS preload_value_count, + SAFE_DIVIDE( + COUNT(*), SUM(COUNT(*)) OVER (PARTITION BY date, client) + ) AS preload_value_pct FROM - video_data + video_data GROUP BY - date, - client, - preload_value + date, + client, + preload_value QUALIFY - preload_value_count > 10 + preload_value_count > 10 ORDER BY - date ASC, - client ASC, - preload_value_count DESC + date ASC, + client ASC, + preload_value_count DESC From ce56199b9652fee7ab8a62019413f90cc94fef91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Tue, 29 Jul 2025 15:13:51 +0100 Subject: [PATCH 06/16] fix linter issues --- sql/2025/sustainability/cache_header_usage.sql | 18 +++++++++--------- sql/2025/sustainability/cdn_adoption.sql | 6 +++--- sql/2025/sustainability/responsive_images.sql | 8 ++++---- sql/2025/sustainability/script_count.sql | 4 ++-- .../sustainability/video_autoplay_values.sql | 6 +++--- .../sustainability/video_preload_values.sql | 4 ++-- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/sql/2025/sustainability/cache_header_usage.sql b/sql/2025/sustainability/cache_header_usage.sql index 6f6a2a7087b..297b1e93059 100644 --- a/sql/2025/sustainability/cache_header_usage.sql +++ b/sql/2025/sustainability/cache_header_usage.sql @@ -3,7 +3,7 @@ SELECT client, - COUNT(*) AS total_requests, + COUNT(1) AS total_requests, COUNTIF(uses_cache_control) AS total_using_cache_control, COUNTIF(uses_max_age) AS total_using_max_age, @@ -22,24 +22,24 @@ SELECT NOT uses_cache_control AND uses_expires ) AS total_using_only_expires, - COUNTIF(uses_cache_control) / COUNT(*) AS pct_cache_control, - COUNTIF(uses_max_age) / COUNT(*) AS pct_using_max_age, - COUNTIF(uses_expires) / COUNT(*) AS pct_using_expires, + COUNTIF(uses_cache_control) / COUNT(1) AS pct_cache_control, + COUNTIF(uses_max_age) / COUNT(1) AS pct_using_max_age, + COUNTIF(uses_expires) / COUNT(1) AS pct_using_expires, COUNTIF( uses_max_age AND uses_expires - ) / COUNT(*) AS pct_using_max_age_and_expires, + ) / COUNT(1) AS pct_using_max_age_and_expires, COUNTIF( uses_cache_control AND uses_expires - ) / COUNT(*) AS pct_using_both_cc_and_expires, + ) / COUNT(1) AS pct_using_both_cc_and_expires, COUNTIF( NOT uses_cache_control AND NOT uses_expires - ) / COUNT(*) AS pct_using_neither_cc_nor_expires, + ) / COUNT(1) AS pct_using_neither_cc_nor_expires, COUNTIF( uses_cache_control AND NOT uses_expires - ) / COUNT(*) AS pct_using_only_cache_control, + ) / COUNT(1) AS pct_using_only_cache_control, COUNTIF( NOT uses_cache_control AND uses_expires - ) / COUNT(*) AS pct_using_only_expires + ) / COUNT(1) AS pct_using_only_expires FROM ( SELECT diff --git a/sql/2025/sustainability/cdn_adoption.sql b/sql/2025/sustainability/cdn_adoption.sql index 6e4c06db59d..dedad8629a7 100644 --- a/sql/2025/sustainability/cdn_adoption.sql +++ b/sql/2025/sustainability/cdn_adoption.sql @@ -5,12 +5,12 @@ SELECT client, total, IF(cdn = '', 'No CDN', cdn) AS cdn, - COUNT(*) AS freq, - COUNT(*) / total AS pct + COUNT(1) AS freq, + COUNT(1) / total AS pct FROM ( SELECT client, - COUNT(*) AS total, + COUNT(1) AS total, ARRAY_CONCAT_AGG( SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ') ) AS cdn_list diff --git a/sql/2025/sustainability/responsive_images.sql b/sql/2025/sustainability/responsive_images.sql index f1052e620a9..41f4273b8c9 100644 --- a/sql/2025/sustainability/responsive_images.sql +++ b/sql/2025/sustainability/responsive_images.sql @@ -34,10 +34,10 @@ WITH page_data AS ( SELECT client, round( - safe_divide(countif(media_info.num_srcset_all > 0), count(*)) * 100, 2 + safe_divide(countif(media_info.num_srcset_all > 0), count(1)) * 100, 2 ) AS pages_with_srcset_pct, round( - safe_divide(countif(media_info.num_srcset_sizes > 0), count(*)) * 100, 2 + safe_divide(countif(media_info.num_srcset_sizes > 0), count(1)) * 100, 2 ) AS pages_with_srcset_sizes_pct, round( safe_divide( @@ -46,7 +46,7 @@ SELECT media_info.num_srcset_all > 0 ) - countif(media_info.num_srcset_sizes > 0) ), - count(*) + count(1) ) * 100, 2 ) AS pages_with_srcset_wo_sizes_pct, @@ -64,7 +64,7 @@ SELECT 2 ) AS instances_of_srcset_wo_sizes_pct, round( - safe_divide(countif(media_info.num_picture_img > 0), count(*)) * 100, 2 + safe_divide(countif(media_info.num_picture_img > 0), count(1)) * 100, 2 ) AS pages_with_picture_pct FROM page_data GROUP BY diff --git a/sql/2025/sustainability/script_count.sql b/sql/2025/sustainability/script_count.sql index b7019b73d25..2ff06b3afe1 100644 --- a/sql/2025/sustainability/script_count.sql +++ b/sql/2025/sustainability/script_count.sql @@ -95,10 +95,10 @@ SELECT SAFE_DIVIDE(SUM(inline_scripts), SUM(total_scripts)) AS pct_inline_script, APPROX_QUANTILES( SAFE_DIVIDE(external_scripts, total_scripts), 1000 - ) [OFFSET(500)] AS median_external, + )[OFFSET(500)] AS median_external, APPROX_QUANTILES( SAFE_DIVIDE(inline_scripts, total_scripts), 1000 - ) [OFFSET(500)] AS median_inline + )[OFFSET(500)] AS median_inline FROM script_data GROUP BY diff --git a/sql/2025/sustainability/video_autoplay_values.sql b/sql/2025/sustainability/video_autoplay_values.sql index 933e9b0ecdb..695f33c1f56 100644 --- a/sql/2025/sustainability/video_autoplay_values.sql +++ b/sql/2025/sustainability/video_autoplay_values.sql @@ -23,10 +23,10 @@ WITH video_data AS ( SELECT client, IF(autoplay_value = '', '(empty)', autoplay_value) AS autoplay_value, - COUNT(*) AS autoplay_value_count, - SUM(COUNT(*)) OVER (PARTITION BY client) AS total_videos, + COUNT(1) AS autoplay_value_count, + SUM(COUNT(1)) OVER (PARTITION BY client) AS total_videos, ROUND( - SAFE_DIVIDE(COUNT(*), SUM(COUNT(*)) OVER (PARTITION BY client)) * 100, 2 + SAFE_DIVIDE(COUNT(1), SUM(COUNT(1)) OVER (PARTITION BY client)) * 100, 2 ) AS autoplay_value_pct FROM video_data diff --git a/sql/2025/sustainability/video_preload_values.sql b/sql/2025/sustainability/video_preload_values.sql index 36c9bcab586..cf4e2fabff7 100644 --- a/sql/2025/sustainability/video_preload_values.sql +++ b/sql/2025/sustainability/video_preload_values.sql @@ -24,9 +24,9 @@ SELECT date, client, IF(preload_value = '', '(empty)', preload_value) AS preload_value, - COUNT(*) AS preload_value_count, + COUNT(1) AS preload_value_count, SAFE_DIVIDE( - COUNT(*), SUM(COUNT(*)) OVER (PARTITION BY date, client) + COUNT(1), SUM(COUNT(1)) OVER (PARTITION BY date, client) ) AS preload_value_pct FROM video_data From adc74c1222268b1b5fc49e81d0a32942b9ef8f9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Tue, 29 Jul 2025 15:36:10 +0100 Subject: [PATCH 07/16] fix CV04 --- .../sustainability/cache_header_usage.sql | 18 +++++++-------- sql/2025/sustainability/cdn_adoption.sql | 8 +++---- sql/2025/sustainability/responsive_images.sql | 8 +++---- .../use_of_prefers_dark_mode_usage.sql | 22 +++++++++---------- .../sustainability/video_autoplay_values.sql | 6 ++--- .../sustainability/video_preload_values.sql | 4 ++-- 6 files changed, 33 insertions(+), 33 deletions(-) diff --git a/sql/2025/sustainability/cache_header_usage.sql b/sql/2025/sustainability/cache_header_usage.sql index 297b1e93059..3d3b414711d 100644 --- a/sql/2025/sustainability/cache_header_usage.sql +++ b/sql/2025/sustainability/cache_header_usage.sql @@ -3,7 +3,7 @@ SELECT client, - COUNT(1) AS total_requests, + COUNT(0) AS total_requests, COUNTIF(uses_cache_control) AS total_using_cache_control, COUNTIF(uses_max_age) AS total_using_max_age, @@ -22,24 +22,24 @@ SELECT NOT uses_cache_control AND uses_expires ) AS total_using_only_expires, - COUNTIF(uses_cache_control) / COUNT(1) AS pct_cache_control, - COUNTIF(uses_max_age) / COUNT(1) AS pct_using_max_age, - COUNTIF(uses_expires) / COUNT(1) AS pct_using_expires, + COUNTIF(uses_cache_control) / COUNT(0) AS pct_cache_control, + COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age, + COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires, COUNTIF( uses_max_age AND uses_expires - ) / COUNT(1) AS pct_using_max_age_and_expires, + ) / COUNT(0) AS pct_using_max_age_and_expires, COUNTIF( uses_cache_control AND uses_expires - ) / COUNT(1) AS pct_using_both_cc_and_expires, + ) / COUNT(0) AS pct_using_both_cc_and_expires, COUNTIF( NOT uses_cache_control AND NOT uses_expires - ) / COUNT(1) AS pct_using_neither_cc_nor_expires, + ) / COUNT(0) AS pct_using_neither_cc_nor_expires, COUNTIF( uses_cache_control AND NOT uses_expires - ) / COUNT(1) AS pct_using_only_cache_control, + ) / COUNT(0) AS pct_using_only_cache_control, COUNTIF( NOT uses_cache_control AND uses_expires - ) / COUNT(1) AS pct_using_only_expires + ) / COUNT(0) AS pct_using_only_expires FROM ( SELECT diff --git a/sql/2025/sustainability/cdn_adoption.sql b/sql/2025/sustainability/cdn_adoption.sql index dedad8629a7..2021dbb1721 100644 --- a/sql/2025/sustainability/cdn_adoption.sql +++ b/sql/2025/sustainability/cdn_adoption.sql @@ -5,12 +5,12 @@ SELECT client, total, IF(cdn = '', 'No CDN', cdn) AS cdn, - COUNT(1) AS freq, - COUNT(1) / total AS pct + COUNT(0) AS freq, + COUNT(0) / total AS pct FROM ( SELECT client, - COUNT(1) AS total, + COUNT(0) AS total, ARRAY_CONCAT_AGG( SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ') ) AS cdn_list @@ -22,7 +22,7 @@ FROM ( GROUP BY client ), -UNNEST(cdn_list) AS cdn + UNNEST(cdn_list) AS cdn GROUP BY client, cdn, diff --git a/sql/2025/sustainability/responsive_images.sql b/sql/2025/sustainability/responsive_images.sql index 41f4273b8c9..a513ef6c6de 100644 --- a/sql/2025/sustainability/responsive_images.sql +++ b/sql/2025/sustainability/responsive_images.sql @@ -34,10 +34,10 @@ WITH page_data AS ( SELECT client, round( - safe_divide(countif(media_info.num_srcset_all > 0), count(1)) * 100, 2 + safe_divide(countif(media_info.num_srcset_all > 0), count(0)) * 100, 2 ) AS pages_with_srcset_pct, round( - safe_divide(countif(media_info.num_srcset_sizes > 0), count(1)) * 100, 2 + safe_divide(countif(media_info.num_srcset_sizes > 0), count(0)) * 100, 2 ) AS pages_with_srcset_sizes_pct, round( safe_divide( @@ -46,7 +46,7 @@ SELECT media_info.num_srcset_all > 0 ) - countif(media_info.num_srcset_sizes > 0) ), - count(1) + count(0) ) * 100, 2 ) AS pages_with_srcset_wo_sizes_pct, @@ -64,7 +64,7 @@ SELECT 2 ) AS instances_of_srcset_wo_sizes_pct, round( - safe_divide(countif(media_info.num_picture_img > 0), count(1)) * 100, 2 + safe_divide(countif(media_info.num_picture_img > 0), count(0)) * 100, 2 ) AS pages_with_picture_pct FROM page_data GROUP BY diff --git a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql index 532e28fbb47..1fdfbfe1f40 100644 --- a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql +++ b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql @@ -8,17 +8,17 @@ WITH combined_data AS ( SUM( CASE WHEN EXISTS ( - SELECT 1 - FROM - UNNEST( - JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules') - ) AS rule - WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND - JSON_EXTRACT_SCALAR( - rule, '$.media' - ) = '(prefers-color-scheme:dark)' - ) - THEN 1 + SELECT 1 + FROM + UNNEST( + JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules') + ) AS rule + WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND + JSON_EXTRACT_SCALAR( + rule, '$.media' + ) = '(prefers-color-scheme:dark)' + ) + THEN 1 ELSE 0 END ) OVER (PARTITION BY client, page) AS is_dark_mode_page diff --git a/sql/2025/sustainability/video_autoplay_values.sql b/sql/2025/sustainability/video_autoplay_values.sql index 695f33c1f56..af1cbc5579f 100644 --- a/sql/2025/sustainability/video_autoplay_values.sql +++ b/sql/2025/sustainability/video_autoplay_values.sql @@ -23,10 +23,10 @@ WITH video_data AS ( SELECT client, IF(autoplay_value = '', '(empty)', autoplay_value) AS autoplay_value, - COUNT(1) AS autoplay_value_count, - SUM(COUNT(1)) OVER (PARTITION BY client) AS total_videos, + COUNT(0) AS autoplay_value_count, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total_videos, ROUND( - SAFE_DIVIDE(COUNT(1), SUM(COUNT(1)) OVER (PARTITION BY client)) * 100, 2 + SAFE_DIVIDE(COUNT(0), SUM(COUNT(0)) OVER (PARTITION BY client)) * 100, 2 ) AS autoplay_value_pct FROM video_data diff --git a/sql/2025/sustainability/video_preload_values.sql b/sql/2025/sustainability/video_preload_values.sql index cf4e2fabff7..41ca1e150ef 100644 --- a/sql/2025/sustainability/video_preload_values.sql +++ b/sql/2025/sustainability/video_preload_values.sql @@ -24,9 +24,9 @@ SELECT date, client, IF(preload_value = '', '(empty)', preload_value) AS preload_value, - COUNT(1) AS preload_value_count, + COUNT(0) AS preload_value_count, SAFE_DIVIDE( - COUNT(1), SUM(COUNT(1)) OVER (PARTITION BY date, client) + COUNT(0), SUM(COUNT(0)) OVER (PARTITION BY date, client) ) AS preload_value_pct FROM video_data From 8a71ade83ad9468a0b7eba1eae3dfac8ab091876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Tue, 29 Jul 2025 15:44:45 +0100 Subject: [PATCH 08/16] fix linter --- sql/2025/sustainability/cdn_adoption.sql | 2 +- .../sustainability/cms_bytes_per_type.sql | 45 ++++++++++--------- .../green_third_party_requests.sql | 6 +-- sql/2025/sustainability/green_web_hosting.sql | 6 +-- .../sustainability/page_byte_pre_type.sql | 32 ++++++------- .../sustainability/unminified_js_bytes.sql | 2 +- sql/2025/sustainability/unused_css_bytes.sql | 2 +- sql/2025/sustainability/unused_js_bytes.sql | 2 +- .../use_of_prefers_dark_mode_usage.sql | 22 ++++----- 9 files changed, 62 insertions(+), 57 deletions(-) diff --git a/sql/2025/sustainability/cdn_adoption.sql b/sql/2025/sustainability/cdn_adoption.sql index 2021dbb1721..2ae4f1cfe2b 100644 --- a/sql/2025/sustainability/cdn_adoption.sql +++ b/sql/2025/sustainability/cdn_adoption.sql @@ -22,7 +22,7 @@ FROM ( GROUP BY client ), - UNNEST(cdn_list) AS cdn + UNNEST(cdn_list) AS cdn GROUP BY client, cdn, diff --git a/sql/2025/sustainability/cms_bytes_per_type.sql b/sql/2025/sustainability/cms_bytes_per_type.sql index 817cf890e24..b217474b3d3 100644 --- a/sql/2025/sustainability/cms_bytes_per_type.sql +++ b/sql/2025/sustainability/cms_bytes_per_type.sql @@ -172,7 +172,8 @@ WITH cms_data AS ( embodied_emissions_network * grid_intensity + embodied_emissions_user_devices * grid_intensity ) - )) AS total_html_emissions, + ) + ) AS total_html_emissions, ( SAFE_DIVIDE( @@ -191,7 +192,8 @@ WITH cms_data AS ( embodied_emissions_network * grid_intensity + embodied_emissions_user_devices * grid_intensity ) - )) AS total_js_emissions, + ) + ) AS total_js_emissions, ( SAFE_DIVIDE( @@ -210,7 +212,8 @@ WITH cms_data AS ( embodied_emissions_network * grid_intensity + embodied_emissions_user_devices * grid_intensity ) - )) AS total_css_emissions, + ) + ) AS total_css_emissions, ( SAFE_DIVIDE( @@ -229,7 +232,8 @@ WITH cms_data AS ( embodied_emissions_network * grid_intensity + embodied_emissions_user_devices * grid_intensity ) - )) AS total_img_emissions, + ) + ) AS total_img_emissions, ( SAFE_DIVIDE( @@ -248,7 +252,8 @@ WITH cms_data AS ( embodied_emissions_network * grid_intensity + embodied_emissions_user_devices * grid_intensity ) - )) AS total_font_emissions, + ) + ) AS total_font_emissions, -- Resource-specific size in KB CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, @@ -268,40 +273,40 @@ WITH cms_data AS ( SELECT client, cms, - COUNT(*) AS pages, + COUNT(0) AS pages, -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, APPROX_QUANTILES( total_operational_emissions, 1000 - ) [OFFSET(500)] AS median_operational_emissions, + )[OFFSET(500)] AS median_operational_emissions, APPROX_QUANTILES( total_embodied_emissions, 1000 - ) [OFFSET(500)] AS median_embodied_emissions, + )[OFFSET(500)] AS median_embodied_emissions, APPROX_QUANTILES( total_emissions, 1000 - ) [OFFSET(500)] AS median_total_emissions, + )[OFFSET(500)] AS median_total_emissions, -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, APPROX_QUANTILES( total_html_emissions, 1000 - ) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, + )[OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, APPROX_QUANTILES( total_js_emissions, 1000 - ) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, + )[OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, APPROX_QUANTILES( total_css_emissions, 1000 - ) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, + )[OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, APPROX_QUANTILES( total_img_emissions, 1000 - ) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, + )[OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, APPROX_QUANTILES( total_font_emissions, 1000 - ) [OFFSET(500)] AS median_total_font_emissions + )[OFFSET(500)] AS median_total_font_emissions FROM cms_data GROUP BY diff --git a/sql/2025/sustainability/green_third_party_requests.sql b/sql/2025/sustainability/green_third_party_requests.sql index be5f2ae7475..ebb236e57e6 100644 --- a/sql/2025/sustainability/green_third_party_requests.sql +++ b/sql/2025/sustainability/green_third_party_requests.sql @@ -117,16 +117,16 @@ SELECT END AS ranking, APPROX_QUANTILES( b.third_parties_per_page, 1000 - ) [OFFSET(500)] AS p50_third_parties_per_page, + )[OFFSET(500)] AS p50_third_parties_per_page, APPROX_QUANTILES( bg.green_third_parties_per_page, 1000 - ) [OFFSET(500)] AS p50_green_third_parties_per_page, + )[OFFSET(500)] AS p50_green_third_parties_per_page, APPROX_QUANTILES( SAFE_DIVIDE( bg.green_third_parties_per_page, b.third_parties_per_page ), 1000 - ) [OFFSET(500)] AS pct_green + )[OFFSET(500)] AS pct_green FROM base AS b, UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping diff --git a/sql/2025/sustainability/green_web_hosting.sql b/sql/2025/sustainability/green_web_hosting.sql index 5c2b3086155..499b45e0858 100644 --- a/sql/2025/sustainability/green_web_hosting.sql +++ b/sql/2025/sustainability/green_web_hosting.sql @@ -33,8 +33,8 @@ SELECT ELSE FORMAT("%'d", rank_grouping) END AS ranking, COUNTIF(is_green) AS total_green, - COUNT(*) AS total_sites, - SAFE_DIVIDE(COUNTIF(is_green), COUNT(*)) AS pct_green + COUNT(0) AS total_sites, + SAFE_DIVIDE(COUNTIF(is_green), COUNT(0)) AS pct_green FROM ( -- Left join green hosting information SELECT @@ -48,7 +48,7 @@ FROM ( green AS g ON p.host = g.host ), -UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping + UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping WHERE rank <= rank_grouping GROUP BY diff --git a/sql/2025/sustainability/page_byte_pre_type.sql b/sql/2025/sustainability/page_byte_pre_type.sql index 20b7742218b..7fce03ed2f0 100644 --- a/sql/2025/sustainability/page_byte_pre_type.sql +++ b/sql/2025/sustainability/page_byte_pre_type.sql @@ -53,60 +53,60 @@ SELECT -- Total resources approx_quantiles( BYTESTOTAL / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS TOTAL_KBYTES, + )[offset(PERCENTILE * 10)] AS TOTAL_KBYTES, approx_quantiles( calculate_emissions(BYTESTOTAL, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS TOTAL_EMISSIONS, + )[offset(PERCENTILE * 10)] AS TOTAL_EMISSIONS, -- HTML resources approx_quantiles( BYTESHTML / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS HTML_KBYTES, + )[offset(PERCENTILE * 10)] AS HTML_KBYTES, approx_quantiles( calculate_emissions(BYTESHTML, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS HTML_EMISSIONS, + )[offset(PERCENTILE * 10)] AS HTML_EMISSIONS, -- JavaScript resources approx_quantiles( BYTESJS / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS JS_KBYTES, + )[offset(PERCENTILE * 10)] AS JS_KBYTES, approx_quantiles( calculate_emissions(BYTESJS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS JS_EMISSIONS, + )[offset(PERCENTILE * 10)] AS JS_EMISSIONS, -- CSS resources approx_quantiles( BYTESCSS / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS CSS_KBYTES, + )[offset(PERCENTILE * 10)] AS CSS_KBYTES, approx_quantiles( calculate_emissions(BYTESCSS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS CSS_EMISSIONS, + )[offset(PERCENTILE * 10)] AS CSS_EMISSIONS, -- Image resources approx_quantiles( BYTESIMG / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS IMG_KBYTES, + )[offset(PERCENTILE * 10)] AS IMG_KBYTES, approx_quantiles( calculate_emissions(BYTESIMG, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS IMG_EMISSIONS, + )[offset(PERCENTILE * 10)] AS IMG_EMISSIONS, -- Other resources approx_quantiles( BYTESOTHER / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS OTHER_KBYTES, + )[offset(PERCENTILE * 10)] AS OTHER_KBYTES, approx_quantiles( calculate_emissions(BYTESOTHER, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS OTHER_EMISSIONS, + )[offset(PERCENTILE * 10)] AS OTHER_EMISSIONS, -- HTML document approx_quantiles( BYTESHTMLDOC / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS HTML_DOC_KBYTES, + )[offset(PERCENTILE * 10)] AS HTML_DOC_KBYTES, approx_quantiles( calculate_emissions(BYTESHTMLDOC, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS HTML_DOC_EMISSIONS, + )[offset(PERCENTILE * 10)] AS HTML_DOC_EMISSIONS, -- Font resources approx_quantiles( BYTESFONT / 1024, 1000 - ) [offset(PERCENTILE * 10)] AS FONT_KBYTES, + )[offset(PERCENTILE * 10)] AS FONT_KBYTES, approx_quantiles( calculate_emissions(BYTESFONT, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [offset(PERCENTILE * 10)] AS FONT_EMISSIONS + )[offset(PERCENTILE * 10)] AS FONT_EMISSIONS FROM PAGE_DATA, unnest([10, 25, 50, 75, 90, 100]) AS PERCENTILE diff --git a/sql/2025/sustainability/unminified_js_bytes.sql b/sql/2025/sustainability/unminified_js_bytes.sql index 2c43ba0d6c5..90c3269369a 100644 --- a/sql/2025/sustainability/unminified_js_bytes.sql +++ b/sql/2025/sustainability/unminified_js_bytes.sql @@ -12,7 +12,7 @@ SELECT ) AS INT64 ) / 1024, 1000 - ) [OFFSET(percentile * 10)] AS js_kilobytes + )[OFFSET(percentile * 10)] AS js_kilobytes FROM `httparchive.crawl.pages`, UNNEST([10, 25, 50, 75, 90, 100]) AS percentile diff --git a/sql/2025/sustainability/unused_css_bytes.sql b/sql/2025/sustainability/unused_css_bytes.sql index f3006d13881..39f76c446ef 100644 --- a/sql/2025/sustainability/unused_css_bytes.sql +++ b/sql/2025/sustainability/unused_css_bytes.sql @@ -12,7 +12,7 @@ SELECT ) AS INT64 ) / 1024, 1000 - ) [OFFSET(percentile * 10)] AS css_kilobytes + )[OFFSET(percentile * 10)] AS css_kilobytes FROM `httparchive.crawl.pages`, UNNEST([10, 25, 50, 75, 90, 100]) AS percentile diff --git a/sql/2025/sustainability/unused_js_bytes.sql b/sql/2025/sustainability/unused_js_bytes.sql index c9615e4f906..3a945e923dc 100644 --- a/sql/2025/sustainability/unused_js_bytes.sql +++ b/sql/2025/sustainability/unused_js_bytes.sql @@ -12,7 +12,7 @@ SELECT ) AS INT64 ) / 1024, 1000 - ) [OFFSET(percentile * 10)] AS js_kilobytes + )[OFFSET(percentile * 10)] AS js_kilobytes FROM `httparchive.crawl.pages`, UNNEST([10, 25, 50, 75, 90, 100]) AS percentile diff --git a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql index 1fdfbfe1f40..532e28fbb47 100644 --- a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql +++ b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql @@ -8,17 +8,17 @@ WITH combined_data AS ( SUM( CASE WHEN EXISTS ( - SELECT 1 - FROM - UNNEST( - JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules') - ) AS rule - WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND - JSON_EXTRACT_SCALAR( - rule, '$.media' - ) = '(prefers-color-scheme:dark)' - ) - THEN 1 + SELECT 1 + FROM + UNNEST( + JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules') + ) AS rule + WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND + JSON_EXTRACT_SCALAR( + rule, '$.media' + ) = '(prefers-color-scheme:dark)' + ) + THEN 1 ELSE 0 END ) OVER (PARTITION BY client, page) AS is_dark_mode_page From ca8a4b5f4e2f5a7990e8d30ea544691b00d7eca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Tue, 29 Jul 2025 15:52:13 +0100 Subject: [PATCH 09/16] fix LT02 --- .../use_of_prefers_dark_mode_usage.sql | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql index 532e28fbb47..775e799e0c3 100644 --- a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql +++ b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql @@ -1,4 +1,5 @@ #standardSQL +# The distribution of websites by client that use the prefers-color-scheme:dark media query. WITH combined_data AS ( SELECT @@ -8,17 +9,12 @@ WITH combined_data AS ( SUM( CASE WHEN EXISTS ( - SELECT 1 - FROM - UNNEST( - JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules') - ) AS rule - WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND - JSON_EXTRACT_SCALAR( - rule, '$.media' - ) = '(prefers-color-scheme:dark)' - ) - THEN 1 + SELECT 1 + FROM UNNEST(JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules')) AS rule + WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND + JSON_EXTRACT_SCALAR(rule, '$.media') = '(prefers-color-scheme:dark)' + ) + THEN 1 ELSE 0 END ) OVER (PARTITION BY client, page) AS is_dark_mode_page From 0245b0c48b970cbdc21969324a15b49d1be931a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Tue, 29 Jul 2025 16:02:43 +0100 Subject: [PATCH 10/16] fix CV04 --- sql/2025/sustainability/ecommerce_bytes_per_type.sql | 2 +- sql/2025/sustainability/favicons.sql | 8 +++----- sql/2025/sustainability/ssg_bytes_per_type.sql | 2 +- sql/2025/sustainability/text_compression.sql | 6 +++--- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/sql/2025/sustainability/ecommerce_bytes_per_type.sql b/sql/2025/sustainability/ecommerce_bytes_per_type.sql index 82b301ee373..85bb8115307 100644 --- a/sql/2025/sustainability/ecommerce_bytes_per_type.sql +++ b/sql/2025/sustainability/ecommerce_bytes_per_type.sql @@ -274,7 +274,7 @@ WITH ecommerce_data AS ( SELECT client, ecommerce, - COUNT(*) AS pages, + COUNT(0) AS pages, -- Median resource weights and emissions APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, diff --git a/sql/2025/sustainability/favicons.sql b/sql/2025/sustainability/favicons.sql index eed571d33c5..9311db0281a 100644 --- a/sql/2025/sustainability/favicons.sql +++ b/sql/2025/sustainability/favicons.sql @@ -50,11 +50,9 @@ WITH favicons AS ( GETFAVICONIMAGE( JSON_EXTRACT_SCALAR(payload, '$._almanac') ) AS image_type_extension, - COUNT(*) AS freq, - SUM(COUNT(*)) OVER (PARTITION BY client) AS total, - COUNT( - * - ) / SUM(COUNT(*)) OVER (PARTITION BY client) AS percentage_of_total + COUNT(0) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS percentage_of_total FROM `httparchive.crawl.pages` WHERE diff --git a/sql/2025/sustainability/ssg_bytes_per_type.sql b/sql/2025/sustainability/ssg_bytes_per_type.sql index fefc7d065c6..66e20262999 100644 --- a/sql/2025/sustainability/ssg_bytes_per_type.sql +++ b/sql/2025/sustainability/ssg_bytes_per_type.sql @@ -273,7 +273,7 @@ WITH ssg_data AS ( SELECT client, ssg, - COUNT(*) AS pages, + COUNT(0) AS pages, -- Median resource weights and emissions APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, diff --git a/sql/2025/sustainability/text_compression.sql b/sql/2025/sustainability/text_compression.sql index 345ef8d6c74..85ef35b8b9e 100644 --- a/sql/2025/sustainability/text_compression.sql +++ b/sql/2025/sustainability/text_compression.sql @@ -33,10 +33,10 @@ compression_data AS ( WHEN resp_content_encoding IS NULL THEN 'no text compression' ELSE 'other' END AS compression_type, - COUNT(*) AS num_requests, - SUM(COUNT(*)) OVER (PARTITION BY client) AS total, + COUNT(0) AS num_requests, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, ROUND( - COUNT(*) / SUM(COUNT(*)) OVER (PARTITION BY client) * 100, 2 + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) * 100, 2 ) AS pct FROM request_data From df2d968eac161aa8820a216f267613544e9a113c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Tue, 29 Jul 2025 16:18:34 +0100 Subject: [PATCH 11/16] fix linter issues --- .../ecommerce_bytes_per_type.sql | 343 +++++------------- .../global_emissions_per_page.sql | 136 +++---- .../sustainability/ssg_bytes_per_type.sql | 336 +++++------------ sql/2025/sustainability/stylesheet_count.sql | 110 +----- .../sustainability/unminified_css_bytes.sql | 9 +- 5 files changed, 249 insertions(+), 685 deletions(-) diff --git a/sql/2025/sustainability/ecommerce_bytes_per_type.sql b/sql/2025/sustainability/ecommerce_bytes_per_type.sql index 85bb8115307..27fff6087d1 100644 --- a/sql/2025/sustainability/ecommerce_bytes_per_type.sql +++ b/sql/2025/sustainability/ecommerce_bytes_per_type.sql @@ -19,235 +19,99 @@ WITH ecommerce_data AS ( CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, -- Operational emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * - grid_intensity AS op_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * - grid_intensity AS op_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * - grid_intensity AS op_emissions_devices, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, -- Embodied emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * - grid_intensity AS em_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * - grid_intensity AS em_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * - grid_intensity AS em_emissions_devices, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, -- Total emissions (operational + embodied) ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity ) AS total_operational_emissions, ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity ) AS total_embodied_emissions, ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity ) AS total_emissions, -- Proportions of each resource type relative to total bytes - CAST( - JSON_VALUE(summary, '$.bytesHtml') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS html_proportion, - CAST( - JSON_VALUE(summary, '$.bytesJS') AS INT64 - ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST( - JSON_VALUE(summary, '$.bytesCss') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS css_proportion, - CAST( - JSON_VALUE(summary, '$.bytesImg') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS img_proportion, - CAST( - JSON_VALUE(summary, '$.bytesFont') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS font_proportion, + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, -- Resource-specific emissions calculations - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, -- Resource-specific size in KB CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, @@ -255,6 +119,7 @@ WITH ecommerce_data AS ( CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + FROM `httparchive.crawl.pages`, UNNEST(technologies) AS tech @@ -265,9 +130,7 @@ WITH ecommerce_data AS ( SELECT 1 FROM UNNEST(tech.categories) AS category WHERE category = 'Ecommerce' AND - tech.technology NOT IN ( - 'Cart Functionality', 'Google Analytics Enhanced eCommerce' - ) + tech.technology NOT IN ('Cart Functionality', 'Google Analytics Enhanced eCommerce') ) ) @@ -277,38 +140,22 @@ SELECT COUNT(0) AS pages, -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES( - total_operational_emissions, 1000 - ) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES( - total_embodied_emissions, 1000 - ) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES( - total_emissions, 1000 - ) [OFFSET(500)] AS median_total_emissions, + APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES( - total_html_emissions, 1000 - ) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES( - total_js_emissions, 1000 - ) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES( - total_css_emissions, 1000 - ) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES( - total_img_emissions, 1000 - ) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES( - total_font_emissions, 1000 - ) [OFFSET(500)] AS median_total_font_emissions + APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions FROM ecommerce_data GROUP BY @@ -316,5 +163,5 @@ GROUP BY ecommerce ORDER BY pages DESC, - ecommerce ASC, - client ASC; + ecommerce, + client; diff --git a/sql/2025/sustainability/global_emissions_per_page.sql b/sql/2025/sustainability/global_emissions_per_page.sql index 3403fb0c233..f397e4c9bce 100644 --- a/sql/2025/sustainability/global_emissions_per_page.sql +++ b/sql/2025/sustainability/global_emissions_per_page.sql @@ -1,123 +1,79 @@ #standardSQL +# The distribution of page weight by resource type and client, with updated SWDM v4 methodology including both operational and embodied emissions -- Energy consumption factors from SWDM v4 (in kWh/GB) --- Operational + Embodied -DECLARE ENERGY_PER_GB_DATACENTER NUMERIC DEFAULT CAST(0.055 + 0.012 AS NUMERIC); --- Operational + Embodied -DECLARE ENERGY_PER_GB_NETWORK NUMERIC DEFAULT CAST(0.059 + 0.013 AS NUMERIC); --- Operational + Embodied -DECLARE ENERGY_PER_GB_DEVICE NUMERIC DEFAULT CAST(0.080 + 0.081 AS NUMERIC); +DECLARE energy_per_GB_datacenter NUMERIC DEFAULT CAST(0.055 + 0.012 AS NUMERIC); -- Operational + Embodied +DECLARE energy_per_GB_network NUMERIC DEFAULT CAST(0.059 + 0.013 AS NUMERIC); -- Operational + Embodied +DECLARE energy_per_GB_device NUMERIC DEFAULT CAST(0.080 + 0.081 AS NUMERIC); -- Operational + Embodied -- Total energy consumption per GB, calculated by summing the above factors --- Sum of all operational and embodied energies -DECLARE KW_PER_GB NUMERIC DEFAULT CAST( - ENERGY_PER_GB_DATACENTER + - ENERGY_PER_GB_NETWORK + - ENERGY_PER_GB_DEVICE AS NUMERIC -); +DECLARE kw_per_GB NUMERIC DEFAULT CAST(energy_per_GB_datacenter + energy_per_GB_network + energy_per_GB_device AS NUMERIC); -- Sum of all operational and embodied energies -- Global average carbon intensity of electricity generation (gCO2/kWh) -DECLARE GLOBAL_GRID_INTENSITY NUMERIC DEFAULT 494; +DECLARE global_grid_intensity NUMERIC DEFAULT 494; -- Function to calculate emissions in gCO2 -CREATE TEMP FUNCTION CALCULATE_EMISSIONS( +CREATE TEMP FUNCTION calculate_emissions( bytes FLOAT64, kw_per_GB FLOAT64, grid_intensity FLOAT64 ) RETURNS FLOAT64 AS ( - (BYTES / 1024 / 1024 / 1024) * -- Convert bytes to GB - (KW_PER_GB) * - GRID_INTENSITY + (bytes / 1024 / 1024 / 1024) * -- Convert bytes to GB + (kw_per_GB) * + grid_intensity ); -WITH PAGE_DATA AS ( +WITH page_data AS ( SELECT - CLIENT, - CAST(JSON_VALUE(SUMMARY, '$.bytesTotal') AS INT64) AS BYTESTOTAL, - CAST(JSON_VALUE(SUMMARY, '$.bytesHtml') AS INT64) AS BYTESHTML, - CAST(JSON_VALUE(SUMMARY, '$.bytesJS') AS INT64) AS BYTESJS, - CAST( - COALESCE( - JSON_VALUE(SUMMARY, '$.bytesCss'), - JSON_VALUE(SUMMARY, '$.bytesStyle') - ) AS INT64 - ) AS BYTESCSS, - CAST(JSON_VALUE(SUMMARY, '$.bytesImg') AS INT64) AS BYTESIMG, - CAST(JSON_VALUE(SUMMARY, '$.bytesOther') AS INT64) AS BYTESOTHER, - CAST(JSON_VALUE(SUMMARY, '$.bytesHtmlDoc') AS INT64) AS BYTESHTMLDOC, - CAST(JSON_VALUE(SUMMARY, '$.bytesFont') AS INT64) AS BYTESFONT + client, + CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS bytesTotal, + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) AS bytesHtml, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) AS bytesJS, + CAST(COALESCE(JSON_VALUE(summary, '$.bytesCss'), JSON_VALUE(summary, '$.bytesStyle')) AS INT64) AS bytesCSS, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) AS bytesImg, + CAST(JSON_VALUE(summary, '$.bytesOther') AS INT64) AS bytesOther, + CAST(JSON_VALUE(summary, '$.bytesHtmlDoc') AS INT64) AS bytesHtmlDoc, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) AS bytesFont FROM `httparchive.crawl.pages` WHERE - DATE = '2025-06-01' AND IS_ROOT_PAGE + date = '2025-06-01' AND is_root_page ) SELECT - PERCENTILE, - CLIENT, + percentile, + client, + -- For each resource type, calculate the size in KB and the associated emissions -- Total resources - APPROX_QUANTILES( - BYTESTOTAL / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS TOTAL_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESTOTAL, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS TOTAL_EMISSIONS, + APPROX_QUANTILES(bytesTotal / 1024, 1000)[OFFSET(percentile * 10)] AS total_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesTotal, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS total_emissions, -- HTML resources - APPROX_QUANTILES( - BYTESHTML / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESHTML, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_EMISSIONS, + APPROX_QUANTILES(bytesHtml / 1024, 1000)[OFFSET(percentile * 10)] AS html_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtml, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_emissions, -- JavaScript resources - APPROX_QUANTILES( - BYTESJS / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS JS_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESJS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS JS_EMISSIONS, + APPROX_QUANTILES(bytesJS / 1024, 1000)[OFFSET(percentile * 10)] AS js_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesJS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS js_emissions, -- CSS resources - APPROX_QUANTILES( - BYTESCSS / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS CSS_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESCSS, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS CSS_EMISSIONS, + APPROX_QUANTILES(bytesCSS / 1024, 1000)[OFFSET(percentile * 10)] AS css_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesCSS, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS css_emissions, -- Image resources - APPROX_QUANTILES( - BYTESIMG / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS IMG_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESIMG, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS IMG_EMISSIONS, + APPROX_QUANTILES(bytesImg / 1024, 1000)[OFFSET(percentile * 10)] AS img_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesImg, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS img_emissions, -- Other resources - APPROX_QUANTILES( - BYTESOTHER / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS OTHER_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESOTHER, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS OTHER_EMISSIONS, + APPROX_QUANTILES(bytesOther / 1024, 1000)[OFFSET(percentile * 10)] AS other_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesOther, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS other_emissions, -- HTML document - APPROX_QUANTILES( - BYTESHTMLDOC / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESHTMLDOC, KW_PER_GB, GLOBAL_GRID_INTENSITY), - 1000 - ) [OFFSET(PERCENTILE * 10)] AS HTML_DOC_EMISSIONS, + APPROX_QUANTILES(bytesHtmlDoc / 1024, 1000)[OFFSET(percentile * 10)] AS html_doc_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesHtmlDoc, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS html_doc_emissions, -- Font resources - APPROX_QUANTILES( - BYTESFONT / 1024, 1000 - ) [OFFSET(PERCENTILE * 10)] AS FONT_KBYTES, - APPROX_QUANTILES( - CALCULATE_EMISSIONS(BYTESFONT, KW_PER_GB, GLOBAL_GRID_INTENSITY), 1000 - ) [OFFSET(PERCENTILE * 10)] AS FONT_EMISSIONS + APPROX_QUANTILES(bytesFont / 1024, 1000)[OFFSET(percentile * 10)] AS font_kbytes, + APPROX_QUANTILES(calculate_emissions(bytesFont, kw_per_GB, global_grid_intensity), 1000)[OFFSET(percentile * 10)] AS font_emissions FROM - PAGE_DATA, - UNNEST([10, 25, 50, 75, 90, 100]) AS PERCENTILE + page_data, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY - PERCENTILE, - CLIENT + percentile, + client ORDER BY - CLIENT, - PERCENTILE + client, + percentile diff --git a/sql/2025/sustainability/ssg_bytes_per_type.sql b/sql/2025/sustainability/ssg_bytes_per_type.sql index 66e20262999..02eec5beeee 100644 --- a/sql/2025/sustainability/ssg_bytes_per_type.sql +++ b/sql/2025/sustainability/ssg_bytes_per_type.sql @@ -20,235 +20,99 @@ WITH ssg_data AS ( CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, -- Operational emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * - grid_intensity AS op_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * - grid_intensity AS op_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * - grid_intensity AS op_emissions_devices, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, -- Embodied emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * - grid_intensity AS em_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * - grid_intensity AS em_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * - grid_intensity AS em_emissions_devices, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, -- Total emissions (operational + embodied) ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity ) AS total_operational_emissions, ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity ) AS total_embodied_emissions, ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity ) AS total_emissions, -- Proportions of each resource type relative to total bytes - CAST( - JSON_VALUE(summary, '$.bytesHtml') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS html_proportion, - CAST( - JSON_VALUE(summary, '$.bytesJS') AS INT64 - ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST( - JSON_VALUE(summary, '$.bytesCss') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS css_proportion, - CAST( - JSON_VALUE(summary, '$.bytesImg') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS img_proportion, - CAST( - JSON_VALUE(summary, '$.bytesFont') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS font_proportion, + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, -- Resource-specific emissions calculations - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_html_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_js_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_css_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_img_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) - )) AS total_font_emissions, + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_html_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_js_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_css_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_img_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity + ) + )) AS total_font_emissions, -- Resource-specific size in KB CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, @@ -256,6 +120,7 @@ WITH ssg_data AS ( CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb, CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb, CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb + FROM `httparchive.crawl.pages`, UNNEST(technologies) AS tech @@ -276,38 +141,23 @@ SELECT COUNT(0) AS pages, -- Median resource weights and emissions - APPROX_QUANTILES(total_kb, 1000) [OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES( - total_operational_emissions, 1000 - ) [OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES( - total_embodied_emissions, 1000 - ) [OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES( - total_emissions, 1000 - ) [OFFSET(500)] AS median_total_emissions, + APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, + APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, -- Resource-specific medians - APPROX_QUANTILES(html_kb, 1000) [OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES( - total_html_emissions, 1000 - ) [OFFSET(500)] AS median_total_html_emissions, - APPROX_QUANTILES(js_kb, 1000) [OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES( - total_js_emissions, 1000 - ) [OFFSET(500)] AS median_total_js_emissions, - APPROX_QUANTILES(css_kb, 1000) [OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES( - total_css_emissions, 1000 - ) [OFFSET(500)] AS median_total_css_emissions, - APPROX_QUANTILES(img_kb, 1000) [OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES( - total_img_emissions, 1000 - ) [OFFSET(500)] AS median_total_img_emissions, - APPROX_QUANTILES(font_kb, 1000) [OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES( - total_font_emissions, 1000 - ) [OFFSET(500)] AS median_total_font_emissions + APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, + APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, + APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, + APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, + APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, + APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions + FROM ssg_data GROUP BY diff --git a/sql/2025/sustainability/stylesheet_count.sql b/sql/2025/sustainability/stylesheet_count.sql index 4d9ad7c9595..cadb44523e2 100644 --- a/sql/2025/sustainability/stylesheet_count.sql +++ b/sql/2025/sustainability/stylesheet_count.sql @@ -4,92 +4,25 @@ WITH stylesheet_data AS ( SELECT client, page, - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' - ), - '$.stylesheets' - ) AS INT64 - ) AS external_stylesheets, - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' - ), - '$.inlineStyles' - ) AS INT64 - ) AS inline_stylesheets, + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) AS external_stylesheets, + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) AS inline_stylesheets, SAFE_DIVIDE( - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.inlineStyles' - ) AS INT64 - ), - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.stylesheets' - ) AS INT64 - ) + - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.inlineStyles' - ) AS INT64 - ) + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64), + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) + + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) ) AS pct_inline_stylesheets, SAFE_DIVIDE( - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.stylesheets' - ) AS INT64 - ), - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.stylesheets' - ) AS INT64 - ) + - CAST( - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), - '$.document' - ), - '$.inlineStyles' - ) AS INT64 - ) + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64), + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) + + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) ) AS pct_external_stylesheets FROM `httparchive.crawl.pages` WHERE - date = '2025-06-01' + date = '2024-06-01' AND is_root_page = TRUE AND - JSON_EXTRACT_SCALAR( - JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document' - ), - '$.stylesheets' - ) IS NOT NULL + JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') IS NOT NULL ) SELECT @@ -97,25 +30,10 @@ SELECT COUNT(DISTINCT page) AS pages_analyzed, SUM(external_stylesheets) AS external_stylesheets, SUM(inline_stylesheets) AS inline_stylesheets, - SAFE_DIVIDE( - SUM(inline_stylesheets), SUM(inline_stylesheets + external_stylesheets) - ) AS pct_inline_stylesheets, - SAFE_DIVIDE( - SUM(external_stylesheets), - SUM(inline_stylesheets + external_stylesheets) - ) AS pct_external_stylesheets, - APPROX_QUANTILES( - SAFE_DIVIDE( - inline_stylesheets, inline_stylesheets + external_stylesheets - ), - 1000 - ) [OFFSET(500)] AS median_inline_stylesheets, - APPROX_QUANTILES( - SAFE_DIVIDE( - external_stylesheets, inline_stylesheets + external_stylesheets - ), - 1000 - ) [OFFSET(500)] AS median_external_stylesheets + SAFE_DIVIDE(SUM(inline_stylesheets), SUM(inline_stylesheets + external_stylesheets)) AS pct_inline_stylesheets, + SAFE_DIVIDE(SUM(external_stylesheets), SUM(inline_stylesheets + external_stylesheets)) AS pct_external_stylesheets, + APPROX_QUANTILES(SAFE_DIVIDE(inline_stylesheets, inline_stylesheets + external_stylesheets), 1000)[OFFSET(500)] AS median_inline_stylesheets, + APPROX_QUANTILES(SAFE_DIVIDE(external_stylesheets, inline_stylesheets + external_stylesheets), 1000)[OFFSET(500)] AS median_external_stylesheets FROM stylesheet_data GROUP BY diff --git a/sql/2025/sustainability/unminified_css_bytes.sql b/sql/2025/sustainability/unminified_css_bytes.sql index c08dd4f7a4e..334fdabd07f 100644 --- a/sql/2025/sustainability/unminified_css_bytes.sql +++ b/sql/2025/sustainability/unminified_css_bytes.sql @@ -4,14 +4,7 @@ SELECT client, percentile, - APPROX_QUANTILES( - CAST( - JSON_VALUE( - lighthouse, '$.audits.minify-css.details.overallSavingsBytes' - ) AS INT64 - ) / 1024, - 1000 - ) [OFFSET(percentile * 10)] AS css_kilobytes + APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse, '$.audits.unminified-css.details.overallSavingsBytes') AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS css_kilobytes FROM `httparchive.crawl.pages`, UNNEST([10, 25, 50, 75, 90, 100]) AS percentile From 713336cc736fe58e386ac80ea31c74570a9b2604 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Wed, 30 Jul 2025 10:38:27 +0100 Subject: [PATCH 12/16] Remove limit --- sql/2025/sustainability/favicons.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/2025/sustainability/favicons.sql b/sql/2025/sustainability/favicons.sql index 9311db0281a..13e965f642f 100644 --- a/sql/2025/sustainability/favicons.sql +++ b/sql/2025/sustainability/favicons.sql @@ -69,5 +69,3 @@ FROM favicons ORDER BY pct DESC -LIMIT - 1000; From 50e8b6700e63742442c987434bc558b8705261f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Wed, 30 Jul 2025 13:31:56 +0100 Subject: [PATCH 13/16] update styles and fix cms_bytes_per_type --- .../sustainability/cache_header_usage.sql | 89 ++--- .../sustainability/cms_bytes_per_type.sql | 323 +++++------------- 2 files changed, 107 insertions(+), 305 deletions(-) diff --git a/sql/2025/sustainability/cache_header_usage.sql b/sql/2025/sustainability/cache_header_usage.sql index 3d3b414711d..1c492dfd0ba 100644 --- a/sql/2025/sustainability/cache_header_usage.sql +++ b/sql/2025/sustainability/cache_header_usage.sql @@ -9,84 +9,43 @@ SELECT COUNTIF(uses_max_age) AS total_using_max_age, COUNTIF(uses_expires) AS total_using_expires, COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires, - COUNTIF( - uses_cache_control AND uses_expires - ) AS total_using_both_cc_and_expires, - COUNTIF( - NOT uses_cache_control AND NOT uses_expires - ) AS total_using_neither_cc_and_expires, - COUNTIF( - uses_cache_control AND NOT uses_expires - ) AS total_using_only_cache_control, - COUNTIF( - NOT uses_cache_control AND uses_expires - ) AS total_using_only_expires, + COUNTIF(uses_cache_control AND uses_expires) AS total_using_both_cc_and_expires, + COUNTIF(NOT uses_cache_control AND NOT uses_expires) AS total_using_neither_cc_and_expires, + COUNTIF(uses_cache_control AND NOT uses_expires) AS total_using_only_cache_control, + COUNTIF(NOT uses_cache_control AND uses_expires) AS total_using_only_expires, COUNTIF(uses_cache_control) / COUNT(0) AS pct_cache_control, COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age, COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires, - COUNTIF( - uses_max_age AND uses_expires - ) / COUNT(0) AS pct_using_max_age_and_expires, - COUNTIF( - uses_cache_control AND uses_expires - ) / COUNT(0) AS pct_using_both_cc_and_expires, - COUNTIF( - NOT uses_cache_control AND NOT uses_expires - ) / COUNT(0) AS pct_using_neither_cc_nor_expires, - COUNTIF( - uses_cache_control AND NOT uses_expires - ) / COUNT(0) AS pct_using_only_cache_control, - COUNTIF( - NOT uses_cache_control AND uses_expires - ) / COUNT(0) AS pct_using_only_expires + COUNTIF(uses_max_age AND uses_expires) / COUNT(0) AS pct_using_max_age_and_expires, + COUNTIF(uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_both_cc_and_expires, + COUNTIF(NOT uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_neither_cc_nor_expires, + COUNTIF(uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_only_cache_control, + COUNTIF(NOT uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_only_expires FROM ( SELECT client, + url, + LOGICAL_OR(header.name = 'expires' AND header.value IS NOT NULL AND TRIM(header.value) != '') AS uses_expires, + LOGICAL_OR(header.name = 'cache-control' AND header.value IS NOT NULL AND TRIM(header.value) != '') AS uses_cache_control, + LOGICAL_OR(header.name = 'cache-control' AND REGEXP_CONTAINS(header.value, r'(?i)max-age\s*=\s*[0-9]+')) AS uses_max_age, - JSON_EXTRACT_SCALAR( - summary, '$.resp_expires' - ) IS NOT NULL AND TRIM( - JSON_EXTRACT_SCALAR(summary, '$.resp_expires') - ) != '' AS uses_expires, - JSON_EXTRACT_SCALAR( - summary, '$.resp_cache_control' - ) IS NOT NULL AND TRIM( - JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control') - ) != '' AS uses_cache_control, - REGEXP_CONTAINS( - JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'), - r'(?i)max-age\s*=\s*[0-9]+' - ) AS uses_max_age, + LOGICAL_OR(header.name = 'etag' AND (header.value IS NULL OR TRIM(header.value) = '')) AS uses_no_etag, + LOGICAL_OR(header.name = 'etag' AND header.value IS NOT NULL AND TRIM(header.value) != '') AS uses_etag, + LOGICAL_OR(header.name = 'last-modified' AND header.value IS NOT NULL AND TRIM(header.value) != '') AS uses_last_modified, - JSON_EXTRACT_SCALAR( - summary, '$.resp_etag' - ) IS NULL OR TRIM( - JSON_EXTRACT_SCALAR(summary, '$.resp_etag') - ) = '' AS uses_no_etag, - JSON_EXTRACT_SCALAR( - summary, '$.resp_etag' - ) IS NOT NULL AND TRIM( - JSON_EXTRACT_SCALAR(summary, '$.resp_etag') - ) != '' AS uses_etag, - JSON_EXTRACT_SCALAR( - summary, '$.resp_last_modified' - ) IS NOT NULL AND TRIM( - JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified') - ) != '' AS uses_last_modified, - - REGEXP_CONTAINS( - TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"' - ) AS uses_weak_etag, - REGEXP_CONTAINS( - TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"' - ) AS uses_strong_etag + LOGICAL_OR(header.name = 'etag' AND REGEXP_CONTAINS(TRIM(header.value), '^W/".*"')) AS uses_weak_etag, + LOGICAL_OR(header.name = 'etag' AND REGEXP_CONTAINS(TRIM(header.value), '^".*"')) AS uses_strong_etag FROM - `httparchive.crawl.requests` + `httparchive.crawl.requests`, + UNNEST(response_headers) AS header WHERE - date = '2025-06-01' + date = '2025-07-01' + GROUP BY + client, + url ) GROUP BY diff --git a/sql/2025/sustainability/cms_bytes_per_type.sql b/sql/2025/sustainability/cms_bytes_per_type.sql index b217474b3d3..5dc9b5a650d 100644 --- a/sql/2025/sustainability/cms_bytes_per_type.sql +++ b/sql/2025/sustainability/cms_bytes_per_type.sql @@ -20,240 +20,99 @@ WITH cms_data AS ( CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb, -- Operational emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * - grid_intensity AS op_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * - grid_intensity AS op_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * - grid_intensity AS op_emissions_devices, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices, -- Embodied emissions calculations - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * - grid_intensity AS em_emissions_dc, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * - grid_intensity AS em_emissions_networks, - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * - grid_intensity AS em_emissions_devices, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks, + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices, -- Total emissions (operational + embodied) ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity ) AS total_operational_emissions, ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity ) AS total_embodied_emissions, ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * operational_emissions_user_devices * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_data_centers * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_network * grid_intensity + - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * embodied_emissions_user_devices * grid_intensity + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity + + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity ) AS total_emissions, -- Proportions of each resource type relative to total bytes - CAST( - JSON_VALUE(summary, '$.bytesHtml') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS html_proportion, - CAST( - JSON_VALUE(summary, '$.bytesJS') AS INT64 - ) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, - CAST( - JSON_VALUE(summary, '$.bytesCss') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS css_proportion, - CAST( - JSON_VALUE(summary, '$.bytesImg') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS img_proportion, - CAST( - JSON_VALUE(summary, '$.bytesFont') AS INT64 - ) / CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) AS font_proportion, + CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion, + CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion, + CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion, + CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion, + CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion, -- Resource-specific emissions calculations - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity ) - ) AS total_html_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) + )) AS total_html_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity ) - ) AS total_js_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) + )) AS total_js_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity ) - ) AS total_css_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) + )) AS total_css_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity ) - ) AS total_img_emissions, - - ( - SAFE_DIVIDE( - CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), - CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) - ) * ( - ( - CAST( - JSON_VALUE(summary, '$.bytesTotal') AS INT64 - ) / 1024 / 1024 / 1024 - ) * ( - operational_emissions_data_centers * grid_intensity + - operational_emissions_network * grid_intensity + - operational_emissions_user_devices * grid_intensity + - embodied_emissions_data_centers * grid_intensity + - embodied_emissions_network * grid_intensity + - embodied_emissions_user_devices * grid_intensity - ) + )) AS total_img_emissions, + + (SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * ( + (CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * ( + operational_emissions_data_centers * grid_intensity + + operational_emissions_network * grid_intensity + + operational_emissions_user_devices * grid_intensity + + embodied_emissions_data_centers * grid_intensity + + embodied_emissions_network * grid_intensity + + embodied_emissions_user_devices * grid_intensity ) - ) AS total_font_emissions, + )) AS total_font_emissions, -- Resource-specific size in KB CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb, @@ -265,7 +124,7 @@ WITH cms_data AS ( `httparchive.crawl.pages`, UNNEST(technologies) AS tech WHERE - date = '2025-06-01' AND + date = '2025-07-01' AND is_root_page = TRUE AND 'CMS' IN UNNEST(tech.categories) ) @@ -276,37 +135,21 @@ SELECT COUNT(0) AS pages, -- Median resource weights and emissions APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb, - APPROX_QUANTILES( - total_operational_emissions, 1000 - )[OFFSET(500)] AS median_operational_emissions, - APPROX_QUANTILES( - total_embodied_emissions, 1000 - )[OFFSET(500)] AS median_embodied_emissions, - APPROX_QUANTILES( - total_emissions, 1000 - )[OFFSET(500)] AS median_total_emissions, + APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions, + APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions, + APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions, -- Resource-specific medians APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb, - APPROX_QUANTILES( - total_html_emissions, 1000 - )[OFFSET(500)] AS median_total_html_emissions, + APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions, APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb, - APPROX_QUANTILES( - total_js_emissions, 1000 - )[OFFSET(500)] AS median_total_js_emissions, + APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions, APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb, - APPROX_QUANTILES( - total_css_emissions, 1000 - )[OFFSET(500)] AS median_total_css_emissions, + APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions, APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb, - APPROX_QUANTILES( - total_img_emissions, 1000 - )[OFFSET(500)] AS median_total_img_emissions, + APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions, APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb, - APPROX_QUANTILES( - total_font_emissions, 1000 - )[OFFSET(500)] AS median_total_font_emissions + APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions FROM cms_data GROUP BY @@ -314,5 +157,5 @@ GROUP BY cms ORDER BY pages DESC, - cms ASC, - client ASC; + cms, + client; From 2a7bdaeed335ccbeaa2de27ff0c91b3d88b98360 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Fri, 5 Sep 2025 11:25:53 +0200 Subject: [PATCH 14/16] Fix sql queries --- sql/2025/sustainability/cdn_adoption.sql | 2 +- sql/2025/sustainability/content-visibility.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/2025/sustainability/cdn_adoption.sql b/sql/2025/sustainability/cdn_adoption.sql index 2ae4f1cfe2b..2e3676586e5 100644 --- a/sql/2025/sustainability/cdn_adoption.sql +++ b/sql/2025/sustainability/cdn_adoption.sql @@ -6,7 +6,7 @@ SELECT total, IF(cdn = '', 'No CDN', cdn) AS cdn, COUNT(0) AS freq, - COUNT(0) / total AS pct + ROUND(100 * COUNT(0) / total, 2) AS pct FROM ( SELECT client, diff --git a/sql/2025/sustainability/content-visibility.sql b/sql/2025/sustainability/content-visibility.sql index 1e84d3b5ab8..7b724c99389 100644 --- a/sql/2025/sustainability/content-visibility.sql +++ b/sql/2025/sustainability/content-visibility.sql @@ -45,7 +45,7 @@ content_visibility_pages AS ( COUNT(DISTINCT root_page) AS pages_with_content_visibility FROM `httparchive.crawl.parsed_css`, - UNNEST(HASCONTENTVISIBILITY(css)) + UNNEST(HASCONTENTVISIBILITY(TO_JSON_STRING(css))) WHERE date = '2025-06-01' AND is_root_page From ab8cddf6f306b5038898ccfee342e64bf39eb724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Thu, 11 Sep 2025 13:09:25 +0200 Subject: [PATCH 15/16] Fix not working SQL queries --- sql/2025/sustainability/favicons.sql | 148 +++++++++++++++--- .../green_third_party_requests.sql | 45 ++++-- sql/2025/sustainability/green_web_hosting.sql | 17 +- sql/2025/sustainability/responsive_images.sql | 59 ++++--- sql/2025/sustainability/script_count.sql | 22 +-- sql/2025/sustainability/stylesheet_count.sql | 24 +-- sql/2025/sustainability/text_compression.sql | 59 ++----- .../sustainability/unminified_js_bytes.sql | 2 +- .../use_of_prefers_dark_mode_usage.sql | 2 +- 9 files changed, 242 insertions(+), 136 deletions(-) diff --git a/sql/2025/sustainability/favicons.sql b/sql/2025/sustainability/favicons.sql index 13e965f642f..6fe6cdfcffd 100644 --- a/sql/2025/sustainability/favicons.sql +++ b/sql/2025/sustainability/favicons.sql @@ -4,16 +4,59 @@ CREATE TEMPORARY FUNCTION GETFAVICONIMAGE(payload STRING) RETURNS STRING LANGUAGE js AS ''' var result = 'NO_DATA'; try { - var almanac = JSON.parse(payload); + var parsed = JSON.parse(payload); + + // If wrapped, unwrap _almanac + if (parsed && typeof parsed === 'object' && parsed._almanac && typeof parsed._almanac === 'object') { + parsed = parsed._almanac; + } - if (Array.isArray(almanac) || typeof almanac != 'object') return result; + // Deep search for any array of link-like nodes anywhere in the object + function findLinkNodes(obj) { + if (!obj) return []; + var stack = [obj]; + while (stack.length) { + var current = stack.pop(); + if (!current) continue; + if (Array.isArray(current)) { + // If array of objects with rel/href, treat as nodes + if ( + current.length && typeof current[0] === 'object' && current.some(function(it){return it && (it.href || it.rel);}) + ) { + return current; + } + for (var i = 0; i < current.length; i++) stack.push(current[i]); + } else if (typeof current === 'object') { + // Common patterns: {nodes: [...]} wrappers + if (current.nodes && Array.isArray(current.nodes)) { + var n = current.nodes; + if (n.length && typeof n[0] === 'object' && n.some(function(it){return it && (it.href || it.rel);})){return n;} + } + for (var k in current) if (Object.prototype.hasOwnProperty.call(current, k)) stack.push(current[k]); + } + } + return []; + } - if (almanac["link-nodes"] && almanac["link-nodes"].nodes && almanac["link-nodes"].nodes.find) { - var faviconNode = almanac["link-nodes"].nodes.find(n => n.rel && n.rel.split(' ').find(r => r.trim().toLowerCase() == 'icon')); + var nodes = findLinkNodes(parsed); + if (!nodes || !nodes.length) return result; + + if (nodes && nodes.find) { + var faviconNode = nodes.find(function(n) { + if (!n || !('rel' in n)) return false; + var rels = Array.isArray(n.rel) ? n.rel : String(n.rel).split(' '); + for (var j = 0; j < rels.length; j++) { + var r = String(rels[j]).trim().toLowerCase(); + if (r === 'icon' || r === 'shortcut icon' || r === 'apple-touch-icon' || r === 'apple-touch-icon-precomposed') { + return true; + } + } + return false; + }); if (faviconNode) { if (faviconNode.href) { - var temp = faviconNode.href; + var temp = String(faviconNode.href); if (temp.includes('?')) { temp = temp.substring(0, temp.indexOf('?')); @@ -43,29 +86,98 @@ try { return result; '''; -# Main query to analyze favicon image extensions with sampling -WITH favicons AS ( +# Main query to analyze favicon image extensions using requests heuristics +WITH pages AS ( SELECT client, - GETFAVICONIMAGE( - JSON_EXTRACT_SCALAR(payload, '$._almanac') - ) AS image_type_extension, - COUNT(0) AS freq, - SUM(COUNT(0)) OVER (PARTITION BY client) AS total, - COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS percentage_of_total + page FROM `httparchive.crawl.pages` WHERE - date = '2025-06-01' + date = '2025-06-01' AND + is_root_page +), + +reqs AS ( + SELECT + client, + page, + url, + response_headers + FROM + `httparchive.crawl.requests` + WHERE + date = '2025-06-01' AND + is_root_page +), + +candidates AS ( + SELECT + r.client, + r.page, + r.url, + LOWER( + REGEXP_EXTRACT( + REGEXP_REPLACE(SPLIT(r.url, '?')[SAFE_OFFSET(0)], r'/+$', ''), + r'\.([A-Za-z0-9]+)$' + ) + ) AS url_ext, + ( + SELECT LOWER(value) + FROM UNNEST(r.response_headers) + WHERE LOWER(name) = 'content-type' + LIMIT 1 + ) AS content_type + FROM + reqs r + JOIN + pages p + USING (client, page) + WHERE + REGEXP_CONTAINS(LOWER(r.url), r'favicon|apple-touch-icon|android-chrome|mstile|safari-pinned-tab') +), + +resolved AS ( + SELECT + client, + page, + url, + COALESCE( + NULLIF(url_ext, ''), + CASE + WHEN content_type LIKE 'image/svg%' THEN 'svg' + WHEN content_type LIKE 'image/png%' THEN 'png' + WHEN content_type LIKE 'image/webp%' THEN 'webp' + WHEN content_type LIKE 'image/jpeg%' OR content_type LIKE 'image/jpg%' THEN 'jpg' + WHEN content_type LIKE 'image/x-icon%' OR content_type LIKE 'image/vnd.microsoft.icon%' THEN 'ico' + ELSE 'unknown' + END + ) AS image_type_extension + FROM + candidates +), + +rollup_data AS ( + SELECT + client, + image_type_extension, + COUNT(DISTINCT page) AS pages + FROM + resolved GROUP BY client, image_type_extension ) SELECT - *, - percentage_of_total AS pct + client, + image_type_extension, + pages AS count, + SUM(pages) OVER (PARTITION BY client) AS total, + ROUND(100 * SAFE_DIVIDE(pages, SUM(pages) OVER (PARTITION BY client)), 2) AS pct FROM - favicons + rollup_data ORDER BY - pct DESC + client ASC, + count DESC, + image_type_extension ASC diff --git a/sql/2025/sustainability/green_third_party_requests.sql b/sql/2025/sustainability/green_third_party_requests.sql index ebb236e57e6..340d8065728 100644 --- a/sql/2025/sustainability/green_third_party_requests.sql +++ b/sql/2025/sustainability/green_third_party_requests.sql @@ -1,11 +1,23 @@ #standardSQL # Median third-parties & green third-party requests per websites by rank -WITH requests AS ( +WITH third_party_date AS ( + SELECT MAX(date) AS date + FROM `httparchive.almanac.third_parties` + WHERE date <= '2025-06-01' +), + +gwf_date AS ( + SELECT MAX(date) AS date + FROM `httparchive.almanac.green_web_foundation` + WHERE date <= '2025-06-01' +), + +requests AS ( SELECT client, url, - CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page + page FROM `httparchive.crawl.requests` WHERE @@ -17,20 +29,21 @@ green AS ( TRUE AS is_green, NET.HOST(url) AS host FROM - `httparchive.almanac.green_web_foundation` - WHERE - date = '2025-09-01' + `httparchive.almanac.green_web_foundation` g + JOIN gwf_date d + ON g.date = d.date ), pages AS ( SELECT client, rank, - CAST(JSON_VALUE(summary, '$.pageid') AS INT64) AS page + page FROM `httparchive.crawl.pages` WHERE - date = '2025-06-01' + date = '2025-06-01' AND + is_root_page ), third_party AS ( @@ -41,9 +54,9 @@ third_party AS ( `httparchive.almanac.third_parties` AS tp INNER JOIN requests AS r - ON NET.HOST(r.url) = NET.HOST(tp.domain) + ON NET.REG_DOMAIN(r.url) = NET.REG_DOMAIN(tp.domain) WHERE - tp.date = '2025-06-01' AND + tp.date = (SELECT date FROM third_party_date) AND tp.category NOT IN ('hosting') GROUP BY tp.domain @@ -57,9 +70,9 @@ green_tp AS ( `httparchive.almanac.third_parties` AS tp INNER JOIN green AS g - ON NET.HOST(g.host) = NET.HOST(tp.domain) + ON NET.REG_DOMAIN(g.host) = NET.REG_DOMAIN(tp.domain) WHERE - tp.date = '2025-06-01' AND + tp.date = (SELECT date FROM third_party_date) AND tp.category NOT IN ('hosting') GROUP BY tp.domain @@ -70,13 +83,13 @@ base AS ( r.client, r.page, p.rank, - COUNT(tp.domain) AS third_parties_per_page + COUNT(DISTINCT tp.domain) AS third_parties_per_page FROM requests AS r LEFT JOIN third_party AS tp ON - NET.HOST(r.url) = NET.HOST(tp.domain) + NET.REG_DOMAIN(r.url) = NET.REG_DOMAIN(tp.domain) INNER JOIN pages AS p ON r.client = p.client AND r.page = p.page @@ -91,13 +104,13 @@ base_green AS ( r.client, r.page, p.rank, - COUNT(gtp.domain) AS green_third_parties_per_page + COUNT(DISTINCT gtp.domain) AS green_third_parties_per_page FROM requests AS r LEFT JOIN green_tp AS gtp ON - NET.HOST(r.url) = NET.HOST(gtp.domain) + NET.REG_DOMAIN(r.url) = NET.REG_DOMAIN(gtp.domain) INNER JOIN pages AS p ON r.client = p.client AND r.page = p.page @@ -126,7 +139,7 @@ SELECT bg.green_third_parties_per_page, b.third_parties_per_page ), 1000 - )[OFFSET(500)] AS pct_green + )[OFFSET(500)] * 100 AS pct_green FROM base AS b, UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping diff --git a/sql/2025/sustainability/green_web_hosting.sql b/sql/2025/sustainability/green_web_hosting.sql index 499b45e0858..d2611cefcbc 100644 --- a/sql/2025/sustainability/green_web_hosting.sql +++ b/sql/2025/sustainability/green_web_hosting.sql @@ -1,14 +1,21 @@ # standardSQL # What percentage of URLs are hosted on a known green web hosting provider? -WITH green AS ( +WITH gwf_date AS ( + SELECT MAX(date) AS date + FROM `httparchive.almanac.green_web_foundation` + WHERE date <= '2025-06-01' +), + +green AS ( SELECT TRUE AS is_green, NET.HOST(url) AS host FROM - `httparchive.almanac.green_web_foundation` - WHERE - date = '2025-09-01' + `httparchive.almanac.green_web_foundation` g + JOIN + gwf_date d + ON g.date = d.date ), pages AS ( @@ -34,7 +41,7 @@ SELECT END AS ranking, COUNTIF(is_green) AS total_green, COUNT(0) AS total_sites, - SAFE_DIVIDE(COUNTIF(is_green), COUNT(0)) AS pct_green + ROUND(100 * SAFE_DIVIDE(COUNTIF(is_green), COUNT(0)), 2) AS pct_green FROM ( -- Left join green hosting information SELECT diff --git a/sql/2025/sustainability/responsive_images.sql b/sql/2025/sustainability/responsive_images.sql index a513ef6c6de..19f21a1224b 100644 --- a/sql/2025/sustainability/responsive_images.sql +++ b/sql/2025/sustainability/responsive_images.sql @@ -1,30 +1,17 @@ #standardSQL # percent of sites using images with srcset w/wo sizes, or picture element -CREATE TEMPORARY FUNCTION get_media_info(media_string STRING) -RETURNS STRUCT< - num_srcset_all INT64, - num_srcset_sizes INT64, - num_picture_img INT64 -> LANGUAGE js AS ''' -var result = { - num_srcset_all: 0, - num_srcset_sizes: 0, - num_picture_img: 0 -}; -try { - var media = JSON.parse(media_string); - if (Array.isArray(media) || typeof media != 'object') return result; - result.num_srcset_all = media.num_srcset_all || 0; - result.num_srcset_sizes = media.num_srcset_sizes || 0; - result.num_picture_img = media.num_picture_img || 0; -} catch (e) {} -return result; -'''; WITH page_data AS ( SELECT client, - get_media_info(json_extract_scalar(payload, '$._media')) AS media_info + -- Totals from markup custom metric + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.markup), '$.images.img.srcset_total') AS INT64) AS img_srcset_total, + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.markup), '$.images.source.srcset_total') AS INT64) AS source_srcset_total, + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.markup), '$.images.picture.total') AS INT64) AS picture_total, + + -- Sizes totals (may be missing; will be NULL) + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.markup), '$.images.img.sizes_total') AS INT64) AS img_sizes_total, + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.markup), '$.images.source.sizes_total') AS INT64) AS source_sizes_total FROM `httparchive.crawl.pages` WHERE @@ -34,17 +21,24 @@ WITH page_data AS ( SELECT client, round( - safe_divide(countif(media_info.num_srcset_all > 0), count(0)) * 100, 2 + safe_divide( + countif(coalesce(img_srcset_total, 0) + coalesce(source_srcset_total, 0) > 0), + count(0) + ) * 100, + 2 ) AS pages_with_srcset_pct, round( - safe_divide(countif(media_info.num_srcset_sizes > 0), count(0)) * 100, 2 + safe_divide( + countif(coalesce(img_sizes_total, 0) + coalesce(source_sizes_total, 0) > 0), + count(0) + ) * 100, + 2 ) AS pages_with_srcset_sizes_pct, round( safe_divide( ( - countif( - media_info.num_srcset_all > 0 - ) - countif(media_info.num_srcset_sizes > 0) + countif(coalesce(img_srcset_total, 0) + coalesce(source_srcset_total, 0) > 0) - + countif(coalesce(img_sizes_total, 0) + coalesce(source_sizes_total, 0) > 0) ), count(0) ) * 100, @@ -52,19 +46,24 @@ SELECT ) AS pages_with_srcset_wo_sizes_pct, round( safe_divide( - sum(media_info.num_srcset_sizes), sum(media_info.num_srcset_all) + sum(coalesce(img_sizes_total, 0) + coalesce(source_sizes_total, 0)), + sum(coalesce(img_srcset_total, 0) + coalesce(source_srcset_total, 0)) ) * 100, 2 ) AS instances_of_srcset_sizes_pct, round( safe_divide( - (sum(media_info.num_srcset_all) - sum(media_info.num_srcset_sizes)), - sum(media_info.num_srcset_all) + ( + sum(coalesce(img_srcset_total, 0) + coalesce(source_srcset_total, 0)) - + sum(coalesce(img_sizes_total, 0) + coalesce(source_sizes_total, 0)) + ), + sum(coalesce(img_srcset_total, 0) + coalesce(source_srcset_total, 0)) ) * 100, 2 ) AS instances_of_srcset_wo_sizes_pct, round( - safe_divide(countif(media_info.num_picture_img > 0), count(0)) * 100, 2 + safe_divide(countif(coalesce(picture_total, 0) > 0), count(0)) * 100, + 2 ) AS pages_with_picture_pct FROM page_data GROUP BY diff --git a/sql/2025/sustainability/script_count.sql b/sql/2025/sustainability/script_count.sql index 2ff06b3afe1..7a7e83b3e56 100644 --- a/sql/2025/sustainability/script_count.sql +++ b/sql/2025/sustainability/script_count.sql @@ -7,7 +7,7 @@ WITH script_data AS ( CAST( JSON_EXTRACT_SCALAR( JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), + TO_JSON_STRING(custom_metrics.javascript), '$.script_tags' ), '$.total' @@ -16,7 +16,7 @@ WITH script_data AS ( CAST( JSON_EXTRACT_SCALAR( JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), + TO_JSON_STRING(custom_metrics.javascript), '$.script_tags' ), '$.inline' @@ -25,7 +25,7 @@ WITH script_data AS ( CAST( JSON_EXTRACT_SCALAR( JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), + TO_JSON_STRING(custom_metrics.javascript), '$.script_tags' ), '$.src' @@ -35,7 +35,7 @@ WITH script_data AS ( CAST( JSON_EXTRACT_SCALAR( JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), + TO_JSON_STRING(custom_metrics.javascript), '$.script_tags' ), '$.inline' @@ -44,7 +44,7 @@ WITH script_data AS ( CAST( JSON_EXTRACT_SCALAR( JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), + TO_JSON_STRING(custom_metrics.javascript), '$.script_tags' ), '$.total' @@ -55,7 +55,7 @@ WITH script_data AS ( CAST( JSON_EXTRACT_SCALAR( JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), + TO_JSON_STRING(custom_metrics.javascript), '$.script_tags' ), '$.src' @@ -64,7 +64,7 @@ WITH script_data AS ( CAST( JSON_EXTRACT_SCALAR( JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), + TO_JSON_STRING(custom_metrics.javascript), '$.script_tags' ), '$.total' @@ -77,7 +77,7 @@ WITH script_data AS ( date = '2025-06-01' AND JSON_EXTRACT_SCALAR( JSON_EXTRACT( - JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.script_tags' + TO_JSON_STRING(custom_metrics.javascript), '$.script_tags' ), '$.total' ) IS NOT NULL @@ -89,10 +89,10 @@ SELECT SUM(total_scripts) AS total_scripts, SUM(inline_scripts) AS inline_scripts, SUM(external_scripts) AS external_scripts, - SAFE_DIVIDE( + ROUND(100 * SAFE_DIVIDE( SUM(external_scripts), SUM(total_scripts) - ) AS pct_external_script, - SAFE_DIVIDE(SUM(inline_scripts), SUM(total_scripts)) AS pct_inline_script, + ), 2) AS pct_external_script, + ROUND(100 * SAFE_DIVIDE(SUM(inline_scripts), SUM(total_scripts)), 2) AS pct_inline_script, APPROX_QUANTILES( SAFE_DIVIDE(external_scripts, total_scripts), 1000 )[OFFSET(500)] AS median_external, diff --git a/sql/2025/sustainability/stylesheet_count.sql b/sql/2025/sustainability/stylesheet_count.sql index cadb44523e2..34ceba4dce8 100644 --- a/sql/2025/sustainability/stylesheet_count.sql +++ b/sql/2025/sustainability/stylesheet_count.sql @@ -4,25 +4,25 @@ WITH stylesheet_data AS ( SELECT client, page, - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) AS external_stylesheets, - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) AS inline_stylesheets, + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.javascript), '$.document.stylesheets') AS INT64) AS external_stylesheets, + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.javascript), '$.document.inlineStyles') AS INT64) AS inline_stylesheets, SAFE_DIVIDE( - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64), - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) + - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.javascript), '$.document.inlineStyles') AS INT64), + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.javascript), '$.document.stylesheets') AS INT64) + + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.javascript), '$.document.inlineStyles') AS INT64) ) AS pct_inline_stylesheets, SAFE_DIVIDE( - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64), - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') AS INT64) + - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.inlineStyles') AS INT64) + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.javascript), '$.document.stylesheets') AS INT64), + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.javascript), '$.document.stylesheets') AS INT64) + + CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.javascript), '$.document.inlineStyles') AS INT64) ) AS pct_external_stylesheets FROM `httparchive.crawl.pages` WHERE - date = '2024-06-01' + date = '2025-06-01' AND is_root_page = TRUE AND - JSON_EXTRACT_SCALAR(JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._javascript'), '$.document'), '$.stylesheets') IS NOT NULL + JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.javascript), '$.document.stylesheets') IS NOT NULL ) SELECT @@ -30,8 +30,8 @@ SELECT COUNT(DISTINCT page) AS pages_analyzed, SUM(external_stylesheets) AS external_stylesheets, SUM(inline_stylesheets) AS inline_stylesheets, - SAFE_DIVIDE(SUM(inline_stylesheets), SUM(inline_stylesheets + external_stylesheets)) AS pct_inline_stylesheets, - SAFE_DIVIDE(SUM(external_stylesheets), SUM(inline_stylesheets + external_stylesheets)) AS pct_external_stylesheets, + ROUND(100 * SAFE_DIVIDE(SUM(inline_stylesheets), SUM(inline_stylesheets + external_stylesheets)), 2) AS pct_inline_stylesheets, + ROUND(100 * SAFE_DIVIDE(SUM(external_stylesheets), SUM(inline_stylesheets + external_stylesheets)), 2) AS pct_external_stylesheets, APPROX_QUANTILES(SAFE_DIVIDE(inline_stylesheets, inline_stylesheets + external_stylesheets), 1000)[OFFSET(500)] AS median_inline_stylesheets, APPROX_QUANTILES(SAFE_DIVIDE(external_stylesheets, inline_stylesheets + external_stylesheets), 1000)[OFFSET(500)] AS median_external_stylesheets FROM diff --git a/sql/2025/sustainability/text_compression.sql b/sql/2025/sustainability/text_compression.sql index 85ef35b8b9e..ddf14470e64 100644 --- a/sql/2025/sustainability/text_compression.sql +++ b/sql/2025/sustainability/text_compression.sql @@ -1,57 +1,32 @@ -CREATE TEMP FUNCTION GETCONTENTENCODING(headers STRING) -RETURNS STRING -LANGUAGE js AS """ - try { - const parsedHeaders = JSON.parse(headers); - for (let i = 0; i < parsedHeaders.length; i++) { - if (parsedHeaders[i].name.toLowerCase() === 'content-encoding') { - return parsedHeaders[i].value.toLowerCase(); - } - } - } catch (e) {} - return null; -"""; - -WITH request_data AS ( +WITH content_encoding AS ( SELECT client, - GETCONTENTENCODING( - JSON_EXTRACT(payload, '$.response.headers') - ) AS resp_content_encoding - FROM - `httparchive.crawl.requests` + LOWER(h.value) AS encoding + FROM `httparchive.crawl.requests`, + UNNEST(response_headers) AS h WHERE date = '2025-06-01' + AND is_root_page + AND is_main_document + AND LOWER(h.name) = 'content-encoding' ), -compression_data AS ( +compression_rollup AS ( SELECT client, CASE - WHEN resp_content_encoding = 'gzip' THEN 'Gzip' - WHEN resp_content_encoding = 'br' THEN 'Brotli' - WHEN resp_content_encoding IS NULL THEN 'no text compression' + WHEN encoding = 'gzip' THEN 'Gzip' + WHEN encoding = 'br' THEN 'Brotli' + WHEN encoding IS NULL OR encoding = '' THEN 'no text compression' ELSE 'other' END AS compression_type, COUNT(0) AS num_requests, SUM(COUNT(0)) OVER (PARTITION BY client) AS total, - ROUND( - COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) * 100, 2 - ) AS pct - FROM - request_data - GROUP BY - client, - compression_type + ROUND(COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) * 100, 2) AS pct + FROM content_encoding + GROUP BY client, compression_type ) -SELECT - client, - compression_type, - num_requests, - total, - pct -FROM compression_data -ORDER BY - client ASC, - num_requests DESC +SELECT client, compression_type, num_requests, total, pct +FROM compression_rollup +ORDER BY client ASC, num_requests DESC diff --git a/sql/2025/sustainability/unminified_js_bytes.sql b/sql/2025/sustainability/unminified_js_bytes.sql index 90c3269369a..01c3b2b9c96 100644 --- a/sql/2025/sustainability/unminified_js_bytes.sql +++ b/sql/2025/sustainability/unminified_js_bytes.sql @@ -8,7 +8,7 @@ SELECT CAST( JSON_VALUE( lighthouse, - '$.audits.minify-javascript.details.overallSavingsBytes' + '$.audits.unminified-javascript.details.overallSavingsBytes' ) AS INT64 ) / 1024, 1000 diff --git a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql index 775e799e0c3..e3fd0a65e9d 100644 --- a/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql +++ b/sql/2025/sustainability/use_of_prefers_dark_mode_usage.sql @@ -12,7 +12,7 @@ WITH combined_data AS ( SELECT 1 FROM UNNEST(JSON_EXTRACT_ARRAY(css, '$.stylesheet.rules')) AS rule WHERE JSON_EXTRACT_SCALAR(rule, '$.type') = 'media' AND - JSON_EXTRACT_SCALAR(rule, '$.media') = '(prefers-color-scheme:dark)' + LOWER(JSON_EXTRACT_SCALAR(rule, '$.media')) LIKE '%prefers-color-scheme:dark%' ) THEN 1 ELSE 0 From 1d3f24722e4469e7ee8459dbe3f108a68ec71e7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20G=C3=BCneli?= Date: Mon, 13 Oct 2025 14:12:13 +0200 Subject: [PATCH 16/16] fix some queries --- sql/2025/sustainability/favicons.sql | 151 +++--------------- .../green_third_party_requests.sql | 14 +- sql/2025/sustainability/responsive_images.sql | 36 ++--- sql/2025/sustainability/text_compression.sql | 6 +- 4 files changed, 46 insertions(+), 161 deletions(-) diff --git a/sql/2025/sustainability/favicons.sql b/sql/2025/sustainability/favicons.sql index 6fe6cdfcffd..1263fcba109 100644 --- a/sql/2025/sustainability/favicons.sql +++ b/sql/2025/sustainability/favicons.sql @@ -1,62 +1,17 @@ #standardSQL # Temporary function to extract favicon image extensions from the JSON payload -CREATE TEMPORARY FUNCTION GETFAVICONIMAGE(payload STRING) +CREATE TEMPORARY FUNCTION getFaviconImage(almanac JSON) RETURNS STRING LANGUAGE js AS ''' var result = 'NO_DATA'; try { - var parsed = JSON.parse(payload); - - // If wrapped, unwrap _almanac - if (parsed && typeof parsed === 'object' && parsed._almanac && typeof parsed._almanac === 'object') { - parsed = parsed._almanac; - } - - // Deep search for any array of link-like nodes anywhere in the object - function findLinkNodes(obj) { - if (!obj) return []; - var stack = [obj]; - while (stack.length) { - var current = stack.pop(); - if (!current) continue; - if (Array.isArray(current)) { - // If array of objects with rel/href, treat as nodes - if ( - current.length && typeof current[0] === 'object' && current.some(function(it){return it && (it.href || it.rel);}) - ) { - return current; - } - for (var i = 0; i < current.length; i++) stack.push(current[i]); - } else if (typeof current === 'object') { - // Common patterns: {nodes: [...]} wrappers - if (current.nodes && Array.isArray(current.nodes)) { - var n = current.nodes; - if (n.length && typeof n[0] === 'object' && n.some(function(it){return it && (it.href || it.rel);})){return n;} - } - for (var k in current) if (Object.prototype.hasOwnProperty.call(current, k)) stack.push(current[k]); - } - } - return []; - } + if (Array.isArray(almanac) || typeof almanac != 'object') return result; - var nodes = findLinkNodes(parsed); - if (!nodes || !nodes.length) return result; - - if (nodes && nodes.find) { - var faviconNode = nodes.find(function(n) { - if (!n || !('rel' in n)) return false; - var rels = Array.isArray(n.rel) ? n.rel : String(n.rel).split(' '); - for (var j = 0; j < rels.length; j++) { - var r = String(rels[j]).trim().toLowerCase(); - if (r === 'icon' || r === 'shortcut icon' || r === 'apple-touch-icon' || r === 'apple-touch-icon-precomposed') { - return true; - } - } - return false; - }); + if (almanac["link-nodes"] && almanac["link-nodes"].nodes && almanac["link-nodes"].nodes.find) { + var faviconNode = almanac["link-nodes"].nodes.find(n => n.rel && n.rel.split(' ').find(r => r.trim().toLowerCase() == 'icon')); if (faviconNode) { if (faviconNode.href) { - var temp = String(faviconNode.href); + var temp = faviconNode.href; if (temp.includes('?')) { temp = temp.substring(0, temp.indexOf('?')); @@ -86,98 +41,30 @@ try { return result; '''; -# Main query to analyze favicon image extensions using requests heuristics -WITH pages AS ( +# Main query to analyze favicon image extensions with sampling +WITH favicons AS ( SELECT client, - page + getFaviconImage(custom_metrics.other.almanac) AS image_type_extension, + COUNT(0) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS percentage_of_total FROM `httparchive.crawl.pages` WHERE - date = '2025-06-01' AND - is_root_page -), - -reqs AS ( - SELECT - client, - page, - url, - response_headers - FROM - `httparchive.crawl.requests` - WHERE - date = '2025-06-01' AND - is_root_page -), - -candidates AS ( - SELECT - r.client, - r.page, - r.url, - LOWER( - REGEXP_EXTRACT( - REGEXP_REPLACE(SPLIT(r.url, '?')[SAFE_OFFSET(0)], r'/+$', ''), - r'\.([A-Za-z0-9]+)$' - ) - ) AS url_ext, - ( - SELECT LOWER(value) - FROM UNNEST(r.response_headers) - WHERE LOWER(name) = 'content-type' - LIMIT 1 - ) AS content_type - FROM - reqs r - JOIN - pages p - USING (client, page) - WHERE - REGEXP_CONTAINS(LOWER(r.url), r'favicon|apple-touch-icon|android-chrome|mstile|safari-pinned-tab') -), - -resolved AS ( - SELECT - client, - page, - url, - COALESCE( - NULLIF(url_ext, ''), - CASE - WHEN content_type LIKE 'image/svg%' THEN 'svg' - WHEN content_type LIKE 'image/png%' THEN 'png' - WHEN content_type LIKE 'image/webp%' THEN 'webp' - WHEN content_type LIKE 'image/jpeg%' OR content_type LIKE 'image/jpg%' THEN 'jpg' - WHEN content_type LIKE 'image/x-icon%' OR content_type LIKE 'image/vnd.microsoft.icon%' THEN 'ico' - ELSE 'unknown' - END - ) AS image_type_extension - FROM - candidates -), - -rollup_data AS ( - SELECT - client, - image_type_extension, - COUNT(DISTINCT page) AS pages - FROM - resolved + date = '2025-07-01' GROUP BY client, image_type_extension ) SELECT - client, - image_type_extension, - pages AS count, - SUM(pages) OVER (PARTITION BY client) AS total, - ROUND(100 * SAFE_DIVIDE(pages, SUM(pages) OVER (PARTITION BY client)), 2) AS pct + *, + percentage_of_total AS pct FROM - rollup_data + favicons ORDER BY - client ASC, - count DESC, - image_type_extension ASC + pct DESC +LIMIT + 1000; + \ No newline at end of file diff --git a/sql/2025/sustainability/green_third_party_requests.sql b/sql/2025/sustainability/green_third_party_requests.sql index 340d8065728..c5f6c4c3104 100644 --- a/sql/2025/sustainability/green_third_party_requests.sql +++ b/sql/2025/sustainability/green_third_party_requests.sql @@ -54,7 +54,7 @@ third_party AS ( `httparchive.almanac.third_parties` AS tp INNER JOIN requests AS r - ON NET.REG_DOMAIN(r.url) = NET.REG_DOMAIN(tp.domain) + ON NET.HOST(r.url) = NET.HOST(tp.domain) WHERE tp.date = (SELECT date FROM third_party_date) AND tp.category NOT IN ('hosting') @@ -70,7 +70,7 @@ green_tp AS ( `httparchive.almanac.third_parties` AS tp INNER JOIN green AS g - ON NET.REG_DOMAIN(g.host) = NET.REG_DOMAIN(tp.domain) + ON NET.HOST(g.host) = NET.HOST(tp.domain) WHERE tp.date = (SELECT date FROM third_party_date) AND tp.category NOT IN ('hosting') @@ -83,13 +83,13 @@ base AS ( r.client, r.page, p.rank, - COUNT(DISTINCT tp.domain) AS third_parties_per_page + COUNT(tp.domain) AS third_parties_per_page FROM requests AS r LEFT JOIN third_party AS tp ON - NET.REG_DOMAIN(r.url) = NET.REG_DOMAIN(tp.domain) + NET.HOST(r.url) = NET.HOST(tp.domain) INNER JOIN pages AS p ON r.client = p.client AND r.page = p.page @@ -104,13 +104,13 @@ base_green AS ( r.client, r.page, p.rank, - COUNT(DISTINCT gtp.domain) AS green_third_parties_per_page + COUNT(gtp.domain) AS green_third_parties_per_page FROM requests AS r LEFT JOIN green_tp AS gtp ON - NET.REG_DOMAIN(r.url) = NET.REG_DOMAIN(gtp.domain) + NET.HOST(r.url) = NET.HOST(gtp.domain) INNER JOIN pages AS p ON r.client = p.client AND r.page = p.page @@ -139,7 +139,7 @@ SELECT bg.green_third_parties_per_page, b.third_parties_per_page ), 1000 - )[OFFSET(500)] * 100 AS pct_green + )[OFFSET(500)] AS pct_green FROM base AS b, UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping diff --git a/sql/2025/sustainability/responsive_images.sql b/sql/2025/sustainability/responsive_images.sql index 19f21a1224b..63b3ee93d86 100644 --- a/sql/2025/sustainability/responsive_images.sql +++ b/sql/2025/sustainability/responsive_images.sql @@ -4,32 +4,30 @@ WITH page_data AS ( SELECT client, - -- Totals from markup custom metric - CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.markup), '$.images.img.srcset_total') AS INT64) AS img_srcset_total, - CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.markup), '$.images.source.srcset_total') AS INT64) AS source_srcset_total, - CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.markup), '$.images.picture.total') AS INT64) AS picture_total, - - -- Sizes totals (may be missing; will be NULL) - CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.markup), '$.images.img.sizes_total') AS INT64) AS img_sizes_total, - CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(custom_metrics.markup), '$.images.source.sizes_total') AS INT64) AS source_sizes_total + -- Count occurrences in HTML of the main document + ARRAY_LENGTH(REGEXP_EXTRACT_ALL(COALESCE(response_body, ''), r'(?is)<(?:img|source)[^>]*srcset\s*=')) AS num_srcset_all, + ARRAY_LENGTH(REGEXP_EXTRACT_ALL(COALESCE(response_body, ''), r'(?is)<(?:img|source)[^>]*sizes\s*=')) AS num_srcset_sizes, + -- Presence of + IF(REGEXP_CONTAINS(COALESCE(response_body, ''), r'(?is) 0), + countif(num_srcset_all > 0), count(0) ) * 100, 2 ) AS pages_with_srcset_pct, round( safe_divide( - countif(coalesce(img_sizes_total, 0) + coalesce(source_sizes_total, 0) > 0), + countif(num_srcset_sizes > 0), count(0) ) * 100, 2 @@ -37,8 +35,8 @@ SELECT round( safe_divide( ( - countif(coalesce(img_srcset_total, 0) + coalesce(source_srcset_total, 0) > 0) - - countif(coalesce(img_sizes_total, 0) + coalesce(source_sizes_total, 0) > 0) + countif(num_srcset_all > 0) - + countif(num_srcset_sizes > 0) ), count(0) ) * 100, @@ -46,18 +44,18 @@ SELECT ) AS pages_with_srcset_wo_sizes_pct, round( safe_divide( - sum(coalesce(img_sizes_total, 0) + coalesce(source_sizes_total, 0)), - sum(coalesce(img_srcset_total, 0) + coalesce(source_srcset_total, 0)) + sum(num_srcset_sizes), + nullif(sum(num_srcset_all), 0) ) * 100, 2 ) AS instances_of_srcset_sizes_pct, round( safe_divide( ( - sum(coalesce(img_srcset_total, 0) + coalesce(source_srcset_total, 0)) - - sum(coalesce(img_sizes_total, 0) + coalesce(source_sizes_total, 0)) + sum(num_srcset_all) - + sum(num_srcset_sizes) ), - sum(coalesce(img_srcset_total, 0) + coalesce(source_srcset_total, 0)) + nullif(sum(num_srcset_all), 0) ) * 100, 2 ) AS instances_of_srcset_wo_sizes_pct, diff --git a/sql/2025/sustainability/text_compression.sql b/sql/2025/sustainability/text_compression.sql index ddf14470e64..174ac0c6e2f 100644 --- a/sql/2025/sustainability/text_compression.sql +++ b/sql/2025/sustainability/text_compression.sql @@ -2,13 +2,13 @@ WITH content_encoding AS ( SELECT client, LOWER(h.value) AS encoding - FROM `httparchive.crawl.requests`, - UNNEST(response_headers) AS h + FROM `httparchive.crawl.requests` r + LEFT JOIN UNNEST(r.response_headers) AS h + ON LOWER(h.name) = 'content-encoding' WHERE date = '2025-06-01' AND is_root_page AND is_main_document - AND LOWER(h.name) = 'content-encoding' ), compression_rollup AS (