|
1 | 1 | #standardSQL |
2 | 2 | # CSS-initiated image px dimension popularity |
3 | 3 | SELECT |
4 | | - * |
| 4 | + client, |
| 5 | + height, |
| 6 | + width, |
| 7 | + COUNT(0) AS freq, |
| 8 | + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, |
| 9 | + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct |
5 | 10 | FROM ( |
6 | 11 | SELECT |
7 | 12 | client, |
8 | | - height, |
9 | | - width, |
10 | | - COUNT(0) AS freq, |
11 | | - SUM(COUNT(0)) OVER (PARTITION BY client) AS total, |
12 | | - COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct |
13 | | - FROM ( |
14 | | - SELECT |
15 | | - client, |
16 | | - page, |
17 | | - url AS img_url, |
18 | | - JSON_VALUE(payload, '$._initiator') AS css_url |
19 | | - FROM |
20 | | - `httparchive.almanac.requests` |
21 | | - WHERE |
22 | | - date = '2021-07-01' AND |
23 | | - type = 'image' |
24 | | - ) |
25 | | - JOIN ( |
26 | | - SELECT |
27 | | - client, |
28 | | - page, |
29 | | - url AS css_url |
30 | | - FROM |
31 | | - `httparchive.almanac.requests` |
32 | | - WHERE |
33 | | - date = '2021-07-01' AND |
34 | | - type = 'css' |
35 | | - ) |
36 | | - USING (client, page, css_url) |
37 | | - JOIN ( |
38 | | - SELECT |
39 | | - _TABLE_SUFFIX AS client, |
40 | | - url AS page, |
41 | | - JSON_EXTRACT_SCALAR(image, '$.url') AS img_url, |
42 | | - SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalHeight') AS INT64) AS height, |
43 | | - SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalWidth') AS INT64) AS width |
44 | | - FROM |
45 | | - `httparchive.pages.2021_07_01_*`, |
46 | | - UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._Images'), '$')) AS image |
47 | | - ) |
48 | | - USING (client, page, img_url) |
| 13 | + page, |
| 14 | + url AS img_url, |
| 15 | + JSON_VALUE(payload, '$._initiator') AS css_url |
| 16 | + FROM |
| 17 | + `httparchive.almanac.requests` |
49 | 18 | WHERE |
50 | | - height IS NOT NULL AND |
51 | | - width IS NOT NULL |
52 | | - GROUP BY |
| 19 | + date = '2021-07-01' AND |
| 20 | + type = 'image' |
| 21 | +) |
| 22 | +JOIN ( |
| 23 | + SELECT |
53 | 24 | client, |
54 | | - height, |
55 | | - width |
56 | | - ORDER BY |
57 | | - pct DESC |
| 25 | + page, |
| 26 | + url AS css_url |
| 27 | + FROM |
| 28 | + `httparchive.almanac.requests` |
| 29 | + WHERE |
| 30 | + date = '2021-07-01' AND |
| 31 | + type = 'css' |
| 32 | +) |
| 33 | +USING (client, page, css_url) |
| 34 | +JOIN ( |
| 35 | + SELECT |
| 36 | + _TABLE_SUFFIX AS client, |
| 37 | + url AS page, |
| 38 | + JSON_EXTRACT_SCALAR(image, '$.url') AS img_url, |
| 39 | + SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalHeight') AS INT64) AS height, |
| 40 | + SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalWidth') AS INT64) AS width |
| 41 | + FROM |
| 42 | + `httparchive.pages.2021_07_01_*`, |
| 43 | + UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._Images'), '$')) AS image |
58 | 44 | ) |
| 45 | +USING (client, page, img_url) |
| 46 | +WHERE |
| 47 | + height IS NOT NULL AND |
| 48 | + width IS NOT NULL |
| 49 | +GROUP BY |
| 50 | + client, |
| 51 | + height, |
| 52 | + width |
| 53 | +ORDER BY |
| 54 | + pct DESC |
59 | 55 | LIMIT 500 |
0 commit comments