Skip to content

Commit ead146f

Browse files
Lint only changed SQL files on PRs (#4254)
* Include sql/** in lint workflow and lint only changed SQL files on PRs * Remove step-level if from Lint SQL * Add -q to pip install * Break long shell lines in Lint SQL workflow for readability * Disable SQLFluff in global super-linter and move Dependabot auto-merge for sqlfluff into Lint SQL workflow * remove explicit VALIDATE_SQLFLUFF:false * linting * Restrict Lint SQL workflow PR path to .sql files * remove outer query * unwraped subqueries * Update sql/2021/css/keyframes_positions.sql Co-authored-by: Barry Pollard <[email protected]> * Update sql/2021/css/image_dimension_popularity.sql Co-authored-by: Barry Pollard <[email protected]> * Apply suggestion from @tunetheweb --------- Co-authored-by: Barry Pollard <[email protected]>
1 parent 47bb395 commit ead146f

File tree

14 files changed

+136
-113
lines changed

14 files changed

+136
-113
lines changed

.github/workflows/linter.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ jobs:
4444
VALIDATE_MARKDOWN: true
4545
VALIDATE_PYTHON_PYLINT: true
4646
VALIDATE_PYTHON_FLAKE8: true
47-
VALIDATE_SQLFLUFF: true
4847
VALIDATE_YAML: true
4948

5049
dependabot:
@@ -73,7 +72,6 @@ jobs:
7372
steps.metadata.outputs.update-type == 'version-update:semver-minor'
7473
) && (
7574
contains(steps.metadata.outputs.dependency-names, 'prettier') ||
76-
contains(steps.metadata.outputs.dependency-names, 'sqlfluff') ||
7775
contains(steps.metadata.outputs.dependency-names, 'super-linter')
7876
)
7977
run: gh pr merge --admin --squash "$PR_URL"

.github/workflows/lintsql.yml

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ on:
77
workflow_dispatch:
88
pull_request:
99
paths:
10-
src/requirements.txt
10+
- src/requirements.txt
11+
- 'sql/**.sql'
1112
jobs:
1213
lint:
1314
name: Lint SQL
@@ -23,9 +24,46 @@ jobs:
2324
with:
2425
python-version: '3.12'
2526
- name: Lint SQL code
26-
if: |
27-
github.event_name == 'workflow_dispatch' ||
28-
startsWith(github.event.pull_request.title,'Bump sqlfluff') == true
2927
run: |
30-
pip install -r src/requirements.txt
31-
sqlfluff lint sql -p 4
28+
pip install -r src/requirements.txt -q
29+
if [ "${{ github.event_name }}" == "workflow_dispatch" ] || \
30+
[[ "${{ github.event.pull_request.title }}" == Bump\ sqlfluff* ]]; then
31+
# Lint all SQL files for workflow_dispatch or sqlfluff bumps
32+
sqlfluff lint sql -p 4
33+
else
34+
# Lint only changed SQL files
35+
git diff --name-only --diff-filter=ACMRT origin/${{ github.base_ref }}...HEAD \
36+
| grep '\.sql$' \
37+
| xargs -r sqlfluff lint
38+
fi
39+
40+
41+
dependabot:
42+
name: Dependabot auto-merge
43+
runs-on: ubuntu-latest
44+
needs: lint
45+
if: |
46+
github.event.pull_request.user.login == 'dependabot[bot]' &&
47+
github.repository == 'HTTPArchive/almanac.httparchive.org'
48+
49+
permissions:
50+
contents: write
51+
pull-requests: write
52+
53+
steps:
54+
- name: Dependabot metadata
55+
id: metadata
56+
uses: dependabot/fetch-metadata@v2
57+
with:
58+
github-token: "${{ secrets.GITHUB_TOKEN }}"
59+
60+
- name: Enable auto-merge for Dependabot PRs
61+
if: |
62+
(
63+
steps.metadata.outputs.update-type == 'version-update:semver-patch' ||
64+
steps.metadata.outputs.update-type == 'version-update:semver-minor'
65+
) && contains(steps.metadata.outputs.dependency-names, 'sqlfluff')
66+
run: gh pr merge --admin --squash "$PR_URL"
67+
env:
68+
PR_URL: ${{github.event.pull_request.html_url}}
69+
GH_TOKEN: ${{secrets.GITHUB_TOKEN}}

sql/2019/fonts/06_32.sql

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,19 @@
11
#standardSQL
22
# 06_32: Top font hosts
33
SELECT
4-
*
5-
FROM (
6-
SELECT
7-
client,
8-
NET.HOST(url) AS host,
9-
COUNT(0) AS freq,
10-
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
11-
ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct
12-
FROM
13-
`httparchive.almanac.requests`
14-
WHERE
15-
date = '2019-07-01' AND
16-
type = 'font'
17-
GROUP BY
18-
client,
19-
host
20-
ORDER BY
21-
freq / total DESC
22-
)
4+
client,
5+
NET.HOST(url) AS host,
6+
COUNT(0) AS freq,
7+
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
8+
ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct
9+
FROM
10+
`httparchive.almanac.requests`
11+
WHERE
12+
date = '2019-07-01' AND
13+
type = 'font'
14+
GROUP BY
15+
client,
16+
host
17+
ORDER BY
18+
freq / total DESC
2319
LIMIT 100
Lines changed: 45 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,55 @@
11
#standardSQL
22
# CSS-initiated image px dimension popularity
33
SELECT
4-
*
4+
client,
5+
height,
6+
width,
7+
COUNT(0) AS freq,
8+
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
9+
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
510
FROM (
611
SELECT
712
client,
8-
height,
9-
width,
10-
COUNT(0) AS freq,
11-
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
12-
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
13-
FROM (
14-
SELECT
15-
client,
16-
page,
17-
url AS img_url,
18-
JSON_VALUE(payload, '$._initiator') AS css_url
19-
FROM
20-
`httparchive.almanac.requests`
21-
WHERE
22-
date = '2021-07-01' AND
23-
type = 'image'
24-
)
25-
JOIN (
26-
SELECT
27-
client,
28-
page,
29-
url AS css_url
30-
FROM
31-
`httparchive.almanac.requests`
32-
WHERE
33-
date = '2021-07-01' AND
34-
type = 'css'
35-
)
36-
USING (client, page, css_url)
37-
JOIN (
38-
SELECT
39-
_TABLE_SUFFIX AS client,
40-
url AS page,
41-
JSON_EXTRACT_SCALAR(image, '$.url') AS img_url,
42-
SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalHeight') AS INT64) AS height,
43-
SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalWidth') AS INT64) AS width
44-
FROM
45-
`httparchive.pages.2021_07_01_*`,
46-
UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._Images'), '$')) AS image
47-
)
48-
USING (client, page, img_url)
13+
page,
14+
url AS img_url,
15+
JSON_VALUE(payload, '$._initiator') AS css_url
16+
FROM
17+
`httparchive.almanac.requests`
4918
WHERE
50-
height IS NOT NULL AND
51-
width IS NOT NULL
52-
GROUP BY
19+
date = '2021-07-01' AND
20+
type = 'image'
21+
)
22+
JOIN (
23+
SELECT
5324
client,
54-
height,
55-
width
56-
ORDER BY
57-
pct DESC
25+
page,
26+
url AS css_url
27+
FROM
28+
`httparchive.almanac.requests`
29+
WHERE
30+
date = '2021-07-01' AND
31+
type = 'css'
32+
)
33+
USING (client, page, css_url)
34+
JOIN (
35+
SELECT
36+
_TABLE_SUFFIX AS client,
37+
url AS page,
38+
JSON_EXTRACT_SCALAR(image, '$.url') AS img_url,
39+
SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalHeight') AS INT64) AS height,
40+
SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalWidth') AS INT64) AS width
41+
FROM
42+
`httparchive.pages.2021_07_01_*`,
43+
UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._Images'), '$')) AS image
5844
)
45+
USING (client, page, img_url)
46+
WHERE
47+
height IS NOT NULL AND
48+
width IS NOT NULL
49+
GROUP BY
50+
client,
51+
height,
52+
width
53+
ORDER BY
54+
pct DESC
5955
LIMIT 500

sql/2021/css/keyframes_positions.sql

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -24,36 +24,32 @@ try {
2424
''';
2525

2626
SELECT
27-
*
28-
FROM (
27+
client,
28+
position,
29+
COUNT(DISTINCT page) AS pages,
30+
ANY_VALUE(total) AS total_pages,
31+
COUNT(DISTINCT page) / ANY_VALUE(total) AS pct_pages,
32+
COUNT(0) AS freq,
33+
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
34+
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
35+
FROM
36+
`httparchive.almanac.parsed_css`,
37+
UNNEST(getKeyframePositions(css)) AS position
38+
JOIN (
2939
SELECT
30-
client,
31-
position,
32-
COUNT(DISTINCT page) AS pages,
33-
ANY_VALUE(total) AS total_pages,
34-
COUNT(DISTINCT page) / ANY_VALUE(total) AS pct_pages,
35-
COUNT(0) AS freq,
36-
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
37-
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
40+
_TABLE_SUFFIX AS client,
41+
COUNT(0) AS total
3842
FROM
39-
`httparchive.almanac.parsed_css`,
40-
UNNEST(getKeyframePositions(css)) AS position
41-
JOIN (
42-
SELECT
43-
_TABLE_SUFFIX AS client,
44-
COUNT(0) AS total
45-
FROM
46-
`httparchive.summary_pages.2021_07_01_*`
47-
GROUP BY
48-
client
49-
)
50-
USING (client)
51-
WHERE
52-
date = '2021-07-01'
43+
`httparchive.summary_pages.2021_07_01_*`
5344
GROUP BY
54-
client,
55-
position
56-
ORDER BY
57-
pct DESC
45+
client
5846
)
47+
USING (client)
48+
WHERE
49+
date = '2021-07-01'
50+
GROUP BY
51+
client,
52+
position
53+
ORDER BY
54+
pct DESC
5955
LIMIT 500

sql/2022/jamstack/jamstack_random_1000_urls.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ FROM
55
WHERE
66
methodology = '2022' AND
77
date = '2022-06-01'
8-
LIMIT 1000
8+
LIMIT 1000 -- noqa: AM09

sql/2024/jamstack/hugo_astro_next.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING,
99
UNNEST(response_headers) AS header
1010
WHERE
1111
LOWER(header.name) = 'cache-control'
12-
LIMIT 1
12+
LIMIT 1 -- noqa: AM09
1313
),
1414
r'max-age=(\d+)'
1515
) AS INT64

sql/2024/jamstack/jamstack-overview.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING,
99
UNNEST(response_headers) AS header
1010
WHERE
1111
LOWER(header.name) = 'cache-control'
12-
LIMIT 1
12+
LIMIT 1 -- noqa: AM09
1313
),
1414
r'max-age=(\d+)'
1515
) AS INT64

sql/2024/jamstack/jamstack_distribution_by_rank.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING,
99
UNNEST(response_headers) AS header
1010
WHERE
1111
LOWER(header.name) = 'cache-control'
12-
LIMIT 1
12+
LIMIT 1 -- noqa: AM09
1313
),
1414
r'max-age=(\d+)'
1515
) AS INT64

sql/2024/jamstack/js_frameworks.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING,
99
UNNEST(response_headers) AS header
1010
WHERE
1111
LOWER(header.name) = 'cache-control'
12-
LIMIT 1
12+
LIMIT 1 -- noqa: AM09
1313
),
1414
r'max-age=(\d+)'
1515
) AS INT64

0 commit comments

Comments
 (0)