Skip to content

Commit 85a04ea

Browse files
authored
New PR check for disallowed files (microsoft#14365)
1 parent f82d13b commit 85a04ea

File tree

2 files changed

+220
-0
lines changed

2 files changed

+220
-0
lines changed

.github/workflows/check-files.yml

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
name: Check Disallowed Files
5+
6+
on:
7+
push:
8+
branches: [main, 2.0*, 3.0*, fasttrack/*]
9+
pull_request:
10+
branches: [main, 2.0*, 3.0*, fasttrack/*]
11+
12+
jobs:
13+
14+
build:
15+
name: Check Disallowed Files
16+
runs-on: ubuntu-latest
17+
steps:
18+
19+
- name: Check out code
20+
uses: actions/checkout@v4
21+
22+
- name: Get base commit for PRs
23+
if: ${{ github.event_name == 'pull_request' }}
24+
run: |
25+
git fetch origin ${{ github.base_ref }}
26+
echo "base_sha=$(git rev-parse origin/${{ github.base_ref }})" >> $GITHUB_ENV
27+
echo "Merging ${{ github.sha }} into ${{ github.base_ref }}"
28+
29+
- name: Get base commit for Pushes
30+
if: ${{ github.event_name == 'push' }}
31+
run: |
32+
git fetch origin ${{ github.event.before }}
33+
echo "base_sha=${{ github.event.before }}" >> $GITHUB_ENV
34+
echo "Merging ${{ github.sha }} into ${{ github.event.before }}"
35+
36+
- name: Get the changed files
37+
run: |
38+
echo "Files changed: '$(git diff-tree --no-commit-id --name-only -r ${{ env.base_sha }} ${{ github.sha }})'"
39+
changed_files=$(git diff-tree --diff-filter=AM --no-commit-id --name-only -r ${{ env.base_sha }} ${{ github.sha }})
40+
echo "Files to validate: '${changed_files}'"
41+
echo "changed-files<<EOF" >> $GITHUB_ENV
42+
echo "${changed_files}" >> $GITHUB_ENV
43+
echo "EOF" >> $GITHUB_ENV
44+
45+
- name: Check for disallowed file types
46+
run: |
47+
if [[ -z "${{ env.changed-files }}" ]]; then
48+
echo "No files to validate. Exiting."
49+
exit 0
50+
fi
51+
52+
echo "Checking files..."
53+
error_found=0
54+
55+
# Read disallowed extensions from the configuration file
56+
if [[ ! -f ".github/workflows/disallowed-extensions.txt" ]]; then
57+
echo "Configuration file '.github/workflows/disallowed-extensions.txt' not found. Skipping check."
58+
exit 0
59+
fi
60+
61+
# Create array of disallowed extensions
62+
mapfile -t disallowed_extensions < .github/workflows/disallowed-extensions.txt
63+
if [[ $? -ne 0 ]]; then
64+
echo "Error occurred while reading disallowed extensions. Exiting."
65+
exit 1
66+
fi
67+
68+
# Check each changed file
69+
while IFS= read -r file; do
70+
if [[ -z "$file" ]]; then
71+
continue
72+
fi
73+
74+
echo "Checking file: $file"
75+
76+
# Get file extension (convert to lowercase for comparison)
77+
extension=$(echo "${file##*.}" | tr '[:upper:]' '[:lower:]')
78+
filename=$(basename "$file")
79+
80+
# Check if file should be in blob store
81+
should_be_in_blob_store=false
82+
83+
# Check against disallowed extensions
84+
for disallowed_ext in "${disallowed_extensions[@]}"; do
85+
# Remove any whitespace and comments
86+
clean_ext=$(echo "$disallowed_ext" | sed 's/#.*//' | xargs)
87+
if [[ -z "$clean_ext" ]]; then
88+
continue
89+
fi
90+
91+
if [[ "$extension" == "$clean_ext" ]]; then
92+
should_be_in_blob_store=true
93+
break
94+
fi
95+
done
96+
97+
# Additional checks for binary files and large files
98+
if [[ -f "$file" ]]; then
99+
# Check if file is binary
100+
if file "$file" | grep -q "binary\|executable\|archive\|compressed"; then
101+
should_be_in_blob_store=true
102+
fi
103+
104+
# Check file size (files > 1MB should be in blob store)
105+
file_size=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo 0)
106+
if [[ $file_size -gt 1048576 ]]; then # 1MB
107+
should_be_in_blob_store=true
108+
fi
109+
fi
110+
111+
if [[ "$should_be_in_blob_store" == "true" ]]; then
112+
1>&2 echo "**** ERROR ****"
113+
1>&2 echo "File '$file' should be stored in blob store, not in git repository."
114+
1>&2 echo "Reason: Images, Large files, binaries, tarballs, and non-text files slow down git operations"
115+
1>&2 echo "and cannot be efficiently diffed. Please upload to blob store instead."
116+
1>&2 echo "**** ERROR ****"
117+
error_found=1
118+
fi
119+
done <<< "${{ env.changed-files }}"
120+
121+
if [[ $error_found -eq 1 ]]; then
122+
echo ""
123+
echo "=========================================="
124+
echo "FILES THAT SHOULD BE IN BLOB STORE DETECTED"
125+
echo "=========================================="
126+
echo "The following file types should be stored in blob store:"
127+
echo "- Source tarballs (.tar.gz, .tar.xz, .zip, etc.)"
128+
echo "- Binary files (.bin, .exe, .so, .dll, etc.)"
129+
echo "- Images (.gif, .bmp, etc.)"
130+
echo "- Archives (.rar, .7z, .tar, etc.)"
131+
echo "- Large files (> 1MB)"
132+
echo "- Any non-text files that cannot be efficiently diffed"
133+
echo ""
134+
echo "Please upload these files to the blob store and reference them"
135+
echo "in your spec files or configuration instead of checking them into git."
136+
echo "=========================================="
137+
exit 1
138+
fi
139+
140+
echo "All files are appropriate for git storage."
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# File extensions that should be stored in blob store instead of git repository
2+
# Lines starting with # are comments and will be ignored
3+
# Extensions should be lowercase without the leading dot
4+
5+
# Source tarballs and archives
6+
tar
7+
gz
8+
tgz
9+
bz2
10+
xz
11+
zip
12+
rar
13+
7z
14+
tar.gz
15+
tar.xz
16+
tar.bz2
17+
18+
# Binary executables
19+
bin
20+
exe
21+
dll
22+
so
23+
dylib
24+
a
25+
lib
26+
obj
27+
o
28+
29+
# Image files
30+
gif
31+
bmp
32+
tiff
33+
tif
34+
webp
35+
raw
36+
heif
37+
38+
39+
# Audio/Video files
40+
mp3
41+
wav
42+
avi
43+
mp4
44+
mkv
45+
mov
46+
wmv
47+
flv
48+
ogg
49+
m4a
50+
aac
51+
52+
# Package files
53+
rpm
54+
deb
55+
msi
56+
pkg
57+
dmg
58+
iso
59+
60+
# Compressed source packages
61+
gem
62+
whl
63+
egg
64+
65+
# Database files
66+
db
67+
sqlite
68+
sqlite3
69+
70+
# Fonts
71+
ttf
72+
otf
73+
woff
74+
woff2
75+
76+
# Other binary formats
77+
jar
78+
war
79+
ear
80+
class

0 commit comments

Comments
 (0)