Skip to content

Commit 60a01d0

Browse files
committed
Update copyright year and binary files check
1 parent b309204 commit 60a01d0

File tree

1 file changed

+118
-38
lines changed

1 file changed

+118
-38
lines changed

.github/workflows/apache-rat-audit.yml

Lines changed: 118 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,15 @@
1717
# permissions and limitations under the License.
1818
#
1919
# --------------------------------------------------------------------
20-
# Apache Rat Audit Workflow
21-
# Checks if all files comply with Apache licensing requirements
22-
# This workflow is based on the Apache Rat tool, you can run it locally
23-
# using the command: `mvn clean verify -Drat.consoleOutput=true`
20+
# Apache Cloudberry (Incubating) Compliance Workflow
21+
#
22+
# Comprehensive compliance checks for Apache Cloudberry:
23+
# 1. Apache RAT license header validation
24+
# 2. Copyright year verification (NOTICE and psql help.c)
25+
# 3. Binary file presence detection with approved whitelist
26+
#
27+
# Based on Apache Rat tool, run locally with:
28+
# `mvn clean verify -Drat.consoleOutput=true`
2429
# --------------------------------------------------------------------
2530

2631
name: Apache Rat License Check
@@ -74,61 +79,117 @@ jobs:
7479
echo "rat_failed=false" >> $GITHUB_OUTPUT
7580
echo "Apache Rat check passed successfully"
7681
77-
- name: Check NOTICE year is up-to-date
82+
- name: Check copyright years are up-to-date
7883
run: |
79-
echo "📅 Checking NOTICE file year..."
84+
echo "Checking copyright years..."
8085
current_year=$(date -u +"%Y")
8186
echo "CURRENT_YEAR=$current_year" >> $GITHUB_ENV
82-
# Check if the NOTICE file contains the current year
87+
88+
# Check NOTICE file
89+
echo "Checking NOTICE file..."
8390
if ! grep -q "Copyright 2024-$current_year The Apache Software Foundation" NOTICE; then
84-
echo "❌ NOTICE file does not contain the current year ($current_year)"
8591
echo "::error::NOTICE file does not contain the current year ($current_year)"
8692
echo "NOTICE_CHECK=fail" >> $GITHUB_ENV
8793
exit 1
8894
else
89-
echo " NOTICE file contains the current year ($current_year)"
95+
echo "PASS: NOTICE file contains the current year ($current_year)"
9096
echo "NOTICE_CHECK=pass" >> $GITHUB_ENV
9197
fi
98+
99+
# Check psql help.c file
100+
echo "Checking src/bin/psql/help.c..."
101+
if ! grep -q "Copyright 2024-$current_year The Apache Software Foundation" src/bin/psql/help.c; then
102+
echo "::error::src/bin/psql/help.c does not contain the current year ($current_year)"
103+
echo "PSQL_HELP_CHECK=fail" >> $GITHUB_ENV
104+
exit 1
105+
else
106+
echo "PASS: src/bin/psql/help.c contains the current year ($current_year)"
107+
echo "PSQL_HELP_CHECK=pass" >> $GITHUB_ENV
108+
fi
109+
110+
echo "All copyright year checks passed"
92111
93112
- name: Check for binary files
94113
run: |
95-
echo "📦 Checking for binary files..."
96-
echo "Checking extensions: class, jar, tar, tgz, zip, exe, dll, so"
114+
echo "Checking for binary files..."
115+
echo "Checking extensions: class, jar, tar, tgz, zip, exe, dll, so, gz, bz2"
97116
echo "----------------------------------------------------------------------"
98117
118+
# Binary file whitelist, see README.apache.md
119+
WHITELIST=(
120+
"contrib/formatter_fixedwidth/data/fixedwidth_small_correct.tbl.gz"
121+
"gpMgmt/demo/gppkg/sample-sources.tar.gz"
122+
"src/bin/gpfdist/regress/data/exttab1/nation.tbl.gz"
123+
"src/bin/gpfdist/regress/data/gpfdist2/gz_multi_chunk.tbl.gz"
124+
"src/bin/gpfdist/regress/data/gpfdist2/gz_multi_chunk_2.tbl.gz"
125+
"src/bin/gpfdist/regress/data/gpfdist2/lineitem.tbl.bz2"
126+
"src/bin/gpfdist/regress/data/gpfdist2/lineitem.tbl.gz"
127+
)
128+
99129
# Check for specific binary file extensions
100-
binary_extensions="class jar tar tgz zip exe dll so"
130+
binary_extensions="class jar tar tgz zip exe dll so gz bz2"
101131
echo "BINARY_EXTENSIONS=${binary_extensions}" >> $GITHUB_ENV
102132
binary_results=""
103133
binaryfiles_found=false
104134
105135
for extension in ${binary_extensions}; do
106136
printf "Checking *.%-4s files..." "${extension}"
107-
found=$(find . -name "*.${extension}" || true)
137+
found=$(find . -name "*.${extension}" -type f || true)
138+
139+
# Filter out whitelisted files
108140
if [ -n "$found" ]; then
109-
echo "❌ FOUND"
110-
echo "::error::${extension} files should not exist"
111-
echo "For ASF compatibility: the source tree should not contain"
112-
echo "binary files as users have a hard time verifying their contents."
113-
echo "Found files:"
114-
echo "$found" | sed 's/^/ /'
115-
echo "${extension}:${found}" >> binary_results.txt
116-
binaryfiles_found=true
141+
filtered_found=""
142+
while IFS= read -r file; do
143+
is_whitelisted=false
144+
for whitelist_file in "${WHITELIST[@]}"; do
145+
if [ "$file" = "./$whitelist_file" ]; then
146+
is_whitelisted=true
147+
echo "Whitelisted: $file" >> binary_whitelist.txt
148+
break
149+
fi
150+
done
151+
if [ "$is_whitelisted" = false ]; then
152+
filtered_found+="$file"$'\n'
153+
fi
154+
done <<< "$found"
155+
156+
filtered_found=$(echo "$filtered_found" | sed '/^$/d')
157+
158+
if [ -n "$filtered_found" ]; then
159+
echo "FOUND"
160+
echo "::error::${extension} files should not exist"
161+
echo "For ASF compatibility: the source tree should not contain"
162+
echo "binary files as users have a hard time verifying their contents."
163+
echo "Found files:"
164+
echo "$filtered_found" | sed 's/^/ /'
165+
echo "${extension}:${filtered_found}" >> binary_results.txt
166+
binaryfiles_found=true
167+
else
168+
echo "NONE (all whitelisted)"
169+
echo "${extension}:none" >> binary_results.txt
170+
fi
117171
else
118-
echo "NONE"
172+
echo "NONE"
119173
echo "${extension}:none" >> binary_results.txt
120174
fi
121175
done
122176
123177
echo "----------------------------------------------------------------------"
124178
if [ "$binaryfiles_found" = true ]; then
125-
echo "❌ Binary files were found in the source tree"
179+
echo "ERROR: Non-whitelisted binary files were found in the source tree"
126180
echo "BINARY_CHECK=fail" >> $GITHUB_ENV
127181
exit 1
128182
else
129-
echo " No binary files found"
183+
echo "PASS: No non-whitelisted binary files found"
130184
echo "BINARY_CHECK=pass" >> $GITHUB_ENV
131185
fi
186+
187+
# Show whitelist summary if any whitelisted files were found
188+
if [ -f binary_whitelist.txt ]; then
189+
echo ""
190+
echo "Whitelisted binary files (approved):"
191+
cat binary_whitelist.txt | sed 's/^/ /'
192+
fi
132193
133194
- name: Upload Rat check results
134195
if: always()
@@ -146,38 +207,57 @@ jobs:
146207
echo "- Run Time: $(date -u +'%Y-%m-%d %H:%M:%S UTC')"
147208
echo ""
148209
149-
# NOTICE Year Check Summary
150-
echo "### 📅 NOTICE Year Check"
210+
# Copyright Year Check Summary
211+
echo "### Copyright Year Checks"
212+
echo "**NOTICE file:**"
151213
if [ "$NOTICE_CHECK" = "pass" ]; then
152-
echo "✅ NOTICE file contains the current year ($CURRENT_YEAR)"
214+
echo "PASS: Contains current year ($CURRENT_YEAR)"
153215
else
154-
echo "❌ NOTICE file does not contain the current year ($CURRENT_YEAR)"
216+
echo "ERROR: Does not contain current year ($CURRENT_YEAR)"
217+
fi
218+
echo ""
219+
echo "**psql help.c:**"
220+
if [ "$PSQL_HELP_CHECK" = "pass" ]; then
221+
echo "PASS: Contains current year ($CURRENT_YEAR)"
222+
else
223+
echo "ERROR: Does not contain current year ($CURRENT_YEAR)"
155224
fi
156225
echo ""
157226
158227
# Binary Files Check Summary
159-
echo "### 📦 Binary Files Check"
228+
echo "### Binary Files Check"
160229
echo "Checked extensions: \`${BINARY_EXTENSIONS}\`"
161230
echo ""
162231
echo "Results:"
163232
echo "\`\`\`"
164233
if [ -f binary_results.txt ]; then
165234
while IFS=: read -r ext files; do
166235
if [ "$files" = "none" ]; then
167-
echo " No .${ext} files found"
236+
echo "PASS: No .${ext} files found"
168237
else
169-
echo " Found .${ext} files:"
238+
echo "ERROR: Found .${ext} files:"
170239
echo "$files" | sed 's/^/ /'
171240
fi
172241
done < binary_results.txt
173242
fi
174243
echo "\`\`\`"
175244
echo ""
245+
246+
# Whitelist summary
247+
if [ -f binary_whitelist.txt ]; then
248+
echo "#### Whitelisted Binary Files"
249+
echo "The following binary files are approved for testing purposes:"
250+
echo "You can see [README.apache.md](https://github.com/apache/cloudberry/blob/main/README.apache.md) for details."
251+
echo "\`\`\`"
252+
cat binary_whitelist.txt | sed 's/Whitelisted: //'
253+
echo "\`\`\`"
254+
echo ""
255+
fi
176256
177257
if [[ -f rat-output.log ]]; then
178258
# First extract and display summary statistics (only once)
179259
if grep -q "Rat check: Summary over all files" rat-output.log; then
180-
echo "#### 📊 License Summary"
260+
echo "#### License Summary"
181261
summary_line=$(grep "Rat check: Summary over all files" rat-output.log)
182262
echo "\`\`\`"
183263
echo "$summary_line"
@@ -187,12 +267,12 @@ jobs:
187267
188268
# Then determine the result status
189269
if grep -q "\[INFO\] BUILD FAILURE" rat-output.log; then
190-
echo "### Check Failed - License Compliance Issues Detected"
270+
echo "### Check Failed - License Compliance Issues Detected"
191271
echo ""
192272
193273
# Extract and display files with unapproved licenses
194274
if grep -q "Files with unapproved licenses:" rat-output.log; then
195-
echo "#### 🚫 Files with Unapproved Licenses"
275+
echo "#### Files with Unapproved Licenses"
196276
echo "\`\`\`"
197277
# Get the line with "Files with unapproved licenses:" and all following lines until the dashed line
198278
sed -n '/Files with unapproved licenses:/,/\[INFO\] ------------------------------------------------------------------------/p' rat-output.log | \
@@ -203,7 +283,7 @@ jobs:
203283
echo ""
204284
fi
205285
206-
echo "💡 **How to fix:**"
286+
echo "**How to fix:**"
207287
echo ""
208288
echo "**For new original files you created:**"
209289
echo "- Add the standard Apache License header to each file"
@@ -218,14 +298,14 @@ jobs:
218298
echo "- Email dev@cloudberry.apache.org if you have questions about license compatibility"
219299
220300
elif grep -q "\[INFO\] BUILD SUCCESS" rat-output.log; then
221-
echo "### Check Passed - All Files Comply with Apache License Requirements"
301+
echo "### Check Passed - All Files Comply with Apache License Requirements"
222302
223303
else
224-
echo "### ⚠️ Indeterminate Result"
304+
echo "### Indeterminate Result"
225305
echo "Check the uploaded log file for details."
226306
fi
227307
else
228-
echo "### ⚠️ No Output Log Found"
308+
echo "### No Output Log Found"
229309
echo "The rat-output.log file was not generated."
230310
fi
231311
} >> "$GITHUB_STEP_SUMMARY"

0 commit comments

Comments
 (0)