1717# permissions and limitations under the License.
1818#
1919# --------------------------------------------------------------------
20- # Apache Rat Audit Workflow
21- # Checks if all files comply with Apache licensing requirements
22- # This workflow is based on the Apache Rat tool, you can run it locally
23- # using the command: `mvn clean verify -Drat.consoleOutput=true`
20+ # Apache Cloudberry (Incubating) Compliance Workflow
21+ #
22+ # Comprehensive compliance checks for Apache Cloudberry:
23+ # 1. Apache RAT license header validation
24+ # 2. Copyright year verification (NOTICE and psql help.c)
25+ # 3. Binary file presence detection with approved whitelist
26+ #
27+ # Based on Apache Rat tool, run locally with:
28+ # `mvn clean verify -Drat.consoleOutput=true`
2429# --------------------------------------------------------------------
2530
2631name : Apache Rat License Check
@@ -74,61 +79,117 @@ jobs:
7479 echo "rat_failed=false" >> $GITHUB_OUTPUT
7580 echo "Apache Rat check passed successfully"
7681
77- - name : Check NOTICE year is up-to-date
82+ - name : Check copyright years are up-to-date
7883 run : |
79- echo "📅 Checking NOTICE file year ..."
84+ echo "Checking copyright years ..."
8085 current_year=$(date -u +"%Y")
8186 echo "CURRENT_YEAR=$current_year" >> $GITHUB_ENV
82- # Check if the NOTICE file contains the current year
87+
88+ # Check NOTICE file
89+ echo "Checking NOTICE file..."
8390 if ! grep -q "Copyright 2024-$current_year The Apache Software Foundation" NOTICE; then
84- echo "❌ NOTICE file does not contain the current year ($current_year)"
8591 echo "::error::NOTICE file does not contain the current year ($current_year)"
8692 echo "NOTICE_CHECK=fail" >> $GITHUB_ENV
8793 exit 1
8894 else
89- echo "✅ NOTICE file contains the current year ($current_year)"
95+ echo "PASS: NOTICE file contains the current year ($current_year)"
9096 echo "NOTICE_CHECK=pass" >> $GITHUB_ENV
9197 fi
98+
99+ # Check psql help.c file
100+ echo "Checking src/bin/psql/help.c..."
101+ if ! grep -q "Copyright 2024-$current_year The Apache Software Foundation" src/bin/psql/help.c; then
102+ echo "::error::src/bin/psql/help.c does not contain the current year ($current_year)"
103+ echo "PSQL_HELP_CHECK=fail" >> $GITHUB_ENV
104+ exit 1
105+ else
106+ echo "PASS: src/bin/psql/help.c contains the current year ($current_year)"
107+ echo "PSQL_HELP_CHECK=pass" >> $GITHUB_ENV
108+ fi
109+
110+ echo "All copyright year checks passed"
92111
93112 - name : Check for binary files
94113 run : |
95- echo "📦 Checking for binary files..."
96- echo "Checking extensions: class, jar, tar, tgz, zip, exe, dll, so"
114+ echo "Checking for binary files..."
115+ echo "Checking extensions: class, jar, tar, tgz, zip, exe, dll, so, gz, bz2 "
97116 echo "----------------------------------------------------------------------"
98117
118+ # Binary file whitelist, see README.apache.md
119+ WHITELIST=(
120+ "contrib/formatter_fixedwidth/data/fixedwidth_small_correct.tbl.gz"
121+ "gpMgmt/demo/gppkg/sample-sources.tar.gz"
122+ "src/bin/gpfdist/regress/data/exttab1/nation.tbl.gz"
123+ "src/bin/gpfdist/regress/data/gpfdist2/gz_multi_chunk.tbl.gz"
124+ "src/bin/gpfdist/regress/data/gpfdist2/gz_multi_chunk_2.tbl.gz"
125+ "src/bin/gpfdist/regress/data/gpfdist2/lineitem.tbl.bz2"
126+ "src/bin/gpfdist/regress/data/gpfdist2/lineitem.tbl.gz"
127+ )
128+
99129 # Check for specific binary file extensions
100- binary_extensions="class jar tar tgz zip exe dll so"
130+ binary_extensions="class jar tar tgz zip exe dll so gz bz2 "
101131 echo "BINARY_EXTENSIONS=${binary_extensions}" >> $GITHUB_ENV
102132 binary_results=""
103133 binaryfiles_found=false
104134
105135 for extension in ${binary_extensions}; do
106136 printf "Checking *.%-4s files..." "${extension}"
107- found=$(find . -name "*.${extension}" || true)
137+ found=$(find . -name "*.${extension}" -type f || true)
138+
139+ # Filter out whitelisted files
108140 if [ -n "$found" ]; then
109- echo "❌ FOUND"
110- echo "::error::${extension} files should not exist"
111- echo "For ASF compatibility: the source tree should not contain"
112- echo "binary files as users have a hard time verifying their contents."
113- echo "Found files:"
114- echo "$found" | sed 's/^/ /'
115- echo "${extension}:${found}" >> binary_results.txt
116- binaryfiles_found=true
141+ filtered_found=""
142+ while IFS= read -r file; do
143+ is_whitelisted=false
144+ for whitelist_file in "${WHITELIST[@]}"; do
145+ if [ "$file" = "./$whitelist_file" ]; then
146+ is_whitelisted=true
147+ echo "Whitelisted: $file" >> binary_whitelist.txt
148+ break
149+ fi
150+ done
151+ if [ "$is_whitelisted" = false ]; then
152+ filtered_found+="$file"$'\n'
153+ fi
154+ done <<< "$found"
155+
156+ filtered_found=$(echo "$filtered_found" | sed '/^$/d')
157+
158+ if [ -n "$filtered_found" ]; then
159+ echo "FOUND"
160+ echo "::error::${extension} files should not exist"
161+ echo "For ASF compatibility: the source tree should not contain"
162+ echo "binary files as users have a hard time verifying their contents."
163+ echo "Found files:"
164+ echo "$filtered_found" | sed 's/^/ /'
165+ echo "${extension}:${filtered_found}" >> binary_results.txt
166+ binaryfiles_found=true
167+ else
168+ echo "NONE (all whitelisted)"
169+ echo "${extension}:none" >> binary_results.txt
170+ fi
117171 else
118- echo "✅ NONE"
172+ echo "NONE"
119173 echo "${extension}:none" >> binary_results.txt
120174 fi
121175 done
122176
123177 echo "----------------------------------------------------------------------"
124178 if [ "$binaryfiles_found" = true ]; then
125- echo "❌ Binary files were found in the source tree"
179+ echo "ERROR: Non-whitelisted binary files were found in the source tree"
126180 echo "BINARY_CHECK=fail" >> $GITHUB_ENV
127181 exit 1
128182 else
129- echo "✅ No binary files found"
183+ echo "PASS: No non-whitelisted binary files found"
130184 echo "BINARY_CHECK=pass" >> $GITHUB_ENV
131185 fi
186+
187+ # Show whitelist summary if any whitelisted files were found
188+ if [ -f binary_whitelist.txt ]; then
189+ echo ""
190+ echo "Whitelisted binary files (approved):"
191+ cat binary_whitelist.txt | sed 's/^/ /'
192+ fi
132193
133194 - name : Upload Rat check results
134195 if : always()
@@ -146,38 +207,57 @@ jobs:
146207 echo "- Run Time: $(date -u +'%Y-%m-%d %H:%M:%S UTC')"
147208 echo ""
148209
149- # NOTICE Year Check Summary
150- echo "### 📅 NOTICE Year Check"
210+ # Copyright Year Check Summary
211+ echo "### Copyright Year Checks"
212+ echo "**NOTICE file:**"
151213 if [ "$NOTICE_CHECK" = "pass" ]; then
152- echo "✅ NOTICE file contains the current year ($CURRENT_YEAR)"
214+ echo "PASS: Contains current year ($CURRENT_YEAR)"
153215 else
154- echo "❌ NOTICE file does not contain the current year ($CURRENT_YEAR)"
216+ echo "ERROR: Does not contain current year ($CURRENT_YEAR)"
217+ fi
218+ echo ""
219+ echo "**psql help.c:**"
220+ if [ "$PSQL_HELP_CHECK" = "pass" ]; then
221+ echo "PASS: Contains current year ($CURRENT_YEAR)"
222+ else
223+ echo "ERROR: Does not contain current year ($CURRENT_YEAR)"
155224 fi
156225 echo ""
157226
158227 # Binary Files Check Summary
159- echo "### 📦 Binary Files Check"
228+ echo "### Binary Files Check"
160229 echo "Checked extensions: \`${BINARY_EXTENSIONS}\`"
161230 echo ""
162231 echo "Results:"
163232 echo "\`\`\`"
164233 if [ -f binary_results.txt ]; then
165234 while IFS=: read -r ext files; do
166235 if [ "$files" = "none" ]; then
167- echo "✅ No .${ext} files found"
236+ echo "PASS: No .${ext} files found"
168237 else
169- echo "❌ Found .${ext} files:"
238+ echo "ERROR: Found .${ext} files:"
170239 echo "$files" | sed 's/^/ /'
171240 fi
172241 done < binary_results.txt
173242 fi
174243 echo "\`\`\`"
175244 echo ""
245+
246+ # Whitelist summary
247+ if [ -f binary_whitelist.txt ]; then
248+ echo "#### Whitelisted Binary Files"
249+ echo "The following binary files are approved for testing purposes:"
250+ echo "You can see [README.apache.md](https://github.com/apache/cloudberry/blob/main/README.apache.md) for details."
251+ echo "\`\`\`"
252+ cat binary_whitelist.txt | sed 's/Whitelisted: //'
253+ echo "\`\`\`"
254+ echo ""
255+ fi
176256
177257 if [[ -f rat-output.log ]]; then
178258 # First extract and display summary statistics (only once)
179259 if grep -q "Rat check: Summary over all files" rat-output.log; then
180- echo "#### 📊 License Summary"
260+ echo "#### License Summary"
181261 summary_line=$(grep "Rat check: Summary over all files" rat-output.log)
182262 echo "\`\`\`"
183263 echo "$summary_line"
@@ -187,12 +267,12 @@ jobs:
187267
188268 # Then determine the result status
189269 if grep -q "\[INFO\] BUILD FAILURE" rat-output.log; then
190- echo "### ❌ Check Failed - License Compliance Issues Detected"
270+ echo "### Check Failed - License Compliance Issues Detected"
191271 echo ""
192272
193273 # Extract and display files with unapproved licenses
194274 if grep -q "Files with unapproved licenses:" rat-output.log; then
195- echo "#### 🚫 Files with Unapproved Licenses"
275+ echo "#### Files with Unapproved Licenses"
196276 echo "\`\`\`"
197277 # Get the line with "Files with unapproved licenses:" and all following lines until the dashed line
198278 sed -n '/Files with unapproved licenses:/,/\[INFO\] ------------------------------------------------------------------------/p' rat-output.log | \
@@ -203,7 +283,7 @@ jobs:
203283 echo ""
204284 fi
205285
206- echo "💡 **How to fix:**"
286+ echo "**How to fix:**"
207287 echo ""
208288 echo "**For new original files you created:**"
209289 echo "- Add the standard Apache License header to each file"
@@ -218,14 +298,14 @@ jobs:
218298 echo "- Email dev@cloudberry.apache.org if you have questions about license compatibility"
219299
220300 elif grep -q "\[INFO\] BUILD SUCCESS" rat-output.log; then
221- echo "### ✅ Check Passed - All Files Comply with Apache License Requirements"
301+ echo "### Check Passed - All Files Comply with Apache License Requirements"
222302
223303 else
224- echo "### ⚠️ Indeterminate Result"
304+ echo "### Indeterminate Result"
225305 echo "Check the uploaded log file for details."
226306 fi
227307 else
228- echo "### ⚠️ No Output Log Found"
308+ echo "### No Output Log Found"
229309 echo "The rat-output.log file was not generated."
230310 fi
231311 } >> "$GITHUB_STEP_SUMMARY"
0 commit comments