Skip to content

Commit bae8e92

Browse files
authored
Add workflow to automate copyright maintenance (#522)
- add maintain_copyrights.py script - add maintain-copyrights.yml workflow
1 parent 5e9d9c5 commit bae8e92

File tree

2 files changed

+312
-0
lines changed

2 files changed

+312
-0
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
name: Maintain Copyright Date
2+
3+
on:
4+
schedule:
5+
- cron: '0 14 1 1 *'
6+
workflow_dispatch:
7+
inputs:
8+
year:
9+
description: 'Year for copyright update (optional)'
10+
required: false
11+
type: string
12+
13+
permissions:
14+
contents: write
15+
pull-requests: write
16+
17+
jobs:
18+
maintain-copyrights:
19+
runs-on: ubuntu-latest
20+
steps:
21+
- name: Checkout Repository
22+
uses: actions/checkout@v4
23+
- name: Set up Python
24+
uses: actions/setup-python@v4
25+
with:
26+
python-version: '3.x'
27+
- name: Update Copyright Dates
28+
id: update-copyright-dates
29+
run: |
30+
CMD="python scripts/maintain_copyrights.py -uaw"
31+
if [ -n "${{ inputs.year }}" ]; then
32+
CMD="$CMD -y ${{ inputs.year }}"
33+
fi
34+
OUTPUT=$(eval $CMD)
35+
echo "$OUTPUT" >> $GITHUB_STEP_SUMMARY
36+
OUTDATED=$(echo "$OUTPUT" | sed -n 's/^Total outdated copyright years: \([0-9]*\).*/\1/p' || echo "0")
37+
UPDATED=$(echo "$OUTPUT" | sed -n 's/^Total updated copyright years: \([0-9]*\).*/\1/p' || echo "0")
38+
WARNINGS=$(echo "$OUTPUT" | sed -n 's/^Total warnings: \([0-9]*\).*/\1/p' || echo "0")
39+
echo "outdated_count=$OUTDATED" >> $GITHUB_OUTPUT
40+
echo "updated_count=$UPDATED" >> $GITHUB_OUTPUT
41+
echo "warning_count=$WARNINGS" >> $GITHUB_OUTPUT
42+
- name: Generate Patch
43+
run: git diff --cached > ${{ runner.temp }}/copyright_update.patch
44+
- name: Upload Patch
45+
if: steps.update-copyright-dates.outputs.updated_count > 0
46+
uses: actions/upload-artifact@v4
47+
with:
48+
name: esmf_copyright_update
49+
path: ${{ runner.temp }}/copyright_update.patch
50+
- name: Create Branch and Commit
51+
if: steps.update-copyright-dates.outputs.updated_count > 0
52+
run: |
53+
git config user.name "esmf-actions[bot]"
54+
git config user.email "esmf_support@ucar.edu"
55+
git checkout -b esmf-bot/copyright/${{ github.run_id }}
56+
git commit -m "Update copyright year to ${{ inputs.year || 'current year' }}"
57+
git push origin esmf-bot/copyright/${{ github.run_id }}
58+
- name: Create Pull Request
59+
if: steps.update-copyright-dates.outputs.updated_count > 0
60+
run: |
61+
cat <<EOF > ${{ runner.temp }}/pr_body.md
62+
This PR was created automatically by
63+
[${{ github.workflow }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/workflow)
64+
65+
See warnings and details in
66+
[GitHub Actions Run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
67+
68+
Outdated copyrights: ${{ steps.update-copyright-dates.outputs.outdated_count }}
69+
Updated copyrights: ${{ steps.update-copyright-dates.outputs.updated_count }}
70+
Warnings: ${{ steps.update-copyright-dates.outputs.warning_count }}
71+
EOF
72+
gh pr create \
73+
--base develop \
74+
--head esmf-bot/copyright/${{ github.run_id }} \
75+
--title "Update copyright year to ${{ inputs.year || 'current year' }}" \
76+
--reviewer @esmf-org/esmf-reviewers \
77+
--body-file ${{ runner.temp }}/pr_body.md
78+
env:
79+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

scripts/maintain_copyrights.py

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import re
4+
import argparse
5+
from datetime import datetime
6+
7+
# Regex to match copyright years (e.g., 2002-2025, 2025, 2002-2026)
8+
FORMAT_REGEX = re.compile(r"Copyright \(c\) (\d{4})-(\d{4})(?:,)? University Corporation for Atmospheric Research")
9+
COPYRIGHT_REGEX = re.compile(r"(?i)copyright")
10+
DATE_REGEX = re.compile(r"\s*(?:\(c\)\s*|=\s*(?:u\')?)?(\d{4})(?:-(\d{4}))?")
11+
NLDATE_REGEX = re.compile(r"^\s*(?:\!|\%|\/\/|\*)?\s*(?:\(c\)\s*|=\s*(?:u\')?|u\')?(\d{4})(?:-(\d{4}))?")
12+
13+
# List of root directories to skip
14+
SKIP_ROOTDIRS = ['.git', 'obj', 'mod', 'lib']
15+
16+
# List of file extensions to skip
17+
SKIP_EXTENSIONS = ['.vsd', '.eps']
18+
19+
# List of directories to skip
20+
SKIP_DIRS = [
21+
os.path.normpath('src/Infrastructure/Mesh/src/Zoltan'),
22+
os.path.normpath('src/Infrastructure/Mesh/src/Lapack'),
23+
os.path.normpath('src/Infrastructure/Mesh/src/Moab'),
24+
os.path.normpath('src/Infrastructure/Base/include/nlohmann'),
25+
os.path.normpath('src/prologue/yaml-cpp'),
26+
os.path.normpath('src/Infrastructure/IO/PIO'),
27+
os.path.normpath('src/Infrastructure/Mesh/include/sacado'),
28+
os.path.normpath('src/addon/esmpy/src/esmpy/util/enum'),
29+
]
30+
31+
# List of specific files to skip
32+
SKIP_FILES = [
33+
os.path.normpath('scripts/maintain_copyrights.py'),
34+
os.path.normpath('src/doc/ESMF_quickstart.tex'),
35+
os.path.normpath('src/Infrastructure/Trace/include/ESMCI_HashNode.h'),
36+
os.path.normpath('src/Infrastructure/Trace/include/ESMCI_HashMap.h'),
37+
os.path.normpath('src/Infrastructure/Trace/include/ESMCI_KeyHash.h'),
38+
os.path.normpath('src/Infrastructure/Trace/include/esmftrc.h'),
39+
os.path.normpath('src/Infrastructure/Trace/include/metadata'),
40+
os.path.normpath('src/Infrastructure/Trace/include/esmftrc-bitfield.h'),
41+
os.path.normpath('src/Infrastructure/Trace/src/esmftrc.c'),
42+
os.path.normpath('src/Infrastructure/Util/include/dmp_diff.hpp'),
43+
]
44+
45+
# List of copyright line matches to skip
46+
SKIP_MATCHES = [
47+
"HashMap Development Team",
48+
"Philippe Proulx",
49+
"Mathieu Desnoyers",
50+
"The diff-match-patch Authors",
51+
"Victor Grishchenko",
52+
]
53+
54+
def print_exclusions():
55+
"""
56+
Print the lists of excluded file extensions, directories, files, and line
57+
matches.
58+
"""
59+
print("### Exclusions")
60+
print("\nExclude file extensions:")
61+
for d in SKIP_EXTENSIONS:
62+
print("- " + d)
63+
print("\nExclude directories:")
64+
for d in SKIP_DIRS:
65+
print("- " + d)
66+
print("\nExclude files:")
67+
for f in SKIP_FILES:
68+
print("- " + f)
69+
print("\nExclude lines matching:")
70+
for m in SKIP_MATCHES:
71+
print("- " + m)
72+
print("")
73+
74+
def find_outdated_copyrights(search_year, loutdated=False):
75+
"""
76+
Find all copyright years in the codebase that are older than the given
77+
search year.
78+
"""
79+
outdated = []
80+
for root, dirs, files in os.walk('.'):
81+
# Skip root directories
82+
if any(skip in root for skip in SKIP_ROOTDIRS):
83+
continue
84+
# Skip specified directories
85+
rel_root = os.path.normpath(os.path.relpath(root, '.'))
86+
if any(rel_root.startswith(skip_dir) for skip_dir in SKIP_DIRS):
87+
continue
88+
for fname in files:
89+
# Skip files with extensions in SKIP_EXTENSIONS
90+
if any(fname.lower().endswith(ext) for ext in SKIP_EXTENSIONS):
91+
continue
92+
path = os.path.join(root, fname)
93+
# Skip specific files
94+
rel_path = os.path.normpath(os.path.relpath(path, '.'))
95+
if rel_path in SKIP_FILES:
96+
continue
97+
try:
98+
with open(path, 'r', encoding='utf-8') as f:
99+
lines = f.readlines()
100+
for i in range(len(lines)):
101+
line = lines[i]
102+
nline = lines[i+1] if i < len(lines)-1 else ""
103+
cmatch = COPYRIGHT_REGEX.search(line)
104+
if cmatch:
105+
# Skip lines matching SKIP_MATCHES
106+
if any(skip_str in line for skip_str in SKIP_MATCHES):
107+
continue
108+
dmatch = DATE_REGEX.search(line, pos=cmatch.end())
109+
if dmatch:
110+
fwarn = not(FORMAT_REGEX.search(line))
111+
syear = int(dmatch.group(1))
112+
eyear = int(dmatch.group(2)) if dmatch.group(2) else syear
113+
pos = dmatch.end()-3
114+
if eyear != search_year:
115+
outdated.append((path, i+1, pos, eyear, bool(fwarn)))
116+
else:
117+
nmatch = NLDATE_REGEX.search(nline)
118+
if nmatch:
119+
fwarn = not(FORMAT_REGEX.search(nline))
120+
syear = int(nmatch.group(1))
121+
eyear = int(nmatch.group(2)) if nmatch.group(2) else syear
122+
pos = nmatch.end()-3
123+
if eyear != search_year:
124+
outdated.append((path, i+2, pos, eyear, bool(fwarn)))
125+
except UnicodeDecodeError:
126+
# Skip files that cannot be decoded as UTF-8
127+
continue
128+
if loutdated:
129+
print_outdated_copyrights(outdated)
130+
return outdated
131+
132+
def print_outdated_copyrights(outdated_list):
133+
"""
134+
Print each outdated copyright year in the outdated list as a markdown table.
135+
"""
136+
if outdated_list:
137+
print("### Outdated Copyright Years")
138+
print("\n| file:line:position | year | warning |")
139+
print("|--------------------|------|---------|")
140+
for file, line, pos, year, fwarn in outdated_list:
141+
if fwarn:
142+
print(f"| {file}:{line}:{pos} | {year} | ! |")
143+
else:
144+
print(f"| {file}:{line}:{pos} | {year} | |")
145+
print("")
146+
147+
def format_warnings(outdated_list):
148+
"""
149+
Print warnings for copyright years that may not be in the correct format.
150+
"""
151+
fwarnings = []
152+
if outdated_list:
153+
for file, line, pos, year, fwarn in outdated_list:
154+
if fwarn:
155+
fwarnings.append((file, line, pos, year))
156+
if fwarnings:
157+
print("### Format Warnings")
158+
print("\nCheck Copyright Format: "
159+
" \n Copyright (c) YYYY-YYYY, University Corporation for"
160+
" Atmospheric Research")
161+
print("\n| file:line:position | year | warning |")
162+
print("|--------------------|------|---------|")
163+
for file, line, pos, year in fwarnings:
164+
print(f"|{file}:{line}:{pos} | {year} | ! |")
165+
print("")
166+
return fwarnings
167+
168+
def update_copyrights(update_list, new_year, add=False, lupdated=False):
169+
"""
170+
For each copyright year in update list, replace the old
171+
copyright year with the new copyright year.
172+
"""
173+
updated = []
174+
if update_list:
175+
for file, line, pos, year, fwarn in update_list:
176+
with open(file, 'r', newline="") as f:
177+
lines = f.readlines()
178+
oldline = lines[line-1]
179+
newline = oldline[:pos-1] + str(new_year) + oldline[pos-1+len(str(year)):]
180+
lines[line-1] = newline
181+
with open(file, 'w') as f:
182+
f.writelines(lines)
183+
updated.append((file, line, pos, year, new_year, fwarn))
184+
if add:
185+
os.system(f"git add {file}")
186+
if lupdated:
187+
print_updated_copyrights(updated)
188+
return updated
189+
190+
def print_updated_copyrights(updated_list):
191+
"""
192+
Print each updated copyright year in the updated list as a markdown table.
193+
"""
194+
if updated_list:
195+
print("### Updated Copyright Years")
196+
print("\n| file:line:position | old year | new year | warning |")
197+
print("|--------------------|----------|----------|---------|")
198+
for file, line, pos, oyear, nyear, ewarn in updated_list:
199+
if ewarn:
200+
print(f"{file}:{line}:{pos} | {oyear} | {nyear} | ! |")
201+
else:
202+
print(f"{file}:{line}:{pos} | {oyear} | {nyear} | |")
203+
print("")
204+
205+
if __name__ == "__main__":
206+
parser = argparse.ArgumentParser(description="check and update copyright years.")
207+
parser.add_argument('-y', '--year', type=int, default=datetime.now().year, help='year to check copyright against (default: current year)')
208+
parser.add_argument('-l', '--list', action='store_true', default=False, help='list outdated copyright years')
209+
parser.add_argument('-w', '--warnings', action='store_true', default=False, help='list warnings for updating copyright years')
210+
parser.add_argument('-u', '--update', action='store_true', default=False, help='update outdated copyright years')
211+
parser.add_argument('-a', '--add', action='store_true', default=False, help='run git add on updated files')
212+
parser.add_argument('-x', '--exclusions', action='store_true', default=False, help='list excluded extensions, directories, files, and patterns')
213+
214+
args = parser.parse_args()
215+
216+
if args.exclusions:
217+
print_exclusions()
218+
outdated = find_outdated_copyrights(args.year,
219+
(args.list and not args.update))
220+
if args.update:
221+
updated = update_copyrights(outdated, args.year, args.add, args.list)
222+
else:
223+
updated = []
224+
if args.warnings:
225+
fwarnings = format_warnings(outdated)
226+
else:
227+
fwarnings = []
228+
229+
print(f"Total outdated copyright years: {len(outdated)}")
230+
if args.update:
231+
print(f"Total updated copyright years: {len(updated)}")
232+
if args.warnings:
233+
print(f"Total warnings: {len(fwarnings)}")

0 commit comments

Comments
 (0)