Skip to content

Commit 8ba235d

Browse files
authored
feat: check headers of files imported from Apache repos (#14)
## What's Changed adbc-drivers/bigquery#43 (comment) Closes #13.
1 parent b877c65 commit 8ba235d

File tree

2 files changed

+87
-0
lines changed

2 files changed

+87
-0
lines changed

.rat-apache

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Copyright (c) 2025 ADBC Drivers Contributors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
.github/workflows/dev_issues.yaml
16+
adbc_drivers_dev/title_check.py

adbc_drivers_dev/rat/cli.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
import argparse
1616
import fnmatch
1717
import io
18+
import re
1819
import subprocess
20+
import tarfile
1921
import tempfile
2022
import xml.etree.ElementTree as ET
2123
from pathlib import Path
@@ -81,6 +83,18 @@ def main():
8183
if line and not line.startswith("#"):
8284
exclusions.append(line)
8385

86+
# ------------------------------------------------------------
87+
# Load the file listing files imported from Apache repos
88+
# ------------------------------------------------------------
89+
apache_file = root / ".rat-apache"
90+
needs_apache_header = set()
91+
if apache_file.is_file():
92+
with apache_file.open("r") as f:
93+
for line in f:
94+
line = line.strip()
95+
if line and not line.startswith("#"):
96+
needs_apache_header.add(line)
97+
8498
with tempfile.TemporaryDirectory() as scratch:
8599
scratch = Path(scratch).resolve()
86100
archive = scratch / "rat.tar"
@@ -122,4 +136,61 @@ def main():
122136
unapproved += 1
123137
print("-", filename)
124138

139+
missing_copyright = []
140+
missing_apache_header = []
141+
should_not_have_apache_header = []
142+
copyright_re = re.compile(r"Copyright \(c\) [0-9]{4} ADBC Drivers Contributors")
143+
header_re = re.compile(
144+
r"This file has been modified from its original version, which is under the Apache License: Licensed to the Apache Software Foundation"
145+
)
146+
sep_re = re.compile(r"[^a-zA-Z0-9,:()]+")
147+
with tarfile.open(archive, "r") as tar:
148+
for member in tar.getmembers():
149+
if not member.isfile():
150+
continue
151+
152+
with tar.extractfile(member) as f:
153+
lines = []
154+
for _ in range(20):
155+
lines.append(f.readline())
156+
157+
content = b" ".join(lines).decode("utf-8")
158+
content = sep_re.sub(" ", content)
159+
160+
if not copyright_re.search(content):
161+
if (
162+
not member.name.endswith("LICENSE.txt")
163+
and not member.name.endswith("NOTICE.txt")
164+
and not any(
165+
fnmatch.fnmatch(member.name, exclusion)
166+
for exclusion in exclusions
167+
)
168+
):
169+
missing_copyright.append(member.name)
170+
171+
if member.name in needs_apache_header:
172+
if not header_re.search(content):
173+
missing_apache_header.append(member.name)
174+
elif header_re.search(content):
175+
should_not_have_apache_header.append(member.name)
176+
177+
if missing_copyright:
178+
print("Files missing ADBC Drivers Contributors copyright header:")
179+
for name in missing_copyright:
180+
print("-", name)
181+
182+
if missing_apache_header:
183+
print("Files missing 'This file has been modified' header:")
184+
for name in missing_apache_header:
185+
print("-", name)
186+
187+
if should_not_have_apache_header:
188+
print("Files that should not have 'This file has been modified' header:")
189+
for name in should_not_have_apache_header:
190+
print("-", name)
191+
192+
unapproved += len(missing_copyright)
193+
unapproved += len(missing_apache_header)
194+
unapproved += len(should_not_have_apache_header)
195+
125196
return unapproved

0 commit comments

Comments
 (0)