|
15 | 15 | import argparse |
16 | 16 | import fnmatch |
17 | 17 | import io |
| 18 | +import re |
18 | 19 | import subprocess |
| 20 | +import tarfile |
19 | 21 | import tempfile |
20 | 22 | import xml.etree.ElementTree as ET |
21 | 23 | from pathlib import Path |
@@ -81,6 +83,18 @@ def main(): |
81 | 83 | if line and not line.startswith("#"): |
82 | 84 | exclusions.append(line) |
83 | 85 |
|
| 86 | + # ------------------------------------------------------------ |
| 87 | + # Load the file listing files imported from Apache repos |
| 88 | + # ------------------------------------------------------------ |
| 89 | + apache_file = root / ".rat-apache" |
| 90 | + needs_apache_header = set() |
| 91 | + if apache_file.is_file(): |
| 92 | + with apache_file.open("r") as f: |
| 93 | + for line in f: |
| 94 | + line = line.strip() |
| 95 | + if line and not line.startswith("#"): |
| 96 | + needs_apache_header.add(line) |
| 97 | + |
84 | 98 | with tempfile.TemporaryDirectory() as scratch: |
85 | 99 | scratch = Path(scratch).resolve() |
86 | 100 | archive = scratch / "rat.tar" |
@@ -122,4 +136,61 @@ def main(): |
122 | 136 | unapproved += 1 |
123 | 137 | print("-", filename) |
124 | 138 |
|
| 139 | + missing_copyright = [] |
| 140 | + missing_apache_header = [] |
| 141 | + should_not_have_apache_header = [] |
| 142 | + copyright_re = re.compile(r"Copyright \(c\) [0-9]{4} ADBC Drivers Contributors") |
| 143 | + header_re = re.compile( |
| 144 | + r"This file has been modified from its original version, which is under the Apache License: Licensed to the Apache Software Foundation" |
| 145 | + ) |
| 146 | + sep_re = re.compile(r"[^a-zA-Z0-9,:()]+") |
| 147 | + with tarfile.open(archive, "r") as tar: |
| 148 | + for member in tar.getmembers(): |
| 149 | + if not member.isfile(): |
| 150 | + continue |
| 151 | + |
| 152 | + with tar.extractfile(member) as f: |
| 153 | + lines = [] |
| 154 | + for _ in range(20): |
| 155 | + lines.append(f.readline()) |
| 156 | + |
| 157 | + content = b" ".join(lines).decode("utf-8") |
| 158 | + content = sep_re.sub(" ", content) |
| 159 | + |
| 160 | + if not copyright_re.search(content): |
| 161 | + if ( |
| 162 | + not member.name.endswith("LICENSE.txt") |
| 163 | + and not member.name.endswith("NOTICE.txt") |
| 164 | + and not any( |
| 165 | + fnmatch.fnmatch(member.name, exclusion) |
| 166 | + for exclusion in exclusions |
| 167 | + ) |
| 168 | + ): |
| 169 | + missing_copyright.append(member.name) |
| 170 | + |
| 171 | + if member.name in needs_apache_header: |
| 172 | + if not header_re.search(content): |
| 173 | + missing_apache_header.append(member.name) |
| 174 | + elif header_re.search(content): |
| 175 | + should_not_have_apache_header.append(member.name) |
| 176 | + |
| 177 | + if missing_copyright: |
| 178 | + print("Files missing ADBC Drivers Contributors copyright header:") |
| 179 | + for name in missing_copyright: |
| 180 | + print("-", name) |
| 181 | + |
| 182 | + if missing_apache_header: |
| 183 | + print("Files missing 'This file has been modified' header:") |
| 184 | + for name in missing_apache_header: |
| 185 | + print("-", name) |
| 186 | + |
| 187 | + if should_not_have_apache_header: |
| 188 | + print("Files that should not have 'This file has been modified' header:") |
| 189 | + for name in should_not_have_apache_header: |
| 190 | + print("-", name) |
| 191 | + |
| 192 | + unapproved += len(missing_copyright) |
| 193 | + unapproved += len(missing_apache_header) |
| 194 | + unapproved += len(should_not_have_apache_header) |
| 195 | + |
125 | 196 | return unapproved |
0 commit comments