Skip to content

Commit 3bedc08

Browse files
authored
LUCENE-9977: rat task corrections (proper up-to-date checks, cleanup and rewrite of the task itself). (#178)
1 parent 69ab144 commit 3bedc08

File tree

15 files changed

+359
-136
lines changed

15 files changed

+359
-136
lines changed

buildSrc/scriptDepVersions.gradle

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
// Declare script dependency versions outside of palantir's
219
// version unification control. These are not our main dependencies
320
// but are reused in buildSrc and across applied scripts.

dev-tools/scripts/README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
-->
17+
118
# Developer Scripts
219

320
This folder contains various useful scripts for developers, mostly related to

dev-tools/scripts/create_line_file_docs.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance with
8+
# the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
118
import os
219
import gzip
320
import time

gradle/validation/rat-sources.gradle

Lines changed: 111 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import groovy.xml.NamespaceBuilder
1919

20+
// Configure rat dependencies for use in the custom task.
2021
configure(rootProject) {
2122
configurations {
2223
ratDeps
@@ -27,139 +28,126 @@ configure(rootProject) {
2728
}
2829
}
2930

31+
// Configure the rat validation task and all scanned directories.
3032
allprojects {
3133
task("rat", type: RatTask) {
3234
group = 'Verification'
3335
description = 'Runs Apache Rat checks.'
34-
}
35-
}
36-
37-
configure(rootProject) {
38-
rat {
39-
includes += [
40-
"buildSrc/**/*.java",
41-
"gradle/**/*.gradle",
42-
"lucene/tools/forbiddenApis/**",
43-
"lucene/tools/prettify/**",
44-
]
45-
excludes += [
46-
// Unclear if this needs ASF header, depends on how much was copied from ElasticSearch
47-
"**/ErrorReportingTestListener.java"
48-
]
49-
}
50-
}
51-
52-
configure(project(":lucene:analysis:common")) {
53-
rat {
54-
srcExcludes += [
55-
"**/*.aff",
56-
"**/*.dic",
57-
"**/*.wrong",
58-
"**/*.good",
59-
"**/*.sug",
60-
"**/charfilter/*.htm*",
61-
"**/*LuceneResourcesWikiPage.html"
62-
]
63-
}
64-
}
65-
66-
configure(project(":lucene:analysis:kuromoji")) {
67-
rat {
68-
srcExcludes += [
69-
// whether rat detects this as binary or not is platform dependent?!
70-
"**/bocchan.utf-8"
71-
]
72-
}
73-
}
7436

75-
configure(project(":lucene:analysis:opennlp")) {
76-
rat {
77-
excludes += [
78-
"src/tools/test-model-data/*.txt",
79-
]
80-
}
81-
}
82-
83-
configure(project(":lucene:highlighter")) {
84-
rat {
85-
srcExcludes += [
86-
"**/CambridgeMA.utf8"
87-
]
88-
}
89-
}
90-
91-
configure(project(":lucene:suggest")) {
92-
rat {
93-
srcExcludes += [
94-
"**/Top50KWiki.utf8",
95-
"**/stop-snowball.txt"
96-
]
37+
def defaultScanFileTree = project.fileTree(projectDir, {
38+
// Don't check under the project's build folder.
39+
exclude project.buildDir.name
40+
41+
// Exclude any generated stuff.
42+
exclude "src/generated"
43+
44+
// Don't check any of the subprojects - they have their own rat tasks.
45+
exclude subprojects.collect { it.projectDir.name }
46+
47+
// At the module scope we only check selected file patterns as folks have various .gitignore-d resources
48+
// generated by IDEs, etc.
49+
include "**/*.gradle"
50+
include "**/*.xml"
51+
include "**/*.md"
52+
include "**/*.py"
53+
include "**/*.sh"
54+
include "**/*.bat"
55+
56+
// Include selected patterns from any source folders. We could make this
57+
// relative to source sets but it seems to be of little value - all our source sets
58+
// live under 'src' anyway.
59+
include "src/**"
60+
exclude "src/**/*.png"
61+
exclude "src/**/*.txt"
62+
exclude "src/**/*.zip"
63+
exclude "src/**/*.properties"
64+
exclude "src/**/*.utf8"
65+
66+
// Conditionally apply module-specific patterns. We do it here instead
67+
// of reconfiguring each project because the provider can be made lazy
68+
// and it's easier to manage this way.
69+
switch (project.path) {
70+
case ":":
71+
include "gradlew"
72+
include "gradlew.bat"
73+
exclude ".gradle"
74+
exclude ".idea"
75+
exclude ".muse"
76+
exclude ".git"
77+
78+
// Exclude github stuff (templates, workflows).
79+
exclude ".github"
80+
81+
// The root project also includes patterns for the boostrap (buildSrc) and composite
82+
// projects. Include their sources in the scan.
83+
include "buildSrc/src/**"
84+
include "dev-tools/missing-doclet/src/**"
85+
break
86+
87+
case ":lucene:analysis:morfologik":
88+
exclude "src/**/*.info"
89+
exclude "src/**/*.input"
90+
break
91+
92+
case ":lucene:analysis:opennlp":
93+
exclude "src/**/en-test-lemmas.dict"
94+
break
95+
96+
case ":lucene:test-framework":
97+
exclude "src/**/europarl.lines.txt.seek"
98+
break
99+
100+
case ":lucene:analysis:common":
101+
exclude "src/**/*.aff"
102+
exclude "src/**/*.dic"
103+
exclude "src/**/*.good"
104+
exclude "src/**/*.sug"
105+
exclude "src/**/*.wrong"
106+
exclude "src/**/charfilter/*.htm*"
107+
exclude "src/**/*LuceneResourcesWikiPage.html"
108+
exclude "src/**/*.rslp"
109+
break
110+
111+
case ":lucene:benchmark":
112+
exclude "data/"
113+
break
114+
}
115+
})
116+
inputFileTrees.add(defaultScanFileTree)
97117
}
98118
}
99119

100-
// Structure inspired by existing task from Apache Kafka, heavily modified since then.
120+
/**
121+
* An Apache RAT adapter that validates whether files contain acceptable licenses.
122+
*/
101123
class RatTask extends DefaultTask {
102-
@Input
103-
List<String> includes = [
104-
"*.gradle",
105-
"*.xml",
106-
"src/tools/**"
107-
]
108-
109-
@Input
110-
List<String> excludes = []
111-
112-
@Input
113-
List<String> srcExcludes = [
114-
"**/TODO",
115-
"**/*.txt",
116-
"**/*.md",
117-
"**/*.iml",
118-
"build/**"
119-
]
124+
@InputFiles
125+
ListProperty<ConfigurableFileTree> inputFileTrees = project.objects.listProperty(ConfigurableFileTree)
120126

121127
@OutputFile
122-
def xmlReport = new File(new File(project.buildDir, 'rat'), 'rat-report.xml')
128+
RegularFileProperty xmlReport = project.objects.fileProperty().convention(
129+
project.layout.buildDirectory.file("rat/rat-report.xml"))
123130

124-
def generateXmlReport() {
131+
def generateReport(File reportFile) {
132+
// Set up ant rat task.
125133
def uri = 'antlib:org.apache.rat.anttasks'
126134
def ratClasspath = project.rootProject.configurations.ratDeps.asPath
127135
ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', uri: uri, classpath: ratClasspath)
128-
129136
def rat = NamespaceBuilder.newInstance(ant, uri)
130-
rat.report(format: 'xml', reportFile: xmlReport, addDefaultLicenseMatchers: true) {
131-
ant.fileset(dir: "${project.projectDir}") {
132-
includes.each { pattern -> ant.include(name: pattern) }
133-
excludes.each { pattern -> ant.exclude(name: pattern) }
134-
}
135137

136-
if (project.plugins.findPlugin(JavaPlugin)) {
137-
def checkSets = [
138-
project.sourceSets.main.java.srcDirs,
139-
project.sourceSets.test.java.srcDirs,
140-
]
141-
142-
project.sourceSets.matching { it.name == 'tools' }.all {
143-
checkSets += project.sourceSets.tools.java.srcDirs
144-
}
145-
146-
checkSets.flatten().each { srcLocation ->
147-
ant.fileset(dir: srcLocation, erroronmissingdir: false) {
148-
srcExcludes.each { pattern -> ant.exclude(name: pattern) }
149-
}
150-
}
151-
152-
[
153-
project.sourceSets.main.resources.srcDirs
154-
].flatten().each { srcLocation ->
155-
ant.fileset(dir: srcLocation, erroronmissingdir: false) {
156-
ant.include(name: "META-INF/**")
157-
}
158-
}
138+
// Collect all output files for debugging.
139+
String inputFileList = inputFileTrees.get().collectMany { fileTree ->
140+
fileTree.asList()
141+
}.sort().join("\n")
142+
project.file(reportFile.path.replaceAll('.xml$', '-filelist.txt')).setText(inputFileList, "UTF-8")
143+
144+
// Run rat via ant.
145+
rat.report(format: 'xml', reportFile: reportFile, addDefaultLicenseMatchers: true) {
146+
// Pass all gradle file trees to the ant task (Gradle's internal adapters are used).
147+
inputFileTrees.get().each { fileTree ->
148+
fileTree.addToAntBuilder(ant, 'resources', FileCollection.AntType.ResourceCollection)
159149
}
160150

161-
// The license rules below were manually copied from lucene/common-build.xml, there is currently no mechanism to sync them
162-
163151
// BSD 4-clause stuff (is disallowed below)
164152
substringMatcher(licenseFamilyCategory: "BSD4 ", licenseFamilyName: "Original BSD License (with advertising clause)") {
165153
pattern(substring: "All advertising materials")
@@ -188,7 +176,7 @@ class RatTask extends DefaultTask {
188176
// ICU license
189177
pattern(substring: "Permission is hereby granted, free of charge, to any person obtaining a copy")
190178
// ui-grid
191-
pattern(substring: " ; License: MIT")
179+
pattern(substring: " ; License: MIT")
192180
}
193181

194182
// Apache
@@ -214,8 +202,8 @@ class RatTask extends DefaultTask {
214202
}
215203
}
216204

217-
def printUnknownFiles() {
218-
def ratXml = new XmlParser().parse(xmlReport)
205+
def printUnknownFiles(File reportFile) {
206+
def ratXml = new XmlParser().parse(reportFile)
219207
def errors = []
220208
ratXml.resource.each { resource ->
221209
if (resource.'license-approval'.@name[0] == "false") {
@@ -224,19 +212,20 @@ class RatTask extends DefaultTask {
224212
}
225213
if (errors) {
226214
throw new GradleException("Found " + errors.size() + " file(s) with errors:\n" +
227-
errors.collect{ msg -> " - ${msg}" }.join("\n"))
215+
errors.collect{ msg -> " - ${msg}" }.join("\n"))
228216
}
229217
}
230218

231219
@TaskAction
232-
def rat() {
220+
def execute() {
233221
def origEncoding = System.getProperty("file.encoding")
234222
try {
235-
generateXmlReport()
236-
printUnknownFiles()
223+
File reportFile = xmlReport.get().asFile
224+
generateReport(reportFile)
225+
printUnknownFiles(reportFile)
237226
} finally {
238227
if (System.getProperty("file.encoding") != origEncoding) {
239-
throw new GradleException("Insane: rat changed file.encoding to ${System.getProperty('file.encoding')}?")
228+
throw new GradleException("Something is wrong: Apache RAT changed file.encoding to ${System.getProperty('file.encoding')}?")
240229
}
241230
}
242231
}

lucene/JRE_VERSION_MIGRATION.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
-->
17+
118
# JRE Version Migration Guide
219

320
If possible, use the same JRE major version at both index and search time.

lucene/MIGRATE.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
-->
17+
118
# Apache Lucene Migration Guide
219

320
## NativeUnixDirectory removed and replaced by DirectIODirectory (LUCENE-8982)

0 commit comments

Comments
 (0)