Skip to content

Commit bd8f182

Browse files
authored
LUCENE-9933: Add non-file properties to wrapped regenerate checksums (#95)
1 parent 936b345 commit bd8f182

File tree

16 files changed

+23682
-23758
lines changed

16 files changed

+23682
-23758
lines changed

gradle/generation/icu.gradle

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ configure(project(":lucene:analysis:icu")) {
6767
icupkg = file("${icuBinDir}/icupkg")
6868
}
6969

70+
// Resolve version lazily (can't resolve at configuration time).
71+
def icu4jVersionProvider = project.provider { getVersion('com.ibm.icu', 'icu4j') }
72+
// lazy gstring with ICU version.
73+
def icu4jVersion = "${-> icu4jVersionProvider.get()}"
74+
7075
def icuCompileTask = Os.isFamily(Os.FAMILY_WINDOWS) ? "compileIcuWindows" : "compileIcuLinux"
7176

7277
task genUtr30DataFilesInternal() {
@@ -80,19 +85,19 @@ configure(project(":lucene:analysis:icu")) {
8085
def outputFile = file("src/resources/org/apache/lucene/analysis/icu/utr30.nrm")
8186

8287
inputs.files inputFiles
88+
inputs.property "icu4j", icu4jVersionProvider
8389
outputs.file outputFile
8490

8591
doFirst {
8692
// all these steps must be done sequentially: it's a pipeline resulting in utr30.nrm
87-
def v = getVersion('com.ibm.icu', 'icu4j');
8893
project.javaexec {
8994
main = "org.apache.lucene.analysis.icu.GenerateUTR30DataFiles"
9095
classpath = sourceSets.tools.runtimeClasspath
9196

9297
ignoreExitValue false
9398
workingDir utr30DataDir
9499
args = [
95-
"release-${v.replace(".", "-")}"
100+
"release-${icu4jVersion.replace(".", "-")}"
96101
]
97102
}
98103

@@ -127,6 +132,7 @@ configure(project(":lucene:analysis:icu")) {
127132
def targetDir = file("src/resources/org/apache/lucene/analysis/icu/segmentation")
128133

129134
inputs.files fileTree(dir: sourceDir, include: "*.rbbi")
135+
inputs.property "icu4j", icu4jVersionProvider
130136
outputs.files fileTree(dir: targetDir, include: "*.brk")
131137

132138
doFirst {
@@ -146,8 +152,7 @@ configure(project(":lucene:analysis:icu")) {
146152

147153
task compileIcuWindows() {
148154
doFirst {
149-
def v = getVersion('com.ibm.icu', 'icu4j');
150-
155+
def v = icu4jVersion
151156
def icuBinZip = file("${icuBuildDir}/icu4c-${v.replace(".", "_")}.zip")
152157
if (!icuBinZip.exists()) {
153158
icuBuildDir.mkdirs()
@@ -175,7 +180,7 @@ configure(project(":lucene:analysis:icu")) {
175180
throw new GradleException("ICU compilation not supported on Windows.")
176181
}
177182

178-
def v = getVersion('com.ibm.icu', 'icu4j');
183+
def v = icu4jVersion
179184
def icuSrcTgz = file("${icuBuildDir}/icu4c-${v.replace(".", "_")}-src.tgz")
180185

181186
// Download sources for version matching icu4j version in version.props
@@ -244,6 +249,8 @@ configure(project(":lucene:analysis:common")) {
244249
group "generation"
245250

246251
dependsOn icuConfig
252+
253+
inputs.property "icuConfig", icuConfig.name
247254
outputs.file outputFile
248255

249256
doFirst {
@@ -274,6 +281,8 @@ configure(project(":lucene:core")) {
274281
group "generation"
275282

276283
dependsOn icuConfig
284+
285+
inputs.property "icuConfig", icuConfig.name
277286
outputs.file outputFile
278287

279288
doFirst {

gradle/generation/jflex.gradle

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import java.nio.file.Files
2+
13
/*
24
* Licensed to the Apache Software Foundation (ASF) under one or more
35
* contributor license agreements. See the NOTICE file distributed with
@@ -63,22 +65,44 @@ configure(project(":lucene:analysis:common")) {
6365

6466
dependsOn { sourceSets.tools.runtimeClasspath }
6567

68+
inputs.property "tldZones", tldZones
6669
outputs.files jflexMacro, tldList
6770

6871
doFirst {
72+
File tmpJflexMacro = File.createTempFile(jflexMacro.getName(), ".tmp", getTemporaryDir())
73+
File tmpTldList = File.createTempFile(tldList.getName(), ".tmp", getTemporaryDir())
74+
6975
project.javaexec {
7076
main = "org.apache.lucene.analysis.standard.GenerateJflexTLDMacros"
7177
classpath = sourceSets.tools.runtimeClasspath
7278

7379
ignoreExitValue false
7480
args = [
7581
tldZones,
76-
jflexMacro,
77-
tldList
82+
tmpJflexMacro,
83+
tmpTldList
7884
]
7985
}
8086

81-
logger.lifecycle("You've regenerated the TLD include file, remember to regenerate UAX29URLEmailTokenizerImpl too.")
87+
// LUCENE-9926: tldZones is regenerated daily. Compare the generated content (excluding comments) so that
88+
// we only update actual output files if non-comments have changed.
89+
def contentLines = { File file ->
90+
if (file.exists()) {
91+
List<String> lines = file.readLines("UTF-8")
92+
lines.removeIf { line -> line.isBlank() || line.startsWith("//") }
93+
return lines
94+
} else {
95+
return []
96+
}
97+
}
98+
99+
if (contentLines(tmpTldList).equals(contentLines(tldList))) {
100+
logger.lifecycle("Generated TLD content identical as before, not updating.")
101+
} else {
102+
tldList.setBytes tmpTldList.bytes
103+
jflexMacro.setBytes tmpJflexMacro.bytes
104+
logger.lifecycle("You've regenerated the TLD include file, remember to regenerate UAX29URLEmailTokenizerImpl too.")
105+
}
82106
}
83107
}
84108

gradle/generation/regenerate.gradle

Lines changed: 54 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,47 @@ import org.apache.commons.codec.digest.DigestUtils
2121

2222
// Create common 'regenerate' task sub-tasks can hook into.
2323

24+
/**
25+
* Compute all "checksummed" key-value pairs.
26+
*/
27+
def computeChecksummedEntries = { Task sourceTask ->
28+
// An flat ordered map of key-value pairs.
29+
Map<String, String> allEntries = new TreeMap<>()
30+
31+
// Make sure all input properties are either simple strings
32+
// or closures returning simple strings.
33+
//
34+
// Don't overcomplicate things with other serializable types.
35+
Map<String, Object> props = sourceTask.inputs.properties
36+
props.forEach { key, val ->
37+
// Handle closures and other lazy providers.
38+
if (val instanceof Provider<?>) {
39+
val = val.get()
40+
}
41+
if (val instanceof Closure<?>) {
42+
val = val.call()
43+
}
44+
45+
if (!(val instanceof String)) {
46+
throw new GradleException("Input properties of wrapped tasks must all be " +
47+
"strings: ${key} in ${sourceTask.name} is not.")
48+
}
49+
allEntries.put("property:" + key, (String) val)
50+
}
51+
52+
// Collect all of task inputs/ output files and compute their checksums.
53+
FileCollection allFiles = sourceTask.inputs.files + sourceTask.outputs.files
54+
55+
// Compute checksums for root-project relative paths
56+
allFiles.files.forEach { file ->
57+
allEntries.put(
58+
sourceTask.project.rootDir.relativePath(file),
59+
file.exists() ? new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim() : "--")
60+
}
61+
62+
return allEntries
63+
}
64+
2465
configure([
2566
project(":lucene:analysis:common"),
2667
project(":lucene:core"),
@@ -86,28 +127,18 @@ configure([
86127
}
87128

88129
doFirst {
89-
// Collect all of task inputs/ outputs.
90-
FileCollection allFiles = sourceTask.inputs.files + sourceTask.outputs.files
91-
ext.allFiles = allFiles
92-
93-
// Compute checksums for root-project relative paths
94-
Map<String, String> actualChecksums = allFiles.files.collectEntries { file ->
95-
[
96-
sourceTask.project.rootDir.relativePath(file),
97-
file.exists() ? new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim() : "--"
98-
]
99-
}
100-
ext.actualChecksums = actualChecksums
130+
// Current persisted task input/outputs (file checksums, properties)
131+
ext.currentChecksums = computeChecksummedEntries(sourceTask)
101132

102133
// Load any previously written checksums
134+
ext.savedChecksums = new TreeMap<>()
103135
ext.checksumsFile = project.file("src/generated/checksums/${sourceTaskName}.json")
104-
Map<String, String> savedChecksums = [:]
105136
if (checksumsFile.exists()) {
106-
savedChecksums = new JsonSlurper().parse(checksumsFile) as Map
137+
savedChecksums.putAll(new JsonSlurper().parse(checksumsFile) as Map)
107138
}
108-
ext.savedChecksums = savedChecksums
109139

110-
ext.checksumMatch = (savedChecksums.equals(actualChecksums))
140+
// Compare saved and current checksums for subsequent tasks.
141+
ext.checksumMatch = (savedChecksums.equals(currentChecksums))
111142
}
112143
})
113144

@@ -117,16 +148,16 @@ configure([
117148
doFirst {
118149
if (!checksumLoadTask.checksumMatch) {
119150
// This can be made prettier but leave it verbose for now:
120-
Map<String, String> actual = checksumLoadTask.actualChecksums
151+
Map<String, String> current = checksumLoadTask.currentChecksums
121152
Map<String, String> expected = checksumLoadTask.savedChecksums
122153

123-
def same = actual.intersect(expected)
124-
actual = actual - same
154+
def same = current.intersect(expected)
155+
current = current - same
125156
expected = expected - same
126157

127158
throw new GradleException("Checksums mismatch for derived resources; you might have" +
128159
" modified a generated resource (regenerate task: ${sourceTask.name}):\n" +
129-
"Actual:\n ${actual.entrySet().join('\n ')}\n\n" +
160+
"Current:\n ${current.entrySet().join('\n ')}\n\n" +
130161
"Expected:\n ${expected.entrySet().join('\n ')}"
131162
)
132163
}
@@ -141,16 +172,10 @@ configure([
141172
File checksumsFile = checksumLoadTask.ext.checksumsFile
142173
checksumsFile.parentFile.mkdirs()
143174

144-
// Recompute checksums for root-project relative paths
145-
Map<String, String> actualChecksums = checksumLoadTask.ext.allFiles.files.collectEntries { file ->
146-
[
147-
sourceTask.project.rootDir.relativePath(file),
148-
new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim()
149-
]
150-
}
151-
175+
// Recompute checksums after the task has completed and write them.
176+
def updatedChecksums = computeChecksummedEntries(sourceTask)
152177
checksumsFile.setText(
153-
JsonOutput.prettyPrint(JsonOutput.toJson(new TreeMap<String, String>(actualChecksums))), "UTF-8")
178+
JsonOutput.prettyPrint(JsonOutput.toJson(new TreeMap<String, String>(updatedChecksums))), "UTF-8")
154179

155180
logger.warn("Updated generated file checksums for task ${sourceTask.path}.")
156181
}

help/regeneration.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,7 @@ Finally, if you do feel like force-regenerating everything, remember to exclude
146146
monster...
147147

148148
gradlew regenerate -x generateUAX29URLEmailTokenizerInternal --rerun-tasks
149+
150+
and on Windows, exclude snowball regeneration (requires bash):
151+
152+
gradlew regenerate -x generateUAX29URLEmailTokenizerInternal -x snowball --rerun-tasks
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2-
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex": "521338e15fbd3fbdd2c1f8fd9c9fc365d4bcce9d",
3-
"lucene/analysis/common/src/test/org/apache/lucene/analysis/email/TLDs.txt": "1c5a201efff431be1c62150aa6bd3dac0f3a21e2"
2+
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex": "aae1ea12f09aa2efcf7611df2dd11cda32869cda",
3+
"lucene/analysis/common/src/test/org/apache/lucene/analysis/email/TLDs.txt": "54d9a32e6dbac42aee8b3aa0d1133ed2fb5f5259",
4+
"property:tldZones": "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
45
}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"gradle/generation/jflex/skeleton.disable.buffer.expansion.txt": "68263ff0a014904c6e89b040d868d8f399408908",
3-
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex": "521338e15fbd3fbdd2c1f8fd9c9fc365d4bcce9d",
4-
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.java": "e437900d9570ca007f9c02c9ea286222b644c329",
3+
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex": "aae1ea12f09aa2efcf7611df2dd11cda32869cda",
4+
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.java": "d890462c065c2b66ce0e58d95fae64ecb64049d2",
55
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.jflex": "56a751d27e481fb55388f91ebf34f5a0cb8cb1b2"
66
}
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
{
2-
"lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "7d2cf5f959c2dfc5b83295e359212a1228f761c4"
2+
"lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "7d2cf5f959c2dfc5b83295e359212a1228f761c4",
3+
"property:icuConfig": "icu_68"
34
}

lucene/analysis/common/src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* limitations under the License.
1616
*/
1717
// Generated from IANA TLD Database <https://data.iana.org/TLD/tlds-alpha-by-domain.txt>
18-
// file version from 2021 Apr 12, Mon 07:07:01 Coordinated Universal Time
18+
// file version from 2021 Apr 18, Sun 07:07:01 Coordinated Universal Time
1919
// generated by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
2020

2121
// LUCENE-8278: None of the TLDs in {ASCIITLD} is a 1-character-shorter prefix of another TLD
@@ -777,7 +777,6 @@ ASCIITLD = "." (
777777
| [nN][aA][bB]
778778
| [nN][aA][gG][oO][yY][aA]
779779
| [nN][aA][mM][eE]
780-
| [nN][aA][tT][iI][oO][nN][wW][iI][dD][eE]
781780
| [nN][aA][tT][uU][rR][aA]
782781
| [nN][aA][vV][yY]
783782
| [nN][bB][aA]
@@ -831,7 +830,6 @@ ASCIITLD = "." (
831830
| [oO][nN][gG]
832831
| [oO][nN][lL]
833832
| [oO][nN][lL][iI][nN][eE]
834-
| [oO][nN][yY][oO][uU][rR][sS][iI][dD][eE]
835833
| [oO][oO][oO]
836834
| [oO][pP][eE][nN]
837835
| [oO][rR][aA][cC][lL][eE]

0 commit comments

Comments
 (0)