Skip to content

Commit a26f539

Browse files
committed
Add transformer to deduplicate identical files based content
Adds a new `DeduplicatingResourceTransformer` that works different than `PreserveFirstFoundResourceTransformer`. `PreserveFirstFoundResourceTransformer` is to preserve the first resource that matches the configured paths and ignore all other ones. `DeduplicatingResourceTransformer` preserves resources by path _and_ identical content and fails for all not explicitly allowed (excluded) resources with different content. It works intentionally against all resources. The new one is intended to guard a couple of unexpected situations: * A (transitive) dependency brings a non-relocated version of a dependency that is also included elsewhere but with a different version. This could normally lead to unexpected exceptions during runtime. * Unintended inclusion or removal or legally important license information, see also `MergeLicenseResourceTransformer` (#1858). * Unintended removal or (false) inclusion of shaded dependency information via `META-INF/x/y/pom.xml`/`.properties` files, which can be important for dependency/license analyzation tools. Adding the functionality of `DeduplicatingResourceTransformer` to `PreserveFirstFoundResourceTransformer` became a bit too difficult without breaking the existing behavior of the latter.
1 parent 6b5f04b commit a26f539

File tree

8 files changed

+403
-0
lines changed

8 files changed

+403
-0
lines changed

api/shadow.api

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,15 @@ public class com/github/jengelman/gradle/plugins/shadow/transformers/ComponentsX
308308
public final class com/github/jengelman/gradle/plugins/shadow/transformers/ComponentsXmlResourceTransformer$Companion {
309309
}
310310

311+
public class com/github/jengelman/gradle/plugins/shadow/transformers/DeduplicatingResourceTransformer : com/github/jengelman/gradle/plugins/shadow/transformers/PatternFilterableResourceTransformer {
312+
public fun <init> (Lorg/gradle/api/model/ObjectFactory;)V
313+
public fun <init> (Lorg/gradle/api/model/ObjectFactory;Lorg/gradle/api/tasks/util/PatternSet;)V
314+
public fun canTransformResource (Lorg/gradle/api/file/FileTreeElement;)Z
315+
public final fun getObjectFactory ()Lorg/gradle/api/model/ObjectFactory;
316+
public fun hasTransformedResource ()Z
317+
public fun modifyOutputStream (Lorg/apache/tools/zip/ZipOutputStream;Z)V
318+
}
319+
311320
public class com/github/jengelman/gradle/plugins/shadow/transformers/DontIncludeResourceTransformer : com/github/jengelman/gradle/plugins/shadow/transformers/ResourceTransformer {
312321
public fun <init> (Lorg/gradle/api/model/ObjectFactory;)V
313322
public fun canTransformResource (Lorg/gradle/api/file/FileTreeElement;)Z

docs/changes/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
- Expose `patternSet` of `ApacheNoticeResourceTransformer` as `public`. ([#1850](https://github.com/GradleUp/shadow/pull/1850))
1212
- Expose `patternSet` of `PreserveFirstFoundResourceTransformer` as `public`. ([#1855](https://github.com/GradleUp/shadow/pull/1855))
1313
- Support overriding output path of `ApacheNoticeResourceTransformer`. ([#1851](https://github.com/GradleUp/shadow/pull/1851))
14+
- Add `DeduplicatingResourceTransformer` to deduplicate on path _and_ content. ([#1859](https://github.com/GradleUp/shadow/pull/1859))
1415

1516
### Changed
1617

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package com.github.jengelman.gradle.plugins.shadow.transformers
2+
3+
import assertk.assertThat
4+
import assertk.assertions.any
5+
import assertk.assertions.containsExactlyInAnyOrder
6+
import assertk.assertions.containsSubList
7+
import assertk.assertions.endsWith
8+
import assertk.assertions.isEqualTo
9+
import assertk.assertions.isSameInstanceAs
10+
import com.github.jengelman.gradle.plugins.shadow.testkit.containsExactlyInAnyOrder
11+
import com.github.jengelman.gradle.plugins.shadow.testkit.containsOnly
12+
import com.github.jengelman.gradle.plugins.shadow.testkit.getContent
13+
import com.github.jengelman.gradle.plugins.shadow.testkit.getContents
14+
import kotlin.booleanArrayOf
15+
import kotlin.io.path.appendText
16+
import org.gradle.testkit.runner.TaskOutcome
17+
import org.junit.jupiter.params.ParameterizedTest
18+
import org.junit.jupiter.params.provider.ValueSource
19+
20+
class DeduplicatingResourceTransformerTest : BaseTransformerTest() {
21+
@ParameterizedTest
22+
@ValueSource(booleans = [false, true])
23+
fun conflictExclusion(excludeAll: Boolean) {
24+
val one = buildJarOne {
25+
insert("multiple-contents", "content")
26+
insert("single-source", "content")
27+
insert("same-content-twice", "content")
28+
insert("differing-content-2", "content")
29+
}
30+
val two = buildJarTwo {
31+
insert("multiple-contents", "content-is-different")
32+
insert("same-content-twice", "content")
33+
insert("differing-content-2", "content-is-different")
34+
}
35+
36+
projectScript.appendText(
37+
transform<DeduplicatingResourceTransformer>(
38+
dependenciesBlock = implementationFiles(one, two),
39+
transformerBlock = """
40+
exclude("multiple-contents")
41+
${if (excludeAll) "exclude(\"differing-content-2\")" else ""}
42+
""".trimIndent(),
43+
),
44+
)
45+
46+
if (excludeAll) {
47+
runWithSuccess(shadowJarPath)
48+
assertThat(outputShadowedJar).useAll {
49+
containsExactlyInAnyOrder(
50+
// twice:
51+
"multiple-contents",
52+
"multiple-contents",
53+
"single-source",
54+
"same-content-twice",
55+
// twice:
56+
"differing-content-2",
57+
"differing-content-2",
58+
"META-INF/",
59+
"META-INF/MANIFEST.MF",
60+
)
61+
getContents("multiple-contents").containsExactlyInAnyOrder("content", "content-is-different")
62+
getContent("single-source").isEqualTo("content")
63+
getContent("same-content-twice").isEqualTo("content")
64+
getContents("differing-content-2").containsExactlyInAnyOrder("content", "content-is-different")
65+
}
66+
} else {
67+
val buildResult = runWithFailure(shadowJarPath)
68+
assertThat(buildResult.task(":shadowJar")!!.outcome).isSameInstanceAs(TaskOutcome.FAILED)
69+
val outputLines = buildResult.output.lines()
70+
assertThat(outputLines).containsSubList(
71+
listOf(
72+
// Keep this list approach for Unix/Windows test compatibility.
73+
"Execution failed for task ':shadowJar'.",
74+
"> Found 1 path duplicate(s) with different content in the shadow JAR:",
75+
" * differing-content-2",
76+
),
77+
)
78+
assertThat(outputLines).any {
79+
it.endsWith("/differing-content-2 (Hash: -1337566116240053116)")
80+
}
81+
assertThat(outputLines).any {
82+
it.endsWith("/differing-content-2 (Hash: -6159701213549668473)")
83+
}
84+
}
85+
}
86+
}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
package com.github.jengelman.gradle.plugins.shadow.transformers
2+
3+
import java.io.File
4+
import java.nio.ByteBuffer
5+
import java.security.MessageDigest
6+
import javax.inject.Inject
7+
import org.apache.tools.zip.ZipOutputStream
8+
import org.gradle.api.GradleException
9+
import org.gradle.api.file.FileTreeElement
10+
import org.gradle.api.model.ObjectFactory
11+
import org.gradle.api.tasks.Internal
12+
import org.gradle.api.tasks.util.PatternSet
13+
14+
/**
15+
* Transformer to include files with identical content only once in the shadow JAR.
16+
*
17+
* Multiple files with the same path but different content lead to an error.
18+
*
19+
* Some scenarios for duplicate resources in a shadow jar:
20+
* * Duplicate `.class` files
21+
* Having duplicate `.class` files with different is a situation indicating that the resulting jar is
22+
* built with _incompatible_ classes, likely leading to issues during runtime.
23+
* This situation can happen when one dependency is (also) included in an uber jar.
24+
* * Duplicate `META-INF/<group-id>/<artifact-id>/pom.properties`/`xml` files.
25+
* Some dependencies contain shaded variants of other dependencies.
26+
* Tools that inspect jar files to extract the included dependencies, for example, for license auditing
27+
* use cases or tools that collect information of all included dependencies, may rely on these files.
28+
* Hence, it is desirable to retain the duplicate resource `pom.properties`/`xml` resources.
29+
*
30+
* `DeduplicatingResourceTransformer` checks all entries in the resulting jar.
31+
* It is generally not recommended to use any of the [include] configuration functions.
32+
*
33+
* There are reasons to retain duplicate resources with different contents in the resulting jar.
34+
* This can be achieved with the [exclude] configuration functions.
35+
*
36+
* To exclude a path or pattern from being deduplicated, for example, legit
37+
* `META-INF/<group-id>/<artifact-id>/pom.properties`/`xml`, configure the transformer with an exclusion
38+
* like the following:
39+
* ```kotlin
40+
* tasks.named<ShadowJar>("shadowJar").configure {
41+
* // Keep pom.* files from different Guava versions in the jar.
42+
* exclude("META-INF/maven/com.google.guava/guava/pom.*")
43+
* // Duplicates with different content for all other resource paths will raise an error.
44+
* }
45+
* ```
46+
*
47+
* *Tip*: the [com.github.jengelman.gradle.plugins.shadow.tasks.FindResourceInClasspath] convenience task
48+
* can be used to find resources in a Gradle classpath/configuration.
49+
*
50+
* *Warning* Do **not** combine [PreserveFirstFoundResourceTransformer] with this transformer.
51+
*/
52+
@CacheableTransformer
53+
public open class DeduplicatingResourceTransformer(
54+
final override val objectFactory: ObjectFactory,
55+
patternSet: PatternSet,
56+
) : PatternFilterableResourceTransformer(patternSet) {
57+
@get:Internal
58+
internal val sources: MutableMap<String, PathInfos> = mutableMapOf()
59+
60+
@Inject
61+
public constructor(objectFactory: ObjectFactory) : this(objectFactory, PatternSet())
62+
63+
internal data class PathInfos(val failOnDuplicateContent: Boolean) {
64+
val filesPerHash: MutableMap<Long, MutableList<File>> = mutableMapOf()
65+
66+
fun uniqueContentCount() = filesPerHash.size
67+
68+
fun addFile(hash: Long, file: File): Boolean {
69+
var filesForHash: MutableList<File>? = filesPerHash[hash]
70+
val new = filesForHash == null
71+
if (new) {
72+
filesForHash = mutableListOf()
73+
filesPerHash[hash] = filesForHash
74+
}
75+
filesForHash.add(file)
76+
return new
77+
}
78+
}
79+
80+
override fun canTransformResource(element: FileTreeElement): Boolean {
81+
val file = element.file
82+
val hash = hashForFile(file)
83+
84+
val pathInfos = sources.computeIfAbsent(element.path) { PathInfos(patternSpec.isSatisfiedBy(element)) }
85+
val retainInOutput = pathInfos.addFile(hash, file)
86+
87+
return !retainInOutput
88+
}
89+
90+
override fun hasTransformedResource(): Boolean = true
91+
92+
internal fun duplicateContentViolations(): Map<String, PathInfos> = sources.filter { (_, pathInfos) -> pathInfos.failOnDuplicateContent && pathInfos.uniqueContentCount() > 1 }
93+
94+
override fun modifyOutputStream(os: ZipOutputStream, preserveFileTimestamps: Boolean) {
95+
val duplicatePaths = duplicateContentViolations()
96+
97+
if (!duplicatePaths.isEmpty()) {
98+
val message =
99+
"Found ${duplicatePaths.size} path duplicate(s) with different content in the shadow JAR:" +
100+
duplicatePaths
101+
.map { (path, infos) ->
102+
" * $path${infos.filesPerHash.map { (hash, files) ->
103+
files.joinToString { file -> " * ${file.path} (Hash: $hash)" }
104+
}.joinToString("\n", "\n", "")}"
105+
}
106+
.joinToString("\n", "\n", "")
107+
throw GradleException(message)
108+
}
109+
}
110+
111+
// Gradle's configuration uses Java serialization, which cannot serialize `MessageDigest` instances.
112+
// Using a rather dirty mechanism to memoize the MD instance for task/transformer execution.
113+
@Transient
114+
private var digest: MessageDigest? = null
115+
116+
internal fun hashForFile(file: File): Long {
117+
if (digest == null) {
118+
digest = MessageDigest.getInstance("SHA-256")
119+
}
120+
val d = digest!!
121+
try {
122+
file.inputStream().use {
123+
val buffer = ByteArray(8192)
124+
while (true) {
125+
val rd = it.read(buffer)
126+
if (rd == -1) {
127+
break
128+
}
129+
d.update(buffer, 0, rd)
130+
}
131+
}
132+
return ByteBuffer.wrap(d.digest()).getLong(0)
133+
} catch (e: Exception) {
134+
throw RuntimeException("Failed to read data or calculate hash for $file", e)
135+
}
136+
}
137+
}

src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/PreserveFirstFoundResourceTransformer.kt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ import org.gradle.api.tasks.util.PatternSet
1818
* want to ensure that only the first found resource is included in the final JAR. If there are multiple resources with
1919
* the same path in a project and its dependencies, the first one found should be the project's.
2020
*
21+
* This transformer deduplicates included resources based on the path name.
22+
* See [DeduplicatingResourceTransformer] for a transformer that deduplicates based on the paths and contents of
23+
* the resources.
24+
*
25+
* *Warning* Do **not** combine [DeduplicatingResourceTransformer] with this transformer.
26+
*
2127
* @see [DuplicatesStrategy]
2228
* @see [ShadowJar.getDuplicatesStrategy]
2329
*/

src/test/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/BaseTransformerTest.kt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import com.github.jengelman.gradle.plugins.shadow.testkit.requireResourceAsStrea
66
import com.github.jengelman.gradle.plugins.shadow.transformers.ResourceTransformer.Companion.create
77
import com.github.jengelman.gradle.plugins.shadow.util.noOpDelegate
88
import com.github.jengelman.gradle.plugins.shadow.util.testObjectFactory
9+
import java.io.File
910
import java.lang.reflect.ParameterizedType
1011
import java.util.Locale
1112
import kotlin.io.path.createTempFile
@@ -14,6 +15,7 @@ import org.apache.tools.zip.ZipOutputStream
1415
import org.gradle.api.file.FileTreeElement
1516
import org.gradle.api.file.RelativePath
1617
import org.junit.jupiter.api.BeforeEach
18+
import org.junit.jupiter.api.io.TempDir
1719

1820
abstract class BaseTransformerTest<T : ResourceTransformer> {
1921
protected lateinit var transformer: T
@@ -41,6 +43,16 @@ abstract class BaseTransformerTest<T : ResourceTransformer> {
4143
return canTransformResource(element)
4244
}
4345

46+
fun ResourceTransformer.canTransformResource(path: String, file: File): Boolean {
47+
val element = object : FileTreeElement by noOpDelegate() {
48+
private val _relativePath = RelativePath.parse(true, path)
49+
override fun getPath(): String = _relativePath.pathString
50+
override fun getRelativePath(): RelativePath = _relativePath
51+
override fun getFile(): File = file
52+
}
53+
return canTransformResource(element)
54+
}
55+
4456
fun JarPath.readContentLines(resourceName: String = MANIFEST_NAME): List<String> {
4557
return use { it.getStream(resourceName).bufferedReader().readLines() }
4658
}

0 commit comments

Comments
 (0)