Skip to content

Commit d9b6282

Browse files
committed
fix(model): Correctly handle license files from (Maven) source artifacts
Fixes #10607. Signed-off-by: Sebastian Schuberth <[email protected]>
1 parent 11a106a commit d9b6282

File tree

4 files changed

+81
-22
lines changed

4 files changed

+81
-22
lines changed

model/src/main/kotlin/licenses/LicenseInfoResolver.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import org.ossreviewtoolkit.model.Identifier
2626
import org.ossreviewtoolkit.model.KnownProvenance
2727
import org.ossreviewtoolkit.model.LicenseSource
2828
import org.ossreviewtoolkit.model.Provenance
29-
import org.ossreviewtoolkit.model.RepositoryProvenance
3029
import org.ossreviewtoolkit.model.TextLocation
3130
import org.ossreviewtoolkit.model.UnknownProvenance
3231
import org.ossreviewtoolkit.model.config.CopyrightGarbage
@@ -36,6 +35,7 @@ import org.ossreviewtoolkit.model.utils.FileArchiver
3635
import org.ossreviewtoolkit.model.utils.FindingCurationMatcher
3736
import org.ossreviewtoolkit.model.utils.FindingsMatcher
3837
import org.ossreviewtoolkit.model.utils.PathLicenseMatcher
38+
import org.ossreviewtoolkit.model.utils.getSubdirectoryForProvenance
3939
import org.ossreviewtoolkit.model.utils.prependedPath
4040
import org.ossreviewtoolkit.utils.common.safeDeleteRecursively
4141
import org.ossreviewtoolkit.utils.ort.createOrtTempDir
@@ -259,7 +259,7 @@ class LicenseInfoResolver(
259259
// Register the (empty) `archiveDir` for deletion on JVM exit.
260260
archiveDir.deleteOnExit()
261261

262-
val directory = (provenance as? RepositoryProvenance)?.vcsInfo?.path.orEmpty()
262+
val directory = getSubdirectoryForProvenance(provenance, id)
263263
val rootLicenseFiles = pathLicenseMatcher.getApplicableLicenseFilesForDirectories(
264264
relativeFilePaths = archiveDir.walk().filter { it.isFile }.mapTo(mutableSetOf()) {
265265
it.relativeTo(archiveDir).invariantSeparatorsPath

model/src/main/kotlin/utils/FileArchiver.kt

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@ import kotlin.time.measureTimedValue
2727
import org.apache.logging.log4j.kotlin.logger
2828
import org.apache.tika.Tika
2929

30+
import org.ossreviewtoolkit.model.ArtifactProvenance
31+
import org.ossreviewtoolkit.model.Identifier
3032
import org.ossreviewtoolkit.model.KnownProvenance
33+
import org.ossreviewtoolkit.model.RepositoryProvenance
3134
import org.ossreviewtoolkit.utils.common.FileMatcher
3235
import org.ossreviewtoolkit.utils.common.collectMessages
3336
import org.ossreviewtoolkit.utils.common.div
@@ -68,35 +71,40 @@ class FileArchiver(
6871
fun hasArchive(provenance: KnownProvenance): Boolean = storage.hasData(provenance)
6972

7073
/**
71-
* Archive all files in [directory] matching any of the configured patterns in the [storage].
74+
* Archive the files matching [patterns] inside [rootDirectory] and put them to the [storage]. The archive is
75+
* associated with the given [provenance] and [id].
7276
*/
73-
fun archive(directory: File, provenance: KnownProvenance) {
74-
logger.info { "Archiving files matching ${matcher.patterns} from '$directory'..." }
77+
fun archive(rootDirectory: File, provenance: KnownProvenance, id: Identifier) {
78+
val subdirectory = getSubdirectoryForProvenance(provenance, id)
79+
val directories = setOf(rootDirectory, rootDirectory / subdirectory)
80+
81+
logger.info { "Archiving files matching ${matcher.patterns} from '$rootDirectory'..." }
7582

7683
val zipFile = createOrtTempFile(suffix = ".zip")
7784
val tika = Tika()
7885

7986
val zipDuration = measureTime {
80-
directory.packZip(zipFile, overwrite = true) { file ->
81-
val relativePath = file.relativeTo(directory).invariantSeparatorsPath
82-
83-
if (!matcher.matches(relativePath)) return@packZip false
87+
rootDirectory.packZip(zipFile, overwrite = true) { file ->
88+
val matchingFile = directories.find {
89+
val relativePath = file.relativeTo(it).invariantSeparatorsPath
90+
matcher.matches(relativePath)
91+
} ?: return@packZip false
8492

8593
if (!tika.detect(file).startsWith("text/")) {
86-
logger.info { "Not adding file '$relativePath' to archive because it is not a text file." }
94+
logger.info { "Not adding file '$matchingFile' to archive because it is not a text file." }
8795
return@packZip false
8896
}
8997

90-
logger.debug { "Adding '$relativePath' to archive." }
98+
logger.debug { "Adding '$matchingFile' to archive." }
9199
true
92100
}
93101
}
94102

95-
logger.info { "Archived directory '$directory' in $zipDuration." }
103+
logger.info { "Archived directory '$rootDirectory' in $zipDuration." }
96104

97105
val writeDuration = measureTime { storage.putData(provenance, zipFile.inputStream(), zipFile.length()) }
98106

99-
logger.info { "Wrote archive of directory '$directory' to storage in $writeDuration." }
107+
logger.info { "Wrote archive of directory '$rootDirectory' to storage in $writeDuration." }
100108

101109
zipFile.parentFile.safeDeleteRecursively()
102110
}
@@ -129,3 +137,21 @@ class FileArchiver(
129137
}.isSuccess
130138
}
131139
}
140+
141+
/**
142+
* Get the (potentially [id]-type specific) nested path directory for the given [provenance].
143+
*/
144+
fun getSubdirectoryForProvenance(provenance: KnownProvenance, id: Identifier): String =
145+
when (provenance) {
146+
// In case of a repository, match paths relative to the VCS path.
147+
is RepositoryProvenance -> provenance.vcsInfo.path
148+
149+
// In case of a source artifact, match paths relative to the archive root or a type-specific directory.
150+
is ArtifactProvenance -> {
151+
// Java Archives (JARs) by convention (see e.g. the Apache Release Policy) often contain licensing
152+
// information as part of the "META-INF" directory.
153+
if (id.type == "Maven") "META-INF" else ""
154+
155+
// TODO: Check if more types need special handling.
156+
}
157+
}

model/src/test/kotlin/utils/FileArchiverTest.kt

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ import io.kotest.matchers.shouldNot
3232

3333
import java.io.File
3434

35+
import org.ossreviewtoolkit.model.ArtifactProvenance
36+
import org.ossreviewtoolkit.model.Identifier
37+
import org.ossreviewtoolkit.model.RemoteArtifact
3538
import org.ossreviewtoolkit.model.RepositoryProvenance
3639
import org.ossreviewtoolkit.model.VcsInfo
3740
import org.ossreviewtoolkit.model.VcsType
@@ -50,6 +53,10 @@ private val PROVENANCE = RepositoryProvenance(
5053
resolvedRevision = "0000000000000000000000000000000000000000"
5154
)
5255

56+
private val ARTIFACT_PROVENANCE = ArtifactProvenance(
57+
sourceArtifact = RemoteArtifact.EMPTY
58+
)
59+
5360
class FileArchiverTest : StringSpec() {
5461
private lateinit var workingDir: File
5562
private lateinit var storageDir: File
@@ -84,7 +91,7 @@ class FileArchiverTest : StringSpec() {
8491
createFile("path/LICENSE")
8592

8693
val archiver = FileArchiver(setOf("**/LICENSE"), storage)
87-
archiver.archive(workingDir, PROVENANCE)
94+
archiver.archive(workingDir, PROVENANCE, Identifier.EMPTY)
8895
val result = archiver.unarchive(targetDir, PROVENANCE)
8996

9097
result shouldBe true
@@ -101,7 +108,7 @@ class FileArchiverTest : StringSpec() {
101108
createFile("d/LiCeNsE")
102109

103110
val archiver = FileArchiver(setOf("**/LICENSE"), storage)
104-
archiver.archive(workingDir, PROVENANCE)
111+
archiver.archive(workingDir, PROVENANCE, Identifier.EMPTY)
105112
val result = archiver.unarchive(targetDir, PROVENANCE)
106113

107114
result shouldBe true
@@ -121,7 +128,7 @@ class FileArchiverTest : StringSpec() {
121128

122129
val archiver = FileArchiver(setOf("a", "**/a"), storage)
123130

124-
archiver.archive(workingDir, PROVENANCE)
131+
archiver.archive(workingDir, PROVENANCE, Identifier.EMPTY)
125132
val result = archiver.unarchive(targetDir, PROVENANCE)
126133

127134
result shouldBe true
@@ -144,7 +151,7 @@ class FileArchiverTest : StringSpec() {
144151
createFile("c/b")
145152

146153
val archiver = FileArchiver(setOf("**"), storage)
147-
archiver.archive(workingDir, PROVENANCE)
154+
archiver.archive(workingDir, PROVENANCE, Identifier.EMPTY)
148155

149156
val result = archiver.unarchive(targetDir, PROVENANCE)
150157

@@ -160,7 +167,7 @@ class FileArchiverTest : StringSpec() {
160167
"Empty archives can be handled" {
161168
val archiver = FileArchiver(LicenseFilePatterns.DEFAULT.allLicenseFilenames, storage)
162169

163-
archiver.archive(workingDir, PROVENANCE)
170+
archiver.archive(workingDir, PROVENANCE, Identifier.EMPTY)
164171

165172
archiver.unarchive(targetDir, PROVENANCE) shouldBe true
166173
targetDir shouldContainNFiles 0
@@ -170,7 +177,7 @@ class FileArchiverTest : StringSpec() {
170177
createFile("License") { writeBytes(byteArrayOf(0xFF.toByte(), 0xD8.toByte())) }
171178

172179
val archiver = FileArchiver(LicenseFilePatterns.DEFAULT.allLicenseFilenames, storage)
173-
archiver.archive(workingDir, PROVENANCE)
180+
archiver.archive(workingDir, PROVENANCE, Identifier.EMPTY)
174181
val result = archiver.unarchive(targetDir, PROVENANCE)
175182

176183
result shouldBe true
@@ -181,7 +188,7 @@ class FileArchiverTest : StringSpec() {
181188
createFile("License") { writeText("ぁあぃいぅうぇえぉおかが") }
182189

183190
val archiver = FileArchiver(LicenseFilePatterns.DEFAULT.allLicenseFilenames, storage)
184-
archiver.archive(workingDir, PROVENANCE)
191+
archiver.archive(workingDir, PROVENANCE, Identifier.EMPTY)
185192
val result = archiver.unarchive(targetDir, PROVENANCE)
186193

187194
result shouldBe true
@@ -192,11 +199,37 @@ class FileArchiverTest : StringSpec() {
192199
createFile("License.md") { writeText("# Heading level 1") }
193200

194201
val archiver = FileArchiver(LicenseFilePatterns.DEFAULT.allLicenseFilenames, storage)
195-
archiver.archive(workingDir, PROVENANCE)
202+
archiver.archive(workingDir, PROVENANCE, Identifier.EMPTY)
196203
val result = archiver.unarchive(targetDir, PROVENANCE)
197204

198205
result shouldBe true
199206
targetDir should containFile("License.md")
200207
}
208+
209+
"LICENSE files from source artifacts are archived" {
210+
createFile("LICENSE")
211+
212+
val archiver = FileArchiver(LicenseFilePatterns.DEFAULT.allLicenseFilenames, storage)
213+
archiver.archive(workingDir, ARTIFACT_PROVENANCE, Identifier.EMPTY)
214+
val result = archiver.unarchive(targetDir, ARTIFACT_PROVENANCE)
215+
216+
result shouldBe true
217+
with(targetDir) {
218+
shouldContainFileWithContent("LICENSE")
219+
}
220+
}
221+
222+
"LICENSE files from Maven artifacts with META-INF are archived" {
223+
createFile("META-INF/LICENSE")
224+
225+
val archiver = FileArchiver(LicenseFilePatterns.DEFAULT.allLicenseFilenames, storage)
226+
archiver.archive(workingDir, ARTIFACT_PROVENANCE, Identifier("Maven:::"))
227+
val result = archiver.unarchive(targetDir, ARTIFACT_PROVENANCE)
228+
229+
result shouldBe true
230+
with(targetDir) {
231+
shouldContainFileWithContent("META-INF/LICENSE")
232+
}
233+
}
201234
}
202235
}

scanner/src/main/kotlin/Scanner.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,7 @@ class Scanner(
745745
// runCatching has a bug with smart-cast, see https://youtrack.jetbrains.com/issue/KT-27748.
746746
try {
747747
dir = provenanceDownloader.downloadRecursively(nestedProvenance)
748-
archiver.archive(dir, nestedProvenance.root)
748+
archiver.archive(dir, nestedProvenance.root, pkg.id)
749749
} catch (e: DownloadException) {
750750
controller.addIssue(
751751
pkg.id,

0 commit comments

Comments
 (0)