Skip to content

Commit b809c5e

Browse files
committed
fix(scanoss): Prevent duplicate licenses in SnippetFindings
Adds getUniqueLicenseExpression method to combine licenses while preventing duplicates in the scan results Signed-off-by: Agustin Isasmendi <[email protected]>
1 parent 84c5684 commit b809c5e

File tree

2 files changed

+73
-7
lines changed

2 files changed

+73
-7
lines changed

plugins/scanners/scanoss/src/main/kotlin/ScanOssResultParser.kt

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.ossreviewtoolkit.plugins.scanners.scanoss
2121

22+
import com.scanoss.dto.LicenseDetails
2223
import com.scanoss.dto.ScanFileDetails
2324
import com.scanoss.dto.ScanFileResult
2425
import com.scanoss.dto.enums.MatchType
@@ -36,7 +37,6 @@ import org.ossreviewtoolkit.model.TextLocation
3637
import org.ossreviewtoolkit.utils.spdx.SpdxConstants
3738
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
3839
import org.ossreviewtoolkit.utils.spdx.SpdxLicenseIdExpression
39-
import org.ossreviewtoolkit.utils.spdx.toExpression
4040

4141
/**
4242
* Generate a summary from the given SCANOSS [result], using [startTime], [endTime] as metadata. This variant can be
@@ -134,7 +134,7 @@ private fun getCopyrightFindings(details: ScanFileDetails): List<CopyrightFindin
134134
}
135135

136136
/**
137-
* Get the snippet findings from the given [details]. If a snippet returned by ScanOSS contains several Purls,
137+
* Get the snippet findings from the given [details]. If a snippet returned by SCANOSS contains several Purls,
138138
* several snippets are created in ORT each containing a single Purl.
139139
*/
140140
private fun getSnippets(details: ScanFileDetails): Set<Snippet> {
@@ -144,9 +144,7 @@ private fun getSnippets(details: ScanFileDetails): Set<Snippet> {
144144
val url = requireNotNull(details.url)
145145
val purls = requireNotNull(details.purls)
146146

147-
val licenses = details.licenseDetails.orEmpty().mapTo(mutableSetOf()) { license ->
148-
SpdxExpression.parse(license.name)
149-
}
147+
val license = getUniqueLicenseExpression(details.licenseDetails.toList())
150148

151149
val score = matched.substringBeforeLast("%").toFloat()
152150
val locations = convertLines(fileUrl, ossLines)
@@ -157,8 +155,6 @@ private fun getSnippets(details: ScanFileDetails): Set<Snippet> {
157155
return buildSet {
158156
purls.forEach { purl ->
159157
locations.forEach { snippetLocation ->
160-
val license = licenses.toExpression()?.sorted() ?: SpdxLicenseIdExpression(SpdxConstants.NOASSERTION)
161-
162158
add(Snippet(score, snippetLocation, provenance, purl, license))
163159
}
164160
}
@@ -178,3 +174,36 @@ private fun convertLines(file: String, lineRanges: String): List<TextLocation> =
178174
else -> throw IllegalArgumentException("Unsupported line range '$lineRange'.")
179175
}
180176
}
177+
178+
/**
179+
* Generates a unified SPDX license expression by combining multiple license declarations using the AND operator.
180+
*
181+
* During license scanning, components may have multiple license declarations from various sources
182+
* (such as package manifests, SPDX tags, file headers, LICENSE files, or automated detection tools).
183+
* This function creates a single, normalized SPDX expression that represents all discovered licenses.
184+
*
185+
*
186+
* @param licensesDetails A list of LicenseDetails objects, each containing information about a
187+
* discovered license.
188+
*
189+
* @return A combined SpdxExpression using AND operator. If the input list is empty,
190+
* returns an SpdxLicenseIdExpression with the value "NOASSERTION".
191+
*
192+
* Note: The function removes duplicate licenses during processing through the simplify() method,
193+
* so identical licenses detected from multiple sources will appear only once in the final
194+
* expression.
195+
*
196+
* Example:
197+
* Input: [LicenseDetails("MIT"), LicenseDetails("Apache-2.0"), LicenseDetails("MIT")]
198+
* Output: SpdxExpression representing "MIT AND Apache-2.0" (duplicate MIT license is removed)
199+
*/
200+
fun getUniqueLicenseExpression(licensesDetails: List<LicenseDetails>): SpdxExpression {
201+
if (licensesDetails.isEmpty()) {
202+
return SpdxLicenseIdExpression(SpdxConstants.NOASSERTION)
203+
}
204+
205+
return licensesDetails
206+
.map { license -> SpdxExpression.parse(license.name) }
207+
.reduce { acc, expr -> acc and expr }
208+
.simplify()
209+
}

plugins/scanners/scanoss/src/test/kotlin/ScanOssResultParserTest.kt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.ossreviewtoolkit.plugins.scanners.scanoss
2121

22+
import com.scanoss.dto.LicenseDetails
2223
import com.scanoss.utils.JsonUtils
2324

2425
import io.kotest.core.spec.style.WordSpec
@@ -27,6 +28,7 @@ import io.kotest.matchers.collections.containExactlyInAnyOrder
2728
import io.kotest.matchers.collections.haveSize
2829
import io.kotest.matchers.collections.shouldContain
2930
import io.kotest.matchers.should
31+
import io.kotest.matchers.shouldBe
3032

3133
import java.io.File
3234
import java.time.Instant
@@ -39,9 +41,44 @@ import org.ossreviewtoolkit.model.SnippetFinding
3941
import org.ossreviewtoolkit.model.TextLocation
4042
import org.ossreviewtoolkit.model.VcsInfo
4143
import org.ossreviewtoolkit.model.VcsType
44+
import org.ossreviewtoolkit.utils.spdx.SpdxConstants
4245
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
46+
import org.ossreviewtoolkit.utils.spdx.SpdxLicenseIdExpression
4347

4448
class ScanOssResultParserTest : WordSpec({
49+
"getUniqueLicenseDetails()" should {
50+
"deduplicate complex license expressions" {
51+
val uniqueLicenses = getUniqueLicenseExpression(
52+
listOf(
53+
LicenseDetails.builder().name("MIT").build(),
54+
LicenseDetails.builder().name("MIT").build(),
55+
LicenseDetails.builder().name("GPL-2.0-only").build(),
56+
LicenseDetails.builder().name("GPL-2.0-only WITH Linux-syscall-note").build(),
57+
LicenseDetails.builder().name("GPL-2.0-only AND MIT").build()
58+
)
59+
)
60+
61+
val decomposed = uniqueLicenses.decompose().toList()
62+
63+
val expressionStrings = decomposed.map { it.toString() }
64+
65+
// Check that each license appears exactly once
66+
expressionStrings.count { it == "MIT" } shouldBe 1
67+
expressionStrings.count { it == "GPL-2.0-only" } shouldBe 1
68+
expressionStrings.count { it == "GPL-2.0-only WITH Linux-syscall-note" } shouldBe 1
69+
70+
// Ensure no unexpected elements
71+
expressionStrings.size shouldBe 3
72+
}
73+
74+
"handle empty license list" {
75+
val emptyLicenses = getUniqueLicenseExpression(listOf())
76+
77+
// Verify empty license list returns NOASSERTION
78+
emptyLicenses shouldBe SpdxLicenseIdExpression(SpdxConstants.NOASSERTION)
79+
}
80+
}
81+
4582
"generateSummary()" should {
4683
"properly summarize JUnit 4.12 findings" {
4784
val results = File("src/test/assets/scanoss-junit-4.12.json").readText().let {

0 commit comments

Comments
 (0)