Skip to content

Commit bedb564

Browse files
isasmendiagussschuberth
authored andcommitted
fix(scanoss): Snippet generation logic to correctly represent match data
Replace cartesian product approach (one snippet per PURL-location pair) with one snippet per location using primary PURL as identifier and storing all related PURLs in metadata. Signed-off-by: Agustin Isasmendi <[email protected]>
1 parent 3c5ead0 commit bedb564

File tree

3 files changed

+126
-14
lines changed

3 files changed

+126
-14
lines changed

plugins/scanners/scanoss/src/main/kotlin/ScanOssResultParser.kt

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,11 @@ import com.scanoss.dto.ScanFileDetails
2323
import com.scanoss.dto.ScanFileResult
2424
import com.scanoss.dto.enums.MatchType
2525

26+
import java.lang.invoke.MethodHandles
2627
import java.time.Instant
2728

29+
import org.apache.logging.log4j.kotlin.loggerOf
30+
2831
import org.ossreviewtoolkit.downloader.VcsHost
2932
import org.ossreviewtoolkit.model.CopyrightFinding
3033
import org.ossreviewtoolkit.model.LicenseFinding
@@ -38,6 +41,8 @@ import org.ossreviewtoolkit.utils.spdx.SpdxExpression
3841
import org.ossreviewtoolkit.utils.spdx.SpdxLicenseIdExpression
3942
import org.ossreviewtoolkit.utils.spdx.toExpression
4043

44+
private val logger = loggerOf(MethodHandles.lookup().lookupClass())
45+
4146
/**
4247
* Generate a summary from the given SCANOSS [results], using [startTime], [endTime] as metadata. This variant can be
4348
* used if the result is not read from a local file.
@@ -62,12 +67,19 @@ internal fun generateSummary(startTime: Instant, endTime: Instant, results: List
6267
val sourceLocations = convertLines(localFile, localLines)
6368
val snippets = getSnippets(details)
6469

65-
snippets.forEach { snippet ->
66-
sourceLocations.forEach { sourceLocation ->
67-
// TODO: Aggregate the snippet by source file location.
68-
snippetFindings += SnippetFinding(sourceLocation, setOf(snippet))
70+
// The number of snippets should match the number of source locations.
71+
if (sourceLocations.size != snippets.size) {
72+
logger.warn {
73+
"Unexpected mismatch in '$localFile': " +
74+
"${sourceLocations.size} source locations vs ${snippets.size} snippets. " +
75+
"This indicates a potential issue with line range conversion."
6976
}
7077
}
78+
79+
// Associate each source location with its corresponding snippet.
80+
sourceLocations.zip(snippets).forEach { (location, snippet) ->
81+
snippetFindings += SnippetFinding(location, setOf(snippet))
82+
}
7183
}
7284

7385
MatchType.none -> {
@@ -132,15 +144,17 @@ private fun getCopyrightFindings(details: ScanFileDetails, path: String): List<C
132144
}
133145

134146
/**
135-
* Get the snippet findings from the given [details]. If a snippet returned by ScanOSS contains several Purls,
136-
* several snippets are created in ORT each containing a single Purl.
147+
* Get the snippet findings from the given [details]. If a snippet returned by ScanOSS contains several PURLs,
148+
* the function extracts the first PURL as the primary identifier while storing the remaining PURLs in additionalData
149+
* to preserve the complete information.
137150
*/
138-
private fun getSnippets(details: ScanFileDetails): Set<Snippet> {
151+
private fun getSnippets(details: ScanFileDetails): List<Snippet> {
139152
val matched = requireNotNull(details.matched)
140153
val ossFile = requireNotNull(details.file)
141154
val ossLines = requireNotNull(details.ossLines)
142155
val url = requireNotNull(details.url)
143-
val purls = requireNotNull(details.purls)
156+
val purls = requireNotNull(details.purls).toMutableList()
157+
val primaryPurl = purls.removeFirstOrNull().orEmpty()
144158

145159
val license = details.licenseDetails.orEmpty()
146160
.map { license -> SpdxExpression.parse(license.name) }
@@ -152,12 +166,11 @@ private fun getSnippets(details: ScanFileDetails): Set<Snippet> {
152166
val vcsInfo = VcsHost.parseUrl(url.takeUnless { it == "none" }.orEmpty())
153167
val provenance = RepositoryProvenance(vcsInfo, ".")
154168

155-
return buildSet {
156-
purls.forEach { purl ->
157-
ossLocations.forEach { snippetLocation ->
158-
add(Snippet(score, snippetLocation, provenance, purl, license))
159-
}
160-
}
169+
val additionalData = purls.associateWith { "" }
170+
171+
// Create one snippet per location, using the first PURL as the primary identifier.
172+
return ossLocations.map { snippetLocation ->
173+
Snippet(score, snippetLocation, provenance, primaryPurl, license, additionalData)
161174
}
162175
}
163176

plugins/scanners/scanoss/src/test/kotlin/ScanOssResultParserTest.kt

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,45 @@ class ScanOssResultParserTest : WordSpec({
137137
)
138138
}
139139

140+
"handle multiple PURLs by extracting first as primary and storing remaining in additionalData" {
141+
val results = readResource("/scanoss-multiple-purls.json").let {
142+
JsonUtils.toScanFileResultsFromObject(JsonUtils.toJsonObject(it))
143+
}
144+
145+
val time = Instant.now()
146+
val summary = generateSummary(time, time, results)
147+
148+
// Verify we have one finding per source location, not per PURL.
149+
summary.snippetFindings should haveSize(2)
150+
151+
with(summary.snippetFindings.first()) {
152+
// Check source location (local file).
153+
sourceLocation shouldBe TextLocation("hung_task.c", 12, 150)
154+
155+
// Verify first PURL is extracted as primary identifier.
156+
snippets should haveSize(1)
157+
snippets.first().purl shouldBe "pkg:github/kdrag0n/proton_bluecross"
158+
159+
// Verify remaining PURLs are stored in additionalData.
160+
snippets.first().additionalData shouldBe
161+
mapOf(
162+
"pkg:github/fake/fake_repository" to ""
163+
)
164+
165+
// Check OSS location.
166+
snippets.first().location shouldBe
167+
TextLocation("kernel/hung_task.c", 10, 148)
168+
}
169+
170+
// Verify same behavior for second snippet.
171+
with(summary.snippetFindings.last()) {
172+
sourceLocation shouldBe TextLocation("hung_task.c", 540, 561)
173+
snippets.first().purl shouldBe "pkg:github/kdrag0n/proton_bluecross"
174+
snippets.first().location shouldBe
175+
TextLocation("kernel/hung_task.c", 86, 107)
176+
}
177+
}
178+
140179
"combine the same license from different sources into a single expression" {
141180
// When the same license appears in multiple sources (like scancode and file_header),
142181
// combine them into a single expression rather than duplicating.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
{
2+
"hung_task.c": [
3+
{
4+
"component": "proton_bluecross",
5+
"file": "kernel/hung_task.c",
6+
"file_hash": "581734935cfbe570d280a1265aaa2a6b",
7+
"file_url": "https://api.scanoss.com/file_contents/581734935cfbe570d280a1265aaa2a6b",
8+
"id": "snippet",
9+
"latest": "17",
10+
"licenses": [
11+
{
12+
"checklist_url": "https://www.osadl.org/fileadmin/checklists/unreflicenses/GPL-2.0-only.txt",
13+
"copyleft": "yes",
14+
"incompatible_with": "Apache-1.0, Apache-1.1, Apache-2.0, BSD-4-Clause, BSD-4-Clause-UC, BSD-4.3TAHOE, ECL-2.0, FTL, IJG, LicenseRef-scancode-bsla-no-advert, Minpack, OpenSSL, PHP-3.01, Python-2.0, zlib-acknowledgement, XFree86-1.1",
15+
"name": "GPL-2.0-only",
16+
"osadl_updated": "2025-02-10T14:26:00+0000",
17+
"patent_hints": "yes",
18+
"source": "scancode",
19+
"url": "https://spdx.org/licenses/GPL-2.0-only.html"
20+
},
21+
{
22+
"name": "GPL-2.0-only WITH Linux-syscall-note",
23+
"source": "scancode",
24+
"url": "https://spdx.org/licenses/GPL-2.0-only WITH Linux-syscall-note.html"
25+
},
26+
{
27+
"checklist_url": "https://www.osadl.org/fileadmin/checklists/unreflicenses/GPL-2.0-only.txt",
28+
"copyleft": "yes",
29+
"incompatible_with": "Apache-1.0, Apache-1.1, Apache-2.0, BSD-4-Clause, BSD-4-Clause-UC, BSD-4.3TAHOE, ECL-2.0, FTL, IJG, LicenseRef-scancode-bsla-no-advert, Minpack, OpenSSL, PHP-3.01, Python-2.0, zlib-acknowledgement, XFree86-1.1",
30+
"name": "GPL-2.0-only",
31+
"osadl_updated": "2025-02-10T14:26:00+0000",
32+
"patent_hints": "yes",
33+
"source": "scancode",
34+
"url": "https://spdx.org/licenses/GPL-2.0-only.html"
35+
}
36+
],
37+
"lines": "12-150,540-561",
38+
"matched": "35%",
39+
"oss_lines": "10-148,86-107",
40+
"purl": [
41+
"pkg:github/kdrag0n/proton_bluecross",
42+
"pkg:github/fake/fake_repository"
43+
],
44+
"release_date": "2019-02-21",
45+
"server": {
46+
"kb_version": {
47+
"daily": "25.03.27",
48+
"monthly": "25.03"
49+
},
50+
"version": "5.4.10"
51+
},
52+
"source_hash": "45dd1e50621a8a32f88fbe0251a470ab",
53+
"status": "pending",
54+
"url": "https://github.com/kdrag0n/proton_bluecross",
55+
"url_hash": "a9c1c67f0930dc42dbd40c29e565bcdd",
56+
"vendor": "kdrag0n",
57+
"version": "15"
58+
}
59+
]
60+
}

0 commit comments

Comments
 (0)