Skip to content

Commit 1683430

Browse files
committed
fix(scanoss): Fix SCANOSS-to-ORT model mapping in generateSummary()
* Fix the association between sourceLocations and snippets in `generateSummary()`. * Address confusion between `details.lines` and `result.filePath` when creating source locations. * Extract primary PURL as identifier while storing all additional PURLs in additionalData for reference. Signed-off-by: Agustin Isasmendi <[email protected]>
1 parent 4c52f61 commit 1683430

File tree

4 files changed

+135
-23
lines changed

4 files changed

+135
-23
lines changed

plugins/scanners/scanoss/src/main/kotlin/ScanOssResultParser.kt

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,11 @@ import com.scanoss.dto.ScanFileDetails
2323
import com.scanoss.dto.ScanFileResult
2424
import com.scanoss.dto.enums.MatchType
2525

26+
import java.lang.invoke.MethodHandles
2627
import java.time.Instant
2728

29+
import org.apache.logging.log4j.kotlin.loggerOf
30+
2831
import org.ossreviewtoolkit.downloader.VcsHost
2932
import org.ossreviewtoolkit.model.CopyrightFinding
3033
import org.ossreviewtoolkit.model.LicenseFinding
@@ -38,6 +41,8 @@ import org.ossreviewtoolkit.utils.spdx.SpdxExpression
3841
import org.ossreviewtoolkit.utils.spdx.SpdxLicenseIdExpression
3942
import org.ossreviewtoolkit.utils.spdx.toExpression
4043

44+
private val logger = loggerOf(MethodHandles.lookup().lookupClass())
45+
4146
/**
4247
* Generate a summary from the given SCANOSS [result], using [startTime], [endTime] as metadata. This variant can be
4348
* used if the result is not read from a local file.
@@ -56,17 +61,24 @@ internal fun generateSummary(startTime: Instant, endTime: Instant, results: List
5661
}
5762

5863
MatchType.snippet -> {
59-
val file = requireNotNull(details.file)
60-
val lines = requireNotNull(details.lines)
61-
val sourceLocations = convertLines(file, lines)
64+
val localFile = requireNotNull(result.filePath)
65+
val localLines = requireNotNull(details.lines)
66+
val sourceLocations = convertLines(localFile, localLines)
6267
val snippets = getSnippets(details)
6368

64-
snippets.forEach { snippet ->
65-
sourceLocations.forEach { sourceLocation ->
66-
// TODO: Aggregate the snippet by source file location.
67-
snippetFindings += SnippetFinding(sourceLocation, setOf(snippet))
69+
// The number of snippets should match the number of source locations.
70+
if (sourceLocations.size != snippets.size) {
71+
logger.warn {
72+
"Unexpected mismatch in '$localFile': " +
73+
"${sourceLocations.size} source locations vs ${snippets.size} snippets. " +
74+
"This indicates a potential issue with line range conversion."
6875
}
6976
}
77+
78+
// Associate each source location with its corresponding snippet.
79+
sourceLocations.zip(snippets).forEach { (location, snippet) ->
80+
snippetFindings += SnippetFinding(location, setOf(snippet))
81+
}
7082
}
7183

7284
MatchType.none -> {
@@ -134,32 +146,33 @@ private fun getCopyrightFindings(details: ScanFileDetails): List<CopyrightFindin
134146
}
135147

136148
/**
137-
* Get the snippet findings from the given [details]. If a snippet returned by ScanOSS contains several Purls,
138-
* several snippets are created in ORT each containing a single Purl.
149+
* Get the snippet findings from the given [details]. If a snippet returned by ScanOSS contains several PURLs,
150+
* the function extracts the first PURL as the primary identifier while storing the remaining PURLs in additionalData
151+
* to preserve the complete information.
139152
*/
140-
private fun getSnippets(details: ScanFileDetails): Set<Snippet> {
153+
private fun getSnippets(details: ScanFileDetails): List<Snippet> {
141154
val matched = requireNotNull(details.matched)
142-
val fileUrl = requireNotNull(details.fileUrl)
155+
val ossFile = requireNotNull(details.file)
143156
val ossLines = requireNotNull(details.ossLines)
144157
val url = requireNotNull(details.url)
145-
val purls = requireNotNull(details.purls)
158+
val purls = requireNotNull(details.purls).toMutableList()
159+
val primaryPurl = purls.removeFirstOrNull().orEmpty()
146160

147161
val license = details.licenseDetails.orEmpty()
148162
.map { license -> SpdxExpression.parse(license.name) }
149163
.toExpression()?.sorted() ?: SpdxLicenseIdExpression(SpdxConstants.NOASSERTION)
150164

151165
val score = matched.substringBeforeLast("%").toFloat()
152-
val locations = convertLines(fileUrl, ossLines)
166+
val ossLocations = convertLines(ossFile, ossLines)
153167
// TODO: No resolved revision is available. Should a ArtifactProvenance be created instead ?
154168
val vcsInfo = VcsHost.parseUrl(url.takeUnless { it == "none" }.orEmpty())
155169
val provenance = RepositoryProvenance(vcsInfo, ".")
156170

157-
return buildSet {
158-
purls.forEach { purl ->
159-
locations.forEach { snippetLocation ->
160-
add(Snippet(score, snippetLocation, provenance, purl, license))
161-
}
162-
}
171+
val additionalData = purls.associateWith { "" }
172+
173+
// Create one snippet per location, using the first PURL as the primary identifier.
174+
return ossLocations.map { snippetLocation ->
175+
Snippet(score, snippetLocation, provenance, primaryPurl, license, additionalData)
163176
}
164177
}
165178

plugins/scanners/scanoss/src/test/kotlin/ScanOssResultParserTest.kt

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ class ScanOssResultParserTest : WordSpec({
119119
Snippet(
120120
98.0f,
121121
TextLocation(
122-
"https://osskb.org/api/file_contents/6ff2427335b985212c9b79dfa795799f",
122+
"src/main/java/com/vdurmont/semver4j/Requirement.java",
123123
1,
124124
710
125125
),
@@ -135,8 +135,47 @@ class ScanOssResultParserTest : WordSpec({
135135
)
136136
}
137137

138+
"handle multiple PURLs by extracting first as primary and storing remaining in additionalData" {
139+
val results = readResource("/scanoss-multiple-purls.json").let {
140+
JsonUtils.toScanFileResultsFromObject(JsonUtils.toJsonObject(it))
141+
}
142+
143+
val time = Instant.now()
144+
val summary = generateSummary(time, time, results)
145+
146+
// Verify we have one finding per source location, not per PURL.
147+
summary.snippetFindings should haveSize(2)
148+
149+
with(summary.snippetFindings.first()) {
150+
// Check source location (local file).
151+
sourceLocation shouldBe TextLocation("hung_task.c", 12, 150)
152+
153+
// Verify first PURL is extracted as primary identifier.
154+
snippets should haveSize(1)
155+
snippets.first().purl shouldBe "pkg:github/kdrag0n/proton_bluecross"
156+
157+
// Verify remaining PURLs are stored in additionalData.
158+
snippets.first().additionalData shouldBe
159+
mapOf(
160+
"pkg:github/fake/fake_repository" to ""
161+
)
162+
163+
// Check OSS location.
164+
snippets.first().location shouldBe
165+
TextLocation("kernel/hung_task.c", 10, 148)
166+
}
167+
168+
// Verify same behavior for second snippet.
169+
with(summary.snippetFindings.last()) {
170+
sourceLocation shouldBe TextLocation("hung_task.c", 540, 561)
171+
snippets.first().purl shouldBe "pkg:github/kdrag0n/proton_bluecross"
172+
snippets.first().location shouldBe
173+
TextLocation("kernel/hung_task.c", 86, 107)
174+
}
175+
}
176+
138177
"combine the same license from different sources into a single expression" {
139-
// When the same license appears in multiple sources (like scancode and file_header),
178+
// When a license appears in multiple sources (like scancode and file_header),
140179
// combine them into a single expression rather than duplicating.
141180
val results = readResource("/scanoss-snippet-same-license-multiple-sources.json").let {
142181
JsonUtils.toScanFileResultsFromObject(JsonUtils.toJsonObject(it))
@@ -161,7 +200,7 @@ class ScanOssResultParserTest : WordSpec({
161200
}
162201
}
163202

164-
"handle empty license array with NOASSERTION" {
203+
"handle empty license array in snippet findings with NOASSERTION" {
165204
// When a component has an empty licenses array, use NOASSERTION.
166205

167206
val results = readResource("/scanoss-snippet-no-license-data.json").let {

plugins/scanners/scanoss/src/test/kotlin/ScanOssScannerDirectoryTest.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ class ScanOssScannerDirectoryTest : StringSpec({
103103
Snippet(
104104
99.0f,
105105
TextLocation(
106-
"https://osskb.org/api/file_contents/871fb0c5188c2f620d9b997e225b0095",
106+
"utils/src/main/kotlin/random-data-05-06-11.kt", // This is the remote oss filepath
107107
128,
108108
367
109109
),
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
{
2+
"hung_task.c": [
3+
{
4+
"component": "proton_bluecross",
5+
"file": "kernel/hung_task.c",
6+
"file_hash": "581734935cfbe570d280a1265aaa2a6b",
7+
"file_url": "https://api.scanoss.com/file_contents/581734935cfbe570d280a1265aaa2a6b",
8+
"id": "snippet",
9+
"latest": "17",
10+
"licenses": [
11+
{
12+
"checklist_url": "https://www.osadl.org/fileadmin/checklists/unreflicenses/GPL-2.0-only.txt",
13+
"copyleft": "yes",
14+
"incompatible_with": "Apache-1.0, Apache-1.1, Apache-2.0, BSD-4-Clause, BSD-4-Clause-UC, BSD-4.3TAHOE, ECL-2.0, FTL, IJG, LicenseRef-scancode-bsla-no-advert, Minpack, OpenSSL, PHP-3.01, Python-2.0, zlib-acknowledgement, XFree86-1.1",
15+
"name": "GPL-2.0-only",
16+
"osadl_updated": "2025-02-10T14:26:00+0000",
17+
"patent_hints": "yes",
18+
"source": "scancode",
19+
"url": "https://spdx.org/licenses/GPL-2.0-only.html"
20+
},
21+
{
22+
"name": "GPL-2.0-only WITH Linux-syscall-note",
23+
"source": "scancode",
24+
"url": "https://spdx.org/licenses/GPL-2.0-only WITH Linux-syscall-note.html"
25+
},
26+
{
27+
"checklist_url": "https://www.osadl.org/fileadmin/checklists/unreflicenses/GPL-2.0-only.txt",
28+
"copyleft": "yes",
29+
"incompatible_with": "Apache-1.0, Apache-1.1, Apache-2.0, BSD-4-Clause, BSD-4-Clause-UC, BSD-4.3TAHOE, ECL-2.0, FTL, IJG, LicenseRef-scancode-bsla-no-advert, Minpack, OpenSSL, PHP-3.01, Python-2.0, zlib-acknowledgement, XFree86-1.1",
30+
"name": "GPL-2.0-only",
31+
"osadl_updated": "2025-02-10T14:26:00+0000",
32+
"patent_hints": "yes",
33+
"source": "scancode",
34+
"url": "https://spdx.org/licenses/GPL-2.0-only.html"
35+
}
36+
],
37+
"lines": "12-150,540-561",
38+
"matched": "35%",
39+
"oss_lines": "10-148,86-107",
40+
"purl": [
41+
"pkg:github/kdrag0n/proton_bluecross",
42+
"pkg:github/fake/fake_repository"
43+
],
44+
"release_date": "2019-02-21",
45+
"server": {
46+
"kb_version": {
47+
"daily": "25.03.27",
48+
"monthly": "25.03"
49+
},
50+
"version": "5.4.10"
51+
},
52+
"source_hash": "45dd1e50621a8a32f88fbe0251a470ab",
53+
"status": "pending",
54+
"url": "https://github.com/kdrag0n/proton_bluecross",
55+
"url_hash": "a9c1c67f0930dc42dbd40c29e565bcdd",
56+
"vendor": "kdrag0n",
57+
"version": "15"
58+
}
59+
]
60+
}

0 commit comments

Comments
 (0)