Skip to content

Commit dbf8667

Browse files
committed
fix(scanoss): Fix SCANOSS-to-ORT model mapping in generateSummary()
* Fix the association between sourceLocations and snippets in generateSummary(). * Addresses confusion between details.lines and result.filePath when creating source locations. * Stores all PURLs in additionalData while using first PURL as primary identifier. Signed-off-by: Agustin Isasmendi <[email protected]>
1 parent b809c5e commit dbf8667

File tree

4 files changed

+140
-22
lines changed

4 files changed

+140
-22
lines changed

plugins/scanners/scanoss/src/main/kotlin/ScanOssResultParser.kt

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,11 @@ import com.scanoss.dto.ScanFileDetails
2424
import com.scanoss.dto.ScanFileResult
2525
import com.scanoss.dto.enums.MatchType
2626

27+
import java.lang.invoke.MethodHandles
2728
import java.time.Instant
2829

30+
import org.apache.logging.log4j.kotlin.loggerOf
31+
2932
import org.ossreviewtoolkit.downloader.VcsHost
3033
import org.ossreviewtoolkit.model.CopyrightFinding
3134
import org.ossreviewtoolkit.model.LicenseFinding
@@ -38,6 +41,8 @@ import org.ossreviewtoolkit.utils.spdx.SpdxConstants
3841
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
3942
import org.ossreviewtoolkit.utils.spdx.SpdxLicenseIdExpression
4043

44+
private val logger = loggerOf(MethodHandles.lookup().lookupClass())
45+
4146
/**
4247
* Generate a summary from the given SCANOSS [result], using [startTime], [endTime] as metadata. This variant can be
4348
* used if the result is not read from a local file.
@@ -56,17 +61,24 @@ internal fun generateSummary(startTime: Instant, endTime: Instant, results: List
5661
}
5762

5863
MatchType.snippet -> {
59-
val file = requireNotNull(details.file)
60-
val lines = requireNotNull(details.lines)
61-
val sourceLocations = convertLines(file, lines)
64+
val localFile = requireNotNull(result.filePath)
65+
val localLines = requireNotNull(details.lines)
66+
val sourceLocations = convertLines(localFile, localLines)
6267
val snippets = getSnippets(details)
6368

64-
snippets.forEach { snippet ->
65-
sourceLocations.forEach { sourceLocation ->
66-
// TODO: Aggregate the snippet by source file location.
67-
snippetFindings += SnippetFinding(sourceLocation, setOf(snippet))
69+
// The number of snippets should match the number of source locations.
70+
if (sourceLocations.size != snippets.size) {
71+
logger.warn {
72+
"Unexpected mismatch in '$localFile': " +
73+
"${sourceLocations.size} source locations vs ${snippets.size} snippets. " +
74+
"This indicates a potential issue with line range conversion."
6875
}
6976
}
77+
78+
// Associate each source location with its corresponding snippet.
79+
sourceLocations.zip(snippets).forEach { (location, snippet) ->
80+
snippetFindings += SnippetFinding(location, setOf(snippet))
81+
}
7082
}
7183

7284
MatchType.none -> {
@@ -135,34 +147,37 @@ private fun getCopyrightFindings(details: ScanFileDetails): List<CopyrightFindin
135147

136148
/**
137149
* Get the snippet findings from the given [details]. If a snippet returned by SCANOSS contains several Purls,
138-
* several snippets are created in ORT each containing a single Purl.
150+
* the function uses the first PURL as the primary identifier while storing all PURLs in additionalData
151+
* to preserve the complete information.
139152
*/
140-
private fun getSnippets(details: ScanFileDetails): Set<Snippet> {
153+
private fun getSnippets(details: ScanFileDetails): List<Snippet> {
141154
val matched = requireNotNull(details.matched)
142-
val fileUrl = requireNotNull(details.fileUrl)
155+
val ossFile = requireNotNull(details.file)
143156
val ossLines = requireNotNull(details.ossLines)
144157
val url = requireNotNull(details.url)
145158
val purls = requireNotNull(details.purls)
146159

147160
val license = getUniqueLicenseExpression(details.licenseDetails.toList())
148161

149162
val score = matched.substringBeforeLast("%").toFloat()
150-
val locations = convertLines(fileUrl, ossLines)
163+
val ossLocations = convertLines(ossFile, ossLines)
151164
// TODO: No resolved revision is available. Should a ArtifactProvenance be created instead ?
152165
val vcsInfo = VcsHost.parseUrl(url.takeUnless { it == "none" }.orEmpty())
153166
val provenance = RepositoryProvenance(vcsInfo, ".")
154167

155-
return buildSet {
156-
purls.forEach { purl ->
157-
locations.forEach { snippetLocation ->
158-
add(Snippet(score, snippetLocation, provenance, purl, license))
159-
}
160-
}
168+
// Store all PURLs in additionalData to preserve the complete information.
169+
val additionalData = mapOf(
170+
"all_purls" to purls.joinToString(", ")
171+
)
172+
173+
// Create one snippet per location, using the first PURL as the primary identifier.
174+
return ossLocations.map { snippetLocation ->
175+
Snippet(score, snippetLocation, provenance, purls.firstOrNull().orEmpty(), license, additionalData)
161176
}
162177
}
163178

164179
/**
165-
* Split [lineRanges] returned by ScanOSS such as "32-105,117-199" into [TextLocation]s for the given [file].
180+
* Split [lineRanges] returned by SCANOSS such as "32-105,117-199" into [TextLocation]s for the given [file].
166181
*/
167182
private fun convertLines(file: String, lineRanges: String): List<TextLocation> =
168183
lineRanges.split(',').map { lineRange ->
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
{
2+
"hung_task.c": [
3+
{
4+
"component": "proton_bluecross",
5+
"file": "kernel/hung_task.c",
6+
"file_hash": "581734935cfbe570d280a1265aaa2a6b",
7+
"file_url": "https://api.scanoss.com/file_contents/581734935cfbe570d280a1265aaa2a6b",
8+
"id": "snippet",
9+
"latest": "17",
10+
"licenses": [
11+
{
12+
"checklist_url": "https://www.osadl.org/fileadmin/checklists/unreflicenses/GPL-2.0-only.txt",
13+
"copyleft": "yes",
14+
"incompatible_with": "Apache-1.0, Apache-1.1, Apache-2.0, BSD-4-Clause, BSD-4-Clause-UC, BSD-4.3TAHOE, ECL-2.0, FTL, IJG, LicenseRef-scancode-bsla-no-advert, Minpack, OpenSSL, PHP-3.01, Python-2.0, zlib-acknowledgement, XFree86-1.1",
15+
"name": "GPL-2.0-only",
16+
"osadl_updated": "2025-02-10T14:26:00+0000",
17+
"patent_hints": "yes",
18+
"source": "scancode",
19+
"url": "https://spdx.org/licenses/GPL-2.0-only.html"
20+
},
21+
{
22+
"name": "GPL-2.0-only WITH Linux-syscall-note",
23+
"source": "scancode",
24+
"url": "https://spdx.org/licenses/GPL-2.0-only WITH Linux-syscall-note.html"
25+
},
26+
{
27+
"checklist_url": "https://www.osadl.org/fileadmin/checklists/unreflicenses/GPL-2.0-only.txt",
28+
"copyleft": "yes",
29+
"incompatible_with": "Apache-1.0, Apache-1.1, Apache-2.0, BSD-4-Clause, BSD-4-Clause-UC, BSD-4.3TAHOE, ECL-2.0, FTL, IJG, LicenseRef-scancode-bsla-no-advert, Minpack, OpenSSL, PHP-3.01, Python-2.0, zlib-acknowledgement, XFree86-1.1",
30+
"name": "GPL-2.0-only",
31+
"osadl_updated": "2025-02-10T14:26:00+0000",
32+
"patent_hints": "yes",
33+
"source": "scancode",
34+
"url": "https://spdx.org/licenses/GPL-2.0-only.html"
35+
}
36+
],
37+
"lines": "12-150,540-561",
38+
"matched": "35%",
39+
"oss_lines": "10-148,86-107",
40+
"purl": [
41+
"pkg:github/kdrag0n/proton_bluecross",
42+
"pkg:github/fake/fake_repository"
43+
],
44+
"release_date": "2019-02-21",
45+
"server": {
46+
"kb_version": {
47+
"daily": "25.03.27",
48+
"monthly": "25.03"
49+
},
50+
"version": "5.4.10"
51+
},
52+
"source_hash": "45dd1e50621a8a32f88fbe0251a470ab",
53+
"status": "pending",
54+
"url": "https://github.com/kdrag0n/proton_bluecross",
55+
"url_hash": "a9c1c67f0930dc42dbd40c29e565bcdd",
56+
"vendor": "kdrag0n",
57+
"version": "15"
58+
}
59+
]
60+
}

plugins/scanners/scanoss/src/test/kotlin/ScanOssResultParserTest.kt

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ class ScanOssResultParserTest : WordSpec({
153153
Snippet(
154154
98.0f,
155155
TextLocation(
156-
"https://osskb.org/api/file_contents/6ff2427335b985212c9b79dfa795799f",
156+
"src/main/java/com/vdurmont/semver4j/Requirement.java",
157157
1,
158158
710
159159
),
@@ -162,11 +162,51 @@ class ScanOssResultParserTest : WordSpec({
162162
"."
163163
),
164164
"pkg:github/vdurmont/semver4j",
165-
SpdxExpression.parse("CC-BY-SA-2.0")
165+
SpdxExpression.parse("CC-BY-SA-2.0"),
166+
mapOf(
167+
"all_purls" to "pkg:github/vdurmont/semver4j"
168+
)
166169
)
167170
)
168171
)
169172
)
170173
}
174+
175+
"should handle multiple PURLs by selecting first as primary and preserving all in metadata" {
176+
val results = File("src/test/assets/scanoss-multiple-purls.json").readText().let {
177+
JsonUtils.toScanFileResultsFromObject(JsonUtils.toJsonObject(it))
178+
}
179+
180+
val time = Instant.now()
181+
val summary = generateSummary(time, time, results)
182+
183+
// Should have one finding per source location, not per PURL.
184+
summary.snippetFindings should haveSize(2)
185+
186+
with(summary.snippetFindings.first()) {
187+
// Check source location (local file).
188+
sourceLocation shouldBe TextLocation("hung_task.c", 12, 150)
189+
190+
// Should use first PURL as primary identifier.
191+
snippets should haveSize(1)
192+
snippets.first().purl shouldBe "pkg:github/kdrag0n/proton_bluecross"
193+
194+
// Should preserve all PURLs in additionalData.
195+
snippets.first().additionalData["all_purls"] shouldBe
196+
"pkg:github/kdrag0n/proton_bluecross, pkg:github/fake/fake_repository"
197+
198+
// Check OSS location.
199+
snippets.first().location shouldBe
200+
TextLocation("kernel/hung_task.c", 10, 148)
201+
}
202+
203+
// Verify same behavior for second snippet.
204+
with(summary.snippetFindings.last()) {
205+
sourceLocation shouldBe TextLocation("hung_task.c", 540, 561)
206+
snippets.first().purl shouldBe "pkg:github/kdrag0n/proton_bluecross"
207+
snippets.first().location shouldBe
208+
TextLocation("kernel/hung_task.c", 86, 107)
209+
}
210+
}
171211
}
172212
})

plugins/scanners/scanoss/src/test/kotlin/ScanOssScannerDirectoryTest.kt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,18 @@ class ScanOssScannerDirectoryTest : StringSpec({
103103
Snippet(
104104
99.0f,
105105
TextLocation(
106-
"https://osskb.org/api/file_contents/871fb0c5188c2f620d9b997e225b0095",
106+
"utils/src/main/kotlin/ArchiveUtils.kt", // This is the remote oss filepath
107107
128,
108108
367
109109
),
110110
RepositoryProvenance(
111111
VcsInfo(VcsType.GIT, "https://github.com/scanoss/ort.git", ""), "."
112112
),
113113
"pkg:github/scanoss/ort",
114-
SpdxExpression.parse("Apache-2.0")
114+
SpdxExpression.parse("Apache-2.0"),
115+
mapOf(
116+
"all_purls" to "pkg:github/scanoss/ort"
117+
)
115118
)
116119
)
117120
)

0 commit comments

Comments
 (0)