Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 9 additions & 65 deletions plugins/scanners/scanoss/src/main/kotlin/ScanOss.kt
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,20 @@

package org.ossreviewtoolkit.plugins.scanners.scanoss

import com.scanoss.Winnowing
import com.scanoss.dto.ScanFileResult
import com.scanoss.rest.ScanApi
import com.scanoss.Scanner
import com.scanoss.utils.JsonUtils
import com.scanoss.utils.PackageDetails

import java.io.File
import java.time.Instant
import java.util.UUID

import org.apache.logging.log4j.kotlin.logger

import org.ossreviewtoolkit.model.ScanSummary
import org.ossreviewtoolkit.plugins.api.OrtPlugin
import org.ossreviewtoolkit.plugins.api.PluginDescriptor
import org.ossreviewtoolkit.scanner.PathScannerWrapper
import org.ossreviewtoolkit.scanner.ScanContext
import org.ossreviewtoolkit.scanner.ScannerMatcher
import org.ossreviewtoolkit.scanner.ScannerMatcherConfig
import org.ossreviewtoolkit.scanner.ScannerWrapperFactory
import org.ossreviewtoolkit.utils.common.VCS_DIRECTORIES

@OrtPlugin(
id = "SCANOSS",
Expand All @@ -51,11 +44,10 @@ class ScanOss(
override val descriptor: PluginDescriptor = ScanOssFactory.descriptor,
config: ScanOssConfig
) : PathScannerWrapper {
private val service = ScanApi.builder()
private val scanossBuilder = Scanner.builder()
// As there is only a single endpoint, the SCANOSS API client expects the path to be part of the API URL.
.url(config.apiUrl.removeSuffix("/") + "/scan/direct")
.apiKey(config.apiKey.value)
.build()

override val version: String by lazy {
// TODO: Find out the best / cheapest way to query the SCANOSS server for its version.
Expand All @@ -64,73 +56,25 @@ class ScanOss(

override val configuration = ""

override val matcher by lazy {
ScannerMatcher.create(
details,
ScannerMatcherConfig(
config.regScannerName,
config.minVersion,
config.maxVersion,
configuration
)
)
}
override val matcher: ScannerMatcher? = null
Copy link
Member

@mnonnenmacher mnonnenmacher May 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@isasmendiagus You should also change the property below to override val readFromStorage = false and remove the readFromStorage property from the config class, like it is also done in FossId.
I'm not requesting a change because you can also do it separately.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't seen that. I'll implement the change on PR #10287

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@isasmendiagus, please create a separate PR for this, as I want to get this in earlier than our discussion about the report generation PR will come to a conclusion.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #10291.


override val readFromStorage = config.readFromStorage

override val writeToStorage = config.writeToStorage

/**
* The name of the file corresponding to the fingerprints can be sent to SCANOSS for more precise matches.
* However, for anonymity, a unique identifier should be generated and used instead. This property holds the
* mapping between the file paths and the unique identifiers. When receiving the response, the UUID will be
* replaced by the actual file path.
*
* TODO: This behavior should be driven by a configuration parameter enabled by default.
*/
private val fileNamesAnonymizationMapping = mutableMapOf<UUID, String>()

override fun scanPath(path: File, context: ScanContext): ScanSummary {
val startTime = Instant.now()

val wfpString = buildString {
path.walk()
.onEnter { it.name !in VCS_DIRECTORIES }
.filterNot { it.isDirectory }
.forEach {
logger.info { "Computing fingerprint for file ${it.absolutePath}..." }
append(createWfpForFile(it))
}
}

val result = service.scan(
wfpString,
context.labels["scanOssContext"],
context.labels["scanOssId"]?.toIntOrNull() ?: Thread.currentThread().threadId().toInt()
)

// Replace the anonymized UUIDs by their file paths.
val results = JsonUtils.toScanFileResultsFromObject(JsonUtils.toJsonObject(result)).map {
val uuid = UUID.fromString(it.filePath)
// Build the scanner at function level in case any path-specific settings or filters are needed later.
val scanoss = scanossBuilder.build()

val fileName = fileNamesAnonymizationMapping[uuid] ?: throw IllegalArgumentException(
"The ${descriptor.id} server returned UUID '$uuid' which is not present in the mapping."
)

ScanFileResult(fileName, it.fileDetails)
val rawResults = when {
path.isFile -> listOf(scanoss.scanFile(path.toString()))
else -> scanoss.scanFolder(path.toString())
}

val results = JsonUtils.toScanFileResults(rawResults)
val endTime = Instant.now()
return generateSummary(startTime, endTime, results)
}

internal fun generateRandomUUID() = UUID.randomUUID()

internal fun createWfpForFile(file: File): String {
generateRandomUUID().let { uuid ->
// TODO: Let's keep the original file extension to give SCANOSS some hint about the mime type.
fileNamesAnonymizationMapping[uuid] = file.path
return Winnowing.builder().build().wfpForFile(file.path, uuid.toString())
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
},
"response" : {
"status" : 200,
"body" : "{ \"c198b884-f6cf-496f-95eb-0e7968dd2ec6\": [ { \"id\": \"snippet\", \"status\": \"pending\", \"lines\": \"1-240\", \"oss_lines\": \"128-367\", \"matched\": \"99%\", \"purl\": [ \"pkg:github/scanoss/ort\" ], \"vendor\": \"scanoss\", \"component\": \"ort\", \"version\": \"e654028\", \"latest\": \"b12f8ee\", \"url\": \"https://github.com/scanoss/ort\", \"release_date\": \"2021-03-18\", \"file\": \"utils/src/main/kotlin/ArchiveUtils.kt\", \"url_hash\": \"37faa38a820322fa93bf7a8fa8290bb8\", \"file_hash\": \"871fb0c5188c2f620d9b997e225b0095\", \"source_hash\": \"2e91edbe430c4eb195a977d326d6d6c0\", \"file_url\": \"https://osskb.org/api/file_contents/871fb0c5188c2f620d9b997e225b0095\", \"licenses\": [ { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"file_spdx_tag\" }, { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"scancode\" } ], \"server\": { \"version\": \"4.4.2\", \"kb_version\": { \"monthly\": \"22.02\", \"daily\": \"22.03.25\" } } } ], \"5530105e-0752-4750-9c07-4e4604b879a5\": [ { \"id\": \"file\", \"status\": \"pending\", \"lines\": \"all\", \"oss_lines\": \"all\", \"matched\": \"100%\", \"purl\": [ \"pkg:github/scanoss/ort\" ], \"vendor\": \"scanoss\", \"component\": \"ort\", \"version\": \"e654028\", \"latest\": \"b12f8ee\", \"url\": \"https://github.com/scanoss/ort\", \"release_date\": \"2021-03-18\", \"file\": \"scanner/src/main/kotlin/ScannerFactory.kt\", \"url_hash\": \"37faa38a820322fa93bf7a8fa8290bb8\", \"file_hash\": \"5c8ab9be40df937e46c53509481107cd\", \"source_hash\": \"5c8ab9be40df937e46c53509481107cd\", \"file_url\": \"https://osskb.org/api/file_contents/5c8ab9be40df937e46c53509481107cd\", \"licenses\": [ { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"file_spdx_tag\" }, { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"scancode\" } ], \"server\": { \"version\": \"4.4.2\", \"kb_version\": { \"monthly\": \"22.02\", \"daily\": \"22.03.25\" } } } ]}",
"body" : "{ \"utils/src/main/kotlin/ArchiveUtils.kt\": [ { \"id\": \"snippet\", \"status\": \"pending\", \"lines\": \"1-240\", \"oss_lines\": \"128-367\", \"matched\": \"99%\", \"purl\": [ \"pkg:github/scanoss/ort\" ], \"vendor\": \"scanoss\", \"component\": \"ort\", \"version\": \"e654028\", \"latest\": \"b12f8ee\", \"url\": \"https://github.com/scanoss/ort\", \"release_date\": \"2021-03-18\", \"file\": \"utils/src/main/kotlin/ArchiveUtils.kt\", \"url_hash\": \"37faa38a820322fa93bf7a8fa8290bb8\", \"file_hash\": \"871fb0c5188c2f620d9b997e225b0095\", \"source_hash\": \"2e91edbe430c4eb195a977d326d6d6c0\", \"file_url\": \"https://osskb.org/api/file_contents/871fb0c5188c2f620d9b997e225b0095\", \"licenses\": [ { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"file_spdx_tag\" }, { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"scancode\" } ], \"server\": { \"version\": \"4.4.2\", \"kb_version\": { \"monthly\": \"22.02\", \"daily\": \"22.03.25\" } } } ], \"5530105e-0752-4750-9c07-4e4604b879a5\": [ { \"id\": \"file\", \"status\": \"pending\", \"lines\": \"all\", \"oss_lines\": \"all\", \"matched\": \"100%\", \"purl\": [ \"pkg:github/scanoss/ort\" ], \"vendor\": \"scanoss\", \"component\": \"ort\", \"version\": \"e654028\", \"latest\": \"b12f8ee\", \"url\": \"https://github.com/scanoss/ort\", \"release_date\": \"2021-03-18\", \"file\": \"scanner/src/main/kotlin/ScannerFactory.kt\", \"url_hash\": \"37faa38a820322fa93bf7a8fa8290bb8\", \"file_hash\": \"5c8ab9be40df937e46c53509481107cd\", \"source_hash\": \"5c8ab9be40df937e46c53509481107cd\", \"file_url\": \"https://osskb.org/api/file_contents/5c8ab9be40df937e46c53509481107cd\", \"licenses\": [ { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"file_spdx_tag\" }, { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"scancode\" } ], \"server\": { \"version\": \"4.4.2\", \"kb_version\": { \"monthly\": \"22.02\", \"daily\": \"22.03.25\" } } } ]}",
"headers" : {
"Server" : "nginx/1.14.2",
"Date" : "Wed, 16 Mar 2022 13:07:04 GMT",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,9 @@ import io.kotest.matchers.collections.containExactly
import io.kotest.matchers.collections.containExactlyInAnyOrder
import io.kotest.matchers.should

import io.mockk.every
import io.mockk.spyk
import io.mockk.verify

import java.io.File
import java.util.UUID

import org.ossreviewtoolkit.model.LicenseFinding
import org.ossreviewtoolkit.model.PackageType
Expand Down Expand Up @@ -74,25 +71,11 @@ class ScanOssScannerDirectoryTest : StringSpec({
}

"The scanner should scan a directory" {
// Manipulate the UUID generation to have the same IDs as in the response.
every {
scanner.generateRandomUUID()
} answers {
UUID.fromString("5530105e-0752-4750-9c07-4e4604b879a5")
} andThenAnswer {
UUID.fromString("c198b884-f6cf-496f-95eb-0e7968dd2ec6")
}

val summary = scanner.scanPath(
TEST_DIRECTORY_TO_SCAN,
ScanContext(labels = emptyMap(), packageType = PackageType.PACKAGE)
)

verify(exactly = 1) {
scanner.createWfpForFile(TEST_DIRECTORY_TO_SCAN.resolve("ArchiveUtils.kt"))
scanner.createWfpForFile(TEST_DIRECTORY_TO_SCAN.resolve("ScannerFactory.kt"))
}

with(summary) {
licenseFindings should containExactlyInAnyOrder(
LicenseFinding(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,9 @@ import io.kotest.core.spec.style.StringSpec
import io.kotest.matchers.collections.containExactly
import io.kotest.matchers.should

import io.mockk.every
import io.mockk.spyk
import io.mockk.verify

import java.io.File
import java.util.UUID

import org.ossreviewtoolkit.model.LicenseFinding
import org.ossreviewtoolkit.model.PackageType
Expand Down Expand Up @@ -67,22 +64,11 @@ class ScanOssScannerFileTest : StringSpec({
}

"The scanner should scan a single file" {
// Manipulate the UUID generation to have the same IDs as in the response.
every {
scanner.generateRandomUUID()
} answers {
UUID.fromString("bf5401e9-03b3-4c91-906c-cadb90487b8c")
}

val summary = scanner.scanPath(
TEST_FILE_TO_SCAN,
ScanContext(labels = emptyMap(), packageType = PackageType.PACKAGE)
)

verify(exactly = 1) {
scanner.createWfpForFile(TEST_FILE_TO_SCAN)
}

with(summary) {
licenseFindings should containExactly(
LicenseFinding(
Expand Down
Loading