Skip to content

Commit 47e00dc

Browse files
isasmendiagussschuberth
authored andcommitted
refactor(scanoss): Remove path anonymization from SCANOSS implementation
Remove the path anonymization functionality from the existing SCANOSS implementation as preparation for migrating to the Java SCANOSS SDK. This is a temporary removal. While path anonymization is not yet available in the SDK, we plan to implement this feature in the upstream SDK in the future. This approach allows us to consolidate all SCANOSS functionality in the SDK rather than maintaining custom implementations. Signed-off-by: Agustin Isasmendi <[email protected]>
1 parent b13bfe0 commit 47e00dc

File tree

4 files changed

+3
-49
lines changed

4 files changed

+3
-49
lines changed

plugins/scanners/scanoss/src/main/kotlin/ScanOss.kt

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,12 @@
2020
package org.ossreviewtoolkit.plugins.scanners.scanoss
2121

2222
import com.scanoss.Winnowing
23-
import com.scanoss.dto.ScanFileResult
2423
import com.scanoss.rest.ScanApi
2524
import com.scanoss.utils.JsonUtils
2625
import com.scanoss.utils.PackageDetails
2726

2827
import java.io.File
2928
import java.time.Instant
30-
import java.util.UUID
3129

3230
import org.apache.logging.log4j.kotlin.logger
3331

@@ -80,16 +78,6 @@ class ScanOss(
8078

8179
override val writeToStorage = config.writeToStorage
8280

83-
/**
84-
* The name of the file corresponding to the fingerprints can be sent to SCANOSS for more precise matches.
85-
* However, for anonymity, a unique identifier should be generated and used instead. This property holds the
86-
* mapping between the file paths and the unique identifiers. When receiving the response, the UUID will be
87-
* replaced by the actual file path.
88-
*
89-
* TODO: This behavior should be driven by a configuration parameter enabled by default.
90-
*/
91-
private val fileNamesAnonymizationMapping = mutableMapOf<UUID, String>()
92-
9381
override fun scanPath(path: File, context: ScanContext): ScanSummary {
9482
val startTime = Instant.now()
9583

@@ -110,27 +98,13 @@ class ScanOss(
11098
)
11199

112100
// Replace the anonymized UUIDs by their file paths.
113-
val results = JsonUtils.toScanFileResultsFromObject(JsonUtils.toJsonObject(result)).map {
114-
val uuid = UUID.fromString(it.filePath)
115-
116-
val fileName = fileNamesAnonymizationMapping[uuid] ?: throw IllegalArgumentException(
117-
"The ${descriptor.id} server returned UUID '$uuid' which is not present in the mapping."
118-
)
119-
120-
ScanFileResult(fileName, it.fileDetails)
121-
}
101+
val results = JsonUtils.toScanFileResultsFromObject(JsonUtils.toJsonObject(result))
122102

123103
val endTime = Instant.now()
124104
return generateSummary(startTime, endTime, results)
125105
}
126106

127-
internal fun generateRandomUUID() = UUID.randomUUID()
128-
129107
internal fun createWfpForFile(file: File): String {
130-
generateRandomUUID().let { uuid ->
131-
// TODO: Let's keep the original file extension to give SCANOSS some hint about the mime type.
132-
fileNamesAnonymizationMapping[uuid] = file.path
133-
return Winnowing.builder().build().wfpForFile(file.path, uuid.toString())
134-
}
108+
return Winnowing.builder().build().wfpForFile(file.path, file.path)
135109
}
136110
}

plugins/scanners/scanoss/src/test/assets/scanMulti/mappings/scanoss-multi-response.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
},
1111
"response" : {
1212
"status" : 200,
13-
"body" : "{ \"c198b884-f6cf-496f-95eb-0e7968dd2ec6\": [ { \"id\": \"snippet\", \"status\": \"pending\", \"lines\": \"1-240\", \"oss_lines\": \"128-367\", \"matched\": \"99%\", \"purl\": [ \"pkg:github/scanoss/ort\" ], \"vendor\": \"scanoss\", \"component\": \"ort\", \"version\": \"e654028\", \"latest\": \"b12f8ee\", \"url\": \"https://github.com/scanoss/ort\", \"release_date\": \"2021-03-18\", \"file\": \"utils/src/main/kotlin/ArchiveUtils.kt\", \"url_hash\": \"37faa38a820322fa93bf7a8fa8290bb8\", \"file_hash\": \"871fb0c5188c2f620d9b997e225b0095\", \"source_hash\": \"2e91edbe430c4eb195a977d326d6d6c0\", \"file_url\": \"https://osskb.org/api/file_contents/871fb0c5188c2f620d9b997e225b0095\", \"licenses\": [ { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"file_spdx_tag\" }, { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"scancode\" } ], \"server\": { \"version\": \"4.4.2\", \"kb_version\": { \"monthly\": \"22.02\", \"daily\": \"22.03.25\" } } } ], \"5530105e-0752-4750-9c07-4e4604b879a5\": [ { \"id\": \"file\", \"status\": \"pending\", \"lines\": \"all\", \"oss_lines\": \"all\", \"matched\": \"100%\", \"purl\": [ \"pkg:github/scanoss/ort\" ], \"vendor\": \"scanoss\", \"component\": \"ort\", \"version\": \"e654028\", \"latest\": \"b12f8ee\", \"url\": \"https://github.com/scanoss/ort\", \"release_date\": \"2021-03-18\", \"file\": \"scanner/src/main/kotlin/ScannerFactory.kt\", \"url_hash\": \"37faa38a820322fa93bf7a8fa8290bb8\", \"file_hash\": \"5c8ab9be40df937e46c53509481107cd\", \"source_hash\": \"5c8ab9be40df937e46c53509481107cd\", \"file_url\": \"https://osskb.org/api/file_contents/5c8ab9be40df937e46c53509481107cd\", \"licenses\": [ { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"file_spdx_tag\" }, { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"scancode\" } ], \"server\": { \"version\": \"4.4.2\", \"kb_version\": { \"monthly\": \"22.02\", \"daily\": \"22.03.25\" } } } ]}",
13+
"body" : "{ \"utils/src/main/kotlin/ArchiveUtils.kt\": [ { \"id\": \"snippet\", \"status\": \"pending\", \"lines\": \"1-240\", \"oss_lines\": \"128-367\", \"matched\": \"99%\", \"purl\": [ \"pkg:github/scanoss/ort\" ], \"vendor\": \"scanoss\", \"component\": \"ort\", \"version\": \"e654028\", \"latest\": \"b12f8ee\", \"url\": \"https://github.com/scanoss/ort\", \"release_date\": \"2021-03-18\", \"file\": \"utils/src/main/kotlin/ArchiveUtils.kt\", \"url_hash\": \"37faa38a820322fa93bf7a8fa8290bb8\", \"file_hash\": \"871fb0c5188c2f620d9b997e225b0095\", \"source_hash\": \"2e91edbe430c4eb195a977d326d6d6c0\", \"file_url\": \"https://osskb.org/api/file_contents/871fb0c5188c2f620d9b997e225b0095\", \"licenses\": [ { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"file_spdx_tag\" }, { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"scancode\" } ], \"server\": { \"version\": \"4.4.2\", \"kb_version\": { \"monthly\": \"22.02\", \"daily\": \"22.03.25\" } } } ], \"5530105e-0752-4750-9c07-4e4604b879a5\": [ { \"id\": \"file\", \"status\": \"pending\", \"lines\": \"all\", \"oss_lines\": \"all\", \"matched\": \"100%\", \"purl\": [ \"pkg:github/scanoss/ort\" ], \"vendor\": \"scanoss\", \"component\": \"ort\", \"version\": \"e654028\", \"latest\": \"b12f8ee\", \"url\": \"https://github.com/scanoss/ort\", \"release_date\": \"2021-03-18\", \"file\": \"scanner/src/main/kotlin/ScannerFactory.kt\", \"url_hash\": \"37faa38a820322fa93bf7a8fa8290bb8\", \"file_hash\": \"5c8ab9be40df937e46c53509481107cd\", \"source_hash\": \"5c8ab9be40df937e46c53509481107cd\", \"file_url\": \"https://osskb.org/api/file_contents/5c8ab9be40df937e46c53509481107cd\", \"licenses\": [ { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"file_spdx_tag\" }, { \"name\": \"Apache-2.0\", \"patent_hints\": \"yes\", \"copyleft\": \"no\", \"checklist_url\": \"https://www.osadl.org/fileadmin/checklists/unreflicenses/Apache-2.0.txt\", \"osadl_updated\": \"2022-03-17 13:38\", \"source\": \"scancode\" } ], \"server\": { \"version\": \"4.4.2\", \"kb_version\": { \"monthly\": \"22.02\", \"daily\": \"22.03.25\" } } } ]}",
1414
"headers" : {
1515
"Server" : "nginx/1.14.2",
1616
"Date" : "Wed, 16 Mar 2022 13:07:04 GMT",

plugins/scanners/scanoss/src/test/kotlin/ScanOssScannerDirectoryTest.kt

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,10 @@ import io.kotest.matchers.collections.containExactly
2727
import io.kotest.matchers.collections.containExactlyInAnyOrder
2828
import io.kotest.matchers.should
2929

30-
import io.mockk.every
3130
import io.mockk.spyk
3231
import io.mockk.verify
3332

3433
import java.io.File
35-
import java.util.UUID
3634

3735
import org.ossreviewtoolkit.model.LicenseFinding
3836
import org.ossreviewtoolkit.model.PackageType
@@ -74,15 +72,6 @@ class ScanOssScannerDirectoryTest : StringSpec({
7472
}
7573

7674
"The scanner should scan a directory" {
77-
// Manipulate the UUID generation to have the same IDs as in the response.
78-
every {
79-
scanner.generateRandomUUID()
80-
} answers {
81-
UUID.fromString("5530105e-0752-4750-9c07-4e4604b879a5")
82-
} andThenAnswer {
83-
UUID.fromString("c198b884-f6cf-496f-95eb-0e7968dd2ec6")
84-
}
85-
8675
val summary = scanner.scanPath(
8776
TEST_DIRECTORY_TO_SCAN,
8877
ScanContext(labels = emptyMap(), packageType = PackageType.PACKAGE)

plugins/scanners/scanoss/src/test/kotlin/ScanOssScannerFileTest.kt

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,10 @@ import io.kotest.core.spec.style.StringSpec
2626
import io.kotest.matchers.collections.containExactly
2727
import io.kotest.matchers.should
2828

29-
import io.mockk.every
3029
import io.mockk.spyk
3130
import io.mockk.verify
3231

3332
import java.io.File
34-
import java.util.UUID
3533

3634
import org.ossreviewtoolkit.model.LicenseFinding
3735
import org.ossreviewtoolkit.model.PackageType
@@ -67,13 +65,6 @@ class ScanOssScannerFileTest : StringSpec({
6765
}
6866

6967
"The scanner should scan a single file" {
70-
// Manipulate the UUID generation to have the same IDs as in the response.
71-
every {
72-
scanner.generateRandomUUID()
73-
} answers {
74-
UUID.fromString("bf5401e9-03b3-4c91-906c-cadb90487b8c")
75-
}
76-
7768
val summary = scanner.scanPath(
7869
TEST_FILE_TO_SCAN,
7970
ScanContext(labels = emptyMap(), packageType = PackageType.PACKAGE)

0 commit comments

Comments
 (0)