Skip to content

Commit 79c41c8

Browse files
committed
feat(scanoss): Add exclusion pattern support to SCANOSS
Implement exclusion filtering to respect path patterns specified in the configuration. The scanner now properly excludes files matching the patterns during the scan process. Signed-off-by: Agustin Isasmendi <[email protected]>
1 parent b996823 commit 79c41c8

File tree

5 files changed

+181
-1
lines changed

5 files changed

+181
-1
lines changed

plugins/scanners/scanoss/src/main/kotlin/ScanOss.kt

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,15 @@
2020
package org.ossreviewtoolkit.plugins.scanners.scanoss
2121

2222
import com.scanoss.Scanner
23+
import com.scanoss.filters.FilterConfig
2324
import com.scanoss.utils.JsonUtils
2425
import com.scanoss.utils.PackageDetails
2526

2627
import java.io.File
2728
import java.time.Instant
2829

30+
import org.apache.logging.log4j.kotlin.logger
31+
2932
import org.ossreviewtoolkit.model.ScanSummary
3033
import org.ossreviewtoolkit.plugins.api.OrtPlugin
3134
import org.ossreviewtoolkit.plugins.api.PluginDescriptor
@@ -65,8 +68,27 @@ class ScanOss(
6568
override fun scanPath(path: File, context: ScanContext): ScanSummary {
6669
val startTime = Instant.now()
6770

71+
val filterConfig = FilterConfig.builder()
72+
.customFilter { currentPath ->
73+
// The "currentPath" variable contains a path object representing the file or directory being evaluated
74+
// by the filter.
75+
// This is provided by the Scanner and represents individual files/directories during traversal.
76+
try {
77+
val relativePath = currentPath.toFile().toRelativeString(path)
78+
val isExcluded = context.excludes?.isPathExcluded(relativePath) ?: false
79+
logger.debug { "Path: $currentPath, relative: $relativePath, isExcluded: $isExcluded" }
80+
isExcluded
81+
} catch (e: IllegalArgumentException) {
82+
logger.warn { "Error processing path $currentPath: ${e.message}" }
83+
false
84+
}
85+
}
86+
.build()
87+
6888
// Build the scanner at function level in case any path-specific settings or filters are needed later.
69-
val scanoss = scanossBuilder.build()
89+
val scanoss = scanossBuilder
90+
.filterConfig(filterConfig)
91+
.build()
7092

7193
val rawResults = when {
7294
path.isFile -> listOf(scanoss.scanFile(path.toString()))
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* This file contains random data generated using the following command:
3+
* head -c 1k < /dev/urandom | base64
4+
*
5+
* The command takes 1 kilobyte of random bytes from the /dev/urandom device
6+
* and then encodes it as base64 text.
7+
*
8+
* Generated on: Fri Mar 14 05:04:43 PM CET 2025
9+
* Purpose: To create test data with completely random content that cannot
10+
* match any existing code in repositories, thereby avoiding false
11+
* positives when scanning ORT source code.
12+
*/
13+
14+
7m8Y06QhHzmQ4ePs0UUUasqsc8SP1ayNTFdQb6wffQwMu605hXOGHbOoy5pUv7ksgf6sw5ET2qXp
15+
T23LF2yA1cdNeDt8DBDd3IDmLX/wGgXcQjcaCtfSsMWB7oqHBMGkzwC5fMcDKPLK6ec2MwX6WPkw
16+
E18ImifWtAmGPEFGxWuqIinhE1yGSN+ImqJPVmpYfMOaDIAaS3JpiHZDmJW5uyQ5DB6W7lpm0q+f
17+
ZbtGPBeimy1jWF0H6kEW/TIve8RzUjdHU/t//O9r0b2AP08shSrSDWGlbQzxTniLOp2VZxNUEcVM
18+
c9/Lx4OXEaM/3NDCdr4qQS/1kZpGKFrv06zzC8tlncGaxBfdZSCsh1i+LbZtvUmTSv/wz7g+mld5
19+
WB2lSzF1Ervzqnm2+3iY+9TvVxDWzZ27LWsd1kvFrJCM03jI1q0c7uJrnnovAOoZkH2QiMPNBQmB
20+
wShT36h3su/aiOXEquXi+DoTSYDNgXeHVGI2joLVWYLfeTcTTfdvZiwp0K+XQp6fKtWX8tpUibNq
21+
ngp2dOlzl5yiT+WAD2ETGuyEML/wM3oz+wB93me2YYLJqz/1gtlnnRvGnAukbFLpxxXGK7Vnz+FF
22+
KfcPWF+O/FNV3nJD+m2nlMVj1n4lRM/mUEdVDhDDtxhywvi6DdNQMcUoeXZRT3dLk27+efNLvMDk
23+
7TsW/asvMoPrioAkDiTHqWvy+OUImWqqzNpzxIMuTWZrApSklw2UeyXknvHBORUN95AM6Oe9iKb0
24+
7B2g8U9dIFo7v/AhaDqoQMw+Dz1KfH6+fPaqZEy2H1U7/9RSorKz0fycz7n7BtqWxjenqw11LLxy
25+
lO36udPuvtr2b/WB/4ch0LuoI2eA11iTeIG4DuTxvizU3lExBXP+e8EAjkWx6F2ymDrI21PYPp++
26+
uidSk3g/RmaRZGk8akcXbs3pDO/twfjaH3YWYZzBf8aP1TRYDp4NF5v2OhWDa5d2dqdQGDRGg/wy
27+
Gf0W8txn/fQ3QN7SS9qPftgD6OYIpxKjIWBq/zb5+SAzhBZVjFYw+KVi+zu/P7he7xLRko6APCum
28+
Ugk7wqohWVdbl2IG2RIuPUOH2zQdzVJvLisKhfq3q6ydGmjD/WRNOxbebpmSKcmZWVg0Ko7/e0ys
29+
ymV2Ud0tIZwfIH/7476SZAh0ym1U7mgyzm/jlxKm5gUIF1+NWQiqa80GmsAJfquf1Yj4i0ftF+eO
30+
6OPJqbkZERpu24u2HIfL6CvlUkx08mS+eqLzyRiRuidDcQGFOK+0xPUk01jOZnGiY1ptG4W+Fo5K
31+
OhcT2H14wQqiHsthzMhpSLXwMG2ddM7P69rHEAXB3iXyWopgdWopVekxHEuar0mv3D6uO/4HKQ==
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* This file contains random data generated using the following command:
3+
* head -c 1k < /dev/urandom | base64
4+
*
5+
* The command takes 1 kilobyte of random bytes from the /dev/urandom device
6+
* and then encodes it as base64 text.
7+
*
8+
* Generated on: Fri Mar 14 05:05:29 PM CET 2025
9+
* Purpose: To create test data with completely random content that cannot
10+
* match any existing code in repositories, thereby avoiding false
11+
* positives when scanning ORT source code.
12+
*/
13+
14+
IyaayUoMK28Ib11Z55hC2OShY3p3HzWQyGy199hx20oqZrypl9AuDhKtBdl+qozcZBNajzvkU3H/
15+
jh3vV/P9I2VLQNVqMCpjelQoXyVq/nmbwxdQBXGbLgcC4J05ujQ2hoXuF4jdtEttDxca8P/EpUub
16+
nmSO3zmz86LqyyYgFj3imketFw0GvnCYU/8VDjmLxnigspEVI7ZDOacKOshObwH+Br/XgFHr5tyc
17+
ulGqACTjGY3EEdAjC2+tcTqoI+4mXVxx4CcBD4lRn90khfFOcAM8Iu2pGaERHnAtUrf9EX3rLsOW
18+
wV+wYllChP71rI/4ueEch9X8ph1dA0nQQN1tLUi58pQlkCHY6K0QNFiD6K+RxaBl3yBt1IZqjfZi
19+
UVjTb7xJDrYnLPIASlPd0AduDik8pKn+GTqIFWgkkRr5mY6c9jTqHxY7rASDNi7LGKUE9gPFd1LD
20+
xPJmsl+8L+lcVJjJNU7Tkps/ZZJuo/EqlwbUd/Wq45S++YBBfYlFaOXn/bVMhxXi1SH3xMHSAjH1
21+
aYj0YHEdBHnEF1ouahyS4607cundZcSR29kITrUnFSi/ZP3zKREa3MGm/qrJS7qFSxlHVsYHBIjy
22+
VRx+teV4nQWKJyA6x/T9Sx63lM7duwhVRdh9JxhxnrKAyUBH5HwhpFXHreMjudNdY9nMaWaKP9Ge
23+
oD4Rr4iA3kvaHjtqSfhB55PgQO7Od/KLNTRfMMPl7IjbouQNCai++hV+p7BRAjtGUwTOXp9FHbv7
24+
YFGBFl3a0e1+YEoQA+0Psf1x2lENCJwH87DuZxuKI3kbcY6XA5kebt43m/eztRa17z/vmwyiQ/up
25+
+RpMU9Xp1bv39h84QbyvZYN40xzHc8togJmPtSKCyEPcmHdt9t0LF6TCsb4k+kIBRUXMfnYpEDqv
26+
E6dldgWHjVy/4llWqyj3SsToERP1VhaloWyq8QRNke6lKzxMXOhmupKX195V2cA+6EGY3sK/ykhl
27+
fYOofbKcHwevHKgOJyj7Tj6+9qUgda/EI01lcJicTO8Nqb0LW+FfwIiws7WlsZWuxQGUZ0SOMBU4
28+
MnR9NPbS6rUSx2rMfSPn18Jd82D5eoM32ogRQb7C2pgXQbQoAegl98vtOjkze4wsa6CmW0rmbQrJ
29+
bpgPoWiZ1t/BlAUvxjRuzQSNNhnyvaC5nib6NYZAcr9BCm3yJ0sR/uSOUG8cCoJptMYhH9XxHqKl
30+
ACwfHgq7/mHBTxhQCmw5hkDWvY7FqzDPME3igab1Mda4lxOyUjJ3PeVzZbWZY2s/oUaSbntsSqRM
31+
z+zutj83Nm76iOSS0MXxCfi5VKYThzGdfXkYB2tZP8yPhh+sw0CpqeV5KB810C76abZbVZ+EDw==
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* This file contains random data generated using the following command:
3+
* head -c 1k < /dev/urandom | base64
4+
*
5+
* The command takes 1 kilobyte of random bytes from the /dev/urandom device
6+
* and then encodes it as base64 text.
7+
*
8+
* Generated on: Fri Mar 14 05:05:29 PM CET 2025
9+
* Purpose: To create test data with completely random content that cannot
10+
* match any existing code in repositories, thereby avoiding false
11+
* positives when scanning ORT source code.
12+
*/
13+
14+
IyaayUoMK28Ib11Z55hC2OShY3p3HzWQyGy199hx20oqZrypl9AuDhKtBdl+qozcZBNajzvkU3H/
15+
jh3vV/P9I2VLQNVqMCpjelQoXyVq/nmbwxdQBXGbLgcC4J05ujQ2hoXuF4jdtEttDxca8P/EpUub
16+
nmSO3zmz86LqyyYgFj3imketFw0GvnCYU/8VDjmLxnigspEVI7ZDOacKOshObwH+Br/XgFHr5tyc
17+
ulGqACTjGY3EEdAjC2+tcTqoI+4mXVxx4CcBD4lRn90khfFOcAM8Iu2pGaERHnAtUrf9EX3rLsOW
18+
wV+wYllChP71rI/4ueEch9X8ph1dA0nQQN1tLUi58pQlkCHY6K0QNFiD6K+RxaBl3yBt1IZqjfZi
19+
UVjTb7xJDrYnLPIASlPd0AduDik8pKn+GTqIFWgkkRr5mY6c9jTqHxY7rASDNi7LGKUE9gPFd1LD
20+
xPJmsl+8L+lcVJjJNU7Tkps/ZZJuo/EqlwbUd/Wq45S++YBBfYlFaOXn/bVMhxXi1SH3xMHSAjH1
21+
aYj0YHEdBHnEF1ouahyS4607cundZcSR29kITrUnFSi/ZP3zKREa3MGm/qrJS7qFSxlHVsYHBIjy
22+
VRx+teV4nQWKJyA6x/T9Sx63lM7duwhVRdh9JxhxnrKAyUBH5HwhpFXHreMjudNdY9nMaWaKP9Ge
23+
oD4Rr4iA3kvaHjtqSfhB55PgQO7Od/KLNTRfMMPl7IjbouQNCai++hV+p7BRAjtGUwTOXp9FHbv7
24+
YFGBFl3a0e1+YEoQA+0Psf1x2lENCJwH87DuZxuKI3kbcY6XA5kebt43m/eztRa17z/vmwyiQ/up
25+
+RpMU9Xp1bv39h84QbyvZYN40xzHc8togJmPtSKCyEPcmHdt9t0LF6TCsb4k+kIBRUXMfnYpEDqv
26+
E6dldgWHjVy/4llWqyj3SsToERP1VhaloWyq8QRNke6lKzxMXOhmupKX195V2cA+6EGY3sK/ykhl
27+
fYOofbKcHwevHKgOJyj7Tj6+9qUgda/EI01lcJicTO8Nqb0LW+FfwIiws7WlsZWuxQGUZ0SOMBU4
28+
MnR9NPbS6rUSx2rMfSPn18Jd82D5eoM32ogRQb7C2pgXQbQoAegl98vtOjkze4wsa6CmW0rmbQrJ
29+
bpgPoWiZ1t/BlAUvxjRuzQSNNhnyvaC5nib6NYZAcr9BCm3yJ0sR/uSOUG8cCoJptMYhH9XxHqKl
30+
ACwfHgq7/mHBTxhQCmw5hkDWvY7FqzDPME3igab1Mda4lxOyUjJ3PeVzZbWZY2s/oUaSbntsSqRM
31+
z+zutj83Nm76iOSS0MXxCfi5VKYThzGdfXkYB2tZP8yPhh+sw0CpqeV5KB810C76abZbVZ+EDw==

plugins/scanners/scanoss/src/test/kotlin/ScanOssScannerDirectoryTest.kt

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,15 @@
2020
package org.ossreviewtoolkit.plugins.scanners.scanoss
2121

2222
import com.github.tomakehurst.wiremock.WireMockServer
23+
import com.github.tomakehurst.wiremock.client.WireMock
2324
import com.github.tomakehurst.wiremock.core.WireMockConfiguration
2425

26+
import io.kotest.assertions.fail
2527
import io.kotest.core.spec.style.StringSpec
2628
import io.kotest.matchers.collections.containExactly
2729
import io.kotest.matchers.collections.containExactlyInAnyOrder
2830
import io.kotest.matchers.should
31+
import io.kotest.matchers.shouldBe
2932

3033
import io.mockk.spyk
3134

@@ -39,11 +42,16 @@ import org.ossreviewtoolkit.model.SnippetFinding
3942
import org.ossreviewtoolkit.model.TextLocation
4043
import org.ossreviewtoolkit.model.VcsInfo
4144
import org.ossreviewtoolkit.model.VcsType
45+
import org.ossreviewtoolkit.model.config.Excludes
46+
import org.ossreviewtoolkit.model.config.PathExclude
47+
import org.ossreviewtoolkit.model.config.PathExcludeReason
4248
import org.ossreviewtoolkit.plugins.api.Secret
4349
import org.ossreviewtoolkit.scanner.ScanContext
4450
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
4551

52+
// Define separate directories for different test scenarios.
4653
private val TEST_DIRECTORY_TO_SCAN = File("src/test/assets/filesToScan")
54+
private val EXCLUSION_TEST_DIRECTORY = File("src/test/assets/exclusionTest")
4755

4856
/**
4957
* A test for scanning a directory with the [ScanOss] scanner.
@@ -110,4 +118,61 @@ class ScanOssScannerDirectoryTest : StringSpec({
110118
)
111119
}
112120
}
121+
122+
"Scanner should exclude only files matching the specified path pattern (**/*.kt)" {
123+
val pathExcludes = listOf(
124+
PathExclude(
125+
pattern = "**/*.kt", // Glob pattern to match all .kt files in any directory.
126+
reason = PathExcludeReason.BUILD_TOOL_OF,
127+
comment = "Excluding .kt source files from scanning"
128+
)
129+
)
130+
131+
// Verify our test file exists. This file should be included in the scan since it does not match the exclusion
132+
// pattern (it is a .go file, not a .kt file).
133+
val includedFile = File(EXCLUSION_TEST_DIRECTORY, "server.go")
134+
if (!includedFile.isFile) {
135+
fail("The file ${includedFile.absolutePath} does not exist - test environment may not be properly set up")
136+
}
137+
138+
// Run the scanner with our exclusion pattern. This will traverse the directory and should skip .kt files.
139+
scanner.scanPath(
140+
EXCLUSION_TEST_DIRECTORY,
141+
ScanContext(
142+
labels = emptyMap(),
143+
packageType = PackageType.PACKAGE,
144+
excludes = Excludes(paths = pathExcludes)
145+
)
146+
)
147+
148+
// Retrieve all HTTP POST requests captured by WireMock during the scan.
149+
val requests = server.findAll(WireMock.postRequestedFor(WireMock.anyUrl()))
150+
val requestBodies = requests.map { it.bodyAsString }
151+
152+
// The scanner sends files to the API in a multipart/form-data POST request with this format:
153+
// --boundary
154+
// Content-Disposition: form-data; name="file"; filename="[UUID].wfp"
155+
// Content-Type: text/plain; charset=utf-8
156+
// Content-Length: [length]
157+
//
158+
// file=[hash],[size],[filename]
159+
// [fingerprint data for the file]
160+
// --boundary--
161+
162+
// Extract included filenames using a regex pattern from the ScanOSS HTTP POST.
163+
// The pattern matches lines starting with "file=" followed by hash and size, then captures the filename.
164+
val filenamePattern = "file=.*?,.*?,(.+)".toRegex(RegexOption.MULTILINE)
165+
val includedFiles = requestBodies.flatMap { body ->
166+
filenamePattern.findAll(body).map { it.groupValues[1] }.toList()
167+
}
168+
169+
// Verify that .kt files were excluded from the scan.
170+
// These assertions check that Kotlin files are not present in the API requests.
171+
includedFiles.any { it.contains("ArchiveUtils.kt") } shouldBe false
172+
includedFiles.any { it.contains("ScannerFactory.kt") } shouldBe false
173+
174+
// Verify that non-.kt files were included in the scan.
175+
// This assertion checks that our Go file was sent to the API.
176+
includedFiles.any { it.contains("server.go") } shouldBe true
177+
}
113178
})

0 commit comments

Comments
 (0)