Skip to content

Commit 457657b

Browse files
committed
feat(scanoss): Add exclusion pattern support to SCANOSS
Implement exclusion filtering to respect path patterns specified in the `.ort.yml` configuration. The scanner now properly excludes files matching the patterns during the scan process. Signed-off-by: Agustin Isasmendi <[email protected]>
1 parent 6a56c15 commit 457657b

File tree

5 files changed

+511
-1
lines changed

5 files changed

+511
-1
lines changed

plugins/scanners/scanoss/src/main/kotlin/ScanOss.kt

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,15 @@
2020
package org.ossreviewtoolkit.plugins.scanners.scanoss
2121

2222
import com.scanoss.Scanner
23+
import com.scanoss.filters.FilterConfig
2324
import com.scanoss.utils.JsonUtils
2425
import com.scanoss.utils.PackageDetails
2526

2627
import java.io.File
2728
import java.time.Instant
2829

30+
import org.apache.logging.log4j.kotlin.logger
31+
2932
import org.ossreviewtoolkit.model.ScanSummary
3033
import org.ossreviewtoolkit.plugins.api.OrtPlugin
3134
import org.ossreviewtoolkit.plugins.api.PluginDescriptor
@@ -65,8 +68,29 @@ class ScanOss(
6568
override fun scanPath(path: File, context: ScanContext): ScanSummary {
6669
val startTime = Instant.now()
6770

71+
val basePath = path.toPath()
72+
73+
val filterConfig = FilterConfig.builder()
74+
.customFilter { currentPath ->
75+
// The "currentPath" variable contains a path object representing the file or directory being evaluated
76+
// by the filter.
77+
// This is provided by the Scanner and represents individual files/directories during traversal.
78+
try {
79+
val relativePath = basePath.relativize(currentPath).toString()
80+
val isExcluded = context.excludes?.isPathExcluded(relativePath) ?: false
81+
logger.debug { "Path: $currentPath, relative: $relativePath, isExcluded: $isExcluded" }
82+
isExcluded
83+
} catch (e: IllegalArgumentException) {
84+
logger.warn { "Error processing path $currentPath: ${e.message}" }
85+
false
86+
}
87+
}
88+
.build()
89+
6890
// Build the scanner at function level in case any path-specific settings or filters are needed later
69-
val scanoss = scanossBuilder.build()
91+
val scanoss = scanossBuilder
92+
.filterConfig(filterConfig)
93+
.build()
7094

7195
val rawResults = when {
7296
path.isFile -> listOf(scanoss.scanFile(path.toString()))
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
/*
2+
* Copyright (C) 2017 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-License-Identifier: Apache-2.0
17+
* License-Filename: LICENSE
18+
*/
19+
20+
@file:Suppress("MatchingDeclarationName")
21+
22+
package org.ossreviewtoolkit.utils
23+
24+
import java.io.File
25+
import java.io.IOException
26+
import java.io.InputStream
27+
import java.nio.file.FileVisitResult
28+
import java.nio.file.Files
29+
import java.nio.file.Path
30+
import java.nio.file.SimpleFileVisitor
31+
import java.nio.file.attribute.BasicFileAttributes
32+
import java.util.zip.Deflater
33+
34+
import org.apache.commons.compress.archivers.ArchiveEntry
35+
import org.apache.commons.compress.archivers.ArchiveInputStream
36+
import org.apache.commons.compress.archivers.sevenz.SevenZFile
37+
import org.apache.commons.compress.archivers.tar.TarArchiveEntry
38+
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
39+
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry
40+
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream
41+
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream
42+
import org.apache.commons.compress.archivers.zip.ZipFile
43+
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream
44+
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
45+
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream
46+
import org.apache.commons.compress.utils.SeekableInMemoryByteChannel
47+
48+
enum class ArchiveType(vararg val extensions: String) {
49+
TAR(".gem", ".tar"),
50+
TAR_BZIP2(".tar.bz2", ".tbz2"),
51+
TAR_GZIP(".crate", ".tar.gz", ".tgz"),
52+
TAR_XZ(".tar.xz", ".txz"),
53+
ZIP(".aar", ".egg", ".jar", ".war", ".whl", ".zip"),
54+
SEVENZIP(".7z"),
55+
NONE("");
56+
57+
companion object {
58+
fun getType(filename: String): ArchiveType {
59+
val lowerName = filename.toLowerCase()
60+
return (ArchiveType.entries - NONE).find { type ->
61+
type.extensions.any { lowerName.endsWith(it) }
62+
} ?: NONE
63+
}
64+
}
65+
}
66+
67+
/**
68+
* Unpack the [File] to [targetDirectory].
69+
*/
70+
fun File.unpack(targetDirectory: File) =
71+
when (ArchiveType.getType(name)) {
72+
ArchiveType.SEVENZIP -> unpack7Zip(targetDirectory)
73+
ArchiveType.ZIP -> unpackZip(targetDirectory)
74+
75+
ArchiveType.TAR -> inputStream().unpackTar(targetDirectory)
76+
ArchiveType.TAR_BZIP2 -> BZip2CompressorInputStream(inputStream()).unpackTar(targetDirectory)
77+
ArchiveType.TAR_GZIP -> GzipCompressorInputStream(inputStream()).unpackTar(targetDirectory)
78+
ArchiveType.TAR_XZ -> XZCompressorInputStream(inputStream()).unpackTar(targetDirectory)
79+
80+
ArchiveType.NONE -> {
81+
throw IOException("Unable to guess compression scheme from file name '$name'.")
82+
}
83+
}
84+
85+
/**
86+
* Unpack the [File] assuming it is a 7-Zip archive. This implementation ignores empty directories and symbolic links.
87+
*/
88+
fun File.unpack7Zip(targetDirectory: File) {
89+
SevenZFile(this).use { zipFile ->
90+
while (true) {
91+
val entry = zipFile.nextEntry ?: break
92+
93+
if (entry.isDirectory || entry.isAntiItem) {
94+
continue
95+
}
96+
97+
val target = targetDirectory.resolve(entry.name)
98+
99+
// There is no guarantee that directory entries appear before file entries, so ensure that the parent
100+
// directory for a file exists.
101+
target.parentFile.safeMkdirs()
102+
103+
target.outputStream().use { output ->
104+
zipFile.getInputStream(entry).copyTo(output)
105+
}
106+
}
107+
}
108+
}
109+
110+
/**
111+
* Unpack the [File] assuming it is a Zip archive.
112+
*/
113+
fun File.unpackZip(targetDirectory: File) = ZipFile(this).unpack(targetDirectory)
114+
115+
/**
116+
* Unpack the [ByteArray] assuming it is a Zip archive.
117+
*/
118+
fun ByteArray.unpackZip(targetDirectory: File) = ZipFile(SeekableInMemoryByteChannel(this)).unpack(targetDirectory)
119+
120+
/**
121+
* Pack the file into a ZIP [targetFile] using [Deflater.BEST_COMPRESSION]. If the file is a directory its content is
122+
* recursively added to the archive. Only regular files are added, e.g. symbolic links or directories are skipped. If
123+
* a [prefix] is specified, it is added to the file names in the ZIP file.
124+
* If not all files shall be added to the archive a [filter] can be provided.
125+
*/
126+
fun File.packZip(
127+
targetFile: File,
128+
prefix: String = "",
129+
overwrite: Boolean = false,
130+
filter: (Path) -> Boolean = { true }
131+
) {
132+
require(overwrite || !targetFile.exists()) {
133+
"The target ZIP file '${targetFile.absolutePath}' must not exist."
134+
}
135+
136+
ZipArchiveOutputStream(targetFile).use { output ->
137+
output.setLevel(Deflater.BEST_COMPRESSION)
138+
Files.walkFileTree(toPath(), object : SimpleFileVisitor<Path>() {
139+
override fun visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult {
140+
if (attrs.isRegularFile && filter(file)) {
141+
val entry = ZipArchiveEntry(file.toFile(), "$prefix${this@packZip.toPath().relativize(file)}")
142+
output.putArchiveEntry(entry)
143+
file.toFile().inputStream().use { input -> input.copyTo(output) }
144+
output.closeArchiveEntry()
145+
}
146+
147+
return FileVisitResult.CONTINUE
148+
}
149+
})
150+
}
151+
}
152+
153+
/**
154+
* Unpack the [InputStream] to [targetDirectory] assuming that it is a tape archive (TAR). This implementation ignores
155+
* empty directories and symbolic links.
156+
*/
157+
fun InputStream.unpackTar(targetDirectory: File) =
158+
TarArchiveInputStream(this).unpack(
159+
targetDirectory,
160+
{ entry -> !(entry as TarArchiveEntry).isFile },
161+
{ entry -> (entry as TarArchiveEntry).mode }
162+
)
163+
164+
/**
165+
* Unpack the [InputStream] to [targetDirectory] assuming that it is a ZIP archive. This implementation ignores empty
166+
* directories and symbolic links.
167+
*/
168+
fun InputStream.unpackZip(targetDirectory: File) =
169+
ZipArchiveInputStream(this).unpack(
170+
targetDirectory,
171+
{ entry -> (entry as ZipArchiveEntry).let { it.isDirectory || it.isUnixSymlink } },
172+
{ entry -> (entry as ZipArchiveEntry).unixMode }
173+
)
174+
175+
/**
176+
* Copy the executable bit contained in [mode] to the [target] file's mode bits.
177+
*/
178+
private fun copyExecutableModeBit(target: File, mode: Int) {
179+
if (Os.isWindows) return
180+
181+
// Note: In contrast to Java, Kotlin does not support octal literals, see
182+
// https://kotlinlang.org/docs/reference/basic-types.html#literal-constants.
183+
// The bit-triplets from left to right stand for user, groups, other, respectively.
184+
if (mode and 0b001_000_001 != 0) {
185+
target.setExecutable(true, (mode and 0b000_000_001) == 0)
186+
}
187+
}
188+
189+
/**
190+
* Unpack this [ArchiveInputStream] to the [targetDirectory], skipping all entries for which [shouldSkip] returns true,
191+
* and using what [mode] returns as the file mode bits.
192+
*/
193+
private fun ArchiveInputStream.unpack(
194+
targetDirectory: File,
195+
shouldSkip: (ArchiveEntry) -> Boolean,
196+
mode: (ArchiveEntry) -> Int
197+
) =
198+
use { input ->
199+
while (true) {
200+
val entry = input.nextEntry ?: break
201+
202+
if (shouldSkip(entry)) continue
203+
204+
val target = targetDirectory.resolve(entry.name)
205+
206+
// There is no guarantee that directory entries appear before file entries, so ensure that the parent
207+
// directory for a file exists.
208+
target.parentFile.safeMkdirs()
209+
210+
target.outputStream().use { output ->
211+
input.copyTo(output)
212+
}
213+
214+
copyExecutableModeBit(target, mode(entry))
215+
}
216+
}
217+
218+
/**
219+
* Unpack the [ZipFile]. In contrast to [InputStream.unpackZip] this properly parses the ZIP's central directory, see
220+
* https://commons.apache.org/proper/commons-compress/zip.html#ZipArchiveInputStream_vs_ZipFile.
221+
*/
222+
private fun ZipFile.unpack(targetDirectory: File) =
223+
use { zipFile ->
224+
val entries = zipFile.entries
225+
226+
while (entries.hasMoreElements()) {
227+
val entry = entries.nextElement()
228+
229+
if (entry.isDirectory || entry.isUnixSymlink) {
230+
continue
231+
}
232+
233+
val target = targetDirectory.resolve(entry.name)
234+
235+
// There is no guarantee that directory entries appear before file entries, so ensure that the parent
236+
// directory for a file exists.
237+
target.parentFile.safeMkdirs()
238+
239+
target.outputStream().use { output ->
240+
zipFile.getInputStream(entry).copyTo(output)
241+
}
242+
243+
copyExecutableModeBit(target, entry.unixMode)
244+
}
245+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Copyright (C) 2017 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-License-Identifier: Apache-2.0
17+
* License-Filename: LICENSE
18+
*/
19+
20+
package org.ossreviewtoolkit.scanner
21+
22+
import java.util.ServiceLoader
23+
24+
import org.ossreviewtoolkit.model.config.ScannerConfiguration
25+
26+
/**
27+
* A common interface for use with [ServiceLoader] that all [AbstractScannerFactory] classes need to implement.
28+
*/
29+
interface ScannerFactory {
30+
/**
31+
* The name to use to refer to the scanner.
32+
*/
33+
val scannerName: String
34+
35+
/**
36+
* Create a [Scanner] using the specified [config].
37+
*/
38+
fun create(config: ScannerConfiguration): Scanner
39+
}
40+
41+
/**
42+
* A generic factory class for a [Scanner].
43+
*/
44+
abstract class AbstractScannerFactory<out T : Scanner>(
45+
override val scannerName: String
46+
) : ScannerFactory {
47+
abstract override fun create(config: ScannerConfiguration): T
48+
49+
/**
50+
* Return the scanner's name here to allow Clikt to display something meaningful when listing the scanners
51+
* which are enabled by default via their factories.
52+
*/
53+
override fun toString() = scannerName
54+
}

0 commit comments

Comments
 (0)