Skip to content

Commit 33c8075

Browse files
committed
feat(scanner): Improve the behavior when storing conflicting results
If ORT is run in parallel, e.g. on different runners on CI, it may happen that a package which has no corresponding storage entry is scanned in parallel on mutliple nodes. Once the nodes attempt to store the scan results, only the writing of the first result succeeds. But the following nodes attempt to overwrite the result (with the same value), which results in the scan storage to write the stack trace to the log as error. Also it (re-)throws the exception which is caught again by the `Scanner` which again logs the stack trace as a warning. There is no problem with the functionality, but the logs look as if there was a serious issue. This is not true, because the attempt to write the scan results must have been to overwrite a value with the exact same value. This holds, because the key / unique constraints have been choosen such that it does. Adjust the interface and the implementations to not throw anymore on unique constraint violation, because overwriting data with identical data has no impact anyway. Still leave a message in the debug log, but without the stack trace in order to reduce the amount of spamming, as this behavior is expected. Furthermore, turn the return type of `write()` into `Boolean`, so that it is still able to examine / assert all previously implemented test cases. Note: According to [1] the `insertIgnore()` seems to target this use case. [1]: https://www.jetbrains.com/help/exposed/dsl-crud-operations.html#batch-insert Signed-off-by: Frank Viernau <[email protected]>
1 parent b71980e commit 33c8075

File tree

5 files changed

+46
-21
lines changed

5 files changed

+46
-21
lines changed

scanner/src/funTest/kotlin/storages/AbstractProvenanceBasedScanStorageFunTest.kt

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import io.kotest.core.spec.style.WordSpec
2525
import io.kotest.matchers.collections.containExactly
2626
import io.kotest.matchers.collections.containExactlyInAnyOrder
2727
import io.kotest.matchers.should
28+
import io.kotest.matchers.shouldBe
2829

2930
import org.ossreviewtoolkit.model.ArtifactProvenance
3031
import org.ossreviewtoolkit.model.Hash
@@ -61,22 +62,25 @@ abstract class AbstractProvenanceBasedScanStorageFunTest(vararg listeners: TestL
6162
"succeed for a valid scan result" {
6263
val scanResult = createScanResult()
6364

64-
storage.write(scanResult)
65+
val writeResult = storage.write(scanResult)
6566
val readResult = storage.read(scanResult.provenance as KnownProvenance)
6667

68+
writeResult shouldBe true
6769
readResult should containExactly(scanResult)
6870
}
6971

70-
"fail if provenance information is missing" {
71-
val scanResult = createScanResult(provenance = UnknownProvenance)
72+
"return false if a result for the same provenance and scanner is already stored" {
73+
val scanResult = createScanResult()
74+
storage.write(scanResult)
7275

73-
shouldThrow<ScanStorageException> { storage.write(scanResult) }
76+
val writeResult = storage.write(scanResult)
77+
78+
writeResult shouldBe false
7479
}
7580

76-
"fail if a result for the same provenance and scanner is already stored" {
77-
val scanResult = createScanResult()
81+
"fail if provenance information is missing" {
82+
val scanResult = createScanResult(provenance = UnknownProvenance)
7883

79-
storage.write(scanResult)
8084
shouldThrow<ScanStorageException> { storage.write(scanResult) }
8185
}
8286

@@ -97,9 +101,11 @@ abstract class AbstractProvenanceBasedScanStorageFunTest(vararg listeners: TestL
97101
vcsInfo = VcsInfo.valid().copy(revision = "another")
98102
)
99103
)
100-
101104
storage.write(scanResult1)
102-
shouldThrow<ScanStorageException> { storage.write(scanResult2) }
105+
106+
val writeResult = storage.write(scanResult2)
107+
108+
writeResult shouldBe false
103109
}
104110
}
105111

scanner/src/main/kotlin/ScanStorage.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,15 +97,15 @@ interface ProvenanceBasedScanStorageWriter : ScanStorageWriter {
9797
/**
9898
* Write the provided [scanResult] to the storage. The [scanResult] must have a [KnownProvenance], and if it has a
9999
* [RepositoryProvenance] the VCS path must be empty, because only scan results for complete repositories are
100-
* stored.
100+
* stored. If the storage already contains a result for the same storage key, the write operation is skipped
101+
* and false is returned.
101102
*
102103
* A [ScanStorageException] is thrown if:
103104
* * An error occurs while writing to the storage.
104-
* * The storage already contains a result for the same provenance and scanner.
105105
* * The provenance of the [scanResult] is [unknown][UnknownProvenance].
106106
* * The provenance of the [scanResult] is a [RepositoryProvenance] with a non-empty VCS path.
107107
*/
108-
fun write(scanResult: ScanResult)
108+
fun write(scanResult: ScanResult): Boolean
109109
}
110110

111111
/**

scanner/src/main/kotlin/storages/ProvenanceBasedFileStorage.kt

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ class ProvenanceBasedFileStorage(private val backend: FileStorage) : ProvenanceB
7575
}
7676
}
7777

78-
override fun write(scanResult: ScanResult) {
78+
override fun write(scanResult: ScanResult): Boolean {
7979
val provenance = scanResult.provenance
8080

8181
requireEmptyVcsPath(provenance)
@@ -87,9 +87,12 @@ class ProvenanceBasedFileStorage(private val backend: FileStorage) : ProvenanceB
8787
val existingScanResults = read(provenance)
8888

8989
if (existingScanResults.any { it.matches(scanResult) }) {
90-
throw ScanStorageException(
91-
"Storage already contains a result for ${scanResult.provenance} and ${scanResult.scanner}."
92-
)
90+
logger.debug {
91+
"Not writing scan result because storage already contains a result for ${scanResult.provenance} and " +
92+
"${scanResult.scanner}."
93+
}
94+
95+
return false
9396
}
9497

9598
val scanResults = existingScanResults + scanResult
@@ -101,6 +104,7 @@ class ProvenanceBasedFileStorage(private val backend: FileStorage) : ProvenanceB
101104
runCatching {
102105
backend.write(path, input)
103106
logger.debug { "Stored scan result for '$provenance' at path '$path'." }
107+
return true
104108
}.onFailure {
105109
when (it) {
106110
is IllegalArgumentException, is IOException -> {
@@ -114,6 +118,8 @@ class ProvenanceBasedFileStorage(private val backend: FileStorage) : ProvenanceB
114118
else -> throw it
115119
}
116120
}
121+
122+
return false
117123
}
118124
}
119125

scanner/src/main/kotlin/storages/ProvenanceBasedPostgresStorage.kt

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,14 @@ import javax.sql.DataSource
2525

2626
import org.apache.logging.log4j.kotlin.logger
2727

28+
import org.jetbrains.exposed.dao.id.EntityID
2829
import org.jetbrains.exposed.dao.id.IntIdTable
2930
import org.jetbrains.exposed.sql.Database
3031
import org.jetbrains.exposed.sql.SchemaUtils
3132
import org.jetbrains.exposed.sql.SchemaUtils.withDataBaseLock
3233
import org.jetbrains.exposed.sql.and
3334
import org.jetbrains.exposed.sql.andWhere
34-
import org.jetbrains.exposed.sql.insert
35+
import org.jetbrains.exposed.sql.insertIgnoreAndGetId
3536
import org.jetbrains.exposed.sql.selectAll
3637

3738
import org.ossreviewtoolkit.model.ArtifactProvenance
@@ -132,7 +133,7 @@ class ProvenanceBasedPostgresStorage(
132133
// TODO: Override read(provenance, scannerMatcher) to make it more efficient by matching the scanner details in the
133134
// query.
134135

135-
override fun write(scanResult: ScanResult) {
136+
override fun write(scanResult: ScanResult): Boolean {
136137
val provenance = scanResult.provenance
137138

138139
requireEmptyVcsPath(provenance)
@@ -141,9 +142,11 @@ class ProvenanceBasedPostgresStorage(
141142
throw ScanStorageException("Scan result must have a known provenance, but it is $provenance.")
142143
}
143144

145+
var rowId: EntityID<Int>? = null
146+
144147
try {
145148
database.transaction {
146-
table.insert {
149+
rowId = table.insertIgnoreAndGetId {
147150
when (provenance) {
148151
is ArtifactProvenance -> {
149152
it[artifactUrl] = provenance.sourceArtifact.url
@@ -170,6 +173,15 @@ class ProvenanceBasedPostgresStorage(
170173

171174
throw ScanStorageException(e)
172175
}
176+
177+
if (rowId == null) {
178+
logger.debug {
179+
"Not writing scan result because storage already contains a result for ${scanResult.provenance} and " +
180+
"${scanResult.scanner}."
181+
}
182+
}
183+
184+
return rowId != null
173185
}
174186
}
175187

scanner/src/test/kotlin/ScannerTest.kt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,9 @@ class ScannerTest : WordSpec({
196196

197197
val storedScanResults = mutableListOf<ScanResult>()
198198
val writer = object : ProvenanceBasedScanStorageWriter {
199-
override fun write(scanResult: ScanResult) {
199+
override fun write(scanResult: ScanResult): Boolean {
200200
storedScanResults.add(scanResult)
201+
return true
201202
}
202203
}
203204

@@ -1038,7 +1039,7 @@ private class FakePackageBasedStorageWriter : PackageBasedScanStorageWriter {
10381039
}
10391040

10401041
private class FakeProvenanceBasedStorageWriter : ProvenanceBasedScanStorageWriter {
1041-
override fun write(scanResult: ScanResult) = Unit
1042+
override fun write(scanResult: ScanResult) = true
10421043
}
10431044

10441045
private fun createContext(labels: Map<String, String> = emptyMap(), type: PackageType = PackageType.PACKAGE) =

0 commit comments

Comments
 (0)