Skip to content

Commit 042e00e

Browse files
authored
Merge pull request #533 from RADAR-base/release-2.3.1
Release 2.3.1
2 parents 6f17c8e + 5241864 commit 042e00e

File tree

12 files changed

+103
-88
lines changed

12 files changed

+103
-88
lines changed

.github/workflows/scheduled_snyk.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
- uses: actions/checkout@v3
1212
- uses: snyk/actions/setup@master
1313
with:
14-
snyk-version: v1.931.0
14+
snyk-version: v1.1032.0
1515

1616
- uses: actions/setup-java@v3
1717
with:

.github/workflows/snyk.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@ name: Snyk test
22
on:
33
pull_request:
44
branches:
5-
- master
5+
- main
66
jobs:
77
security:
88
runs-on: ubuntu-latest
99
steps:
1010
- uses: actions/checkout@v3
1111
- uses: snyk/actions/setup@master
1212
with:
13-
snyk-version: v1.931.0
13+
snyk-version: v1.1032.0
1414

1515
- uses: actions/setup-java@v3
1616
with:

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ When upgrading to version 0.6.0 from version 0.5.x or earlier, please follow the
7070
This package is available as docker image [`radarbase/radar-output-restructure`](https://hub.docker.com/r/radarbase/radar-output-restructure). The entrypoint of the image is the current application. So in all the commands listed in usage, replace `radar-output-restructure` with for example:
7171

7272
```shell
73-
docker run --rm -t --network s3 -v "$PWD/output:/output" radarbase/radar-output-restructure:2.3.0 -o /output /myTopic
73+
docker run --rm -t --network s3 -v "$PWD/output:/output" radarbase/radar-output-restructure:2.3.1 -o /output /myTopic
7474
```
7575

7676
## Command line usage
@@ -228,7 +228,7 @@ This package requires at least Java JDK 8. Build the distribution with
228228
and install the package into `/usr/local` with for example
229229
```shell
230230
sudo mkdir -p /usr/local
231-
sudo tar -xzf build/distributions/radar-output-restructure-2.3.0.tar.gz -C /usr/local --strip-components=1
231+
sudo tar -xzf build/distributions/radar-output-restructure-2.3.1.tar.gz -C /usr/local --strip-components=1
232232
```
233233

234234
Now the `radar-output-restructure` command should be available.

build.gradle.kts

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ plugins {
1616
}
1717

1818
group = "org.radarbase"
19-
version = "2.3.0"
19+
version = "2.3.1"
2020

2121
repositories {
2222
mavenCentral()
@@ -57,7 +57,10 @@ dependencies {
5757
val jacksonVersion: String by project
5858
api(platform("com.fasterxml.jackson:jackson-bom:$jacksonVersion"))
5959
implementation("com.fasterxml.jackson.core:jackson-databind")
60-
implementation("com.fasterxml.jackson.dataformat:jackson-dataformat-yaml")
60+
implementation("com.fasterxml.jackson.dataformat:jackson-dataformat-yaml") {
61+
val snakeYamlVersion: String by project
62+
runtimeOnly("org.yaml:snakeyaml:$snakeYamlVersion")
63+
}
6164
implementation("com.fasterxml.jackson.dataformat:jackson-dataformat-csv")
6265
implementation("com.fasterxml.jackson.module:jackson-module-kotlin")
6366
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310")
@@ -86,14 +89,17 @@ dependencies {
8689
implementation(platform("io.netty:netty-bom:$nettyVersion"))
8790
}
8891
val opencsvVersion: String by project
89-
implementation("com.opencsv:opencsv:$opencsvVersion")
92+
implementation("com.opencsv:opencsv:$opencsvVersion") {
93+
val apacheCommonsTextVersion: String by project
94+
runtimeOnly("org.apache.commons:commons-text:$apacheCommonsTextVersion")
95+
}
9096

9197
val slf4jVersion: String by project
9298
implementation("org.slf4j:slf4j-api:$slf4jVersion")
9399

94100
val log4jVersion: String by project
95-
runtimeOnly("org.apache.logging.log4j:log4j-slf4j-impl:$log4jVersion")
96-
runtimeOnly("org.apache.logging.log4j:log4j-api:$log4jVersion")
101+
runtimeOnly("org.apache.logging.log4j:log4j-core:$log4jVersion")
102+
runtimeOnly("org.apache.logging.log4j:log4j-slf4j2-impl:$log4jVersion")
97103
runtimeOnly("org.apache.logging.log4j:log4j-jul:$log4jVersion")
98104

99105
val radarSchemasVersion: String by project

gradle.properties

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,30 @@
11
kotlin.code.style=official
22

3-
kotlinVersion=1.7.10
4-
dokkaVersion=1.7.10
3+
kotlinVersion=1.7.20
4+
dokkaVersion=1.7.20
55
dockerComposeVersion=0.16.9
6-
dependencyUpdateVersion=0.42.0
6+
dependencyUpdateVersion=0.43.0
77
nexusPublishVersion=1.1.0
88
jsoupVersion=1.15.3
99

1010
coroutinesVersion=1.6.4
1111
avroVersion=1.11.1
1212
snappyVersion=1.1.8.4
13-
jacksonVersion=2.13.4
13+
jacksonVersion=2.13.4.20221013
1414
jCommanderVersion=1.82
1515
almworksVersion=1.1.2
16-
minioVersion=8.4.3
16+
minioVersion=8.4.5
1717
guavaVersion=31.1-jre
1818
opencsvVersion=5.7.0
1919
okhttpVersion=4.10.0
20-
jedisVersion=4.2.3
21-
slf4jVersion=1.7.36
22-
log4jVersion=2.18.0
23-
azureStorageVersion=12.19.0
24-
nettyVersion=4.1.80.Final
20+
jedisVersion=4.3.0
21+
slf4jVersion=2.0.3
22+
log4jVersion=2.19.0
23+
azureStorageVersion=12.20.0
24+
nettyVersion=4.1.84.Final
25+
snakeYamlVersion=1.32
26+
apacheCommonsTextVersion=1.10.0
2527

26-
junitVersion=5.9.0
28+
junitVersion=5.9.1
2729
mockitoKotlinVersion=4.0.0
2830
radarSchemasVersion=0.7.9

src/main/java/org/radarbase/output/accounting/Accountant.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ interface Accountant : SuspendedCloseable {
3434
internal val offsets: OffsetRangeSet = OffsetRangeSet { DirectFunctionalValue(it) }
3535

3636
fun add(transaction: Transaction) = time("accounting.add") {
37-
offsets.add(transaction.topicPartition, transaction.offset, transaction.lastModified)
37+
offsets.add(transaction)
3838
}
3939
}
4040

41-
class Transaction(
41+
data class Transaction(
4242
val topicPartition: TopicPartition,
43-
internal var offset: Long,
44-
internal val lastModified: Instant,
43+
val offset: Long,
44+
val lastModified: Instant,
4545
)
4646
}

src/main/java/org/radarbase/output/accounting/OffsetRangeSet.kt

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -30,38 +30,29 @@ import java.util.concurrent.ConcurrentMap
3030
* when they overlap or are adjacent. When merging offsets, last processing time is taken as the
3131
* maximum of all the merged processing times.
3232
*/
33-
class OffsetRangeSet {
34-
private val factory: (OffsetIntervals) -> FunctionalValue<OffsetIntervals>
35-
private val ranges: ConcurrentMap<TopicPartition, FunctionalValue<OffsetIntervals>>
33+
class OffsetRangeSet(
34+
private val ranges: ConcurrentMap<TopicPartition, FunctionalValue<OffsetIntervals>>,
35+
private val factory: (OffsetIntervals) -> FunctionalValue<OffsetIntervals>,
36+
) {
3637

3738
/** Whether the stored offsets is empty. */
3839
val isEmpty: Boolean
3940
get() = ranges.isEmpty()
4041

41-
@JvmOverloads
4242
constructor(
4343
factory: (OffsetIntervals) -> FunctionalValue<OffsetIntervals> = { LockedFunctionalValue(it) },
44-
) {
45-
this.ranges = ConcurrentHashMap()
46-
this.factory = factory
47-
}
48-
49-
private constructor(
50-
ranges: ConcurrentMap<TopicPartition, FunctionalValue<OffsetIntervals>>,
51-
factory: (OffsetIntervals) -> FunctionalValue<OffsetIntervals>,
52-
) {
53-
this.ranges = ranges
54-
this.factory = factory
55-
}
44+
) : this(ConcurrentHashMap(), factory)
5645

5746
/** Add given offset range to seen offsets. */
5847
fun add(range: TopicPartitionOffsetRange) {
5948
range.topicPartition.modifyIntervals { it.add(range.range) }
6049
}
6150

6251
/** Add given single offset to seen offsets. */
63-
fun add(topicPartition: TopicPartition, offset: Long, lastModified: Instant) {
64-
topicPartition.modifyIntervals { it.add(offset, lastModified) }
52+
fun add(transaction: Accountant.Transaction) {
53+
transaction.run {
54+
topicPartition.modifyIntervals { it.add(offset, lastModified) }
55+
}
6556
}
6657

6758
/** Add all offset stream of given set to the current set. */
@@ -81,23 +72,18 @@ class OffsetRangeSet {
8172
}
8273

8374
/** Whether this range value completely contains the given range. */
84-
operator fun contains(range: TopicPartitionOffsetRange): Boolean {
85-
return range.topicPartition.readIntervals { it.contains(range.range) }
86-
}
75+
operator fun contains(range: TopicPartitionOffsetRange): Boolean =
76+
range.topicPartition.readIntervals { it.contains(range.range) }
8777

8878
/** Whether this range value completely contains the given range. */
89-
fun contains(partition: TopicPartition, offset: Long, lastModified: Instant): Boolean {
90-
return partition.readIntervals { it.contains(offset, lastModified) }
91-
}
79+
fun contains(partition: TopicPartition, offset: Long, lastModified: Instant): Boolean =
80+
partition.readIntervals { it.contains(offset, lastModified) }
9281

9382
/** Number of distinct offsets in given topic/partition. */
94-
fun size(topicPartition: TopicPartition): Int {
95-
return topicPartition.readIntervals { it.size() }
96-
}
83+
fun size(topicPartition: TopicPartition): Int = topicPartition.readIntervals { it.size() }
9784

98-
fun remove(range: TopicPartitionOffsetRange) {
99-
return range.topicPartition.modifyIntervals { it.remove(range.range) }
100-
}
85+
fun remove(range: TopicPartitionOffsetRange) =
86+
range.topicPartition.modifyIntervals { it.remove(range.range) }
10187

10288
fun withFactory(
10389
factory: (OffsetIntervals) -> FunctionalValue<OffsetIntervals>,
@@ -168,7 +154,11 @@ class OffsetRangeSet {
168154
factory,
169155
)
170156

171-
data class Range(val from: Long, val to: Long?, val lastProcessed: Instant = Instant.now()) {
157+
data class Range(
158+
val from: Long,
159+
val to: Long?,
160+
val lastProcessed: Instant = Instant.now(),
161+
) {
172162
@JsonIgnore
173163
val size: Long? = to?.let { it - from + 1 }
174164
fun ensureToOffset(): Range = if (to == null) copy(to = from) else this

src/main/java/org/radarbase/output/format/CsvAvroConverterFactory.kt

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import com.opencsv.CSVReader
44
import com.opencsv.CSVWriter
55
import org.apache.avro.generic.GenericRecord
66
import org.radarbase.output.compression.Compression
7+
import org.radarbase.output.util.Md5Hasher
78
import org.radarbase.output.util.ResourceContext.Companion.resourceContext
89
import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended
910
import org.radarbase.output.util.TimeUtil.parseDate
@@ -16,6 +17,7 @@ import java.io.InputStream
1617
import java.io.Reader
1718
import java.io.Writer
1819
import java.nio.file.Path
20+
import kotlin.collections.HashSet
1921
import kotlin.io.path.inputStream
2022
import kotlin.io.path.outputStream
2123

@@ -38,10 +40,13 @@ class CsvAvroConverterFactory : RecordConverterFactory {
3840
if (header == null) return false
3941
val fields = fieldIndexes(header, distinctFields, ignoreFields)
4042
var count = 0
43+
44+
val md5 = Md5Hasher()
45+
4146
val lineMap = buildMap {
4247
lines.forEachIndexed { idx, line ->
4348
count += 1
44-
put(ArrayWrapper(line.byIndex(fields)), idx)
49+
put(md5.hash(line.byIndex(fields)), idx)
4550
}
4651
}
4752

@@ -208,21 +213,6 @@ class CsvAvroConverterFactory : RecordConverterFactory {
208213
return null
209214
}
210215

211-
private class ArrayWrapper<T>(val values: Array<T>) {
212-
private val hashCode = values.contentHashCode()
213-
214-
override fun equals(other: Any?): Boolean {
215-
if (this === other) return true
216-
if (javaClass != other?.javaClass) return false
217-
218-
other as ArrayWrapper<*>
219-
220-
return values.contentEquals(other.values)
221-
}
222-
223-
override fun hashCode(): Int = hashCode
224-
}
225-
226216
@Throws(IndexOutOfBoundsException::class)
227217
inline fun <reified T> Array<T>.byIndex(
228218
indexes: IntArray?,
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package org.radarbase.output.util
2+
3+
import java.security.MessageDigest
4+
import java.util.*
5+
6+
class Md5Hasher {
7+
private val md5 = MessageDigest.getInstance("MD5")
8+
private val base64 = Base64.getEncoder().withoutPadding()
9+
10+
fun hash(value: Array<String>): String {
11+
value.forEachIndexed { i, field ->
12+
if (i > 0) {
13+
md5.update(','.code.toByte())
14+
}
15+
md5.update(field.toByteArray())
16+
}
17+
val hash = base64.encodeToString(md5.digest())
18+
md5.reset()
19+
return hash
20+
}
21+
}

src/main/java/org/radarbase/output/worker/FileCache.kt

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class FileCache(
7373
}
7474

7575
suspend fun initialize(record: GenericRecord) {
76-
val fileIsNew = targetStorage.status(path)?.takeIf { it.size > 0L } == null
76+
var fileIsNew = targetStorage.status(path)?.takeIf { it.size > 0L } == null
7777

7878
var outStream = compression.compress(fileName, tmpPath.outputStream().buffered())
7979

@@ -82,13 +82,19 @@ class FileCache(
8282
inputStream = ByteArrayInputStream(ByteArray(0))
8383
} else {
8484
inputStream = time("write.copyOriginal") {
85-
if (!copy(path, outStream, compression)) {
86-
// restart output buffer
87-
outStream.close()
88-
// clear output file
89-
outStream = compression.compress(fileName, tmpPath.outputStream().buffered())
85+
try {
86+
if (!copy(path, outStream, compression)) {
87+
// restart output buffer
88+
outStream.close()
89+
// clear output file
90+
outStream =
91+
compression.compress(fileName, tmpPath.outputStream().buffered())
92+
}
93+
compression.decompress(targetStorage.newInputStream(path))
94+
} catch (ex: FileNotFoundException) {
95+
fileIsNew = true
96+
ByteArrayInputStream(ByteArray(0))
9097
}
91-
compression.decompress(targetStorage.newInputStream(path))
9298
}
9399
}
94100

@@ -191,6 +197,8 @@ class FileCache(
191197
true
192198
}
193199
}
200+
} catch (ex: FileNotFoundException) {
201+
throw ex
194202
} catch (ex: IOException) {
195203
var corruptPath: Path? = null
196204
var suffix = ""
@@ -205,17 +213,17 @@ class FileCache(
205213
}
206214
if (corruptPath != null) {
207215
logger.error(
208-
"Original file {} could not be read: {}." + " Moved to {}.",
216+
"Original file {} could not be read: {}. Moved to {}.",
209217
source,
210-
ex,
218+
ex.toString(),
211219
corruptPath,
212220
)
213221
targetStorage.move(source, corruptPath)
214222
} else {
215223
logger.error(
216-
"Original file {} could not be read: {}." + " Too many corrupt backups stored, removing file.",
224+
"Original file {} could not be read: {}. Too many corrupt backups stored, removing file.",
217225
source,
218-
ex,
226+
ex.toString(),
219227
)
220228
}
221229
false

0 commit comments

Comments
 (0)