From 04b85114359d721b66897551c4d729568c7dfaa6 Mon Sep 17 00:00:00 2001 From: Joris Borgdorff Date: Tue, 24 Oct 2023 17:08:31 +0200 Subject: [PATCH 01/10] Properly support multiple sources and targets --- .../output/RestructureS3IntegrationTest.kt | 6 +- .../java/org/radarbase/output/Application.kt | 50 +++++-------- .../org/radarbase/output/FileStoreFactory.kt | 7 +- .../output/accounting/AccountantImpl.kt | 27 ------- .../output/cleaner/SourceDataCleaner.kt | 44 ++++++------ .../output/config/CommandLineArgs.kt | 10 --- .../org/radarbase/output/config/PathConfig.kt | 30 ++------ .../radarbase/output/config/ResourceConfig.kt | 13 +++- .../output/config/RestructureConfig.kt | 58 ++++++++++++--- .../output/config/StorageIndexConfig.kt | 11 ++- ...tterConfig.kt => TargetFormatterConfig.kt} | 4 +- .../output/path/FormattedPathFactory.kt | 35 +++++---- .../output/path/MPPathFormatterPlugin.kt | 43 ++++++----- .../output/path/RecordPathFactory.kt | 21 ++---- .../output/source/AzureSourceStorage.kt | 1 + .../source/InMemoryStorageIndexFactory.kt | 5 ++ .../output/source/S3SourceStorage.kt | 1 + .../radarbase/output/source/SourceStorage.kt | 30 +------- .../output/source/SourceStorageFactory.kt | 35 +++------ .../output/source/SourceStorageManager.kt | 49 +++++++++++++ .../output/source/StorageIndexFactory.kt | 5 ++ .../output/source/StorageIndexManager.kt | 47 ++++++------ .../output/target/AzureTargetStorage.kt | 18 ++--- .../output/target/CombinedTargetStorage.kt | 72 +++++++++++++++++++ .../output/target/LocalTargetStorage.kt | 32 +++++---- .../output/target/S3TargetStorage.kt | 68 +++++++----------- .../radarbase/output/target/TargetStorage.kt | 6 +- .../output/target/TargetStorageFactory.kt | 13 ++-- .../java/org/radarbase/output/util/Path.kt | 15 +--- .../java/org/radarbase/output/util/Timer.kt | 1 + .../org/radarbase/output/worker/FileCache.kt | 3 +- .../output/worker/RadarKafkaRestructure.kt | 56 ++++++++------- .../radarbase/output/OffsetRangeFileTest.kt | 8 ++- .../output/cleaner/TimestampFileCacheTest.kt | 4 +- .../output/data/FileCacheStoreTest.kt | 34 +++++---- .../radarbase/output/data/FileCacheTest.kt | 26 ++++--- .../output/path/FormattedPathFactoryTest.kt | 21 ++++++ 37 files changed, 499 insertions(+), 410 deletions(-) rename src/main/java/org/radarbase/output/config/{BucketFormatterConfig.kt => TargetFormatterConfig.kt} (87%) create mode 100644 src/main/java/org/radarbase/output/source/InMemoryStorageIndexFactory.kt create mode 100644 src/main/java/org/radarbase/output/source/SourceStorageManager.kt create mode 100644 src/main/java/org/radarbase/output/source/StorageIndexFactory.kt create mode 100644 src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt diff --git a/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt b/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt index 60fcb7b..d69bec7 100644 --- a/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt +++ b/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt @@ -16,7 +16,6 @@ import kotlinx.coroutines.test.runTest import kotlinx.coroutines.withContext import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Test -import org.radarbase.output.config.PathConfig import org.radarbase.output.config.PathFormatterConfig import org.radarbase.output.config.ResourceConfig import org.radarbase.output.config.RestructureConfig @@ -49,9 +48,8 @@ class RestructureS3IntegrationTest { ), ) val config = RestructureConfig( - source = ResourceConfig("s3", s3 = sourceConfig), - target = ResourceConfig("s3", s3 = targetConfig), - paths = PathConfig(inputs = listOf(Paths.get("in"))), + sources = listOf(ResourceConfig("s3", path = Paths.get("in"), s3 = sourceConfig)), + targets = mapOf("radar-output-storage" to ResourceConfig("s3", path = Paths.get("output"), s3 = targetConfig)), worker = WorkerConfig(minimumFileAge = 0L), topics = topicConfig, ) diff --git a/src/main/java/org/radarbase/output/Application.kt b/src/main/java/org/radarbase/output/Application.kt index b6baddb..861dcaa 100644 --- a/src/main/java/org/radarbase/output/Application.kt +++ b/src/main/java/org/radarbase/output/Application.kt @@ -35,9 +35,8 @@ import org.radarbase.output.config.RestructureConfig import org.radarbase.output.format.RecordConverterFactory import org.radarbase.output.path.RecordPathFactory import org.radarbase.output.source.InMemoryStorageIndex -import org.radarbase.output.source.SourceStorage import org.radarbase.output.source.SourceStorageFactory -import org.radarbase.output.source.StorageIndexManager +import org.radarbase.output.source.SourceStorageManager import org.radarbase.output.target.TargetStorage import org.radarbase.output.target.TargetStorageFactory import org.radarbase.output.util.Timer @@ -47,9 +46,7 @@ import org.radarbase.output.worker.RadarKafkaRestructure import org.slf4j.LoggerFactory import redis.clients.jedis.JedisPool import java.io.IOException -import java.nio.file.Path import java.text.NumberFormat -import java.time.Duration import java.time.LocalDateTime import java.time.format.DateTimeFormatter import java.util.concurrent.atomic.LongAdder @@ -64,18 +61,23 @@ class Application( override val config = config.apply { validate() } override val recordConverter: RecordConverterFactory = config.format.createConverter() override val compression: Compression = config.compression.createCompression() - override val pathFactory: RecordPathFactory = config.paths.createFactory( - config.target, - recordConverter.extension + compression.extension, - config.topics, - ) - private val sourceStorageFactory = SourceStorageFactory(config.source, config.paths.temp) - override val sourceStorage: SourceStorage - get() = sourceStorageFactory.createSourceStorage() + private val sourceStorageFactory = SourceStorageFactory(config.paths.temp) + override val sourceStorage: List = config.consolidatedSources + .map { sourceConfig -> + val storage = sourceStorageFactory.createSourceStorage(sourceConfig) + SourceStorageManager(storage, InMemoryStorageIndex(), sourceConfig.index) + } + + override val targetStorage: TargetStorage = TargetStorageFactory() + .createTargetStorage(config.paths.target.defaultName, config.consolidatedTargets) - override val targetStorage: TargetStorage = - TargetStorageFactory(config.target).createTargetStorage() + override val pathFactory: RecordPathFactory = + config.paths.createFactory( + targetStorage, + recordConverter.extension + compression.extension, + config.topics, + ) override val redisHolder: RedisHolder = RedisHolder(JedisPool(config.redis.uri)) override val remoteLockManager: RemoteLockManager = RedisRemoteLockManager( @@ -88,27 +90,9 @@ class Application( override val workerSemaphore = Semaphore(config.worker.numThreads * 2) - override val storageIndexManagers: Map - private val jobs: List init { - val indexConfig = config.source.index - val (fullScan, emptyScan) = if (indexConfig == null) { - listOf(3600L, 900L) - } else { - listOf(indexConfig.fullSyncInterval, indexConfig.emptyDirectorySyncInterval) - }.map { Duration.ofSeconds(it) } - - storageIndexManagers = config.paths.inputs.associateWith { input -> - StorageIndexManager( - InMemoryStorageIndex(), - sourceStorage, - input, - fullScan, - emptyScan, - ) - } val serviceMutex = Mutex() jobs = listOfNotNull( RadarKafkaRestructure.job(config, serviceMutex), @@ -137,7 +121,7 @@ class Application( } runBlocking { - launch { targetStorage.initialize() } + targetStorage.initialize() } if (config.service.enable) { diff --git a/src/main/java/org/radarbase/output/FileStoreFactory.kt b/src/main/java/org/radarbase/output/FileStoreFactory.kt index 9448ac3..aa04c16 100644 --- a/src/main/java/org/radarbase/output/FileStoreFactory.kt +++ b/src/main/java/org/radarbase/output/FileStoreFactory.kt @@ -25,16 +25,14 @@ import org.radarbase.output.compression.Compression import org.radarbase.output.config.RestructureConfig import org.radarbase.output.format.RecordConverterFactory import org.radarbase.output.path.RecordPathFactory -import org.radarbase.output.source.SourceStorage -import org.radarbase.output.source.StorageIndexManager +import org.radarbase.output.source.SourceStorageManager import org.radarbase.output.target.TargetStorage import org.radarbase.output.worker.FileCacheStore import java.io.IOException -import java.nio.file.Path /** Factory for all factory classes and settings. */ interface FileStoreFactory { - val sourceStorage: SourceStorage + val sourceStorage: List val targetStorage: TargetStorage val pathFactory: RecordPathFactory val compression: Compression @@ -44,7 +42,6 @@ interface FileStoreFactory { val redisHolder: RedisHolder val offsetPersistenceFactory: OffsetPersistenceFactory val workerSemaphore: Semaphore - val storageIndexManagers: Map @Throws(IOException::class) fun newFileCacheStore(accountant: Accountant): FileCacheStore diff --git a/src/main/java/org/radarbase/output/accounting/AccountantImpl.kt b/src/main/java/org/radarbase/output/accounting/AccountantImpl.kt index f59a951..592d3f7 100644 --- a/src/main/java/org/radarbase/output/accounting/AccountantImpl.kt +++ b/src/main/java/org/radarbase/output/accounting/AccountantImpl.kt @@ -2,14 +2,10 @@ package org.radarbase.output.accounting import kotlinx.coroutines.CoroutineScope import org.radarbase.output.FileStoreFactory -import org.radarbase.output.config.RestructureConfig -import org.radarbase.output.target.TargetStorage import org.radarbase.output.util.Timer import org.slf4j.LoggerFactory import java.io.IOException import java.nio.file.Paths -import kotlin.io.path.deleteExisting -import kotlin.io.path.exists open class AccountantImpl( private val factory: FileStoreFactory, @@ -27,29 +23,6 @@ open class AccountantImpl( val offsets = offsetPersistence.read(offsetsKey) offsetFile = offsetPersistence.writer(scope, offsetsKey, offsets) - readDeprecatedOffsets(factory.config, factory.targetStorage, topic) - ?.takeUnless { it.isEmpty } - ?.let { - offsetFile.addAll(it) - offsetFile.triggerWrite() - } - } - - private suspend fun readDeprecatedOffsets( - config: RestructureConfig, - targetStorage: TargetStorage, - topic: String, - ): OffsetRangeSet? { - val offsetsPath = config.paths.output - .resolve(OFFSETS_FILE_NAME) - .resolve("$topic.csv") - - return if (offsetsPath.exists()) { - OffsetFilePersistence(targetStorage).read(offsetsPath) - .also { offsetsPath.deleteExisting() } - } else { - null - } } override suspend fun remove(range: TopicPartitionOffsetRange) = diff --git a/src/main/java/org/radarbase/output/cleaner/SourceDataCleaner.kt b/src/main/java/org/radarbase/output/cleaner/SourceDataCleaner.kt index c8ab192..5c997b4 100644 --- a/src/main/java/org/radarbase/output/cleaner/SourceDataCleaner.kt +++ b/src/main/java/org/radarbase/output/cleaner/SourceDataCleaner.kt @@ -6,13 +6,13 @@ import kotlinx.coroutines.launch import kotlinx.coroutines.sync.Mutex import kotlinx.coroutines.sync.withPermit import kotlinx.coroutines.withContext +import org.radarbase.kotlin.coroutines.launchJoin import org.radarbase.output.Application.Companion.format import org.radarbase.output.FileStoreFactory import org.radarbase.output.accounting.Accountant import org.radarbase.output.accounting.AccountantImpl import org.radarbase.output.config.RestructureConfig -import org.radarbase.output.source.StorageIndex -import org.radarbase.output.source.StorageNode +import org.radarbase.output.source.SourceStorageManager import org.radarbase.output.util.ResourceContext.Companion.resourceContext import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended import org.radarbase.output.util.Timer @@ -21,7 +21,6 @@ import org.slf4j.LoggerFactory import java.io.Closeable import java.io.IOException import java.nio.file.Path -import java.nio.file.Paths import java.time.Instant import java.time.temporal.ChronoUnit import java.util.concurrent.atomic.LongAdder @@ -29,9 +28,10 @@ import kotlin.coroutines.coroutineContext class SourceDataCleaner( private val fileStoreFactory: FileStoreFactory, + private val sourceStorageManager: SourceStorageManager, ) : Closeable { + private val sourceStorage = sourceStorageManager.sourceStorage private val lockManager = fileStoreFactory.remoteLockManager - private val sourceStorage = fileStoreFactory.sourceStorage private val excludeTopics: Set = fileStoreFactory.config.topics .mapNotNullTo(HashSet()) { (topic, conf) -> topic.takeIf { conf.excludeFromDelete } @@ -45,11 +45,9 @@ class SourceDataCleaner( private val supervisor = SupervisorJob() @Throws(IOException::class, InterruptedException::class) - suspend fun process(storageIndex: StorageIndex, directoryName: String) { + suspend fun process() { // Get files and directories - val absolutePath = Paths.get(directoryName) - - val paths = topicPaths(storageIndex, absolutePath) + val paths = topicPaths(sourceStorage.root) logger.info("{} topics found", paths.size) @@ -58,7 +56,7 @@ class SourceDataCleaner( launch { try { val deleteCount = fileStoreFactory.workerSemaphore.withPermit { - mapTopic(storageIndex, p) + mapTopic(p) } if (deleteCount > 0) { logger.info("Removed {} files in topic {}", deleteCount, p.fileName) @@ -72,7 +70,7 @@ class SourceDataCleaner( } } - private suspend fun mapTopic(storageIndex: StorageIndex, topicPath: Path): Long { + private suspend fun mapTopic(topicPath: Path): Long { val topic = topicPath.fileName.toString() return try { lockManager.tryWithLock(topic) { @@ -86,7 +84,7 @@ class SourceDataCleaner( fileStoreFactory, ) } - deleteOldFiles(storageIndex, accountant, extractionCheck, topic, topicPath).toLong() + deleteOldFiles(accountant, extractionCheck, topic, topicPath).toLong() } } } @@ -97,7 +95,6 @@ class SourceDataCleaner( } private suspend fun deleteOldFiles( - storageIndex: StorageIndex, accountant: Accountant, extractionCheck: ExtractionCheck, topic: String, @@ -105,7 +102,7 @@ class SourceDataCleaner( ): Int { val offsets = accountant.offsets.copyForTopic(topic) - val paths = sourceStorage.listTopicFiles(storageIndex, topic, topicPath, maxFilesPerTopic) { f -> + val paths = sourceStorageManager.listTopicFiles(topic, topicPath, maxFilesPerTopic) { f -> f.lastModified.isBefore(deleteThreshold) && // ensure that there is a file with a larger offset also // processed, so the largest offset is never removed. @@ -117,8 +114,7 @@ class SourceDataCleaner( if (extractionCheck.isExtracted(file)) { logger.info("Removing {}", file.path) Timer.time("cleaner.delete") { - sourceStorage.delete(file.path) - storageIndex.remove(StorageNode.StorageFile(file.path, Instant.MIN)) + sourceStorageManager.delete(file.path) } true } else { @@ -131,8 +127,8 @@ class SourceDataCleaner( } } - private suspend fun topicPaths(storageIndex: StorageIndex, path: Path): List = - sourceStorage.listTopics(storageIndex, path, excludeTopics) + private suspend fun topicPaths(path: Path): List = + sourceStorageManager.listTopics(path, excludeTopics) // different services start on different topics to decrease lock contention .shuffled() @@ -149,14 +145,14 @@ class SourceDataCleaner( null } - private suspend fun runCleaner(factory: FileStoreFactory) { - SourceDataCleaner(factory).useSuspended { cleaner -> - for ((input, indexManager) in factory.storageIndexManagers) { - indexManager.update() - logger.info("Cleaning {}", input) - cleaner.process(indexManager.storageIndex, input.toString()) + private suspend fun runCleaner(factory: FileStoreFactory) = coroutineScope { + factory.sourceStorage.launchJoin { sourceStorage -> + SourceDataCleaner(factory, sourceStorage).useSuspended { cleaner -> + sourceStorage.storageIndexManager.update() + logger.info("Cleaning {}", sourceStorage.sourceStorage.root) + cleaner.process() + logger.info("Cleaned up {} files", cleaner.deletedFileCount.format()) } - logger.info("Cleaned up {} files", cleaner.deletedFileCount.format()) } } } diff --git a/src/main/java/org/radarbase/output/config/CommandLineArgs.kt b/src/main/java/org/radarbase/output/config/CommandLineArgs.kt index e56a76e..670f0c0 100644 --- a/src/main/java/org/radarbase/output/config/CommandLineArgs.kt +++ b/src/main/java/org/radarbase/output/config/CommandLineArgs.kt @@ -21,9 +21,6 @@ import com.beust.jcommander.validators.PositiveInteger import org.radarbase.output.config.RestructureConfig.Companion.RESTRUCTURE_CONFIG_FILE_NAME class CommandLineArgs { - @Parameter(description = " [ ...]", variableArity = true) - var inputPaths: List? = null - @Parameter( names = ["-F", "--config-file"], description = "Config file. By default, $RESTRUCTURE_CONFIG_FILE_NAME is tried.", @@ -58,13 +55,6 @@ class CommandLineArgs { ) var deduplicate: Boolean? = null - @Parameter( - names = ["-o", "--output-directory"], - description = "The output folder where the files are to be extracted.", - validateWith = [NonEmptyValidator::class], - ) - var outputDirectory: String? = null - @Parameter( names = ["-h", "--help"], help = true, diff --git a/src/main/java/org/radarbase/output/config/PathConfig.kt b/src/main/java/org/radarbase/output/config/PathConfig.kt index 0755275..b9de1f8 100644 --- a/src/main/java/org/radarbase/output/config/PathConfig.kt +++ b/src/main/java/org/radarbase/output/config/PathConfig.kt @@ -2,52 +2,36 @@ package org.radarbase.output.config import org.radarbase.output.path.FormattedPathFactory import org.radarbase.output.path.RecordPathFactory +import org.radarbase.output.target.TargetStorage import java.nio.file.Path -import java.nio.file.Paths import kotlin.io.path.createTempDirectory import kotlin.reflect.jvm.jvmName data class PathConfig( override val factory: String = FormattedPathFactory::class.jvmName, override val properties: Map = emptyMap(), - /** Input paths referencing the source resource. */ - val inputs: List = emptyList(), /** Temporary directory for processing output files before uploading. */ val temp: Path = createTempDirectory("radar-output-restructure"), - /** Output path on the target resource. */ - val output: Path = Paths.get("output"), /** Path formatting rules. */ val path: PathFormatterConfig = PathFormatterConfig(), /** - * Bucket formatting rules for the target storage. If no configuration is provided, this - * will not format any bucket for local storage, and it will use the target bucket (s3) - * or container (azure) as the default target bucket. + * Formatting rules for the target storage. */ - val bucket: BucketFormatterConfig? = null, + val target: TargetFormatterConfig = TargetFormatterConfig(), ) : PluginConfig { fun createFactory( - target: ResourceConfig, + targetStorage: TargetStorage, extension: String, topics: Map, ): RecordPathFactory { val pathFactory = factory.constructClass() - val bucketConfig = bucket - ?: when (target.sourceType) { - ResourceType.AZURE -> { - val container = requireNotNull(target.azure?.container) { "Either target container or bucket formatter config needs to be configured." } - BucketFormatterConfig(format = container, plugins = "", defaultName = container) - } - ResourceType.S3 -> { - val bucket = requireNotNull(target.s3?.bucket) { "Either target container or bucket formatter config needs to be configured." } - BucketFormatterConfig(format = bucket, plugins = "", defaultName = bucket) - } - else -> null - } + require(targetStorage.allowsPrefix(target.defaultName)) { "Default bucket ${target.defaultName} is not specified as a target storage" } pathFactory.init( + targetStorage = targetStorage, extension = extension, - config = copy(bucket = bucketConfig), + config = this, topics = topics, ) diff --git a/src/main/java/org/radarbase/output/config/ResourceConfig.kt b/src/main/java/org/radarbase/output/config/ResourceConfig.kt index cb9781a..01673bb 100644 --- a/src/main/java/org/radarbase/output/config/ResourceConfig.kt +++ b/src/main/java/org/radarbase/output/config/ResourceConfig.kt @@ -3,20 +3,31 @@ package org.radarbase.output.config import com.fasterxml.jackson.annotation.JsonIgnore import org.radarbase.output.config.ResourceType.Companion.toResourceType import org.radarbase.output.config.RestructureConfig.Companion.copyOnChange +import java.nio.file.Path +import java.nio.file.Paths data class ResourceConfig( /** Resource type. One of s3, azure or local. */ val type: String, + val path: Path = Paths.get("/"), val s3: S3Config? = null, val local: LocalConfig? = null, val azure: AzureConfig? = null, - val index: StorageIndexConfig? = null, + val index: StorageIndexConfig = StorageIndexConfig(), ) { @get:JsonIgnore val sourceType: ResourceType by lazy { requireNotNull(type.toResourceType()) { "Unknown resource type $type, choose s3, azure or local" } } + @get:JsonIgnore + val name: String? + get() = when (sourceType) { + ResourceType.S3 -> checkNotNull(s3) { "No S3 configuration provided." }.bucket + ResourceType.AZURE -> checkNotNull(azure) { "No Azure configuration provided." }.container + else -> null + } + fun validate() { when (sourceType) { ResourceType.S3 -> checkNotNull(s3) { "No S3 configuration provided." } diff --git a/src/main/java/org/radarbase/output/config/RestructureConfig.kt b/src/main/java/org/radarbase/output/config/RestructureConfig.kt index 122a42a..68928e6 100644 --- a/src/main/java/org/radarbase/output/config/RestructureConfig.kt +++ b/src/main/java/org/radarbase/output/config/RestructureConfig.kt @@ -1,5 +1,6 @@ package org.radarbase.output.config +import com.fasterxml.jackson.annotation.JsonIgnore import org.slf4j.LoggerFactory import java.nio.file.Paths @@ -13,9 +14,13 @@ data class RestructureConfig( /** Topic exceptional handling. */ val topics: Map = emptyMap(), /** Source data resource configuration. */ - val source: ResourceConfig = ResourceConfig("s3"), + val source: ResourceConfig? = null, + /** Source data resource configuration. */ + val sources: List = emptyList(), + /** Target data resource configuration. */ + val target: ResourceConfig? = null, /** Target data resource configuration. */ - val target: ResourceConfig = ResourceConfig("local", local = LocalConfig()), + val targets: Map = emptyMap(), /** Redis configuration for synchronization and storing offsets. */ val redis: RedisConfig = RedisConfig(), /** Paths to use for processing. */ @@ -25,9 +30,36 @@ data class RestructureConfig( /** File format to use for output files. */ val format: FormatConfig = FormatConfig(), ) { + @get:JsonIgnore + val consolidatedTargets: Map by lazy { + buildMap(targets.size + 1) { + putAll(targets) + + if (target != null) { + val name = target.name + if (name != null && name !in this) { + put(name, target) + } else { + val bucketConfig = paths.target + require(bucketConfig.defaultName !in this) { "Deprecated target storage does not have a proper name." } + put(bucketConfig.defaultName, target) + } + } + } + } + + @get:JsonIgnore + val consolidatedSources: List by lazy { + if (source != null) { + sources + source + } else { + sources + } + } + fun validate() { - source.validate() - target.validate() + consolidatedSources.forEach(ResourceConfig::validate) + consolidatedTargets.values.forEach(ResourceConfig::validate) cleaner.validate() service.validate() check(worker.enable || cleaner.enable) { "Either restructuring or cleaning needs to be enabled." } @@ -41,8 +73,6 @@ data class RestructureConfig( args.numThreads?.let { copy(worker = worker.copy(numThreads = it)) } args.maxFilesPerTopic?.let { copy(worker = worker.copy(maxFilesPerTopic = it)) } args.tmpDir?.let { copy(paths = paths.copy(temp = Paths.get(it))) } - args.inputPaths?.let { inputs -> copy(paths = paths.copy(inputs = inputs.map { Paths.get(it) })) } - args.outputDirectory?.let { copy(paths = paths.copy(output = Paths.get(it))) } args.format?.let { copy(format = format.copy(type = it)) } args.deduplicate?.let { copy(format = format.copy(deduplication = format.deduplication.copy(enable = it))) @@ -53,8 +83,20 @@ data class RestructureConfig( } fun withEnv(): RestructureConfig = this - .copyOnChange(source, { it.withEnv("SOURCE_") }) { copy(source = it) } - .copyOnChange(target, { it.withEnv("TARGET_") }) { copy(target = it) } + .copyOnChange(source, { it?.withEnv("SOURCE_") }) { copy(source = it) } + .copyOnChange(sources, { it.map { source -> source.withEnv("SOURCE_") } }) { copy(sources = it) } + .copyOnChange( + targets, + { + it.mapValues { (name, target) -> + val prefix = "TARGET_" + name.replace('-', '_').uppercase() + target + .withEnv("TARGET_") + .withEnv(prefix) + } + }, + ) { copy(targets = it) } + .copyOnChange(target, { it?.withEnv("TARGET_") }) { copy(target = it) } .copyOnChange(redis, { it.withEnv() }) { copy(redis = it) } companion object { diff --git a/src/main/java/org/radarbase/output/config/StorageIndexConfig.kt b/src/main/java/org/radarbase/output/config/StorageIndexConfig.kt index 05fda27..d541f9a 100644 --- a/src/main/java/org/radarbase/output/config/StorageIndexConfig.kt +++ b/src/main/java/org/radarbase/output/config/StorageIndexConfig.kt @@ -1,5 +1,8 @@ package org.radarbase.output.config +import kotlin.time.Duration +import kotlin.time.Duration.Companion.seconds + data class StorageIndexConfig( /** How often to fully sync the storage index, in seconds. */ val fullSyncInterval: Long = 3600L, @@ -9,4 +12,10 @@ data class StorageIndexConfig( * full sync. */ val emptyDirectorySyncInterval: Long = 900L, -) +) { + val fullSyncDuration: Duration + get() = fullSyncInterval.seconds + + val emptyDirectorySyncDuration: Duration + get() = emptyDirectorySyncInterval.seconds +} diff --git a/src/main/java/org/radarbase/output/config/BucketFormatterConfig.kt b/src/main/java/org/radarbase/output/config/TargetFormatterConfig.kt similarity index 87% rename from src/main/java/org/radarbase/output/config/BucketFormatterConfig.kt rename to src/main/java/org/radarbase/output/config/TargetFormatterConfig.kt index dc65ec3..7cb983f 100644 --- a/src/main/java/org/radarbase/output/config/BucketFormatterConfig.kt +++ b/src/main/java/org/radarbase/output/config/TargetFormatterConfig.kt @@ -1,7 +1,7 @@ package org.radarbase.output.config -/** Configuration on how to format the target bucket name. */ -data class BucketFormatterConfig( +/** Configuration on how to format the target storage name to be used. */ +data class TargetFormatterConfig( /** Format string. May include any variables computed by the configured plugins. */ val format: String = "radar-output-storage", /** diff --git a/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt b/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt index 9fc7fbe..31477cc 100644 --- a/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt +++ b/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt @@ -16,42 +16,39 @@ package org.radarbase.output.path -import org.radarbase.output.config.BucketFormatterConfig import org.radarbase.output.config.PathConfig import org.radarbase.output.config.PathFormatterConfig +import org.radarbase.output.config.TargetFormatterConfig import org.radarbase.output.config.TopicConfig +import org.radarbase.output.target.TargetStorage import org.slf4j.LoggerFactory open class FormattedPathFactory : RecordPathFactory() { private lateinit var pathFormatter: PathFormatter private var topicFormatters: Map = emptyMap() - private var bucketFormatter: PathFormatter? = null + private lateinit var targetFormatter: PathFormatter private lateinit var disabledBucketRegexes: List private lateinit var defaultBucketName: String override fun init( + targetStorage: TargetStorage, extension: String, config: PathConfig, topics: Map, ) { - super.init(extension, config, topics) + super.init(targetStorage, extension, config, topics) pathFormatter = pathConfig.path.toPathFormatter() - bucketFormatter = pathConfig.bucket?.toBucketFormatter() - disabledBucketRegexes = pathConfig.bucket - ?.disabledFormats - ?.map { it.toRegex(RegexOption.IGNORE_CASE) } - ?: emptyList() - defaultBucketName = pathConfig.bucket - ?.defaultName - ?: "radar-output-storage" + targetFormatter = pathConfig.target.toTargetFormatter() + disabledBucketRegexes = pathConfig.target + .disabledFormats + .map { it.toRegex(RegexOption.IGNORE_CASE) } + defaultBucketName = pathConfig.target.defaultName logger.info("Formatting path with {}", pathFormatter) } - override suspend fun bucket(pathParameters: PathFormatParameters?): String? { - val formatter = bucketFormatter ?: return null - pathParameters ?: return pathConfig.bucket?.defaultName - val format = formatter.format(pathParameters) + override suspend fun target(pathParameters: PathFormatParameters): String { + val format = targetFormatter.format(pathParameters) return if (disabledBucketRegexes.any { it.matches(format) }) { defaultBucketName } else { @@ -74,8 +71,10 @@ open class FormattedPathFactory : RecordPathFactory() { override suspend fun relativePath( pathParameters: PathFormatParameters, - ): String = (topicFormatters[pathParameters.topic] ?: pathFormatter) - .format(pathParameters) + ): String { + val formatter = topicFormatters[pathParameters.topic] ?: pathFormatter + return formatter.format(pathParameters) + } companion object { private fun PathFormatterConfig.toPathFormatter(): PathFormatter = PathFormatter( @@ -83,7 +82,7 @@ open class FormattedPathFactory : RecordPathFactory() { plugins.toPathFormatterPlugins(properties), ) - private fun BucketFormatterConfig.toBucketFormatter(): PathFormatter = PathFormatter( + private fun TargetFormatterConfig.toTargetFormatter(): PathFormatter = PathFormatter( format, plugins.toPathFormatterPlugins(properties), checkMinimalDistinction = false, diff --git a/src/main/java/org/radarbase/output/path/MPPathFormatterPlugin.kt b/src/main/java/org/radarbase/output/path/MPPathFormatterPlugin.kt index 883bc80..90abe70 100644 --- a/src/main/java/org/radarbase/output/path/MPPathFormatterPlugin.kt +++ b/src/main/java/org/radarbase/output/path/MPPathFormatterPlugin.kt @@ -83,10 +83,13 @@ class MPPathFormatterPlugin : PathFormatterPlugin.Factory { } pluginScope.launch { + val staleDuration = 20.minutes + val delayDuration = staleDuration * 3 / 2 + while (isActive) { - delay(30.minutes) + delay(delayDuration) subjectCache - .filter { it.value.isStale(20.minutes) } + .filter { it.value.isStale(staleDuration) } .forEach { (key, value) -> subjectCache.remove(key, value) } @@ -106,13 +109,15 @@ class MPPathFormatterPlugin : PathFormatterPlugin.Factory { "group" -> subjectProperty("default") { group } "externalId" -> subjectProperty("unknown-user") { externalId ?: id } "userId", "login", "id" -> subjectProperty("unknown-user") { id } - else -> if (parameterContents.startsWith("project:")) { - projectProperty("unknown-$parameterContents") { - attributes[parameterContents.removePrefix("project:")] - } - } else { - subjectProperty("unknown-$parameterContents") { - attributes[parameterContents] + else -> { + if (parameterContents.startsWith("project:")) { + projectProperty("unknown-$parameterContents") { + attributes[parameterContents.removePrefix("project:")] + } + } else { + subjectProperty("unknown-$parameterContents") { + attributes[parameterContents] + } } } } @@ -128,18 +133,18 @@ class MPPathFormatterPlugin : PathFormatterPlugin.Factory { val projectId = key.getOrNull("projectId") ?: return null val userId = key.getOrNull("userId") ?: return null - val cache = subjectCache.computeIfAbsent(projectId.toString()) { projectIdString -> - CachedMap(cacheConfig) { - val subjects = mpClient.requestSubjects(projectIdString) - buildMap(subjects.size) { - subjects.forEach { subject -> - val subjectId = subject.id ?: return@forEach - put(subjectId, subject) - } - } + val cache = subjectCache.computeIfAbsent(projectId.toString(), ::createCache) + return cache.get(userId.toString()) + } + + private fun createCache(projectId: String) = CachedMap(cacheConfig) { + val subjects = mpClient.requestSubjects(projectId) + buildMap(subjects.size) { + subjects.forEach { subject -> + val subjectId = subject.id ?: return@forEach + put(subjectId, subject) } } - return cache.get(userId.toString()) } private inline fun projectProperty( diff --git a/src/main/java/org/radarbase/output/path/RecordPathFactory.kt b/src/main/java/org/radarbase/output/path/RecordPathFactory.kt index 1aafbc4..9253731 100644 --- a/src/main/java/org/radarbase/output/path/RecordPathFactory.kt +++ b/src/main/java/org/radarbase/output/path/RecordPathFactory.kt @@ -23,6 +23,7 @@ import org.apache.avro.generic.GenericRecord import org.apache.avro.generic.GenericRecordBuilder import org.radarbase.output.config.PathConfig import org.radarbase.output.config.TopicConfig +import org.radarbase.output.target.TargetStorage import org.radarbase.output.util.TimeUtil import java.nio.file.Path import java.nio.file.Paths @@ -33,16 +34,12 @@ abstract class RecordPathFactory { private set open fun init( + targetStorage: TargetStorage, extension: String, config: PathConfig, topics: Map = emptyMap(), ) { this.pathConfig = config.copy( - output = if (config.output.isAbsolute) { - rootPath.relativize(config.output) - } else { - config.output - }, path = config.path.copy( properties = buildMap(config.path.properties.size + 1) { putAll(config.path.properties) @@ -88,20 +85,15 @@ abstract class RecordPathFactory { ) return coroutineScope { - val bucketJob = async { bucket(params) } + val targetJob = async { target(params) } val pathJob = async { relativePath(params) } - val path = pathConfig.output.resolve(pathJob.await()) - val bucket = bucketJob.await() - if (bucket != null) { - Paths.get(bucket).resolve(path) - } else { - path - } + Paths.get(targetJob.await()) + .resolve(pathJob.await()) } } - abstract suspend fun bucket(pathParameters: PathFormatParameters?): String? + abstract suspend fun target(pathParameters: PathFormatParameters): String /** * Get the relative path corresponding to given record on given topic. @@ -114,7 +106,6 @@ abstract class RecordPathFactory { companion object { private val ILLEGAL_CHARACTER_PATTERN = Pattern.compile("[^a-zA-Z0-9_-]+") - private val rootPath = Paths.get("/") fun sanitizeId(id: Any?, defaultValue: String): String = id ?.let { ILLEGAL_CHARACTER_PATTERN.matcher(it.toString()).replaceAll("") } diff --git a/src/main/java/org/radarbase/output/source/AzureSourceStorage.kt b/src/main/java/org/radarbase/output/source/AzureSourceStorage.kt index 2ca026d..cbf2ab5 100644 --- a/src/main/java/org/radarbase/output/source/AzureSourceStorage.kt +++ b/src/main/java/org/radarbase/output/source/AzureSourceStorage.kt @@ -16,6 +16,7 @@ import kotlin.io.path.createTempFile import kotlin.io.path.deleteIfExists class AzureSourceStorage( + override val root: Path, client: BlobServiceClient, config: AzureConfig, private val tempPath: Path, diff --git a/src/main/java/org/radarbase/output/source/InMemoryStorageIndexFactory.kt b/src/main/java/org/radarbase/output/source/InMemoryStorageIndexFactory.kt new file mode 100644 index 0000000..f86bd71 --- /dev/null +++ b/src/main/java/org/radarbase/output/source/InMemoryStorageIndexFactory.kt @@ -0,0 +1,5 @@ +package org.radarbase.output.source + +class InMemoryStorageIndexFactory : StorageIndexFactory { + override fun get(): StorageIndex = InMemoryStorageIndex() +} diff --git a/src/main/java/org/radarbase/output/source/S3SourceStorage.kt b/src/main/java/org/radarbase/output/source/S3SourceStorage.kt index 8a33444..1b9088b 100644 --- a/src/main/java/org/radarbase/output/source/S3SourceStorage.kt +++ b/src/main/java/org/radarbase/output/source/S3SourceStorage.kt @@ -23,6 +23,7 @@ import kotlin.io.path.pathString import kotlin.time.Duration.Companion.seconds class S3SourceStorage( + override val root: Path, private val s3Client: MinioClient, config: S3Config, private val tempPath: Path, diff --git a/src/main/java/org/radarbase/output/source/SourceStorage.kt b/src/main/java/org/radarbase/output/source/SourceStorage.kt index d07cf23..e4b7838 100644 --- a/src/main/java/org/radarbase/output/source/SourceStorage.kt +++ b/src/main/java/org/radarbase/output/source/SourceStorage.kt @@ -1,15 +1,14 @@ package org.radarbase.output.source import org.apache.avro.file.SeekableInput -import org.radarbase.output.util.AvroFileLister.Companion.avroFileTreeLister -import org.radarbase.output.util.AvroTopicLister.Companion.avroTopicTreeLister import org.radarbase.output.util.SuspendedCloseable -import org.radarbase.output.util.TopicPath import java.nio.file.Path import java.time.Instant /** Source storage type. */ interface SourceStorage { + val root: Path + /** Create a reader for the storage medium. It should be closed by the caller. */ fun createReader(): SourceStorageReader @@ -28,31 +27,6 @@ interface SourceStorage { lastModified = if (status is StorageNode.StorageFile) status.lastModified else Instant.now(), ) - /** - * Recursively returns all record files in a sequence of a given topic with path. - * The path must only contain records of a single topic, this is not verified. - */ - suspend fun listTopicFiles( - storageIndex: StorageIndex, - topic: String, - topicPath: Path, - limit: Int, - predicate: (TopicFile) -> Boolean, - ): List = storageIndex.avroFileTreeLister(this) - .list(TopicPath(topic, topicPath), limit, predicate) - - /** - * Recursively find all topic root paths of records in the given path. - * Exclude paths belonging to the set of given excluded topics. - */ - suspend fun listTopics( - storageIndex: StorageIndex, - root: Path, - exclude: Set, - ): List = storageIndex.avroTopicTreeLister() - .listTo(LinkedHashSet(), root) - .filter { it.fileName.toString() !in exclude } - /** * File reader for the storage medium. * All inputs opened by this reader should be closed before closing the reader itself. diff --git a/src/main/java/org/radarbase/output/source/SourceStorageFactory.kt b/src/main/java/org/radarbase/output/source/SourceStorageFactory.kt index 6396a49..5ecbd52 100644 --- a/src/main/java/org/radarbase/output/source/SourceStorageFactory.kt +++ b/src/main/java/org/radarbase/output/source/SourceStorageFactory.kt @@ -1,41 +1,28 @@ package org.radarbase.output.source import com.azure.storage.blob.BlobServiceClient -import io.minio.MinioClient import org.radarbase.output.config.ResourceConfig import org.radarbase.output.config.ResourceType import java.nio.file.Path class SourceStorageFactory( - private val resourceConfig: ResourceConfig, private val tempPath: Path, ) { - private val s3SourceClient: MinioClient? = if (resourceConfig.sourceType == ResourceType.S3) { - requireNotNull(resourceConfig.s3) { "Missing S3 configuration" } - .createS3Client() - } else { - null - } - - private val azureSourceClient: BlobServiceClient? = - if (resourceConfig.sourceType == ResourceType.AZURE) { - requireNotNull(resourceConfig.azure) { "Missing Azure configuration" } - .createAzureClient() - } else { - null - } + fun createSourceStorage(consolidatedSources: List) = consolidatedSources.map { createSourceStorage(it) } - fun createSourceStorage() = when (resourceConfig.sourceType) { + fun createSourceStorage(sourceConfig: ResourceConfig) = when (sourceConfig.sourceType) { ResourceType.S3 -> { - val s3Config = requireNotNull(resourceConfig.s3) { "Missing S3 configuration for source storage" } - val minioClient = requireNotNull(s3SourceClient) { "Missing S3 client configuration for source storage" } - S3SourceStorage(minioClient, s3Config, tempPath) + val s3Config = + requireNotNull(sourceConfig.s3) { "Missing S3 configuration for source storage" } + val minioClient = s3Config.createS3Client() + S3SourceStorage(sourceConfig.path, minioClient, s3Config, tempPath) } ResourceType.AZURE -> { - val azureClient = requireNotNull(azureSourceClient) { "Missing Azure client configuration for source storage" } - val azureConfig = requireNotNull(resourceConfig.azure) { "Missing Azure configuration for source storage" } - AzureSourceStorage(azureClient, azureConfig, tempPath) + val azureConfig = requireNotNull(sourceConfig.azure) { "Missing Azure configuration for source storage" } + val azureSourceClient: BlobServiceClient = azureConfig.createAzureClient() + + AzureSourceStorage(sourceConfig.path, azureSourceClient, azureConfig, tempPath) } - else -> throw IllegalStateException("Cannot create kafka storage for type ${resourceConfig.sourceType}") + else -> throw IllegalStateException("Cannot create kafka storage for type ${sourceConfig.sourceType}") } } diff --git a/src/main/java/org/radarbase/output/source/SourceStorageManager.kt b/src/main/java/org/radarbase/output/source/SourceStorageManager.kt new file mode 100644 index 0000000..dd7646e --- /dev/null +++ b/src/main/java/org/radarbase/output/source/SourceStorageManager.kt @@ -0,0 +1,49 @@ +package org.radarbase.output.source + +import org.radarbase.output.config.StorageIndexConfig +import org.radarbase.output.util.AvroFileLister.Companion.avroFileTreeLister +import org.radarbase.output.util.AvroTopicLister.Companion.avroTopicTreeLister +import org.radarbase.output.util.TopicPath +import java.nio.file.Path +import java.time.Instant + +class SourceStorageManager( + val sourceStorage: SourceStorage, + val storageIndex: StorageIndex, + storageIndexConfig: StorageIndexConfig, +) { + val storageIndexManager: StorageIndexManager = StorageIndexManager( + storageIndex, + sourceStorage, + sourceStorage.root, + storageIndexConfig, + ) + + suspend fun delete(path: Path) { + sourceStorage.delete(path) + storageIndex.remove(StorageNode.StorageFile(path, Instant.MIN)) + } + + /** + * Recursively returns all record files in a sequence of a given topic with path. + * The path must only contain records of a single topic, this is not verified. + */ + suspend fun listTopicFiles( + topic: String, + topicPath: Path, + limit: Int, + predicate: (TopicFile) -> Boolean, + ): List = storageIndex.avroFileTreeLister(sourceStorage) + .list(TopicPath(topic, topicPath), limit, predicate) + + /** + * Recursively find all topic root paths of records in the given path. + * Exclude paths belonging to the set of given excluded topics. + */ + suspend fun listTopics( + root: Path, + exclude: Set, + ): List = storageIndex.avroTopicTreeLister() + .listTo(LinkedHashSet(), root) + .filter { it.fileName.toString() !in exclude } +} diff --git a/src/main/java/org/radarbase/output/source/StorageIndexFactory.kt b/src/main/java/org/radarbase/output/source/StorageIndexFactory.kt new file mode 100644 index 0000000..e953281 --- /dev/null +++ b/src/main/java/org/radarbase/output/source/StorageIndexFactory.kt @@ -0,0 +1,5 @@ +package org.radarbase.output.source + +interface StorageIndexFactory { + fun get(): StorageIndex +} diff --git a/src/main/java/org/radarbase/output/source/StorageIndexManager.kt b/src/main/java/org/radarbase/output/source/StorageIndexManager.kt index c9ed4ed..352816d 100644 --- a/src/main/java/org/radarbase/output/source/StorageIndexManager.kt +++ b/src/main/java/org/radarbase/output/source/StorageIndexManager.kt @@ -1,10 +1,10 @@ package org.radarbase.output.source import org.radarbase.kotlin.coroutines.forkJoin +import org.radarbase.output.config.StorageIndexConfig import org.slf4j.LoggerFactory import java.nio.file.Path -import java.time.Duration -import java.time.Instant +import kotlin.time.TimeSource.Monotonic.markNow /** Manager to manage a storage index. */ class StorageIndexManager( @@ -14,43 +14,41 @@ class StorageIndexManager( private val sourceStorage: SourceStorage, /** Root directory in source storage to start scanning. */ root: Path, - /** How often to rescan the full directory structure. */ - private val rescanDirectoryDuration: Duration, - /** How often to rescan empty directories. */ - private val rescanEmptyDuration: Duration, + config: StorageIndexConfig, ) { private val root = StorageNode.StorageDirectory(root) + private val rescanEmptyDuration = config.emptyDirectorySyncDuration + private val rescanDirectoryDuration = config.fullSyncDuration - private var nextSync = Instant.MIN + private var nextSync = markNow() - private var nextEmptySync = Instant.MIN + private var nextEmptySync = markNow() /** Update the storage index, taking into account caching times. */ suspend fun update() { if (storageIndex !is MutableStorageIndex) return - if (nextSync < Instant.now()) { - sync() - } else { - val rescanEmpty = nextEmptySync < Instant.now() - if (rescanEmpty) { + when { + nextSync.hasPassedNow() -> { + sync() + } + nextEmptySync.hasPassedNow() -> { logger.info("Updating source {} index (including empty directories)...", root) - nextEmptySync = Instant.now() + rescanEmptyDuration - } else { + nextEmptySync = markNow() + rescanEmptyDuration + val listOperations = storageIndex.updateLevel(root, true) + logger.debug("Updated source {} with {} list operations...", root, listOperations) + } + else -> { logger.info("Updating source {} index (excluding empty directories)...", root) + val listOperations = storageIndex.updateLevel(root, false) + logger.debug("Updated source {} with {} list operations...", root, listOperations) } - val listOperations = storageIndex.updateLevel(root, rescanEmpty) - logger.debug("Updated source {} with {} list operations...", root, listOperations) } } private suspend fun MutableStorageIndex.updateLevel(node: StorageNode.StorageDirectory, rescanEmpty: Boolean): Long { val list = list(node) if (list.isEmpty()) { - return if (rescanEmpty) { - syncLevel(node) - } else { - 0L - } + return if (rescanEmpty) syncLevel(node) else 0L } val lastFile = list.asSequence() .filterIsInstance() @@ -78,8 +76,9 @@ class StorageIndexManager( logger.info("Syncing source {} index...", root) val listOperations = storageIndex.syncLevel(root) logger.debug("Synced source {} index with {} list operations...", root, listOperations) - nextSync = Instant.now() + rescanDirectoryDuration - nextEmptySync = Instant.now() + rescanEmptyDuration + val now = markNow() + nextSync = now + rescanDirectoryDuration + nextEmptySync = now + rescanEmptyDuration } private suspend fun MutableStorageIndex.syncLevel(node: StorageNode.StorageDirectory): Long { diff --git a/src/main/java/org/radarbase/output/target/AzureTargetStorage.kt b/src/main/java/org/radarbase/output/target/AzureTargetStorage.kt index dff4eb1..094ed64 100644 --- a/src/main/java/org/radarbase/output/target/AzureTargetStorage.kt +++ b/src/main/java/org/radarbase/output/target/AzureTargetStorage.kt @@ -25,8 +25,6 @@ import kotlinx.coroutines.withContext import org.radarbase.kotlin.coroutines.CacheConfig import org.radarbase.kotlin.coroutines.CachedValue import org.radarbase.output.config.AzureConfig -import org.radarbase.output.util.firstSegment -import org.radarbase.output.util.splitFirstSegment import org.slf4j.LoggerFactory import java.io.IOException import java.io.InputStream @@ -38,7 +36,10 @@ import kotlin.time.Duration.Companion.days import kotlin.time.Duration.Companion.hours import kotlin.time.Duration.Companion.minutes -class AzureTargetStorage(private val config: AzureConfig) : TargetStorage { +class AzureTargetStorage( + private val root: Path, + private val config: AzureConfig, +) : TargetStorage { private lateinit var serviceClient: BlobServiceClient private val containerClient: ConcurrentMap> = ConcurrentHashMap() private val cacheConfig = CacheConfig( @@ -46,6 +47,7 @@ class AzureTargetStorage(private val config: AzureConfig) : TargetStorage { retryDuration = 1.hours, exceptionCacheDuration = 1.minutes, ) + private val container = requireNotNull(config.container) { "Missing Azure Blob Storage container setting" } init { logger.info( @@ -63,8 +65,6 @@ class AzureTargetStorage(private val config: AzureConfig) : TargetStorage { } } - private suspend fun client(path: Path) = client(path.firstSegment()) - private suspend fun client(container: String) = containerClient.computeIfAbsent(container) { CachedValue( @@ -129,14 +129,10 @@ class AzureTargetStorage(private val config: AzureConfig) : TargetStorage { blob(path).delete() } - override suspend fun createDirectories(directory: Path) { - // ensure bucket exists - client(directory) - } + override suspend fun createDirectories(directory: Path?) = Unit private suspend fun blob(path: Path): BlobClient { - val (container, key) = path.splitFirstSegment() - return client(container).getBlobClient(key) + return client(container).getBlobClient(root.resolve(path).toString()) } companion object { diff --git a/src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt b/src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt new file mode 100644 index 0000000..7b80860 --- /dev/null +++ b/src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt @@ -0,0 +1,72 @@ +package org.radarbase.output.target + +import kotlinx.coroutines.coroutineScope +import org.radarbase.kotlin.coroutines.launchJoin +import org.radarbase.output.target.TargetStorage.PathStatus +import java.io.InputStream +import java.nio.file.Path + +class CombinedTargetStorage( + private val delegates: Map, + defaultName: String? = null, +) : TargetStorage { + private val defaultDelegate = if (defaultName != null) { + requireNotNull(delegates[defaultName]) { "Default target storage $defaultName not found in ${delegates.keys}" } + } else { + require(delegates.size == 1) { "Must provide a default taret storage if more than one target storage is defined: ${delegates.keys}" } + delegates.values.first() + } + + override fun allowsPrefix(prefix: String): Boolean = prefix in delegates + + override suspend fun initialize() = coroutineScope { + delegates.values.launchJoin { it.initialize() } + } + + override suspend fun status(path: Path): PathStatus? = withDelegate(path) { status(it) } + + override suspend fun newInputStream(path: Path): InputStream = withDelegate(path) { newInputStream(it) } + + override suspend fun move(oldPath: Path, newPath: Path) { + val (oldDelegate, oldDelegatePath) = delegate(oldPath) + val (newDelegate, newDelegatePath) = delegate(newPath) + + require(oldDelegate == newDelegate) { "Cannot move files between storage systems ($oldPath to $newPath)" } + return oldDelegate.move(oldDelegatePath, newDelegatePath) + } + + override suspend fun store(localPath: Path, newPath: Path) = withDelegate(newPath) { + store(localPath, it) + } + + override suspend fun delete(path: Path) = withDelegate(path) { delete(it) } + + override suspend fun createDirectories(directory: Path?) { + if (directory != null) { + val delegateName = directory.firstOrNull() ?: return + val delegate = delegates[delegateName.toString()] ?: return + + if (directory.count() == 1) { + delegate.createDirectories(null) + } else { + delegate.createDirectories(delegateName.relativize(directory)) + } + } + } + + private fun delegate(path: Path): Pair { + val targetName = requireNotNull(path.firstOrNull()) { "Target storage not found in path '$this'" } + val delegate = delegates[targetName.toString()] ?: defaultDelegate + val delegatePath = try { + targetName.relativize(path) + } catch (ex: IllegalArgumentException) { + throw IllegalArgumentException("Failed to split path $path into a relative path", ex) + } + return Pair(delegate, delegatePath) + } + + private inline fun withDelegate(path: Path, block: TargetStorage.(Path) -> T): T { + val (delegate, delegatePath) = delegate(path) + return delegate.block(delegatePath) + } +} diff --git a/src/main/java/org/radarbase/output/target/LocalTargetStorage.kt b/src/main/java/org/radarbase/output/target/LocalTargetStorage.kt index be1aafb..d01d615 100644 --- a/src/main/java/org/radarbase/output/target/LocalTargetStorage.kt +++ b/src/main/java/org/radarbase/output/target/LocalTargetStorage.kt @@ -36,7 +36,10 @@ import kotlin.io.path.moveTo import kotlin.io.path.setAttribute import kotlin.io.path.setPosixFilePermissions -class LocalTargetStorage(private val config: LocalConfig) : TargetStorage { +class LocalTargetStorage( + private val root: Path, + private val config: LocalConfig, +) : TargetStorage { init { logger.info( "Local storage configured with user id {}:{} (-1 if not configured)", @@ -48,23 +51,25 @@ class LocalTargetStorage(private val config: LocalConfig) : TargetStorage { override suspend fun initialize() = Unit @Throws(IOException::class) - override suspend fun status(path: Path): TargetStorage.PathStatus? = - withContext(Dispatchers.IO) { - if (path.exists()) { - TargetStorage.PathStatus(path.fileSize()) + override suspend fun status(path: Path): TargetStorage.PathStatus? { + val rootedPath = path.withRoot() + return withContext(Dispatchers.IO) { + if (rootedPath.exists()) { + TargetStorage.PathStatus(rootedPath.fileSize()) } else { null } } + } @Throws(IOException::class) override suspend fun newInputStream(path: Path): InputStream = withContext(Dispatchers.IO) { - path.inputStream() + path.withRoot().inputStream() } @Throws(IOException::class) override suspend fun move(oldPath: Path, newPath: Path) = withContext(Dispatchers.IO) { - doMove(oldPath, newPath) + doMove(oldPath.withRoot(), newPath.withRoot()) } private fun doMove(oldPath: Path, newPath: Path) { @@ -79,16 +84,17 @@ class LocalTargetStorage(private val config: LocalConfig) : TargetStorage { override suspend fun store(localPath: Path, newPath: Path) = withContext(Dispatchers.IO) { localPath.updateUser() localPath.setPosixFilePermissions(PosixFilePermissions.fromString("rw-r--r--")) - doMove(localPath, newPath) + doMove(localPath, newPath.withRoot()) } - override suspend fun createDirectories(directory: Path) = withContext(Dispatchers.IO) { - directory.createDirectories( + override suspend fun createDirectories(directory: Path?) = withContext(Dispatchers.IO) { + val dir = directory?.withRoot() ?: root + dir.createDirectories( PosixFilePermissions.asFileAttribute( PosixFilePermissions.fromString("rwxr-xr-x"), ), ) - directory.updateUser() + dir.updateUser() } private fun Path.updateUser() { @@ -102,9 +108,11 @@ class LocalTargetStorage(private val config: LocalConfig) : TargetStorage { @Throws(IOException::class) override suspend fun delete(path: Path) = withContext(Dispatchers.IO) { - path.deleteExisting() + path.withRoot().deleteExisting() } + private fun Path.withRoot() = this@LocalTargetStorage.root.resolve(this) + companion object { private val logger = LoggerFactory.getLogger(LocalTargetStorage::class.java) } diff --git a/src/main/java/org/radarbase/output/target/S3TargetStorage.kt b/src/main/java/org/radarbase/output/target/S3TargetStorage.kt index 6eba54a..2183298 100644 --- a/src/main/java/org/radarbase/output/target/S3TargetStorage.kt +++ b/src/main/java/org/radarbase/output/target/S3TargetStorage.kt @@ -16,7 +16,6 @@ package org.radarbase.output.target -import io.minio.BucketArgs import io.minio.BucketExistsArgs import io.minio.CopyObjectArgs import io.minio.CopySource @@ -26,25 +25,19 @@ import io.minio.MinioClient import io.minio.RemoveObjectArgs import io.minio.StatObjectArgs import io.minio.UploadObjectArgs -import org.radarbase.kotlin.coroutines.CacheConfig -import org.radarbase.kotlin.coroutines.CachedValue import org.radarbase.output.config.S3Config import org.radarbase.output.source.S3SourceStorage.Companion.faultTolerant import org.radarbase.output.util.bucketBuild -import org.radarbase.output.util.firstSegment import org.radarbase.output.util.objectBuild import org.slf4j.LoggerFactory import java.io.FileNotFoundException import java.io.IOException import java.io.InputStream import java.nio.file.Path -import java.util.concurrent.ConcurrentHashMap import kotlin.io.path.deleteExisting -import kotlin.time.Duration.Companion.days -import kotlin.time.Duration.Companion.hours -import kotlin.time.Duration.Companion.minutes class S3TargetStorage( + private val root: Path, config: S3Config, ) : TargetStorage { private val s3Client: MinioClient = try { @@ -54,12 +47,7 @@ class S3TargetStorage( throw ex } - private val buckets = ConcurrentHashMap>() - private val cacheConfig = CacheConfig( - refreshDuration = 1.days, - retryDuration = 1.hours, - exceptionCacheDuration = 1.minutes, - ) + private val bucket = requireNotNull(config.bucket) { "Missing bucket configuration" } init { logger.info( @@ -68,11 +56,12 @@ class S3TargetStorage( ) } - override suspend fun initialize() {} + override suspend fun initialize() { + ensureBucket() + } override suspend fun status(path: Path): TargetStorage.PathStatus? { - val statRequest = StatObjectArgs.builder().objectBuild(path) - .also { it.ensureBucket() } + val statRequest = StatObjectArgs.builder().objectBuild(bucket, path.withRoot()) return try { faultTolerant { s3Client.statObject(statRequest) @@ -83,23 +72,17 @@ class S3TargetStorage( } } - private suspend fun BucketArgs.ensureBucket() = ensureBucket(bucket()) - - private suspend fun ensureBucket(bucket: String) { + private suspend fun ensureBucket() { try { - buckets.computeIfAbsent(bucket) { - CachedValue(cacheConfig) { - val bucketExistsRequest = BucketExistsArgs.builder().bucketBuild(bucket) - val isExist: Boolean = faultTolerant { s3Client.bucketExists(bucketExistsRequest) } - if (isExist) { - logger.info("Bucket $bucket already exists.") - } else { - val makeBucketRequest = MakeBucketArgs.builder().bucketBuild(bucket) - faultTolerant { s3Client.makeBucket(makeBucketRequest) } - logger.info("Bucket $bucket was created.") - } - } - }.get() + val bucketExistsRequest = BucketExistsArgs.builder().bucketBuild(bucket) + val isExist: Boolean = faultTolerant { s3Client.bucketExists(bucketExistsRequest) } + if (isExist) { + logger.info("Bucket $bucket already exists.") + } else { + val makeBucketRequest = MakeBucketArgs.builder().bucketBuild(bucket) + faultTolerant { s3Client.makeBucket(makeBucketRequest) } + logger.info("Bucket $bucket was created.") + } } catch (ex: Exception) { logger.error( "Failed to create bucket {}: {}", @@ -110,17 +93,18 @@ class S3TargetStorage( } } + private fun Path.withRoot(): Path = this@S3TargetStorage.root.resolve(this) + @Throws(IOException::class) override suspend fun newInputStream(path: Path): InputStream { - val getRequest = GetObjectArgs.builder().objectBuild(path) - .also { it.ensureBucket() } + val getRequest = GetObjectArgs.builder().objectBuild(bucket, path.withRoot()) return faultTolerant { s3Client.getObject(getRequest) } } @Throws(IOException::class) override suspend fun move(oldPath: Path, newPath: Path) { - val copyRequest = CopyObjectArgs.builder().objectBuild(newPath) { - source(CopySource.Builder().objectBuild(oldPath)) + val copyRequest = CopyObjectArgs.builder().objectBuild(bucket, newPath.withRoot()) { + source(CopySource.Builder().objectBuild(bucket, oldPath.withRoot())) } faultTolerant { s3Client.copyObject(copyRequest) } delete(oldPath) @@ -128,10 +112,9 @@ class S3TargetStorage( @Throws(IOException::class) override suspend fun store(localPath: Path, newPath: Path) { - val uploadRequest = UploadObjectArgs.builder().objectBuild(newPath) { + val uploadRequest = UploadObjectArgs.builder().objectBuild(bucket, newPath.withRoot()) { filename(localPath.toAbsolutePath().toString()) } - .also { it.ensureBucket() } faultTolerant { s3Client.uploadObject(uploadRequest) } localPath.deleteExisting() @@ -139,14 +122,11 @@ class S3TargetStorage( @Throws(IOException::class) override suspend fun delete(path: Path) { - val removeRequest = RemoveObjectArgs.builder().objectBuild(path) - .also { it.ensureBucket() } + val removeRequest = RemoveObjectArgs.builder().objectBuild(bucket, path.withRoot()) faultTolerant { s3Client.removeObject(removeRequest) } } - override suspend fun createDirectories(directory: Path) { - ensureBucket(directory.firstSegment()) - } + override suspend fun createDirectories(directory: Path?) = Unit companion object { private val logger = LoggerFactory.getLogger(S3TargetStorage::class.java) diff --git a/src/main/java/org/radarbase/output/target/TargetStorage.kt b/src/main/java/org/radarbase/output/target/TargetStorage.kt index d1bf212..8f57df0 100644 --- a/src/main/java/org/radarbase/output/target/TargetStorage.kt +++ b/src/main/java/org/radarbase/output/target/TargetStorage.kt @@ -59,10 +59,14 @@ interface TargetStorage { /** Create given directory, by recursively creating all parent directories. */ @Throws(IOException::class) - suspend fun createDirectories(directory: Path) + suspend fun createDirectories(directory: Path?) data class PathStatus( /** Size in bytes */ val size: Long, ) + + fun allowsPrefix(prefix: String): Boolean { + return true + } } diff --git a/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt b/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt index aa9b50b..51f7a09 100644 --- a/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt +++ b/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt @@ -3,10 +3,13 @@ package org.radarbase.output.target import org.radarbase.output.config.ResourceConfig import org.radarbase.output.config.ResourceType -class TargetStorageFactory(private val config: ResourceConfig) { - fun createTargetStorage(): TargetStorage = when (config.sourceType) { - ResourceType.S3 -> S3TargetStorage(config.s3!!) - ResourceType.LOCAL -> LocalTargetStorage(config.local!!) - ResourceType.AZURE -> AzureTargetStorage(config.azure!!) +class TargetStorageFactory { + fun createTargetStorage(defaultName: String, configs: Map): TargetStorage = + CombinedTargetStorage(configs.mapValues { (_, config) -> createTargetStorage(config) }, defaultName) + + private fun createTargetStorage(config: ResourceConfig) = when (config.sourceType) { + ResourceType.S3 -> S3TargetStorage(config.path, config.s3!!) + ResourceType.LOCAL -> LocalTargetStorage(config.path, config.local!!) + ResourceType.AZURE -> AzureTargetStorage(config.path, config.azure!!) } } diff --git a/src/main/java/org/radarbase/output/util/Path.kt b/src/main/java/org/radarbase/output/util/Path.kt index 7fdd8fe..3704984 100644 --- a/src/main/java/org/radarbase/output/util/Path.kt +++ b/src/main/java/org/radarbase/output/util/Path.kt @@ -9,11 +9,11 @@ fun Path.withoutFirstSegment(): String { return first().relativize(this).toString() } -fun Path.splitFirstSegment(): Pair { +fun Path.splitFirstSegment(): Pair { val bucketPath = first() return Pair( bucketPath.toString(), - bucketPath.relativize(this).toString(), + bucketPath.relativize(this), ) } @@ -28,17 +28,6 @@ inline fun > T.bucketBuild( return build() } -inline fun > T.objectBuild( - path: Path, - configure: T.() -> T = { this }, -): S { - val (bucket, key) = path.splitFirstSegment() - return bucketBuild(bucket) { - `object`(key) - configure() - } -} - inline fun > T.objectBuild( bucket: String, key: Path, diff --git a/src/main/java/org/radarbase/output/util/Timer.kt b/src/main/java/org/radarbase/output/util/Timer.kt index 0e8ce9e..4c1a200 100644 --- a/src/main/java/org/radarbase/output/util/Timer.kt +++ b/src/main/java/org/radarbase/output/util/Timer.kt @@ -104,6 +104,7 @@ object Timer { fun add(nanoTime: Long) { invocations.increment() totalTime.add(nanoTime) + @Suppress("DEPRECATION", "KotlinRedundantDiagnosticSuppress") val threadId = Thread.currentThread().id threads[threadId] = threadId } diff --git a/src/main/java/org/radarbase/output/worker/FileCache.kt b/src/main/java/org/radarbase/output/worker/FileCache.kt index a7602e6..36ac711 100644 --- a/src/main/java/org/radarbase/output/worker/FileCache.kt +++ b/src/main/java/org/radarbase/output/worker/FileCache.kt @@ -111,7 +111,8 @@ class FileCache( inputStream.reader().useSuspended { reader -> converterFactory.converterFor(writer, record, fileIsNew, reader, excludeFields) } - } catch (ex: IOException) { + } catch (ex: Exception) { + logger.error("Failed to initialize record converter for {}: {}", path, ex.toString()) withContext(Dispatchers.IO) { try { writer.close() diff --git a/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt b/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt index 4fec805..060df95 100644 --- a/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt +++ b/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt @@ -22,13 +22,14 @@ import kotlinx.coroutines.launch import kotlinx.coroutines.sync.Mutex import kotlinx.coroutines.sync.withPermit import kotlinx.coroutines.withContext +import org.radarbase.kotlin.coroutines.launchJoin import org.radarbase.output.Application.Companion.format import org.radarbase.output.FileStoreFactory import org.radarbase.output.accounting.Accountant import org.radarbase.output.accounting.AccountantImpl import org.radarbase.output.accounting.OffsetRangeSet import org.radarbase.output.config.RestructureConfig -import org.radarbase.output.source.StorageIndex +import org.radarbase.output.source.SourceStorageManager import org.radarbase.output.source.TopicFileList import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended import org.radarbase.output.util.TimeUtil.durationSince @@ -36,7 +37,6 @@ import org.slf4j.LoggerFactory import java.io.Closeable import java.io.IOException import java.nio.file.Path -import java.nio.file.Paths import java.time.Duration import java.util.concurrent.atomic.LongAdder import kotlin.coroutines.coroutineContext @@ -51,8 +51,9 @@ import kotlin.coroutines.coroutineContext */ class RadarKafkaRestructure( private val fileStoreFactory: FileStoreFactory, + private val sourceStorageManager: SourceStorageManager, ) : Closeable { - private val sourceStorage = fileStoreFactory.sourceStorage + private val sourceStorage = sourceStorageManager.sourceStorage private val lockManager = fileStoreFactory.remoteLockManager @@ -78,13 +79,13 @@ class RadarKafkaRestructure( val processedRecordsCount = LongAdder() @Throws(IOException::class, InterruptedException::class) - suspend fun process(directoryName: String, storageIndex: StorageIndex) { + suspend fun process() { // Get files and directories - val absolutePath = Paths.get(directoryName) + val absolutePath = sourceStorage.root logger.info("Scanning topics...") - val paths = topicPaths(storageIndex, absolutePath) + val paths = topicPaths(absolutePath) logger.info("{} topics found", paths.size) @@ -93,7 +94,7 @@ class RadarKafkaRestructure( launch { try { val (fileCount, recordCount) = fileStoreFactory.workerSemaphore.withPermit { - mapTopic(storageIndex, p) + mapTopic(p) } processedFileCount.add(fileCount) processedRecordsCount.add(recordCount) @@ -105,7 +106,7 @@ class RadarKafkaRestructure( } } - private suspend fun mapTopic(storageIndex: StorageIndex, topicPath: Path): ProcessingStatistics { + private suspend fun mapTopic(topicPath: Path): ProcessingStatistics { val topic = topicPath.fileName.toString() return try { @@ -113,7 +114,7 @@ class RadarKafkaRestructure( coroutineScope { AccountantImpl(fileStoreFactory, topic).useSuspended { accountant -> accountant.initialize(this) - startWorker(storageIndex, topic, topicPath, accountant, accountant.offsets) + startWorker(topic, topicPath, accountant, accountant.offsets) } } } @@ -128,7 +129,6 @@ class RadarKafkaRestructure( } private suspend fun startWorker( - storageIndex: StorageIndex, topic: String, topicPath: Path, accountant: Accountant, @@ -142,7 +142,7 @@ class RadarKafkaRestructure( try { val topicPaths = TopicFileList( topic, - sourceStorage.listTopicFiles(storageIndex, topic, topicPath, maxFilesPerTopic) { f -> + sourceStorageManager.listTopicFiles(topic, topicPath, maxFilesPerTopic) { f -> !seenFiles.contains(f.range) && f.lastModified.durationSince() >= minimumFileAge }, @@ -163,8 +163,8 @@ class RadarKafkaRestructure( supervisor.cancel() } - private suspend fun topicPaths(storageIndex: StorageIndex, root: Path): List = - sourceStorage.listTopics(storageIndex, root, excludeTopics) + private suspend fun topicPaths(root: Path): List = + sourceStorageManager.listTopics(root, excludeTopics) // different services start on different topics to decrease lock contention .shuffled() @@ -183,24 +183,26 @@ class RadarKafkaRestructure( } private suspend fun runRestructure(factory: FileStoreFactory) { - RadarKafkaRestructure(factory).useSuspended { restructure -> - for ((input, index) in factory.storageIndexManagers) { - index.update() - logger.info("In: {}", input) + val pathConfig = factory.pathFactory.pathConfig + + factory.sourceStorage.launchJoin { sourceStorage -> + RadarKafkaRestructure(factory, sourceStorage).useSuspended { restructure -> + sourceStorage.storageIndexManager.update() + logger.info("In: {}", sourceStorage.sourceStorage.root) logger.info( "Out: bucket {} (default {}) - path {}", - factory.pathFactory.pathConfig.bucket?.format, - factory.pathFactory.pathConfig.bucket?.defaultName, - factory.pathFactory.pathConfig.path.format, + pathConfig.target.format, + pathConfig.target.defaultName, + pathConfig.path.format, ) - restructure.process(input.toString(), index.storageIndex) - } + restructure.process() - logger.info( - "Processed {} files and {} records", - restructure.processedFileCount.format(), - restructure.processedRecordsCount.format(), - ) + logger.info( + "Processed {} files and {} records", + restructure.processedFileCount.format(), + restructure.processedRecordsCount.format(), + ) + } } } } diff --git a/src/test/java/org/radarbase/output/OffsetRangeFileTest.kt b/src/test/java/org/radarbase/output/OffsetRangeFileTest.kt index bc51430..e97a350 100644 --- a/src/test/java/org/radarbase/output/OffsetRangeFileTest.kt +++ b/src/test/java/org/radarbase/output/OffsetRangeFileTest.kt @@ -35,6 +35,7 @@ import org.radarbase.output.target.TargetStorage import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended import java.io.IOException import java.nio.file.Path +import java.nio.file.Paths import java.time.Instant import kotlin.io.path.createFile @@ -47,9 +48,10 @@ class OffsetRangeFileTest { @BeforeEach @Throws(IOException::class) fun setUp(@TempDir dir: Path) { - testFile = dir.resolve("test") - testFile.createFile() - targetStorage = LocalTargetStorage(LocalConfig()) + testFile = Paths.get("test") + dir.resolve(testFile).createFile() + + targetStorage = LocalTargetStorage(dir, LocalConfig()) offsetPersistence = OffsetFilePersistence(targetStorage) } diff --git a/src/test/java/org/radarbase/output/cleaner/TimestampFileCacheTest.kt b/src/test/java/org/radarbase/output/cleaner/TimestampFileCacheTest.kt index 66e4794..7431705 100644 --- a/src/test/java/org/radarbase/output/cleaner/TimestampFileCacheTest.kt +++ b/src/test/java/org/radarbase/output/cleaner/TimestampFileCacheTest.kt @@ -33,11 +33,11 @@ internal class TimestampFileCacheTest { private lateinit var csvConverter: CsvAvroConverterFactory @BeforeEach - fun setUp() { + fun setUp(@TempDir dir: Path) { csvConverter = CsvAvroConverterFactory() factory = mock { on { recordConverter } doReturn csvConverter - on { targetStorage } doReturn LocalTargetStorage(LocalConfig()) + on { targetStorage } doReturn LocalTargetStorage(dir, LocalConfig()) on { compression } doReturn IdentityCompression() } schema = Schema.Parser().parse(javaClass.resourceStream("android_phone_light.avsc")) diff --git a/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt b/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt index 6a16eab..bb67f83 100644 --- a/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt +++ b/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt @@ -35,6 +35,7 @@ import org.radarbase.output.accounting.Accountant import org.radarbase.output.accounting.OffsetRangeSet import org.radarbase.output.accounting.TopicPartition import org.radarbase.output.accounting.TopicPartitionOffsetRange +import org.radarbase.output.config.LocalConfig import org.radarbase.output.config.PathConfig import org.radarbase.output.config.ResourceConfig import org.radarbase.output.config.RestructureConfig @@ -43,10 +44,11 @@ import org.radarbase.output.config.WorkerConfig import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended import org.radarbase.output.worker.FileCacheStore import java.io.IOException -import java.nio.file.Files import java.nio.file.Path +import java.nio.file.Paths import java.time.Instant import kotlin.io.path.createDirectories +import kotlin.io.path.readBytes class FileCacheStoreTest { private val lastModified = Instant.now() @@ -54,13 +56,17 @@ class FileCacheStoreTest { @Test @Throws(IOException::class) fun appendLine(@TempDir root: Path, @TempDir tmpDir: Path) = runTest { - val f1 = root.resolve("f1") - val f2 = root.resolve("f2") - val f3 = root.resolve("f3") - val d4 = root.resolve("d4") - d4.createDirectories() + val bucketName = Paths.get("radar-output-storage") + + fun Path.toLocalPath() = root.resolve(bucketName.relativize(this)) + + val f1 = bucketName.resolve("f1") + val f2 = bucketName.resolve("f2") + val f3 = bucketName.resolve("f3") + val d4 = bucketName.resolve("d4") + d4.toLocalPath().createDirectories() val f4 = d4.resolve("f4.txt") - val newFile = root.resolve("newFile") + val newFile = bucketName.resolve("newFile") val simpleSchema = SchemaBuilder.record("simple").fields() .name("a").type("string").noDefault() @@ -84,11 +90,11 @@ class FileCacheStoreTest { val factory = Application( RestructureConfig( paths = PathConfig( - output = root, temp = tmpDir, ), worker = WorkerConfig(cacheSize = 2), - source = ResourceConfig(type = "s3", s3 = S3Config("endpoint", null, null)), + sources = listOf(ResourceConfig("s3", tmpDir, s3 = S3Config("http://ep", "null", "null", bucket = "Test"))), + targets = mapOf("radar-output-storage" to ResourceConfig("local", path = root, local = LocalConfig())), ), ) @@ -188,19 +194,19 @@ class FileCacheStoreTest { assertTrue(offsets.contains(offsetRange1)) launch(Dispatchers.IO) { - assertEquals("a\nsomething\nsomethingElse\nthird\n", String(Files.readAllBytes(f1))) + assertEquals("a\nsomething\nsomethingElse\nthird\n", String(f1.toLocalPath().readBytes())) } launch(Dispatchers.IO) { - assertEquals("a\nsomething\nf2\n", String(Files.readAllBytes(f2))) + assertEquals("a\nsomething\nf2\n", String(f2.toLocalPath().readBytes())) } launch(Dispatchers.IO) { - assertEquals("a\nf3\nf3\nf3\n", String(Files.readAllBytes(f3))) + assertEquals("a\nf3\nf3\nf3\n", String(f3.toLocalPath().readBytes())) } launch(Dispatchers.IO) { - assertEquals("a\nf4\n", String(Files.readAllBytes(f4))) + assertEquals("a\nf4\n", String(f4.toLocalPath().readBytes())) } launch(Dispatchers.IO) { - assertEquals("a,b\nf1,conflict\n", String(Files.readAllBytes(newFile))) + assertEquals("a,b\nf1,conflict\n", String(newFile.toLocalPath().readBytes())) } } } diff --git a/src/test/java/org/radarbase/output/data/FileCacheTest.kt b/src/test/java/org/radarbase/output/data/FileCacheTest.kt index 111fe20..0f35b4e 100644 --- a/src/test/java/org/radarbase/output/data/FileCacheTest.kt +++ b/src/test/java/org/radarbase/output/data/FileCacheTest.kt @@ -32,6 +32,7 @@ import org.mockito.kotlin.mock import org.radarbase.output.Application import org.radarbase.output.accounting.Accountant import org.radarbase.output.accounting.TopicPartition +import org.radarbase.output.config.LocalConfig import org.radarbase.output.config.PathConfig import org.radarbase.output.config.ResourceConfig import org.radarbase.output.config.RestructureConfig @@ -41,6 +42,7 @@ import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended import org.radarbase.output.worker.FileCache import java.io.IOException import java.nio.file.Path +import java.nio.file.Paths import java.time.Instant import java.util.zip.GZIPInputStream import kotlin.io.path.bufferedReader @@ -51,6 +53,7 @@ import kotlin.io.path.inputStream * Created by joris on 03/07/2017. */ class FileCacheTest { + private lateinit var localPath: Path private lateinit var path: Path private lateinit var exampleRecord: Record private lateinit var tmpDir: Path @@ -65,7 +68,8 @@ class FileCacheTest { @BeforeEach @Throws(IOException::class) fun setUp(@TempDir path: Path, @TempDir tmpPath: Path) { - this.path = path.resolve("f") + this.path = Paths.get("radar-output-storage/f") + this.localPath = path.resolve("f") this.tmpDir = tmpPath val schema = SchemaBuilder.record("simple").fields() @@ -75,10 +79,10 @@ class FileCacheTest { config = RestructureConfig( paths = PathConfig( - output = path.parent, temp = tmpPath, ), - source = ResourceConfig("s3", S3Config("endpoint", null, null)), + source = ResourceConfig("s3", path = Paths.get("in"), s3 = S3Config("http://ep", "null", "null", "test")), + targets = mapOf("radar-output-storage" to ResourceConfig("local", path = path, local = LocalConfig())), ) setUp(config) @@ -105,10 +109,10 @@ class FileCacheTest { ) } - println("Gzip: " + path.fileSize()) + println("Gzip: " + localPath.fileSize()) val lines = resourceContext { - resourceChain { path.inputStream() } + resourceChain { localPath.inputStream() } .chain { GZIPInputStream(it) } .chain { it.reader() } .result @@ -139,9 +143,9 @@ class FileCacheTest { ) } - println("Gzip appended: " + path.fileSize()) + println("Gzip appended: " + localPath.fileSize()) val lines = resourceContext { - resourceChain { path.inputStream() } + resourceChain { localPath.inputStream() } .chain { GZIPInputStream(it) } .chain { it.reader() } .result @@ -161,9 +165,9 @@ class FileCacheTest { ) } - println("Plain: " + path.fileSize()) + println("Plain: " + localPath.fileSize()) - val lines = path.bufferedReader().use { it.readLines() } + val lines = localPath.bufferedReader().use { it.readLines() } assertEquals(listOf("a", "something"), lines) } @@ -186,9 +190,9 @@ class FileCacheTest { ) } - println("Plain appended: " + path.fileSize()) + println("Plain appended: " + localPath.fileSize()) - val lines = path.bufferedReader().use { it.readLines() } + val lines = localPath.bufferedReader().use { it.readLines() } assertEquals(listOf("a", "something", "something"), lines) } diff --git a/src/test/java/org/radarbase/output/path/FormattedPathFactoryTest.kt b/src/test/java/org/radarbase/output/path/FormattedPathFactoryTest.kt index 2a67819..1d3cd23 100644 --- a/src/test/java/org/radarbase/output/path/FormattedPathFactoryTest.kt +++ b/src/test/java/org/radarbase/output/path/FormattedPathFactoryTest.kt @@ -5,16 +5,35 @@ import org.hamcrest.MatcherAssert.assertThat import org.hamcrest.Matchers.instanceOf import org.hamcrest.Matchers.nullValue import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows +import org.radarbase.output.config.LocalConfig import org.radarbase.output.config.PathConfig import org.radarbase.output.config.PathFormatterConfig +import org.radarbase.output.target.CombinedTargetStorage +import org.radarbase.output.target.LocalTargetStorage import org.radarcns.kafka.ObservationKey import org.radarcns.passive.phone.PhoneLight +import java.nio.file.Paths import java.time.Instant import kotlin.reflect.jvm.jvmName internal class FormattedPathFactoryTest { + private lateinit var targetStorage: CombinedTargetStorage + + @BeforeEach + fun setUp() { + targetStorage = CombinedTargetStorage( + mapOf( + Pair( + "radar-output-storage", + LocalTargetStorage(Paths.get("/test"), LocalConfig()), + ), + ), + ) + } + @Test fun testFormat() = runBlocking { val factory = createFactory( @@ -48,6 +67,7 @@ internal class FormattedPathFactoryTest { fun unparameterized() = runBlocking { val factory = FormattedPathFactory().apply { init( + targetStorage = targetStorage, extension = ".csv.gz", config = PathConfig(), ) @@ -107,6 +127,7 @@ internal class FormattedPathFactoryTest { private fun createFactory(format: String): FormattedPathFactory = FormattedPathFactory().apply { init( + targetStorage = targetStorage, extension = ".csv.gz", config = PathConfig( path = PathFormatterConfig( From b9e05c1f51c3c3e12956bc3ed04ecdea69ab0664 Mon Sep 17 00:00:00 2001 From: Joris Borgdorff Date: Tue, 24 Oct 2023 17:17:45 +0200 Subject: [PATCH 02/10] Fix integration tests --- .../output/RestructureS3IntegrationTest.kt | 13 ++++++++++--- .../org/radarbase/output/target/S3TargetStorage.kt | 13 ++++++++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt b/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt index d69bec7..33d46a3 100644 --- a/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt +++ b/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt @@ -16,10 +16,12 @@ import kotlinx.coroutines.test.runTest import kotlinx.coroutines.withContext import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Test +import org.radarbase.output.config.PathConfig import org.radarbase.output.config.PathFormatterConfig import org.radarbase.output.config.ResourceConfig import org.radarbase.output.config.RestructureConfig import org.radarbase.output.config.S3Config +import org.radarbase.output.config.TargetFormatterConfig import org.radarbase.output.config.TopicConfig import org.radarbase.output.config.WorkerConfig import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended @@ -49,9 +51,15 @@ class RestructureS3IntegrationTest { ) val config = RestructureConfig( sources = listOf(ResourceConfig("s3", path = Paths.get("in"), s3 = sourceConfig)), - targets = mapOf("radar-output-storage" to ResourceConfig("s3", path = Paths.get("output"), s3 = targetConfig)), + targets = mapOf( + "radar-output-storage" to ResourceConfig("s3", path = Paths.get("output"), s3 = targetConfig), + "radar-test-root" to ResourceConfig("s3", path = Paths.get("otherOutput"), s3 = targetConfig), + ), worker = WorkerConfig(minimumFileAge = 0L), topics = topicConfig, + paths = PathConfig( + target = TargetFormatterConfig("\${projectId}"), + ), ) val application = Application(config) val sourceClient = sourceConfig.createS3Client() @@ -92,7 +100,7 @@ class RestructureS3IntegrationTest { val firstParticipantOutput = "output/STAGING_PROJECT/1543bc93-3c17-4381-89a5-c5d6272b827c/application_server_status/CONNECTED" val secondParticipantOutput = - "output/radar-test-root/4ab9b985-6eec-4e51-9a29-f4c571c89f99/android_phone_acceleration" + "otherOutput/radar-test-root/4ab9b985-6eec-4e51-9a29-f4c571c89f99/android_phone_acceleration" val targetBucket = requireNotNull(targetConfig.bucket) @@ -119,7 +127,6 @@ class RestructureS3IntegrationTest { return@coroutineScope withContext(Dispatchers.IO) { targetClient.listObjects( ListObjectsArgs.Builder().bucketBuild(targetBucket) { - prefix("output") recursive(true) useUrlEncodingType(false) }, diff --git a/src/main/java/org/radarbase/output/target/S3TargetStorage.kt b/src/main/java/org/radarbase/output/target/S3TargetStorage.kt index 2183298..8366dc7 100644 --- a/src/main/java/org/radarbase/output/target/S3TargetStorage.kt +++ b/src/main/java/org/radarbase/output/target/S3TargetStorage.kt @@ -25,6 +25,7 @@ import io.minio.MinioClient import io.minio.RemoveObjectArgs import io.minio.StatObjectArgs import io.minio.UploadObjectArgs +import io.minio.errors.ErrorResponseException import org.radarbase.output.config.S3Config import org.radarbase.output.source.S3SourceStorage.Companion.faultTolerant import org.radarbase.output.util.bucketBuild @@ -80,7 +81,17 @@ class S3TargetStorage( logger.info("Bucket $bucket already exists.") } else { val makeBucketRequest = MakeBucketArgs.builder().bucketBuild(bucket) - faultTolerant { s3Client.makeBucket(makeBucketRequest) } + faultTolerant { + try { + s3Client.makeBucket(makeBucketRequest) + } catch (ex: ErrorResponseException) { + if (ex.errorResponse().code() == "BucketAlreadyOwnedByYou") { + logger.warn("Bucket {} was already created while the request was busy", bucket) + } else { + throw ex + } + } + } logger.info("Bucket $bucket was created.") } } catch (ex: Exception) { From 60a4280f05f72f8e72e0782d392e1f2dfb68a6d9 Mon Sep 17 00:00:00 2001 From: Joris Borgdorff Date: Wed, 25 Oct 2023 13:48:10 +0200 Subject: [PATCH 03/10] Update sane defaults --- .../output/config/TargetFormatterConfig.kt | 2 +- .../output/path/FormattedPathFactory.kt | 23 +++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/radarbase/output/config/TargetFormatterConfig.kt b/src/main/java/org/radarbase/output/config/TargetFormatterConfig.kt index 7cb983f..546d055 100644 --- a/src/main/java/org/radarbase/output/config/TargetFormatterConfig.kt +++ b/src/main/java/org/radarbase/output/config/TargetFormatterConfig.kt @@ -3,7 +3,7 @@ package org.radarbase.output.config /** Configuration on how to format the target storage name to be used. */ data class TargetFormatterConfig( /** Format string. May include any variables computed by the configured plugins. */ - val format: String = "radar-output-storage", + val format: String? = null, /** * Spaces separated list of plugins to use for formatting the format string. May include * custom class names. diff --git a/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt b/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt index 31477cc..ac5c2c3 100644 --- a/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt +++ b/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt @@ -26,7 +26,7 @@ import org.slf4j.LoggerFactory open class FormattedPathFactory : RecordPathFactory() { private lateinit var pathFormatter: PathFormatter private var topicFormatters: Map = emptyMap() - private lateinit var targetFormatter: PathFormatter + private var targetFormatter: PathFormatter? = null private lateinit var disabledBucketRegexes: List private lateinit var defaultBucketName: String @@ -48,11 +48,11 @@ open class FormattedPathFactory : RecordPathFactory() { } override suspend fun target(pathParameters: PathFormatParameters): String { - val format = targetFormatter.format(pathParameters) - return if (disabledBucketRegexes.any { it.matches(format) }) { - defaultBucketName - } else { + val format = targetFormatter?.format(pathParameters) + return if (format != null && disabledBucketRegexes.none { it.matches(format) }) { format + } else { + defaultBucketName } } @@ -82,11 +82,14 @@ open class FormattedPathFactory : RecordPathFactory() { plugins.toPathFormatterPlugins(properties), ) - private fun TargetFormatterConfig.toTargetFormatter(): PathFormatter = PathFormatter( - format, - plugins.toPathFormatterPlugins(properties), - checkMinimalDistinction = false, - ) + private fun TargetFormatterConfig.toTargetFormatter(): PathFormatter? { + format ?: return null + return PathFormatter( + format, + plugins.toPathFormatterPlugins(properties), + checkMinimalDistinction = false, + ) + } private val logger = LoggerFactory.getLogger(FormattedPathFactory::class.java) } From 85fa4e22ad988e312a9cce842cf27c86d51b9c85 Mon Sep 17 00:00:00 2001 From: Joris Borgdorff Date: Wed, 25 Oct 2023 13:51:05 +0200 Subject: [PATCH 04/10] Small naming changes --- src/main/java/org/radarbase/output/Application.kt | 2 +- src/main/java/org/radarbase/output/config/PathConfig.kt | 2 +- .../java/org/radarbase/output/config/RestructureConfig.kt | 4 ++-- .../org/radarbase/output/config/TargetFormatterConfig.kt | 4 ++-- .../org/radarbase/output/path/FormattedPathFactory.kt | 2 +- .../org/radarbase/output/target/CombinedTargetStorage.kt | 8 ++++---- .../org/radarbase/output/target/TargetStorageFactory.kt | 4 ++-- .../org/radarbase/output/worker/RadarKafkaRestructure.kt | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/radarbase/output/Application.kt b/src/main/java/org/radarbase/output/Application.kt index 861dcaa..9bfafac 100644 --- a/src/main/java/org/radarbase/output/Application.kt +++ b/src/main/java/org/radarbase/output/Application.kt @@ -70,7 +70,7 @@ class Application( } override val targetStorage: TargetStorage = TargetStorageFactory() - .createTargetStorage(config.paths.target.defaultName, config.consolidatedTargets) + .createTargetStorage(config.paths.target.default, config.consolidatedTargets) override val pathFactory: RecordPathFactory = config.paths.createFactory( diff --git a/src/main/java/org/radarbase/output/config/PathConfig.kt b/src/main/java/org/radarbase/output/config/PathConfig.kt index b9de1f8..271fa4b 100644 --- a/src/main/java/org/radarbase/output/config/PathConfig.kt +++ b/src/main/java/org/radarbase/output/config/PathConfig.kt @@ -26,7 +26,7 @@ data class PathConfig( ): RecordPathFactory { val pathFactory = factory.constructClass() - require(targetStorage.allowsPrefix(target.defaultName)) { "Default bucket ${target.defaultName} is not specified as a target storage" } + require(targetStorage.allowsPrefix(target.default)) { "Default bucket ${target.default} is not specified as a target storage" } pathFactory.init( targetStorage = targetStorage, diff --git a/src/main/java/org/radarbase/output/config/RestructureConfig.kt b/src/main/java/org/radarbase/output/config/RestructureConfig.kt index 68928e6..de326a1 100644 --- a/src/main/java/org/radarbase/output/config/RestructureConfig.kt +++ b/src/main/java/org/radarbase/output/config/RestructureConfig.kt @@ -41,8 +41,8 @@ data class RestructureConfig( put(name, target) } else { val bucketConfig = paths.target - require(bucketConfig.defaultName !in this) { "Deprecated target storage does not have a proper name." } - put(bucketConfig.defaultName, target) + require(bucketConfig.default !in this) { "Deprecated target storage does not have a proper name." } + put(bucketConfig.default, target) } } } diff --git a/src/main/java/org/radarbase/output/config/TargetFormatterConfig.kt b/src/main/java/org/radarbase/output/config/TargetFormatterConfig.kt index 546d055..6aa620a 100644 --- a/src/main/java/org/radarbase/output/config/TargetFormatterConfig.kt +++ b/src/main/java/org/radarbase/output/config/TargetFormatterConfig.kt @@ -9,13 +9,13 @@ data class TargetFormatterConfig( * custom class names. */ val plugins: String = "fixed time key value", - /** List of regexes to disable the formatted string for and use [defaultName] instead. */ + /** List of regexes to disable the formatted string for and use [default] instead. */ val disabledFormats: List = emptyList(), /** * Default name to use for the output storage if the output format is disabled via * [disabledFormats]. */ - val defaultName: String = "radar-output-storage", + val default: String = "radar-output-storage", /** Additional plugin properties. */ val properties: Map = emptyMap(), ) diff --git a/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt b/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt index ac5c2c3..6e3529c 100644 --- a/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt +++ b/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt @@ -42,7 +42,7 @@ open class FormattedPathFactory : RecordPathFactory() { disabledBucketRegexes = pathConfig.target .disabledFormats .map { it.toRegex(RegexOption.IGNORE_CASE) } - defaultBucketName = pathConfig.target.defaultName + defaultBucketName = pathConfig.target.default logger.info("Formatting path with {}", pathFormatter) } diff --git a/src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt b/src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt index 7b80860..87ee433 100644 --- a/src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt +++ b/src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt @@ -8,12 +8,12 @@ import java.nio.file.Path class CombinedTargetStorage( private val delegates: Map, - defaultName: String? = null, + default: String? = null, ) : TargetStorage { - private val defaultDelegate = if (defaultName != null) { - requireNotNull(delegates[defaultName]) { "Default target storage $defaultName not found in ${delegates.keys}" } + private val defaultDelegate = if (default != null) { + requireNotNull(delegates[default]) { "Default target storage $default not found in ${delegates.keys}" } } else { - require(delegates.size == 1) { "Must provide a default taret storage if more than one target storage is defined: ${delegates.keys}" } + require(delegates.size == 1) { "Must provide a default target storage if more than one target storage is defined: ${delegates.keys}" } delegates.values.first() } diff --git a/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt b/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt index 51f7a09..00d891e 100644 --- a/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt +++ b/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt @@ -4,8 +4,8 @@ import org.radarbase.output.config.ResourceConfig import org.radarbase.output.config.ResourceType class TargetStorageFactory { - fun createTargetStorage(defaultName: String, configs: Map): TargetStorage = - CombinedTargetStorage(configs.mapValues { (_, config) -> createTargetStorage(config) }, defaultName) + fun createTargetStorage(default: String, configs: Map): TargetStorage = + CombinedTargetStorage(configs.mapValues { (_, config) -> createTargetStorage(config) }, default) private fun createTargetStorage(config: ResourceConfig) = when (config.sourceType) { ResourceType.S3 -> S3TargetStorage(config.path, config.s3!!) diff --git a/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt b/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt index 060df95..f054183 100644 --- a/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt +++ b/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt @@ -192,7 +192,7 @@ class RadarKafkaRestructure( logger.info( "Out: bucket {} (default {}) - path {}", pathConfig.target.format, - pathConfig.target.defaultName, + pathConfig.target.default, pathConfig.path.format, ) restructure.process() From 4329b4a4932fbc032a36c07877bb4f39121a76bc Mon Sep 17 00:00:00 2001 From: Joris Borgdorff Date: Thu, 26 Oct 2023 11:42:43 +0200 Subject: [PATCH 05/10] Explicitly map TargetPath instead of embedding it in the path --- .../java/org/radarbase/output/Application.kt | 8 +-- .../org/radarbase/output/FileStoreFactory.kt | 4 +- .../cleaner/TimestampExtractionCheck.kt | 12 ++-- .../output/cleaner/TimestampFileCache.kt | 11 +-- .../output/cleaner/TimestampFileCacheStore.kt | 13 ++-- .../org/radarbase/output/config/PathConfig.kt | 8 +-- .../output/path/FormattedPathFactory.kt | 6 +- .../output/path/RecordPathFactory.kt | 12 ++-- .../org/radarbase/output/path/TargetPath.kt | 26 +++++++ .../output/target/AzureTargetStorage.kt | 9 ++- .../output/target/CombinedTargetStorage.kt | 72 ------------------- .../output/target/LocalTargetStorage.kt | 10 +-- .../output/target/S3TargetStorage.kt | 8 ++- .../radarbase/output/target/TargetManager.kt | 23 ++++++ .../radarbase/output/target/TargetStorage.kt | 8 +-- .../output/target/TargetStorageFactory.kt | 4 +- .../org/radarbase/output/worker/FileCache.kt | 6 +- .../radarbase/output/worker/FileCacheStore.kt | 43 +++++------ .../output/cleaner/TimestampFileCacheTest.kt | 29 ++++---- .../output/data/FileCacheStoreTest.kt | 19 +++-- .../radarbase/output/data/FileCacheTest.kt | 10 +-- .../output/path/FormattedPathFactoryTest.kt | 23 +++--- 22 files changed, 177 insertions(+), 187 deletions(-) create mode 100644 src/main/java/org/radarbase/output/path/TargetPath.kt delete mode 100644 src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt create mode 100644 src/main/java/org/radarbase/output/target/TargetManager.kt diff --git a/src/main/java/org/radarbase/output/Application.kt b/src/main/java/org/radarbase/output/Application.kt index 9bfafac..93a9ddd 100644 --- a/src/main/java/org/radarbase/output/Application.kt +++ b/src/main/java/org/radarbase/output/Application.kt @@ -37,7 +37,7 @@ import org.radarbase.output.path.RecordPathFactory import org.radarbase.output.source.InMemoryStorageIndex import org.radarbase.output.source.SourceStorageFactory import org.radarbase.output.source.SourceStorageManager -import org.radarbase.output.target.TargetStorage +import org.radarbase.output.target.TargetManager import org.radarbase.output.target.TargetStorageFactory import org.radarbase.output.util.Timer import org.radarbase.output.worker.FileCacheStore @@ -69,12 +69,12 @@ class Application( SourceStorageManager(storage, InMemoryStorageIndex(), sourceConfig.index) } - override val targetStorage: TargetStorage = TargetStorageFactory() + override val targetManager: TargetManager = TargetStorageFactory() .createTargetStorage(config.paths.target.default, config.consolidatedTargets) override val pathFactory: RecordPathFactory = config.paths.createFactory( - targetStorage, + targetManager, recordConverter.extension + compression.extension, config.topics, ) @@ -121,7 +121,7 @@ class Application( } runBlocking { - targetStorage.initialize() + targetManager.initialize() } if (config.service.enable) { diff --git a/src/main/java/org/radarbase/output/FileStoreFactory.kt b/src/main/java/org/radarbase/output/FileStoreFactory.kt index aa04c16..7deec1f 100644 --- a/src/main/java/org/radarbase/output/FileStoreFactory.kt +++ b/src/main/java/org/radarbase/output/FileStoreFactory.kt @@ -26,14 +26,14 @@ import org.radarbase.output.config.RestructureConfig import org.radarbase.output.format.RecordConverterFactory import org.radarbase.output.path.RecordPathFactory import org.radarbase.output.source.SourceStorageManager -import org.radarbase.output.target.TargetStorage +import org.radarbase.output.target.TargetManager import org.radarbase.output.worker.FileCacheStore import java.io.IOException /** Factory for all factory classes and settings. */ interface FileStoreFactory { val sourceStorage: List - val targetStorage: TargetStorage + val targetManager: TargetManager val pathFactory: RecordPathFactory val compression: Compression val recordConverter: RecordConverterFactory diff --git a/src/main/java/org/radarbase/output/cleaner/TimestampExtractionCheck.kt b/src/main/java/org/radarbase/output/cleaner/TimestampExtractionCheck.kt index 7e0c4d6..97918df 100644 --- a/src/main/java/org/radarbase/output/cleaner/TimestampExtractionCheck.kt +++ b/src/main/java/org/radarbase/output/cleaner/TimestampExtractionCheck.kt @@ -61,18 +61,18 @@ class TimestampExtractionCheck( var suffix = 0 do { - val path = pathFactory.getRecordPath( + val targetPath = pathFactory.getRecordPath( topicFile.topic, record, suffix, ) try { - when (cacheStore.contains(path, record)) { + when (cacheStore.contains(targetPath, record)) { TimestampFileCacheStore.FindResult.FILE_NOT_FOUND -> { logger.warn( "Target {} for record of {} (offset {}) has not been created yet.", - path, + targetPath, topicFile.path, offset, ) @@ -81,7 +81,7 @@ class TimestampExtractionCheck( TimestampFileCacheStore.FindResult.NOT_FOUND -> { logger.warn( "Target {} does not contain record of {} (offset {})", - path, + targetPath, topicFile.path, offset, ) @@ -91,7 +91,7 @@ class TimestampExtractionCheck( TimestampFileCacheStore.FindResult.BAD_SCHEMA -> { logger.debug( "Schema of {} does not match schema of {} (offset {})", - path, + targetPath, topicFile.path, offset, ) @@ -99,7 +99,7 @@ class TimestampExtractionCheck( } } } catch (ex: IOException) { - logger.error("Failed to read target file {} for checking data integrity", path, ex) + logger.error("Failed to read target file {} for checking data integrity", targetPath, ex) return false } } while (true) diff --git a/src/main/java/org/radarbase/output/cleaner/TimestampFileCache.kt b/src/main/java/org/radarbase/output/cleaner/TimestampFileCache.kt index b3e8f46..d2de6c3 100644 --- a/src/main/java/org/radarbase/output/cleaner/TimestampFileCache.kt +++ b/src/main/java/org/radarbase/output/cleaner/TimestampFileCache.kt @@ -19,28 +19,29 @@ package org.radarbase.output.cleaner import org.apache.avro.generic.GenericRecord import org.radarbase.output.FileStoreFactory import org.radarbase.output.format.RecordConverterFactory +import org.radarbase.output.path.TargetPath import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended import org.radarbase.output.util.TimeUtil.getDate import org.radarbase.output.util.TimeUtil.toDouble import java.io.FileNotFoundException -import java.nio.file.Path /** Keeps path handles of a path. */ class TimestampFileCache( private val factory: FileStoreFactory, /** File that the cache is maintaining. */ - val path: Path, + val targetPath: TargetPath, ) : Comparable { + private val targetStorage = factory.targetManager[targetPath] + private val path = targetPath.path private val converterFactory: RecordConverterFactory = factory.recordConverter private var lastUse: Long = 0 private var header: Array? = null private lateinit var times: Set suspend fun initialize() { - val targetStorage = factory.targetStorage targetStorage.status(path) ?.takeIf { it.size > 0 } - ?: throw FileNotFoundException() + ?: throw FileNotFoundException("Path $path not found on target $targetStorage") val readDates = targetStorage.newInputStream(path).useSuspended { converterFactory.readTimeSeconds(it, factory.compression) @@ -75,6 +76,6 @@ class TimestampFileCache( override fun compareTo(other: TimestampFileCache): Int = comparator.compare(this, other) companion object { - val comparator = compareBy(TimestampFileCache::lastUse, TimestampFileCache::path) + val comparator = compareBy(TimestampFileCache::lastUse, TimestampFileCache::targetPath) } } diff --git a/src/main/java/org/radarbase/output/cleaner/TimestampFileCacheStore.kt b/src/main/java/org/radarbase/output/cleaner/TimestampFileCacheStore.kt index 9674e01..9acddde 100644 --- a/src/main/java/org/radarbase/output/cleaner/TimestampFileCacheStore.kt +++ b/src/main/java/org/radarbase/output/cleaner/TimestampFileCacheStore.kt @@ -18,6 +18,7 @@ package org.radarbase.output.cleaner import org.apache.avro.generic.GenericRecord import org.radarbase.output.FileStoreFactory +import org.radarbase.output.path.TargetPath import org.radarbase.output.util.Timer.time import java.io.FileNotFoundException import java.io.IOException @@ -29,7 +30,7 @@ import java.util.concurrent.ConcurrentHashMap * the longest ago cache are evicted from cache. */ class TimestampFileCacheStore(private val factory: FileStoreFactory) { - private val caches: MutableMap + private val caches: MutableMap private val maxCacheSize: Int private val schemasAdded: MutableMap @@ -50,14 +51,14 @@ class TimestampFileCacheStore(private val factory: FileStoreFactory) { * @throws IOException when failing to open a file or writing to it. */ @Throws(IOException::class) - suspend fun contains(path: Path, record: GenericRecord): FindResult { + suspend fun contains(targetPath: TargetPath, record: GenericRecord): FindResult { return try { - val fileCache = caches[path] + val fileCache = caches[targetPath] ?: time("cleaner.cache") { ensureCapacity() - TimestampFileCache(factory, path).apply { + TimestampFileCache(factory, targetPath).apply { initialize() - caches[path] = this + caches[targetPath] = this } } @@ -82,7 +83,7 @@ class TimestampFileCacheStore(private val factory: FileStoreFactory) { val cacheList = ArrayList(caches.values) .sorted() for (i in 0 until cacheList.size / 2) { - caches.remove(cacheList[i].path) + caches.remove(cacheList[i].targetPath) } } } diff --git a/src/main/java/org/radarbase/output/config/PathConfig.kt b/src/main/java/org/radarbase/output/config/PathConfig.kt index 271fa4b..e853387 100644 --- a/src/main/java/org/radarbase/output/config/PathConfig.kt +++ b/src/main/java/org/radarbase/output/config/PathConfig.kt @@ -2,7 +2,7 @@ package org.radarbase.output.config import org.radarbase.output.path.FormattedPathFactory import org.radarbase.output.path.RecordPathFactory -import org.radarbase.output.target.TargetStorage +import org.radarbase.output.target.TargetManager import java.nio.file.Path import kotlin.io.path.createTempDirectory import kotlin.reflect.jvm.jvmName @@ -20,16 +20,16 @@ data class PathConfig( val target: TargetFormatterConfig = TargetFormatterConfig(), ) : PluginConfig { fun createFactory( - targetStorage: TargetStorage, + targetStorage: TargetManager, extension: String, topics: Map, ): RecordPathFactory { val pathFactory = factory.constructClass() - require(targetStorage.allowsPrefix(target.default)) { "Default bucket ${target.default} is not specified as a target storage" } + require(target.default in targetStorage) { "Default bucket ${target.default} is not specified as a target storage" } pathFactory.init( - targetStorage = targetStorage, + targetManager = targetStorage, extension = extension, config = this, topics = topics, diff --git a/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt b/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt index 6e3529c..db0d030 100644 --- a/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt +++ b/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt @@ -20,7 +20,7 @@ import org.radarbase.output.config.PathConfig import org.radarbase.output.config.PathFormatterConfig import org.radarbase.output.config.TargetFormatterConfig import org.radarbase.output.config.TopicConfig -import org.radarbase.output.target.TargetStorage +import org.radarbase.output.target.TargetManager import org.slf4j.LoggerFactory open class FormattedPathFactory : RecordPathFactory() { @@ -31,12 +31,12 @@ open class FormattedPathFactory : RecordPathFactory() { private lateinit var defaultBucketName: String override fun init( - targetStorage: TargetStorage, + targetManager: TargetManager, extension: String, config: PathConfig, topics: Map, ) { - super.init(targetStorage, extension, config, topics) + super.init(targetManager, extension, config, topics) pathFormatter = pathConfig.path.toPathFormatter() targetFormatter = pathConfig.target.toTargetFormatter() disabledBucketRegexes = pathConfig.target diff --git a/src/main/java/org/radarbase/output/path/RecordPathFactory.kt b/src/main/java/org/radarbase/output/path/RecordPathFactory.kt index 9253731..6db8bdd 100644 --- a/src/main/java/org/radarbase/output/path/RecordPathFactory.kt +++ b/src/main/java/org/radarbase/output/path/RecordPathFactory.kt @@ -23,9 +23,8 @@ import org.apache.avro.generic.GenericRecord import org.apache.avro.generic.GenericRecordBuilder import org.radarbase.output.config.PathConfig import org.radarbase.output.config.TopicConfig -import org.radarbase.output.target.TargetStorage +import org.radarbase.output.target.TargetManager import org.radarbase.output.util.TimeUtil -import java.nio.file.Path import java.nio.file.Paths import java.util.regex.Pattern @@ -34,7 +33,7 @@ abstract class RecordPathFactory { private set open fun init( - targetStorage: TargetStorage, + targetManager: TargetManager, extension: String, config: PathConfig, topics: Map = emptyMap(), @@ -62,7 +61,7 @@ abstract class RecordPathFactory { topic: String, record: GenericRecord, attempt: Int, - ): Path { + ): TargetPath { val keyField = requireNotNull(record.get("key")) { "Failed to process $record; no key present" } val valueField = requireNotNull(record.get("value") as? GenericRecord) { "Failed to process $record; no value present" } @@ -86,10 +85,9 @@ abstract class RecordPathFactory { return coroutineScope { val targetJob = async { target(params) } - val pathJob = async { relativePath(params) } + val pathJob = async { Paths.get(relativePath(params)) } - Paths.get(targetJob.await()) - .resolve(pathJob.await()) + TargetPath(targetJob.await(), pathJob.await()) } } diff --git a/src/main/java/org/radarbase/output/path/TargetPath.kt b/src/main/java/org/radarbase/output/path/TargetPath.kt new file mode 100644 index 0000000..9dec383 --- /dev/null +++ b/src/main/java/org/radarbase/output/path/TargetPath.kt @@ -0,0 +1,26 @@ +package org.radarbase.output.path + +import java.nio.file.Path +import java.nio.file.Paths + +data class TargetPath( + val target: String, + val path: Path, +) : Comparable { + + override fun compareTo(other: TargetPath): Int = comparator.compare(this, other) + + override fun toString(): String = "$target:$path" + + fun navigate(block: (Path) -> Path): TargetPath = copy(path = block(path)) + + fun toLocalPath(root: Path): Path = root.resolve(path) + + companion object { + private val comparator = compareBy(TargetPath::target, TargetPath::path) + } +} + +fun Path.toTargetPath(target: String) = TargetPath(target, this) + +fun String.toTargetPath(target: String) = Paths.get(this).toTargetPath(target) diff --git a/src/main/java/org/radarbase/output/target/AzureTargetStorage.kt b/src/main/java/org/radarbase/output/target/AzureTargetStorage.kt index 094ed64..8b7a3fb 100644 --- a/src/main/java/org/radarbase/output/target/AzureTargetStorage.kt +++ b/src/main/java/org/radarbase/output/target/AzureTargetStorage.kt @@ -37,7 +37,7 @@ import kotlin.time.Duration.Companion.hours import kotlin.time.Duration.Companion.minutes class AzureTargetStorage( - private val root: Path, + override val baseDir: Path, private val config: AzureConfig, ) : TargetStorage { private lateinit var serviceClient: BlobServiceClient @@ -129,12 +129,15 @@ class AzureTargetStorage( blob(path).delete() } - override suspend fun createDirectories(directory: Path?) = Unit + override suspend fun createDirectories(directory: Path) = Unit private suspend fun blob(path: Path): BlobClient { - return client(container).getBlobClient(root.resolve(path).toString()) + return client(container).getBlobClient(baseDir.resolve(path).toString()) } + override fun toString(): String = + "AzureTargetStorage(baseDir=$baseDir, container='$container')" + companion object { private val logger = LoggerFactory.getLogger(AzureTargetStorage::class.java) } diff --git a/src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt b/src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt deleted file mode 100644 index 87ee433..0000000 --- a/src/main/java/org/radarbase/output/target/CombinedTargetStorage.kt +++ /dev/null @@ -1,72 +0,0 @@ -package org.radarbase.output.target - -import kotlinx.coroutines.coroutineScope -import org.radarbase.kotlin.coroutines.launchJoin -import org.radarbase.output.target.TargetStorage.PathStatus -import java.io.InputStream -import java.nio.file.Path - -class CombinedTargetStorage( - private val delegates: Map, - default: String? = null, -) : TargetStorage { - private val defaultDelegate = if (default != null) { - requireNotNull(delegates[default]) { "Default target storage $default not found in ${delegates.keys}" } - } else { - require(delegates.size == 1) { "Must provide a default target storage if more than one target storage is defined: ${delegates.keys}" } - delegates.values.first() - } - - override fun allowsPrefix(prefix: String): Boolean = prefix in delegates - - override suspend fun initialize() = coroutineScope { - delegates.values.launchJoin { it.initialize() } - } - - override suspend fun status(path: Path): PathStatus? = withDelegate(path) { status(it) } - - override suspend fun newInputStream(path: Path): InputStream = withDelegate(path) { newInputStream(it) } - - override suspend fun move(oldPath: Path, newPath: Path) { - val (oldDelegate, oldDelegatePath) = delegate(oldPath) - val (newDelegate, newDelegatePath) = delegate(newPath) - - require(oldDelegate == newDelegate) { "Cannot move files between storage systems ($oldPath to $newPath)" } - return oldDelegate.move(oldDelegatePath, newDelegatePath) - } - - override suspend fun store(localPath: Path, newPath: Path) = withDelegate(newPath) { - store(localPath, it) - } - - override suspend fun delete(path: Path) = withDelegate(path) { delete(it) } - - override suspend fun createDirectories(directory: Path?) { - if (directory != null) { - val delegateName = directory.firstOrNull() ?: return - val delegate = delegates[delegateName.toString()] ?: return - - if (directory.count() == 1) { - delegate.createDirectories(null) - } else { - delegate.createDirectories(delegateName.relativize(directory)) - } - } - } - - private fun delegate(path: Path): Pair { - val targetName = requireNotNull(path.firstOrNull()) { "Target storage not found in path '$this'" } - val delegate = delegates[targetName.toString()] ?: defaultDelegate - val delegatePath = try { - targetName.relativize(path) - } catch (ex: IllegalArgumentException) { - throw IllegalArgumentException("Failed to split path $path into a relative path", ex) - } - return Pair(delegate, delegatePath) - } - - private inline fun withDelegate(path: Path, block: TargetStorage.(Path) -> T): T { - val (delegate, delegatePath) = delegate(path) - return delegate.block(delegatePath) - } -} diff --git a/src/main/java/org/radarbase/output/target/LocalTargetStorage.kt b/src/main/java/org/radarbase/output/target/LocalTargetStorage.kt index d01d615..933e102 100644 --- a/src/main/java/org/radarbase/output/target/LocalTargetStorage.kt +++ b/src/main/java/org/radarbase/output/target/LocalTargetStorage.kt @@ -37,7 +37,7 @@ import kotlin.io.path.setAttribute import kotlin.io.path.setPosixFilePermissions class LocalTargetStorage( - private val root: Path, + override val baseDir: Path, private val config: LocalConfig, ) : TargetStorage { init { @@ -87,8 +87,8 @@ class LocalTargetStorage( doMove(localPath, newPath.withRoot()) } - override suspend fun createDirectories(directory: Path?) = withContext(Dispatchers.IO) { - val dir = directory?.withRoot() ?: root + override suspend fun createDirectories(directory: Path) = withContext(Dispatchers.IO) { + val dir = directory.withRoot() dir.createDirectories( PosixFilePermissions.asFileAttribute( PosixFilePermissions.fromString("rwxr-xr-x"), @@ -111,7 +111,9 @@ class LocalTargetStorage( path.withRoot().deleteExisting() } - private fun Path.withRoot() = this@LocalTargetStorage.root.resolve(this) + private fun Path.withRoot() = this@LocalTargetStorage.baseDir.resolve(this) + + override fun toString(): String = "LocalTargetStorage(baseDir=$baseDir)" companion object { private val logger = LoggerFactory.getLogger(LocalTargetStorage::class.java) diff --git a/src/main/java/org/radarbase/output/target/S3TargetStorage.kt b/src/main/java/org/radarbase/output/target/S3TargetStorage.kt index 8366dc7..52e480a 100644 --- a/src/main/java/org/radarbase/output/target/S3TargetStorage.kt +++ b/src/main/java/org/radarbase/output/target/S3TargetStorage.kt @@ -38,7 +38,7 @@ import java.nio.file.Path import kotlin.io.path.deleteExisting class S3TargetStorage( - private val root: Path, + override val baseDir: Path, config: S3Config, ) : TargetStorage { private val s3Client: MinioClient = try { @@ -104,7 +104,7 @@ class S3TargetStorage( } } - private fun Path.withRoot(): Path = this@S3TargetStorage.root.resolve(this) + private fun Path.withRoot(): Path = this@S3TargetStorage.baseDir.resolve(this) @Throws(IOException::class) override suspend fun newInputStream(path: Path): InputStream { @@ -137,7 +137,9 @@ class S3TargetStorage( faultTolerant { s3Client.removeObject(removeRequest) } } - override suspend fun createDirectories(directory: Path?) = Unit + override suspend fun createDirectories(directory: Path) = Unit + + override fun toString(): String = "S3TargetStorage(baseDir=$baseDir, bucket='$bucket')" companion object { private val logger = LoggerFactory.getLogger(S3TargetStorage::class.java) diff --git a/src/main/java/org/radarbase/output/target/TargetManager.kt b/src/main/java/org/radarbase/output/target/TargetManager.kt new file mode 100644 index 0000000..32f49bd --- /dev/null +++ b/src/main/java/org/radarbase/output/target/TargetManager.kt @@ -0,0 +1,23 @@ +package org.radarbase.output.target + +import kotlinx.coroutines.coroutineScope +import org.radarbase.kotlin.coroutines.launchJoin +import org.radarbase.output.path.TargetPath + +class TargetManager( + private val delegates: Map, + default: String, +) { + constructor(name: String, targetStorage: TargetStorage) : this(mapOf(name to targetStorage), name) + + private val defaultDelegate = + requireNotNull(delegates[default]) { "Default target storage $default not found in ${delegates.keys}" } + + operator fun contains(target: String) = target in delegates + + operator fun get(targetPath: TargetPath) = delegates[targetPath.target] ?: defaultDelegate + + suspend fun initialize() = coroutineScope { + delegates.values.launchJoin { it.initialize() } + } +} diff --git a/src/main/java/org/radarbase/output/target/TargetStorage.kt b/src/main/java/org/radarbase/output/target/TargetStorage.kt index 8f57df0..2622482 100644 --- a/src/main/java/org/radarbase/output/target/TargetStorage.kt +++ b/src/main/java/org/radarbase/output/target/TargetStorage.kt @@ -22,6 +22,8 @@ import java.io.InputStream import java.nio.file.Path interface TargetStorage { + val baseDir: Path + suspend fun initialize() /** @@ -59,14 +61,10 @@ interface TargetStorage { /** Create given directory, by recursively creating all parent directories. */ @Throws(IOException::class) - suspend fun createDirectories(directory: Path?) + suspend fun createDirectories(directory: Path) data class PathStatus( /** Size in bytes */ val size: Long, ) - - fun allowsPrefix(prefix: String): Boolean { - return true - } } diff --git a/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt b/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt index 00d891e..d3c5416 100644 --- a/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt +++ b/src/main/java/org/radarbase/output/target/TargetStorageFactory.kt @@ -4,8 +4,8 @@ import org.radarbase.output.config.ResourceConfig import org.radarbase.output.config.ResourceType class TargetStorageFactory { - fun createTargetStorage(default: String, configs: Map): TargetStorage = - CombinedTargetStorage(configs.mapValues { (_, config) -> createTargetStorage(config) }, default) + fun createTargetStorage(default: String, configs: Map): TargetManager = + TargetManager(configs.mapValues { (_, config) -> createTargetStorage(config) }, default) private fun createTargetStorage(config: ResourceConfig) = when (config.sourceType) { ResourceType.S3 -> S3TargetStorage(config.path, config.s3!!) diff --git a/src/main/java/org/radarbase/output/worker/FileCache.kt b/src/main/java/org/radarbase/output/worker/FileCache.kt index 36ac711..b2c1b88 100644 --- a/src/main/java/org/radarbase/output/worker/FileCache.kt +++ b/src/main/java/org/radarbase/output/worker/FileCache.kt @@ -25,6 +25,7 @@ import org.radarbase.output.compression.Compression import org.radarbase.output.config.DeduplicationConfig import org.radarbase.output.format.RecordConverter import org.radarbase.output.format.RecordConverterFactory +import org.radarbase.output.path.TargetPath import org.radarbase.output.target.TargetStorage import org.radarbase.output.util.SuspendedCloseable import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended @@ -49,7 +50,7 @@ class FileCache( factory: FileStoreFactory, topic: String, /** File that the cache is maintaining. */ - val path: Path, + val targetPath: TargetPath, /** Local temporary directory to store files in. */ tmpDir: Path, private val accountant: Accountant, @@ -57,7 +58,8 @@ class FileCache( private lateinit var writer: Writer private lateinit var recordConverter: RecordConverter - private val targetStorage: TargetStorage = factory.targetStorage + private val targetStorage: TargetStorage = factory.targetManager[targetPath] + private val path = targetPath.path private val tmpPath: Path private val compression: Compression = factory.compression private val converterFactory: RecordConverterFactory = factory.recordConverter diff --git a/src/main/java/org/radarbase/output/worker/FileCacheStore.kt b/src/main/java/org/radarbase/output/worker/FileCacheStore.kt index de6d41f..a6ecc7f 100644 --- a/src/main/java/org/radarbase/output/worker/FileCacheStore.kt +++ b/src/main/java/org/radarbase/output/worker/FileCacheStore.kt @@ -24,13 +24,13 @@ import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.radarbase.output.FileStoreFactory import org.radarbase.output.accounting.Accountant +import org.radarbase.output.path.TargetPath import org.radarbase.output.util.SuspendedCloseable import org.radarbase.output.util.TemporaryDirectory import org.radarbase.output.util.Timer.time import org.radarbase.output.worker.FileCacheStore.WriteResponse.NO_CACHE_AND_NO_WRITE import org.slf4j.LoggerFactory import java.io.IOException -import java.nio.file.Path import kotlin.io.path.createTempFile import kotlin.io.path.outputStream @@ -44,9 +44,9 @@ class FileCacheStore( ) : SuspendedCloseable { private val tmpDir: TemporaryDirectory - private val caches: MutableMap + private val caches: MutableMap private val maxCacheSize: Int - private val schemasAdded: MutableMap + private val schemasAdded: MutableMap init { val config = factory.config @@ -67,7 +67,7 @@ class FileCacheStore( */ @Throws(IOException::class) suspend fun writeRecord( - path: Path, + path: TargetPath, record: GenericRecord, transaction: Accountant.Transaction, ): WriteResponse { @@ -84,27 +84,29 @@ class FileCacheStore( } private suspend fun createCache( - path: Path, + targetPath: TargetPath, record: GenericRecord, transaction: Accountant.Transaction, ): FileCache { ensureCapacity() - val dir = path.parent - factory.targetStorage.createDirectories(dir) + val dir = targetPath.path.parent + if (dir != null) { + factory.targetManager[targetPath].createDirectories(dir) + } val cache = time("write.open") { FileCache( factory, transaction.topicPartition.topic, - path, + targetPath, tmpDir.path, accountant, ) } cache.initialize(record) - writeSchema(transaction.topicPartition.topic, path, record.schema) - caches[path] = cache + writeSchema(transaction.topicPartition.topic, targetPath, record.schema) + caches[targetPath] = cache return cache } @@ -120,31 +122,32 @@ class FileCacheStore( isSuccessful = fileCache.writeRecord(record, transaction), ) } catch (ex: IOException) { - logger.error("Failed to write record. Closing cache {}.", fileCache.path, ex) + logger.error("Failed to write record. Closing cache {}.", fileCache.targetPath, ex) fileCache.markError() - caches.remove(fileCache.path) + caches.remove(fileCache.targetPath) fileCache.closeAndJoin() NO_CACHE_AND_NO_WRITE } } @Throws(IOException::class) - private suspend fun writeSchema(topic: String, path: Path, schema: Schema) = + private suspend fun writeSchema(topic: String, targetPath: TargetPath, schema: Schema) = time("write.schema") { // Write was successful, finalize the write operation - val schemaPath = path.resolveSibling("schema-$topic.json") + val schemaPath = targetPath.navigate { it.resolveSibling("schema-$topic.json") } // First check if we already checked this path, because otherwise the storage.exists call // will take too much time. - if (schemasAdded.putIfAbsent(schemaPath, schemaPath) == null) { - withContext(Dispatchers.IO) { - val storage = factory.targetStorage + if (schemasAdded.putIfAbsent(schemaPath, Unit) == null) { + val storage = factory.targetManager[schemaPath] + val path = schemaPath.path - if (storage.status(schemaPath) == null) { + withContext(Dispatchers.IO) { + if (storage.status(path) == null) { val tmpSchemaPath = createTempFile(tmpDir.path, "schema-$topic", ".json") tmpSchemaPath.outputStream().use { out -> out.write(schema.toString(true).toByteArray()) } - storage.store(tmpSchemaPath, schemaPath) + storage.store(tmpSchemaPath, path) } } } @@ -160,7 +163,7 @@ class FileCacheStore( .sorted() for (i in 0 until cacheList.size / 2) { val rmCache = cacheList[i] - caches.remove(rmCache.path) + caches.remove(rmCache.targetPath) rmCache.closeAndJoin() } accountant.flush() diff --git a/src/test/java/org/radarbase/output/cleaner/TimestampFileCacheTest.kt b/src/test/java/org/radarbase/output/cleaner/TimestampFileCacheTest.kt index 7431705..428a2f4 100644 --- a/src/test/java/org/radarbase/output/cleaner/TimestampFileCacheTest.kt +++ b/src/test/java/org/radarbase/output/cleaner/TimestampFileCacheTest.kt @@ -18,7 +18,10 @@ import org.radarbase.output.compression.IdentityCompression import org.radarbase.output.config.LocalConfig import org.radarbase.output.data.JsonAvroConverterTest.Companion.resourceStream import org.radarbase.output.format.CsvAvroConverterFactory +import org.radarbase.output.path.TargetPath +import org.radarbase.output.path.toTargetPath import org.radarbase.output.target.LocalTargetStorage +import org.radarbase.output.target.TargetManager import org.radarbase.output.util.ResourceContext.Companion.resourceContext import java.io.ByteArrayInputStream import java.io.FileNotFoundException @@ -31,13 +34,15 @@ internal class TimestampFileCacheTest { private lateinit var schema: Schema private lateinit var factory: FileStoreFactory private lateinit var csvConverter: CsvAvroConverterFactory + private lateinit var dir: Path @BeforeEach fun setUp(@TempDir dir: Path) { + this.dir = dir csvConverter = CsvAvroConverterFactory() factory = mock { on { recordConverter } doReturn csvConverter - on { targetStorage } doReturn LocalTargetStorage(dir, LocalConfig()) + on { targetManager } doReturn TargetManager("radar-output-storage", LocalTargetStorage(dir, LocalConfig())) on { compression } doReturn IdentityCompression() } schema = Schema.Parser().parse(javaClass.resourceStream("android_phone_light.avsc")) @@ -63,8 +68,8 @@ internal class TimestampFileCacheTest { } @Test - fun testFileCacheFound(@TempDir path: Path) = runTest { - val targetPath = path.resolve("test.avro") + fun testFileCacheFound() = runTest { + val targetPath = "test.avro".toTargetPath("radar-output-storage") writeRecord(targetPath, record) val timestampFileCache = TimestampFileCache(factory, targetPath).apply { initialize() @@ -72,9 +77,9 @@ internal class TimestampFileCacheTest { assertThat(timestampFileCache.contains(record), `is`(true)) } - private suspend fun writeRecord(path: Path, record: GenericRecord) { + private suspend fun writeRecord(targetPath: TargetPath, record: GenericRecord) { resourceContext { - val wr = this.createResource { path.bufferedWriter() } + val wr = createResource { targetPath.toLocalPath(dir).bufferedWriter() } val emptyReader = resourceChain { ByteArrayInputStream(ByteArray(0)) } .conclude { it.reader() } @@ -85,8 +90,8 @@ internal class TimestampFileCacheTest { } @Test - fun testFileCacheNotFound(@TempDir path: Path) = runTest { - val targetPath = path.resolve("test.avro") + fun testFileCacheNotFound() = runTest { + val targetPath = "test.avro".toTargetPath("radar-output-storage") assertThrows { TimestampFileCache(factory, targetPath) .initialize() @@ -94,9 +99,9 @@ internal class TimestampFileCacheTest { } @Test - fun testHeaderMismatch(@TempDir path: Path) = runTest { - val targetPath = path.resolve("test.avro") - targetPath.bufferedWriter().use { writer -> + fun testHeaderMismatch() = runTest { + val targetPath = "test.avro".toTargetPath("radar-output-storage") + targetPath.toLocalPath(dir).bufferedWriter().use { writer -> writer.write("key.projectId,key.userId,key.sourceId,value.time,value.timeReceived,value.luminance") } val cache = TimestampFileCache(factory, targetPath).apply { initialize() } @@ -104,8 +109,8 @@ internal class TimestampFileCacheTest { } @Test - fun testNotFound(@TempDir path: Path) = runTest { - val targetPath = path.resolve("test.avro") + fun testNotFound() = runTest { + val targetPath = "test.avro".toTargetPath("radar-output-storage") val otherRecord = GenericRecordBuilder(record) .set( diff --git a/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt b/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt index bb67f83..4fd77c8 100644 --- a/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt +++ b/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt @@ -41,11 +41,12 @@ import org.radarbase.output.config.ResourceConfig import org.radarbase.output.config.RestructureConfig import org.radarbase.output.config.S3Config import org.radarbase.output.config.WorkerConfig +import org.radarbase.output.path.TargetPath +import org.radarbase.output.path.toTargetPath import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended import org.radarbase.output.worker.FileCacheStore import java.io.IOException import java.nio.file.Path -import java.nio.file.Paths import java.time.Instant import kotlin.io.path.createDirectories import kotlin.io.path.readBytes @@ -56,17 +57,15 @@ class FileCacheStoreTest { @Test @Throws(IOException::class) fun appendLine(@TempDir root: Path, @TempDir tmpDir: Path) = runTest { - val bucketName = Paths.get("radar-output-storage") + fun TargetPath.toLocalPath(): Path = toLocalPath(root) - fun Path.toLocalPath() = root.resolve(bucketName.relativize(this)) - - val f1 = bucketName.resolve("f1") - val f2 = bucketName.resolve("f2") - val f3 = bucketName.resolve("f3") - val d4 = bucketName.resolve("d4") + val f1 = "f1".toTargetPath("radar-output-storage") + val f2 = "f2".toTargetPath("radar-output-storage") + val f3 = "f3".toTargetPath("radar-output-storage") + val d4 = "d4".toTargetPath("radar-output-storage") d4.toLocalPath().createDirectories() - val f4 = d4.resolve("f4.txt") - val newFile = bucketName.resolve("newFile") + val f4 = d4.navigate { it.resolve("f4.txt") } + val newFile = "newFile".toTargetPath("radar-output-storage") val simpleSchema = SchemaBuilder.record("simple").fields() .name("a").type("string").noDefault() diff --git a/src/test/java/org/radarbase/output/data/FileCacheTest.kt b/src/test/java/org/radarbase/output/data/FileCacheTest.kt index 0f35b4e..95bd04a 100644 --- a/src/test/java/org/radarbase/output/data/FileCacheTest.kt +++ b/src/test/java/org/radarbase/output/data/FileCacheTest.kt @@ -37,6 +37,8 @@ import org.radarbase.output.config.PathConfig import org.radarbase.output.config.ResourceConfig import org.radarbase.output.config.RestructureConfig import org.radarbase.output.config.S3Config +import org.radarbase.output.path.TargetPath +import org.radarbase.output.path.toTargetPath import org.radarbase.output.util.ResourceContext.Companion.resourceContext import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended import org.radarbase.output.worker.FileCache @@ -54,7 +56,7 @@ import kotlin.io.path.inputStream */ class FileCacheTest { private lateinit var localPath: Path - private lateinit var path: Path + private lateinit var path: TargetPath private lateinit var exampleRecord: Record private lateinit var tmpDir: Path private lateinit var factory: Application @@ -68,8 +70,8 @@ class FileCacheTest { @BeforeEach @Throws(IOException::class) fun setUp(@TempDir path: Path, @TempDir tmpPath: Path) { - this.path = Paths.get("radar-output-storage/f") - this.localPath = path.resolve("f") + this.path = "f".toTargetPath("radar-output-storage") + this.localPath = this.path.toLocalPath(path) this.tmpDir = tmpPath val schema = SchemaBuilder.record("simple").fields() @@ -199,7 +201,7 @@ class FileCacheTest { @Test @Throws(IOException::class) fun compareTo() = runTest { - val file3 = path.parent.resolve("g") + val file3 = path.navigate { it.resolveSibling("g") } resourceContext { val cache1 = createResource { FileCache(factory, "topic", path, tmpDir, accountant) } diff --git a/src/test/java/org/radarbase/output/path/FormattedPathFactoryTest.kt b/src/test/java/org/radarbase/output/path/FormattedPathFactoryTest.kt index 1d3cd23..d9764d0 100644 --- a/src/test/java/org/radarbase/output/path/FormattedPathFactoryTest.kt +++ b/src/test/java/org/radarbase/output/path/FormattedPathFactoryTest.kt @@ -8,29 +8,26 @@ import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.api.io.TempDir import org.radarbase.output.config.LocalConfig import org.radarbase.output.config.PathConfig import org.radarbase.output.config.PathFormatterConfig -import org.radarbase.output.target.CombinedTargetStorage import org.radarbase.output.target.LocalTargetStorage +import org.radarbase.output.target.TargetManager import org.radarcns.kafka.ObservationKey import org.radarcns.passive.phone.PhoneLight -import java.nio.file.Paths +import java.nio.file.Path import java.time.Instant import kotlin.reflect.jvm.jvmName internal class FormattedPathFactoryTest { - private lateinit var targetStorage: CombinedTargetStorage + private lateinit var targetStorage: TargetManager @BeforeEach - fun setUp() { - targetStorage = CombinedTargetStorage( - mapOf( - Pair( - "radar-output-storage", - LocalTargetStorage(Paths.get("/test"), LocalConfig()), - ), - ), + fun setUp(@TempDir dir: Path) { + targetStorage = TargetManager( + "radar-output-storage", + LocalTargetStorage(dir, LocalConfig()), ) } @@ -67,7 +64,7 @@ internal class FormattedPathFactoryTest { fun unparameterized() = runBlocking { val factory = FormattedPathFactory().apply { init( - targetStorage = targetStorage, + targetManager = targetStorage, extension = ".csv.gz", config = PathConfig(), ) @@ -127,7 +124,7 @@ internal class FormattedPathFactoryTest { private fun createFactory(format: String): FormattedPathFactory = FormattedPathFactory().apply { init( - targetStorage = targetStorage, + targetManager = targetStorage, extension = ".csv.gz", config = PathConfig( path = PathFormatterConfig( From 742143a5bd552a27899f92ad2ba1e763b6b401a3 Mon Sep 17 00:00:00 2001 From: Joris Borgdorff Date: Thu, 26 Oct 2023 11:57:56 +0200 Subject: [PATCH 06/10] Proper env substitution --- .../output/config/RestructureConfig.kt | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/radarbase/output/config/RestructureConfig.kt b/src/main/java/org/radarbase/output/config/RestructureConfig.kt index de326a1..a96f5a0 100644 --- a/src/main/java/org/radarbase/output/config/RestructureConfig.kt +++ b/src/main/java/org/radarbase/output/config/RestructureConfig.kt @@ -83,20 +83,15 @@ data class RestructureConfig( } fun withEnv(): RestructureConfig = this - .copyOnChange(source, { it?.withEnv("SOURCE_") }) { copy(source = it) } - .copyOnChange(sources, { it.map { source -> source.withEnv("SOURCE_") } }) { copy(sources = it) } + .copyOnChange(source, { it?.withNamedEnv("SOURCE_") }) { copy(source = it) } + .copyOnChange(sources, { it.map { source -> source.withNamedEnv("SOURCE_") } }) { copy(sources = it) } .copyOnChange( targets, { - it.mapValues { (name, target) -> - val prefix = "TARGET_" + name.replace('-', '_').uppercase() - target - .withEnv("TARGET_") - .withEnv(prefix) - } + it.mapValues { (name, target) -> target.withNamedEnv("TARGET_", name) } }, ) { copy(targets = it) } - .copyOnChange(target, { it?.withEnv("TARGET_") }) { copy(target = it) } + .copyOnChange(target, { it?.withNamedEnv("TARGET_") }) { copy(target = it) } .copyOnChange(redis, { it.withEnv() }) { copy(redis = it) } companion object { @@ -109,6 +104,18 @@ data class RestructureConfig( private val logger = LoggerFactory.getLogger(RestructureConfig::class.java) internal const val RESTRUCTURE_CONFIG_FILE_NAME = "restructure.yml" + private val illegalEnvSymbols = "[^A-Za-z0-9]+".toRegex() + + private fun ResourceConfig.withNamedEnv(prefix: String, targetName: String? = null): ResourceConfig { + val withFixedPrefix = withEnv(prefix) + val useName = targetName + ?: this.name + ?: return withFixedPrefix + return withFixedPrefix.withEnv( + prefix + useName.replace(illegalEnvSymbols, "_").uppercase(), + ) + } + inline fun T.copyEnv(key: String, doCopy: T.(String) -> T): T = copyOnChange( null, From 36cfa2c513e2c09abb1b8ff6bdde73500ac6d1a3 Mon Sep 17 00:00:00 2001 From: Joris Borgdorff Date: Thu, 26 Oct 2023 11:59:43 +0200 Subject: [PATCH 07/10] Rename bucket -> target in FormattedPathFactory --- .../java/org/radarbase/output/path/FormattedPathFactory.kt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt b/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt index db0d030..742d69e 100644 --- a/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt +++ b/src/main/java/org/radarbase/output/path/FormattedPathFactory.kt @@ -28,7 +28,7 @@ open class FormattedPathFactory : RecordPathFactory() { private var topicFormatters: Map = emptyMap() private var targetFormatter: PathFormatter? = null private lateinit var disabledBucketRegexes: List - private lateinit var defaultBucketName: String + private lateinit var defaultTarget: String override fun init( targetManager: TargetManager, @@ -42,7 +42,7 @@ open class FormattedPathFactory : RecordPathFactory() { disabledBucketRegexes = pathConfig.target .disabledFormats .map { it.toRegex(RegexOption.IGNORE_CASE) } - defaultBucketName = pathConfig.target.default + defaultTarget = pathConfig.target.default logger.info("Formatting path with {}", pathFormatter) } @@ -52,7 +52,7 @@ open class FormattedPathFactory : RecordPathFactory() { return if (format != null && disabledBucketRegexes.none { it.matches(format) }) { format } else { - defaultBucketName + defaultTarget } } From d9137707013738fd15a0fd8382d60ed9c519310c Mon Sep 17 00:00:00 2001 From: Joris Borgdorff Date: Thu, 26 Oct 2023 15:16:50 +0200 Subject: [PATCH 08/10] Properly separate path handling in SourceStorage --- .../output/RestructureS3IntegrationTest.kt | 56 +++++++++---------- .../output/cleaner/SourceDataCleaner.kt | 14 ++--- .../org/radarbase/output/path/TargetPath.kt | 2 +- .../output/source/AzureSourceStorage.kt | 22 +++++--- .../output/source/InMemoryStorageIndex.kt | 51 +++++++++-------- .../output/source/S3SourceStorage.kt | 15 +++-- .../radarbase/output/source/SourceStorage.kt | 6 +- .../output/source/SourceStorageManager.kt | 6 +- .../radarbase/output/source/StorageIndex.kt | 2 +- .../output/source/StorageIndexManager.kt | 23 ++++---- .../radarbase/output/util/AvroFileLister.kt | 1 - .../output/util/TemporaryDirectory.kt | 6 +- .../output/worker/RadarKafkaRestructure.kt | 18 ++---- .../output/data/FileCacheStoreTest.kt | 6 +- .../radarbase/output/data/FileCacheTest.kt | 9 ++- .../output/util/TemporaryDirectoryTest.kt | 10 ++-- 16 files changed, 117 insertions(+), 130 deletions(-) diff --git a/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt b/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt index 33d46a3..5d2b1cd 100644 --- a/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt +++ b/src/integrationTest/java/org/radarbase/output/RestructureS3IntegrationTest.kt @@ -10,12 +10,12 @@ import io.minio.RemoveBucketArgs import io.minio.RemoveObjectArgs import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.coroutineScope -import kotlinx.coroutines.joinAll import kotlinx.coroutines.launch import kotlinx.coroutines.test.runTest import kotlinx.coroutines.withContext import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Test +import org.radarbase.kotlin.coroutines.launchJoin import org.radarbase.output.config.PathConfig import org.radarbase.output.config.PathFormatterConfig import org.radarbase.output.config.ResourceConfig @@ -73,20 +73,18 @@ class RestructureS3IntegrationTest { "application_server_status/partition=1/application_server_status+1+0000000021.avro", "android_phone_acceleration/partition=0/android_phone_acceleration+0+0003018784.avro", ) - val targetFiles = resourceFiles.map { Paths.get("in/$it") } - resourceFiles.mapIndexed { i, resourceFile -> - launch(Dispatchers.IO) { - this@RestructureS3IntegrationTest.javaClass.getResourceAsStream("/$resourceFile") - .useSuspended { statusFile -> - sourceClient.putObject( - PutObjectArgs.Builder() - .objectBuild(sourceBucket, targetFiles[i]) { - stream(statusFile, -1, MAX_PART_SIZE) - }, - ) - } - } - }.joinAll() + val targetFiles = resourceFiles.associateWith { Paths.get("in/$it") } + targetFiles.entries.launchJoin(Dispatchers.IO) { (resourceFile, targetFile) -> + this@RestructureS3IntegrationTest.javaClass.getResourceAsStream("/$resourceFile") + .useSuspended { statusFile -> + sourceClient.putObject( + PutObjectArgs.Builder() + .objectBuild(sourceBucket, targetFile) { + stream(statusFile, -1, MAX_PART_SIZE) + }, + ) + } + } application.start() @@ -149,13 +147,11 @@ class RestructureS3IntegrationTest { coroutineScope { // delete source files launch { - targetFiles.map { - launch(Dispatchers.IO) { - sourceClient.removeObject( - RemoveObjectArgs.Builder().objectBuild(sourceBucket, it), - ) - } - }.joinAll() + targetFiles.values.launchJoin(Dispatchers.IO) { + sourceClient.removeObject( + RemoveObjectArgs.Builder().objectBuild(sourceBucket, it), + ) + } launch(Dispatchers.IO) { sourceClient.removeBucket( @@ -166,15 +162,13 @@ class RestructureS3IntegrationTest { // delete target files launch { - files.map { - launch(Dispatchers.IO) { - targetClient.removeObject( - RemoveObjectArgs.Builder().bucketBuild(targetBucket) { - `object`(it) - }, - ) - } - }.joinAll() + files.launchJoin(Dispatchers.IO) { file -> + targetClient.removeObject( + RemoveObjectArgs.Builder().bucketBuild(targetBucket) { + `object`(file) + }, + ) + } launch(Dispatchers.IO) { targetClient.removeBucket( RemoveBucketArgs.Builder().bucketBuild(targetBucket), diff --git a/src/main/java/org/radarbase/output/cleaner/SourceDataCleaner.kt b/src/main/java/org/radarbase/output/cleaner/SourceDataCleaner.kt index 5c997b4..b030f5e 100644 --- a/src/main/java/org/radarbase/output/cleaner/SourceDataCleaner.kt +++ b/src/main/java/org/radarbase/output/cleaner/SourceDataCleaner.kt @@ -24,7 +24,6 @@ import java.nio.file.Path import java.time.Instant import java.time.temporal.ChronoUnit import java.util.concurrent.atomic.LongAdder -import kotlin.coroutines.coroutineContext class SourceDataCleaner( private val fileStoreFactory: FileStoreFactory, @@ -47,11 +46,13 @@ class SourceDataCleaner( @Throws(IOException::class, InterruptedException::class) suspend fun process() { // Get files and directories - val paths = topicPaths(sourceStorage.root) + val paths = sourceStorageManager.listTopics(excludeTopics) + // different services start on different topics to decrease lock contention + .shuffled() logger.info("{} topics found", paths.size) - withContext(coroutineContext + supervisor) { + withContext(supervisor) { paths.forEach { p -> launch { try { @@ -127,11 +128,6 @@ class SourceDataCleaner( } } - private suspend fun topicPaths(path: Path): List = - sourceStorageManager.listTopics(path, excludeTopics) - // different services start on different topics to decrease lock contention - .shuffled() - override fun close() { supervisor.cancel() } @@ -149,7 +145,7 @@ class SourceDataCleaner( factory.sourceStorage.launchJoin { sourceStorage -> SourceDataCleaner(factory, sourceStorage).useSuspended { cleaner -> sourceStorage.storageIndexManager.update() - logger.info("Cleaning {}", sourceStorage.sourceStorage.root) + logger.info("Cleaning {}", sourceStorage.sourceStorage.baseDir) cleaner.process() logger.info("Cleaned up {} files", cleaner.deletedFileCount.format()) } diff --git a/src/main/java/org/radarbase/output/path/TargetPath.kt b/src/main/java/org/radarbase/output/path/TargetPath.kt index 9dec383..ee85d3f 100644 --- a/src/main/java/org/radarbase/output/path/TargetPath.kt +++ b/src/main/java/org/radarbase/output/path/TargetPath.kt @@ -14,7 +14,7 @@ data class TargetPath( fun navigate(block: (Path) -> Path): TargetPath = copy(path = block(path)) - fun toLocalPath(root: Path): Path = root.resolve(path) + fun toLocalPath(baseDir: Path): Path = baseDir.resolve(path) companion object { private val comparator = compareBy(TargetPath::target, TargetPath::path) diff --git a/src/main/java/org/radarbase/output/source/AzureSourceStorage.kt b/src/main/java/org/radarbase/output/source/AzureSourceStorage.kt index cbf2ab5..98de0bc 100644 --- a/src/main/java/org/radarbase/output/source/AzureSourceStorage.kt +++ b/src/main/java/org/radarbase/output/source/AzureSourceStorage.kt @@ -8,7 +8,6 @@ import org.apache.avro.file.SeekableFileInput import org.apache.avro.file.SeekableInput import org.radarbase.output.config.AzureConfig import org.radarbase.output.util.TemporaryDirectory -import org.radarbase.output.util.withoutFirstSegment import java.nio.file.Path import java.nio.file.Paths import java.time.Instant @@ -16,7 +15,7 @@ import kotlin.io.path.createTempFile import kotlin.io.path.deleteIfExists class AzureSourceStorage( - override val root: Path, + override val baseDir: Path, client: BlobServiceClient, config: AzureConfig, private val tempPath: Path, @@ -24,25 +23,27 @@ class AzureSourceStorage( private val blobContainerClient = client.getBlobContainerClient(config.container) private val readOffsetFromMetadata = config.endOffsetFromMetadata - private fun blobClient(path: Path) = blobContainerClient.getBlobClient(path.withoutFirstSegment()) + private fun blobClient(path: Path) = blobContainerClient.getBlobClient(path.toSourcePath().toString()) override suspend fun list(path: Path, startAfter: Path?, maxKeys: Int?): List = withContext(Dispatchers.IO) { - var iterable: Iterable = blobContainerClient.listBlobsByHierarchy("$path/") + var iterable: Iterable = blobContainerClient.listBlobsByHierarchy("${path.toSourcePath()}/") if (startAfter != null) { iterable = iterable.filter { Paths.get(it.name) > startAfter } } if (maxKeys != null) { iterable = iterable.take(maxKeys) } - iterable.map { - if (it.isPrefix == true) { + val baseDirPrefix = "$baseDir/" + iterable.map { item -> + val itemPath = Paths.get(item.name.removePrefix(baseDirPrefix)) + if (item.isPrefix == true) { StorageNode.StorageFile( - Paths.get(it.name), - it.properties?.lastModified?.toInstant() ?: Instant.now(), + itemPath, + item.properties?.lastModified?.toInstant() ?: Instant.now(), ) } else { - StorageNode.StorageDirectory(Paths.get(it.name)) + StorageNode.StorageDirectory(itemPath) } } } @@ -77,6 +78,9 @@ class AzureSourceStorage( override fun createReader(): SourceStorage.SourceStorageReader = AzureSourceStorageReader() + override fun toString(): String = + "AzureSourceStorage(container=${blobContainerClient.blobContainerName}, baseDir=$baseDir)" + private inner class AzureSourceStorageReader : SourceStorage.SourceStorageReader { private val tempDir = TemporaryDirectory(tempPath, "worker-") diff --git a/src/main/java/org/radarbase/output/source/InMemoryStorageIndex.kt b/src/main/java/org/radarbase/output/source/InMemoryStorageIndex.kt index e167624..c423397 100644 --- a/src/main/java/org/radarbase/output/source/InMemoryStorageIndex.kt +++ b/src/main/java/org/radarbase/output/source/InMemoryStorageIndex.kt @@ -12,18 +12,13 @@ import java.util.concurrent.ConcurrentMap */ class InMemoryStorageIndex : MutableStorageIndex { private val fileIndex: ConcurrentMap> = ConcurrentHashMap() - private val rootSet = ConcurrentHashMap() init { - fileIndex[ROOT] = rootSet + fileIndex[ROOT] = mapOf() } override suspend fun list(dir: StorageNode.StorageDirectory, maxKeys: Int?): List { - val listing = if (dir === ROOT) { - rootSet - } else { - fileIndex[dir] ?: return listOf() - } + val listing = fileIndex[dir] ?: return listOf() return if (maxKeys != null) { listing.values.take(maxKeys) @@ -34,23 +29,28 @@ class InMemoryStorageIndex : MutableStorageIndex { /** Adds a node and all its parents to the file hierarchy. */ private fun add(dir: StorageNode.StorageDirectory) { + if (dir == ROOT) return var currentDir = dir var parentDir = currentDir.parent() while (parentDir != null) { - fileIndex.compute(parentDir) { _, map -> - if (map == null) { - mapOf(currentDir.path to currentDir) - } else { - buildMap(map.size + 1) { - putAll(map) - put(currentDir.path, currentDir) - } - } - } + addNode(parentDir, currentDir) currentDir = parentDir parentDir = currentDir.parent() } - rootSet[currentDir.path] = currentDir + addNode(ROOT, currentDir) + } + + private fun addNode(parent: StorageNode.StorageDirectory, dir: StorageNode.StorageDirectory) { + fileIndex.compute(parent) { _, map -> + if (map == null) { + mapOf(dir.path to dir) + } else { + buildMap(map.size + 1) { + putAll(map) + put(dir.path, dir) + } + } + } } override suspend fun addAll(parent: StorageNode.StorageDirectory, nodes: List): Collection { @@ -89,7 +89,6 @@ class InMemoryStorageIndex : MutableStorageIndex { val newMap = buildMap(nodes.size) { nodes.forEach { put(it.path, it) } } - fileIndex[parent] = newMap nodes.asSequence() @@ -99,14 +98,10 @@ class InMemoryStorageIndex : MutableStorageIndex { } override suspend fun remove(file: StorageNode.StorageFile) { - val parent = file.parent() + val parent = file.parent() ?: ROOT - if (parent != null) { - fileIndex.computeIfPresent(parent) { _, map -> - (map - file.path).takeIf { it.isNotEmpty() } - } - } else { - rootSet.remove(file.path) + fileIndex.computeIfPresent(parent) { _, map -> + (map - file.path).takeIf { it.isNotEmpty() } } } @@ -118,4 +113,8 @@ class InMemoryStorageIndex : MutableStorageIndex { fileIndex.remove(first)?.values?.filterIsInstanceTo(directoriesToRemove) } } + + override fun toString(): String { + return "InMemoryStorageIndex(size=${fileIndex.size})" + } } diff --git a/src/main/java/org/radarbase/output/source/S3SourceStorage.kt b/src/main/java/org/radarbase/output/source/S3SourceStorage.kt index 1b9088b..5098a5d 100644 --- a/src/main/java/org/radarbase/output/source/S3SourceStorage.kt +++ b/src/main/java/org/radarbase/output/source/S3SourceStorage.kt @@ -23,7 +23,7 @@ import kotlin.io.path.pathString import kotlin.time.Duration.Companion.seconds class S3SourceStorage( - override val root: Path, + override val baseDir: Path, private val s3Client: MinioClient, config: S3Config, private val tempPath: Path, @@ -40,7 +40,7 @@ class S3SourceStorage( if (maxKeys != null) { maxKeys(maxKeys.coerceAtMost(1000)) } - prefix("$path/") + prefix("${path.toSourcePath()}/") recursive(false) if (startAfter != null) { startAfter(startAfter.pathString) @@ -51,10 +51,11 @@ class S3SourceStorage( if (maxKeys != null) { iterable = iterable.take(maxKeys) } + val baseDirPrefix = "$baseDir/" return iterable .map { val item = it.get() - val itemPath = Paths.get(item.objectName()) + val itemPath = Paths.get(item.objectName().removePrefix(baseDirPrefix)) if (item.isDir) { StorageNode.StorageDirectory(itemPath) } else { @@ -86,17 +87,21 @@ class S3SourceStorage( } private suspend fun getObjectTags(path: Path): Tags { - val tagRequest = GetObjectTagsArgs.Builder().objectBuild(bucket, path) + val tagRequest = GetObjectTagsArgs.Builder().objectBuild(bucket, path.toSourcePath()) return faultTolerant { s3Client.getObjectTags(tagRequest) } } override suspend fun delete(path: Path) { - val removeRequest = RemoveObjectArgs.Builder().objectBuild(bucket, path) + val removeRequest = RemoveObjectArgs.Builder().objectBuild(bucket, path.toSourcePath()) faultTolerant { s3Client.removeObject(removeRequest) } } override fun createReader(): SourceStorage.SourceStorageReader = S3SourceStorageReader(tempPath, s3Client, bucket) + override fun toString(): String { + return "S3SourceStorage(bucket=$bucket, baseDir=$baseDir)" + } + companion object { private val logger = LoggerFactory.getLogger(S3SourceStorage::class.java) diff --git a/src/main/java/org/radarbase/output/source/SourceStorage.kt b/src/main/java/org/radarbase/output/source/SourceStorage.kt index e4b7838..87080e8 100644 --- a/src/main/java/org/radarbase/output/source/SourceStorage.kt +++ b/src/main/java/org/radarbase/output/source/SourceStorage.kt @@ -7,7 +7,7 @@ import java.time.Instant /** Source storage type. */ interface SourceStorage { - val root: Path + val baseDir: Path /** Create a reader for the storage medium. It should be closed by the caller. */ fun createReader(): SourceStorageReader @@ -23,10 +23,12 @@ interface SourceStorage { suspend fun delete(path: Path) suspend fun createTopicFile(topic: String, status: StorageNode): TopicFile = TopicFile( topic = topic, - path = status.path, + path = status.path.toSourcePath(), lastModified = if (status is StorageNode.StorageFile) status.lastModified else Instant.now(), ) + fun Path.toSourcePath(): Path = baseDir.resolve(this).normalize() + /** * File reader for the storage medium. * All inputs opened by this reader should be closed before closing the reader itself. diff --git a/src/main/java/org/radarbase/output/source/SourceStorageManager.kt b/src/main/java/org/radarbase/output/source/SourceStorageManager.kt index dd7646e..958c1b6 100644 --- a/src/main/java/org/radarbase/output/source/SourceStorageManager.kt +++ b/src/main/java/org/radarbase/output/source/SourceStorageManager.kt @@ -9,13 +9,12 @@ import java.time.Instant class SourceStorageManager( val sourceStorage: SourceStorage, - val storageIndex: StorageIndex, + private val storageIndex: StorageIndex, storageIndexConfig: StorageIndexConfig, ) { val storageIndexManager: StorageIndexManager = StorageIndexManager( storageIndex, sourceStorage, - sourceStorage.root, storageIndexConfig, ) @@ -41,9 +40,8 @@ class SourceStorageManager( * Exclude paths belonging to the set of given excluded topics. */ suspend fun listTopics( - root: Path, exclude: Set, ): List = storageIndex.avroTopicTreeLister() - .listTo(LinkedHashSet(), root) + .listTo(LinkedHashSet(), StorageIndex.ROOT.path) .filter { it.fileName.toString() !in exclude } } diff --git a/src/main/java/org/radarbase/output/source/StorageIndex.kt b/src/main/java/org/radarbase/output/source/StorageIndex.kt index 51dd258..df71c68 100644 --- a/src/main/java/org/radarbase/output/source/StorageIndex.kt +++ b/src/main/java/org/radarbase/output/source/StorageIndex.kt @@ -24,6 +24,6 @@ interface StorageIndex { * Root directory. All files that are in the index can be found by traversing the index * starting at this root. */ - val ROOT = StorageNode.StorageDirectory(Paths.get("/")) + val ROOT = StorageNode.StorageDirectory(Paths.get(".")) } } diff --git a/src/main/java/org/radarbase/output/source/StorageIndexManager.kt b/src/main/java/org/radarbase/output/source/StorageIndexManager.kt index 352816d..41b0973 100644 --- a/src/main/java/org/radarbase/output/source/StorageIndexManager.kt +++ b/src/main/java/org/radarbase/output/source/StorageIndexManager.kt @@ -3,20 +3,17 @@ package org.radarbase.output.source import org.radarbase.kotlin.coroutines.forkJoin import org.radarbase.output.config.StorageIndexConfig import org.slf4j.LoggerFactory -import java.nio.file.Path import kotlin.time.TimeSource.Monotonic.markNow /** Manager to manage a storage index. */ class StorageIndexManager( /** Storage index to manage. */ - val storageIndex: StorageIndex, + private val storageIndex: StorageIndex, /** Source storage to index. */ private val sourceStorage: SourceStorage, /** Root directory in source storage to start scanning. */ - root: Path, config: StorageIndexConfig, ) { - private val root = StorageNode.StorageDirectory(root) private val rescanEmptyDuration = config.emptyDirectorySyncDuration private val rescanDirectoryDuration = config.fullSyncDuration @@ -32,15 +29,15 @@ class StorageIndexManager( sync() } nextEmptySync.hasPassedNow() -> { - logger.info("Updating source {} index (including empty directories)...", root) + logger.info("Updating source {} index (including empty directories)...", sourceStorage) nextEmptySync = markNow() + rescanEmptyDuration - val listOperations = storageIndex.updateLevel(root, true) - logger.debug("Updated source {} with {} list operations...", root, listOperations) + val listOperations = storageIndex.updateLevel(StorageIndex.ROOT, true) + logger.debug("Updated source {} with {} list operations...", sourceStorage, listOperations) } else -> { - logger.info("Updating source {} index (excluding empty directories)...", root) - val listOperations = storageIndex.updateLevel(root, false) - logger.debug("Updated source {} with {} list operations...", root, listOperations) + logger.info("Updating source {} index (excluding empty directories)...", sourceStorage) + val listOperations = storageIndex.updateLevel(StorageIndex.ROOT, false) + logger.debug("Updated source {} with {} list operations...", sourceStorage, listOperations) } } } @@ -73,9 +70,9 @@ class StorageIndexManager( /** Fully synchronize the storage index with the source storage. */ suspend fun sync() { if (storageIndex !is MutableStorageIndex) return - logger.info("Syncing source {} index...", root) - val listOperations = storageIndex.syncLevel(root) - logger.debug("Synced source {} index with {} list operations...", root, listOperations) + logger.info("Syncing source {} index...", sourceStorage) + val listOperations = storageIndex.syncLevel(StorageIndex.ROOT) + logger.info("Synced source {} index with {} list operations", sourceStorage, listOperations) val now = markNow() nextSync = now + rescanDirectoryDuration nextEmptySync = now + rescanEmptyDuration diff --git a/src/main/java/org/radarbase/output/util/AvroFileLister.kt b/src/main/java/org/radarbase/output/util/AvroFileLister.kt index a4267aa..fa881ca 100644 --- a/src/main/java/org/radarbase/output/util/AvroFileLister.kt +++ b/src/main/java/org/radarbase/output/util/AvroFileLister.kt @@ -1,6 +1,5 @@ package org.radarbase.output.util -import kotlinx.coroutines.flow.toList import org.radarbase.output.source.SourceStorage import org.radarbase.output.source.StorageIndex import org.radarbase.output.source.StorageNode diff --git a/src/main/java/org/radarbase/output/util/TemporaryDirectory.kt b/src/main/java/org/radarbase/output/util/TemporaryDirectory.kt index 2235bdc..c0af719 100644 --- a/src/main/java/org/radarbase/output/util/TemporaryDirectory.kt +++ b/src/main/java/org/radarbase/output/util/TemporaryDirectory.kt @@ -25,14 +25,14 @@ import kotlin.io.path.createTempDirectory import kotlin.io.path.exists /** Temporary directory that will be removed on close or shutdown. */ -class TemporaryDirectory(root: Path, prefix: String) : Closeable { +class TemporaryDirectory(baseDir: Path, prefix: String) : Closeable { private val shutdownHook: Thread val path: Path init { - root.createDirectories() - path = createTempDirectory(root, prefix) + baseDir.createDirectories() + path = createTempDirectory(baseDir, prefix) shutdownHook = Thread( { this.doClose() }, "remove-" + path.toString().replace("/".toRegex(), "-"), diff --git a/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt b/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt index f054183..9f18c79 100644 --- a/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt +++ b/src/main/java/org/radarbase/output/worker/RadarKafkaRestructure.kt @@ -39,7 +39,6 @@ import java.io.IOException import java.nio.file.Path import java.time.Duration import java.util.concurrent.atomic.LongAdder -import kotlin.coroutines.coroutineContext /** * Performs the following actions @@ -81,15 +80,15 @@ class RadarKafkaRestructure( @Throws(IOException::class, InterruptedException::class) suspend fun process() { // Get files and directories - val absolutePath = sourceStorage.root - logger.info("Scanning topics...") - val paths = topicPaths(absolutePath) + val paths = sourceStorageManager.listTopics(excludeTopics) + // different services start on different topics to decrease lock contention + .shuffled() logger.info("{} topics found", paths.size) - withContext(coroutineContext + supervisor) { + withContext(supervisor) { paths.forEach { p -> launch { try { @@ -163,11 +162,6 @@ class RadarKafkaRestructure( supervisor.cancel() } - private suspend fun topicPaths(root: Path): List = - sourceStorageManager.listTopics(root, excludeTopics) - // different services start on different topics to decrease lock contention - .shuffled() - private data class ProcessingStatistics( val fileCount: Long, val recordCount: Long, @@ -188,9 +182,9 @@ class RadarKafkaRestructure( factory.sourceStorage.launchJoin { sourceStorage -> RadarKafkaRestructure(factory, sourceStorage).useSuspended { restructure -> sourceStorage.storageIndexManager.update() - logger.info("In: {}", sourceStorage.sourceStorage.root) + logger.info("In: {}", sourceStorage.sourceStorage) logger.info( - "Out: bucket {} (default {}) - path {}", + "Out: target format '{}' (default {}) - path format '{}'", pathConfig.target.format, pathConfig.target.default, pathConfig.path.format, diff --git a/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt b/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt index 4fd77c8..2c368da 100644 --- a/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt +++ b/src/test/java/org/radarbase/output/data/FileCacheStoreTest.kt @@ -56,8 +56,8 @@ class FileCacheStoreTest { @Test @Throws(IOException::class) - fun appendLine(@TempDir root: Path, @TempDir tmpDir: Path) = runTest { - fun TargetPath.toLocalPath(): Path = toLocalPath(root) + fun appendLine(@TempDir baseDir: Path, @TempDir tmpDir: Path) = runTest { + fun TargetPath.toLocalPath(): Path = toLocalPath(baseDir) val f1 = "f1".toTargetPath("radar-output-storage") val f2 = "f2".toTargetPath("radar-output-storage") @@ -93,7 +93,7 @@ class FileCacheStoreTest { ), worker = WorkerConfig(cacheSize = 2), sources = listOf(ResourceConfig("s3", tmpDir, s3 = S3Config("http://ep", "null", "null", bucket = "Test"))), - targets = mapOf("radar-output-storage" to ResourceConfig("local", path = root, local = LocalConfig())), + targets = mapOf("radar-output-storage" to ResourceConfig("local", path = baseDir, local = LocalConfig())), ), ) diff --git a/src/test/java/org/radarbase/output/data/FileCacheTest.kt b/src/test/java/org/radarbase/output/data/FileCacheTest.kt index 95bd04a..e786a2c 100644 --- a/src/test/java/org/radarbase/output/data/FileCacheTest.kt +++ b/src/test/java/org/radarbase/output/data/FileCacheTest.kt @@ -17,8 +17,6 @@ package org.radarbase.output.data import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.joinAll -import kotlinx.coroutines.launch import kotlinx.coroutines.test.runTest import org.apache.avro.SchemaBuilder import org.apache.avro.generic.GenericData.Record @@ -29,6 +27,7 @@ import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test import org.junit.jupiter.api.io.TempDir import org.mockito.kotlin.mock +import org.radarbase.kotlin.coroutines.launchJoin import org.radarbase.output.Application import org.radarbase.output.accounting.Accountant import org.radarbase.output.accounting.TopicPartition @@ -208,9 +207,9 @@ class FileCacheTest { val cache2 = createResource { FileCache(factory, "topic", path, tmpDir, accountant) } val cache3 = createResource { FileCache(factory, "topic", file3, tmpDir, accountant) } - listOf(cache1, cache2, cache3) - .map { cache -> launch(Dispatchers.IO) { cache.initialize(exampleRecord) } } - .joinAll() + listOf(cache1, cache2, cache3).launchJoin(Dispatchers.IO) { cache -> + cache.initialize(exampleRecord) + } val transaction = Accountant.Transaction(topicPartition, 0, lastModified) assertEquals(0, cache1.compareTo(cache2)) diff --git a/src/test/java/org/radarbase/output/util/TemporaryDirectoryTest.kt b/src/test/java/org/radarbase/output/util/TemporaryDirectoryTest.kt index 265047f..9859f7e 100644 --- a/src/test/java/org/radarbase/output/util/TemporaryDirectoryTest.kt +++ b/src/test/java/org/radarbase/output/util/TemporaryDirectoryTest.kt @@ -10,16 +10,16 @@ import kotlin.io.path.listDirectoryEntries internal class TemporaryDirectoryTest { @Test - fun createAndDelete(@TempDir root: Path) { - TemporaryDirectory(root, "worker-").use { - assertThat(root.listDirectoryEntries().size, `is`(1)) + fun createAndDelete(@TempDir baseDir: Path) { + TemporaryDirectory(baseDir, "worker-").use { + assertThat(baseDir.listDirectoryEntries().size, `is`(1)) createTempFile(it.path, "test", "txt") createTempFile(it.path, "test", "txt") assertThat(it.path.listDirectoryEntries().size, `is`(2)) - assertThat(root.listDirectoryEntries().size, `is`(1)) + assertThat(baseDir.listDirectoryEntries().size, `is`(1)) } - assertThat(root.listDirectoryEntries().size, `is`(0)) + assertThat(baseDir.listDirectoryEntries().size, `is`(0)) } } From 3e42fd455249b1f2b4b0964b05e092dd72a22acc Mon Sep 17 00:00:00 2001 From: Joris Borgdorff Date: Thu, 26 Oct 2023 15:21:35 +0200 Subject: [PATCH 09/10] Removed unused factory classes --- .../radarbase/output/source/InMemoryStorageIndexFactory.kt | 5 ----- .../java/org/radarbase/output/source/StorageIndexFactory.kt | 5 ----- 2 files changed, 10 deletions(-) delete mode 100644 src/main/java/org/radarbase/output/source/InMemoryStorageIndexFactory.kt delete mode 100644 src/main/java/org/radarbase/output/source/StorageIndexFactory.kt diff --git a/src/main/java/org/radarbase/output/source/InMemoryStorageIndexFactory.kt b/src/main/java/org/radarbase/output/source/InMemoryStorageIndexFactory.kt deleted file mode 100644 index f86bd71..0000000 --- a/src/main/java/org/radarbase/output/source/InMemoryStorageIndexFactory.kt +++ /dev/null @@ -1,5 +0,0 @@ -package org.radarbase.output.source - -class InMemoryStorageIndexFactory : StorageIndexFactory { - override fun get(): StorageIndex = InMemoryStorageIndex() -} diff --git a/src/main/java/org/radarbase/output/source/StorageIndexFactory.kt b/src/main/java/org/radarbase/output/source/StorageIndexFactory.kt deleted file mode 100644 index e953281..0000000 --- a/src/main/java/org/radarbase/output/source/StorageIndexFactory.kt +++ /dev/null @@ -1,5 +0,0 @@ -package org.radarbase.output.source - -interface StorageIndexFactory { - fun get(): StorageIndex -} From 054282f3bfc2e62e31874cdc19b7a15a763cd675 Mon Sep 17 00:00:00 2001 From: Joris Borgdorff Date: Thu, 26 Oct 2023 15:23:48 +0200 Subject: [PATCH 10/10] Removed unused code --- src/main/java/org/radarbase/output/util/Path.kt | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/main/java/org/radarbase/output/util/Path.kt b/src/main/java/org/radarbase/output/util/Path.kt index 3704984..7b445a0 100644 --- a/src/main/java/org/radarbase/output/util/Path.kt +++ b/src/main/java/org/radarbase/output/util/Path.kt @@ -4,21 +4,6 @@ import io.minio.BucketArgs import io.minio.ObjectArgs import java.nio.file.Path -fun Path.withoutFirstSegment(): String { - // remove bucket prefix - return first().relativize(this).toString() -} - -fun Path.splitFirstSegment(): Pair { - val bucketPath = first() - return Pair( - bucketPath.toString(), - bucketPath.relativize(this), - ) -} - -fun Path.firstSegment(): String = first().toString() - inline fun > T.bucketBuild( bucket: String, configure: T.() -> T = { this },