1
1
package org.radarbase.output.cleaner
2
2
3
3
import kotlinx.coroutines.*
4
- import kotlinx.coroutines.flow.*
4
+ import kotlinx.coroutines.sync.Mutex
5
+ import kotlinx.coroutines.sync.withLock
5
6
import kotlinx.coroutines.sync.withPermit
7
+ import org.radarbase.output.Application.Companion.format
6
8
import org.radarbase.output.FileStoreFactory
7
9
import org.radarbase.output.accounting.Accountant
8
10
import org.radarbase.output.accounting.AccountantImpl
11
+ import org.radarbase.output.config.RestructureConfig
9
12
import org.radarbase.output.util.ResourceContext.Companion.resourceContext
13
+ import org.radarbase.output.util.SuspendedCloseable.Companion.useSuspended
10
14
import org.radarbase.output.util.Timer
15
+ import org.radarbase.output.worker.Job
11
16
import org.slf4j.LoggerFactory
12
17
import java.io.Closeable
13
18
import java.io.IOException
@@ -33,6 +38,7 @@ class SourceDataCleaner(
33
38
.minus(fileStoreFactory.config.cleaner.age.toLong(), ChronoUnit .DAYS )
34
39
35
40
val deletedFileCount = LongAdder ()
41
+ private val scope = CoroutineScope (Dispatchers .Default )
36
42
37
43
@Throws(IOException ::class , InterruptedException ::class )
38
44
suspend fun process (directoryName : String ) {
@@ -43,23 +49,21 @@ class SourceDataCleaner(
43
49
44
50
logger.info(" {} topics found" , paths.size)
45
51
46
- coroutineScope {
47
- paths.forEach { p ->
48
- launch {
49
- try {
50
- val deleteCount = fileStoreFactory.workerSemaphore.withPermit {
51
- mapTopic(p)
52
- }
53
- if (deleteCount > 0 ) {
54
- logger.info(" Removed {} files in topic {}" , deleteCount, p.fileName)
55
- deletedFileCount.add(deleteCount)
56
- }
57
- } catch (ex: Exception ) {
58
- logger.warn(" Failed to map topic" , ex)
52
+ paths.map { p ->
53
+ scope.launch {
54
+ try {
55
+ val deleteCount = fileStoreFactory.workerSemaphore.withPermit {
56
+ mapTopic(p)
59
57
}
58
+ if (deleteCount > 0 ) {
59
+ logger.info(" Removed {} files in topic {}" , deleteCount, p.fileName)
60
+ deletedFileCount.add(deleteCount)
61
+ }
62
+ } catch (ex: Exception ) {
63
+ logger.warn(" Failed to map topic" , ex)
60
64
}
61
65
}
62
- }
66
+ }.joinAll()
63
67
}
64
68
65
69
private suspend fun mapTopic (topicPath : Path ): Long {
@@ -93,41 +97,65 @@ class SourceDataCleaner(
93
97
): Int {
94
98
val offsets = accountant.offsets.copyForTopic(topic)
95
99
96
- return sourceStorage.walker.walkRecords(topic, topicPath).consumeAsFlow()
97
- .filter { f ->
98
- f.lastModified.isBefore(deleteThreshold) &&
99
- // ensure that there is a file with a larger offset also
100
- // processed, so the largest offset is never removed.
101
- offsets.contains(f.range.mapRange { r -> r.incrementTo() })
102
- }
103
- .take(maxFilesPerTopic)
104
- .takeWhile { ! isClosed.get() }
105
- .count { file ->
106
- if (extractionCheck.isExtracted(file)) {
107
- logger.info(" Removing {}" , file.path)
108
- Timer .time(" cleaner.delete" ) {
109
- sourceStorage.delete(file.path)
100
+ val paths = sourceStorage.listTopicFiles(topic, topicPath, maxFilesPerTopic) { f ->
101
+ f.lastModified.isBefore(deleteThreshold) &&
102
+ // ensure that there is a file with a larger offset also
103
+ // processed, so the largest offset is never removed.
104
+ offsets.contains(f.range.mapRange { r -> r.incrementTo() })
105
+ }
106
+
107
+ val accountantMutex = Mutex ()
108
+
109
+ return coroutineScope {
110
+ paths
111
+ .map { file ->
112
+ async {
113
+ if (extractionCheck.isExtracted(file)) {
114
+ logger.info(" Removing {}" , file.path)
115
+ Timer .time(" cleaner.delete" ) {
116
+ sourceStorage.delete(file.path)
117
+ }
118
+ true
119
+ } else {
120
+ // extract the file again at a later time
121
+ logger.warn(" Source file was not completely extracted: {}" , file.path)
122
+ val fullRange = file.range.mapRange { it.ensureToOffset() }
123
+ accountantMutex.withLock {
124
+ accountant.remove(fullRange)
125
+ }
126
+ false
127
+ }
110
128
}
111
- true
112
- } else {
113
- logger.warn(" Source file was not completely extracted: {}" , file.path)
114
- // extract the file again at a later time
115
- accountant.remove(file.range.mapRange { it.ensureToOffset() })
116
- false
117
129
}
118
- }
130
+ .awaitAll()
131
+ .count { it }
132
+ }
119
133
}
120
134
121
- private suspend fun topicPaths (path : Path ): List <Path > = sourceStorage.walker.walkTopics (path, excludeTopics)
135
+ private suspend fun topicPaths (path : Path ): List <Path > = sourceStorage.listTopics (path, excludeTopics)
122
136
.toMutableList()
123
137
// different services start on different topics to decrease lock contention
124
138
.also { it.shuffle() }
125
139
126
140
override fun close () {
127
- isClosed.set( true )
141
+ scope.cancel( )
128
142
}
129
143
130
144
companion object {
131
145
private val logger = LoggerFactory .getLogger(SourceDataCleaner ::class .java)
146
+
147
+ fun job (config : RestructureConfig , serviceMutex : Mutex ): Job ? = if (config.cleaner.enable) {
148
+ Job (" cleaner" , config.cleaner.interval, ::runCleaner, serviceMutex)
149
+ } else null
150
+
151
+ private suspend fun runCleaner (factory : FileStoreFactory ) {
152
+ SourceDataCleaner (factory).useSuspended { cleaner ->
153
+ for (input in factory.config.paths.inputs) {
154
+ logger.info(" Cleaning {}" , input)
155
+ cleaner.process(input.toString())
156
+ }
157
+ logger.info(" Cleaned up {} files" , cleaner.deletedFileCount.format())
158
+ }
159
+ }
132
160
}
133
161
}
0 commit comments