Skip to content

Commit 18ebd0d

Browse files
bodom0015lmarini
andauthored
Update available archival config options (#194)
* Simplify available configuration options, remove archiveDebug * Update CHANGELOG * Fix typoe Unit -> Units Co-authored-by: Luigi Marini <[email protected]>
1 parent 97f58dd commit 18ebd0d

File tree

4 files changed

+51
-54
lines changed

4 files changed

+51
-54
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1313
- Added a `sort` and `order` parameter to `/api/search` endpoint that supports date and numeric field sorting. If only order is specified, created date is used. String fields are not currently supported.
1414
- Added a new `/api/deleteindex` admin endpoint that will queue an action to delete an Elasticsearch index (usually prior to a reindex).
1515

16+
### Changed
17+
- Consolidated field names sent by the EventSinkService to maxaimize reuse.
18+
- Reworked auto-archival configuration options to make their meanings more clear.
19+
1620
## 1.15.1 - 2021-03-12
1721

1822
### Fixed

app/Global.scala

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,21 +64,18 @@ object Global extends WithFilters(new GzipFilter(), new Jsonp(), CORSFilter()) w
6464

6565
val archiveEnabled = Play.application.configuration.getBoolean("archiveEnabled", false)
6666
if (archiveEnabled && archivalTimer == null) {
67-
val archiveDebug = Play.application.configuration.getBoolean("archiveDebug", false)
68-
val interval = if (archiveDebug) { 5 minutes } else { 1 day }
69-
70-
// Determine time until next midnight
71-
val now = ZonedDateTime.now
72-
val midnight = now.truncatedTo(ChronoUnit.DAYS)
73-
val sinceLastMidnight = Duration.between(midnight, now).getSeconds
74-
val delay = if (archiveDebug) { 10 seconds } else {
75-
(Duration.ofDays(1).getSeconds - sinceLastMidnight) seconds
76-
}
77-
78-
Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval)
79-
archivalTimer = Akka.system.scheduler.schedule(delay, interval) {
80-
Logger.info("Starting auto archive process...")
81-
files.autoArchiveCandidateFiles()
67+
// Set archiveAutoInterval == 0 to disable auto archiving
68+
val archiveAutoInterval = Play.application.configuration.getLong("archiveAutoInterval", 0)
69+
if (archiveAutoInterval > 0) {
70+
val interval = FiniteDuration(archiveAutoInterval, SECONDS)
71+
val archiveAutoDelay = Play.application.configuration.getLong("archiveAutoDelay", 0)
72+
val delay = FiniteDuration(archiveAutoDelay, SECONDS)
73+
74+
Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval)
75+
archivalTimer = Akka.system.scheduler.schedule(delay, interval) {
76+
Logger.info("Starting auto archive process...")
77+
files.autoArchiveCandidateFiles()
78+
}
8279
}
8380
}
8481

app/services/mongodb/MongoDBFileService.scala

Lines changed: 26 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,23 @@ import play.api.mvc.Request
44
import services._
55
import models._
66
import com.mongodb.casbah.commons.{Imports, MongoDBObject}
7-
import java.text.SimpleDateFormat
87

8+
import java.text.SimpleDateFormat
99
import _root_.util.{License, Parsers, SearchUtils}
1010

1111
import scala.collection.mutable.ListBuffer
1212
import Transformation.LidoToCidocConvertion
13-
import java.util.{ArrayList, Calendar}
14-
import java.io._
1513

14+
import java.util.{ArrayList, Calendar, Date}
15+
import java.io._
1616
import org.apache.commons.io.FileUtils
1717
import org.json.JSONObject
1818
import play.api.libs.json.{JsValue, Json}
1919
import com.mongodb.util.JSON
20+
2021
import java.nio.file.{FileSystems, Files}
2122
import java.nio.file.attribute.BasicFileAttributes
22-
import java.time.LocalDateTime
23-
23+
import java.time.Instant
2424
import collection.JavaConverters._
2525
import scala.collection.JavaConversions._
2626
import javax.inject.{Inject, Singleton}
@@ -31,15 +31,16 @@ import scala.util.parsing.json.JSONArray
3131
import play.api.libs.json.JsArray
3232
import models.File
3333
import play.api.libs.json.JsObject
34-
import java.util.Date
35-
3634
import com.novus.salat.dao.{ModelCompanion, SalatDAO}
3735
import MongoContext.context
3836
import play.api.Play._
3937
import com.mongodb.casbah.Imports._
4038
import models.FileStatus.FileStatus
4139
import org.bson.types.ObjectId
4240

41+
import java.time.temporal.ChronoUnit
42+
import scala.concurrent.duration.FiniteDuration
43+
4344

4445
/**
4546
* Use mongo for both metadata and blobs.
@@ -201,48 +202,41 @@ class MongoDBFileService @Inject() (
201202
* This may be expanded to support per-space configuration in the future.
202203
*
203204
* Reads the following parameters from Clowder configuration:
204-
* - archiveAutoAfterDaysInactive - timeout after which files are considered
205+
* - archiveAutoAfterInactiveCount - timeout after which files are considered
205206
* to be candidates for archival (see below)
206-
* - archiveMinimumStorageSize - files below this size (in Bytes) should not be archived
207+
* - archiveAutoAfterInactiveUnits - time unit that should be used for the timeout (see below)
208+
* - archiveAutoAboveMinimumStorageSize - files below this size (in Bytes) should not be archived
207209
* - clowder.rabbitmq.clowderurl - the Clowder hostname to pass to the archival extractor
208210
* - commKey - the admin key to pass to the archival extractor
209211
*
210212
* Archival candidates are currently defined as follows:
211-
* - file must be over `archiveMinimumStorageSize` Bytes in size
212-
* - file must be over `archiveAutoAfterDaysInactive` days old
213+
* - file's size must be greater than `archiveAutoAboveMinimumStorageSize` Bytes
214+
* - file's age must be greater than `archiveAutoAfterInactiveCount` * `archiveAutoAfterInactiveUnits`
215+
* (e.g. 10 days old)
213216
* - AND one of the following must be true:
214217
* - file has never been downloaded (0 downloads)
215218
* OR
216-
* - file has not been downloaded in the past `archiveAutoAfterDaysInactive` days
219+
* - file has not been downloaded in the past `archiveAutoAfterInactiveCount` `archiveAutoAfterInactiveUnits`
217220
*
218221
*
219222
*/
220223
def autoArchiveCandidateFiles() = {
221-
val timeout = configuration(play.api.Play.current).getInt("archiveAutoAfterDaysInactive")
224+
val timeout: Option[Long] = configuration(play.api.Play.current).getLong("archiveAutoAfterInactiveCount")
222225
timeout match {
223226
case None => Logger.info("No archival auto inactivity timeout set - skipping auto archival loop.")
224-
case Some(days) => {
225-
if (days == 0) {
227+
case Some(inactiveTimeout) => {
228+
if (inactiveTimeout == 0) {
226229
Logger.info("Archival auto inactivity timeout set to 0 - skipping auto archival loop.")
227230
} else {
228-
// DEBUG ONLY: query for files that were uploaded within the past hour
229-
val archiveDebug = configuration(play.api.Play.current).getBoolean("archiveDebug").getOrElse(false)
230-
val oneHourAgo = LocalDateTime.now.minusHours(1).toString + "-00:00"
231-
232-
// Query for files that haven't been accessed for at least this many days
233-
val daysAgo = LocalDateTime.now.minusDays(days).toString + "-00:00"
234-
val notDownloadedWithinTimeout = if (archiveDebug) {
235-
("stats.last_downloaded" $gte Parsers.fromISO8601(oneHourAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
236-
} else {
237-
("stats.last_downloaded" $lt Parsers.fromISO8601(daysAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
238-
}
231+
val unit = configuration(play.api.Play.current).getString("archiveAutoAfterInactiveUnits").getOrElse("days")
232+
val timeoutAgo = FiniteDuration(inactiveTimeout, unit)
233+
234+
// Query for files that haven't been accessed for at least this many units
235+
val since = Instant.now().minus(timeoutAgo.length.toLong, ChronoUnit.valueOf(timeoutAgo.unit.toString)).toString + "-00:00"
236+
val notDownloadedWithinTimeout = ("stats.last_downloaded" $lt Parsers.fromISO8601(since)) ++ ("status" $eq FileStatus.PROCESSED.toString)
239237

240238
// Include files that have never been downloaded, but make sure they are old enough
241-
val neverDownloaded = if (archiveDebug) {
242-
("stats.downloads" $eq 0) ++ ("uploadDate" $gte Parsers.fromISO8601(oneHourAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
243-
} else {
244-
("stats.downloads" $eq 0) ++ ("uploadDate" $lt Parsers.fromISO8601(daysAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
245-
}
239+
val neverDownloaded = ("stats.downloads" $eq 0) ++ ("uploadDate" $lt Parsers.fromISO8601(since)) ++ ("status" $eq FileStatus.PROCESSED.toString)
246240

247241
// TODO: How to get host / apiKey / admin internally without a request?
248242
val host = configuration(play.api.Play.current).getString("clowder.rabbitmq.clowderurl").getOrElse("http://localhost:9000")
@@ -257,7 +251,7 @@ class MongoDBFileService @Inject() (
257251
Logger.info("Archival candidates found: " + matchingFiles.length)
258252

259253
// Exclude candidates that do not exceed our minimum file size threshold
260-
val minSize = configuration(play.api.Play.current).getLong("archiveMinimumStorageSize").getOrElse(1000000L)
254+
val minSize = configuration(play.api.Play.current).getLong("archiveAutoAboveMinimumStorageSize").getOrElse(1000000L)
261255

262256
// Loop all candidate files and submit each one for archival
263257
for (file <- matchingFiles) {

conf/application.conf

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -587,18 +587,20 @@ enableUsernamePassword = true
587587
# "archive" and "unarchive" should be purely inverse operations, such
588588
# that unarchive(archive(x)) == x for any valid input.
589589
#
590-
# Available archival extractors:
591-
# - ncsa.archival.disk - https://opensource.ncsa.illinois.edu/bitbucket/projects/CATS/repos/extractors-archival-disk/browse
592-
# - ncsa.archival.s3 - https://opensource.ncsa.illinois.edu/bitbucket/projects/CATS/repos/extractors-archival-s3/browse
590+
# See https://github.com/clowder-framework/extractors-archival for available extractors
593591
#
594592
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
595593
archiveEnabled=false
596-
archiveDebug=false
594+
archiveAllowUnarchive=false
597595
#archiveExtractorId="ncsa.archival.s3"
598596
archiveExtractorId="ncsa.archival.disk"
599-
archiveAllowUnarchive=false
600-
archiveAutoAfterDaysInactive=90
601-
archiveMinimumStorageSize=1000000
597+
598+
# NOTE: Setting interval to zero will disable automatic archiving
599+
archiveAutoInterval=0 # in seconds (e.g. 86400 == 24 hours)
600+
archiveAutoDelay=120 # in seconds (e.g. 86400 == 24 hours)
601+
archiveAutoAfterInactiveCount=90 # NOTE: Setting count to zero will disable automatic archiving
602+
archiveAutoAfterInactiveUnits="days"
603+
archiveAutoAboveMinimumStorageSize=1000000
602604

603605
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
604606
# Configuration file for securesocial

0 commit comments

Comments
 (0)