Skip to content

Commit fdb11d5

Browse files
authored
Merge branch 'develop' into dependabot/pip/doc/src/sphinx/pygments-2.7.4
2 parents 21f0c57 + e1a6020 commit fdb11d5

File tree

12 files changed

+574
-62
lines changed

12 files changed

+574
-62
lines changed

CHANGELOG.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,26 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](http://keepachangelog.com/)
55
and this project adheres to [Semantic Versioning](http://semver.org/).
66

7-
## Unreleased
7+
## Unrelease
8+
9+
### Added
10+
- Log an event each time that a user archives or unarchives a file.
11+
12+
## 1.16.0 - 2021-03-31
813

914
### Fixed
1015
- Remove the RabbitMQ plugin from the docker version of clowder
1116

1217
### Added
13-
- Added a `sort` and `order` parameter to `/api/search` endpoint that supports date and numeric field sorting. If only order is specified, created date is used. String fields are not currently supported.
18+
- Added a `sort` and `order` parameter to `/api/search` endpoint that supports date and numeric field sorting.
19+
If only order is specified, created date is used. String fields are not currently supported.
1420
- Added a new `/api/deleteindex` admin endpoint that will queue an action to delete an Elasticsearch index (usually prior to a reindex).
21+
- JMeter testing suite.
22+
23+
### Changed
24+
- Consolidated field names sent by the EventSinkService to maximize reuse.
25+
- Add status column to files report to indicate if files are ARCHIVED, etc.
26+
- Reworked auto-archival configuration options to make their meanings more clear.
1527

1628
## 1.15.1 - 2021-03-12
1729

app/Global.scala

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,21 +64,18 @@ object Global extends WithFilters(new GzipFilter(), new Jsonp(), CORSFilter()) w
6464

6565
val archiveEnabled = Play.application.configuration.getBoolean("archiveEnabled", false)
6666
if (archiveEnabled && archivalTimer == null) {
67-
val archiveDebug = Play.application.configuration.getBoolean("archiveDebug", false)
68-
val interval = if (archiveDebug) { 5 minutes } else { 1 day }
69-
70-
// Determine time until next midnight
71-
val now = ZonedDateTime.now
72-
val midnight = now.truncatedTo(ChronoUnit.DAYS)
73-
val sinceLastMidnight = Duration.between(midnight, now).getSeconds
74-
val delay = if (archiveDebug) { 10 seconds } else {
75-
(Duration.ofDays(1).getSeconds - sinceLastMidnight) seconds
76-
}
77-
78-
Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval)
79-
archivalTimer = Akka.system.scheduler.schedule(delay, interval) {
80-
Logger.info("Starting auto archive process...")
81-
files.autoArchiveCandidateFiles()
67+
// Set archiveAutoInterval == 0 to disable auto archiving
68+
val archiveAutoInterval = Play.application.configuration.getLong("archiveAutoInterval", 0)
69+
if (archiveAutoInterval > 0) {
70+
val interval = FiniteDuration(archiveAutoInterval, SECONDS)
71+
val archiveAutoDelay = Play.application.configuration.getLong("archiveAutoDelay", 0)
72+
val delay = FiniteDuration(archiveAutoDelay, SECONDS)
73+
74+
Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval)
75+
archivalTimer = Akka.system.scheduler.schedule(delay, interval) {
76+
Logger.info("Starting auto archive process...")
77+
files.autoArchiveCandidateFiles()
78+
}
8279
}
8380
}
8481

app/api/Files.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1856,9 +1856,11 @@ class Files @Inject()(
18561856
}
18571857

18581858
def archive(id: UUID) = PermissionAction(Permission.ArchiveFile, Some(ResourceRef(ResourceRef.file, id))) { implicit request =>
1859+
implicit val user = request.user
18591860
files.get(id) match {
18601861
case Some(file) => {
18611862
files.setStatus(id, FileStatus.ARCHIVED)
1863+
sinkService.logFileArchiveEvent(file, user)
18621864
Ok(toJson(Map("status" -> "success")))
18631865
}
18641866
case None => {
@@ -1869,9 +1871,11 @@ class Files @Inject()(
18691871
}
18701872

18711873
def unarchive(id: UUID) = PermissionAction(Permission.ArchiveFile, Some(ResourceRef(ResourceRef.file, id))) { implicit request =>
1874+
implicit val user = request.user
18721875
files.get(id) match {
18731876
case Some(file) => {
18741877
files.setStatus(id, FileStatus.PROCESSED)
1878+
sinkService.logFileUnarchiveEvent(file, user)
18751879
Ok(toJson(Map("status" -> "success")))
18761880
}
18771881
case None => {

app/api/Reporting.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class Reporting @Inject()(selections: SelectionService,
3939
var headerRow = true
4040
val enum = Enumerator.generateM({
4141
val chunk = if (headerRow) {
42-
val header = "type,id,name,owner,owner_id,size_kb,uploaded,views,downloads,last_viewed,last_downloaded,location,parent_datasets,parent_collections,parent_spaces\n"
42+
val header = "type,id,name,owner,owner_id,size_kb,uploaded,views,downloads,last_viewed,last_downloaded,location,parent_datasets,parent_collections,parent_spaces,status\n"
4343
headerRow = false
4444
Some(header.getBytes("UTF-8"))
4545
} else {
@@ -137,7 +137,7 @@ class Reporting @Inject()(selections: SelectionService,
137137

138138
// TODO: This will still fail on excessively large instances without Enumerator refactor - should we maintain this endpoint or remove?
139139

140-
var contents: String = "type,id,name,owner,owner_id,size_kb,uploaded/created,views,downloads,last_viewed,last_downloaded,location,parent_datasets,parent_collections,parent_spaces\n"
140+
var contents: String = "type,id,name,owner,owner_id,size_kb,uploaded/created,views,downloads,last_viewed,last_downloaded,location,parent_datasets,parent_collections,parent_spaces,status\n"
141141

142142
collections.getMetrics().foreach(coll => {
143143
contents += _buildCollectionRow(coll, true)
@@ -288,7 +288,8 @@ class Reporting @Inject()(selections: SelectionService,
288288
contents += "\""+f.loader_id+"\","
289289
contents += "\""+ds_list+"\","
290290
contents += "\""+coll_list+"\","
291-
contents += "\""+space_list+"\""
291+
contents += "\""+space_list+"\","
292+
contents += "\""+f.status+"\""
292293
contents += "\n"
293294

294295
return contents
@@ -343,6 +344,7 @@ class Reporting @Inject()(selections: SelectionService,
343344
if (returnAllColums) contents += "," // datasets do not have parent_datasets
344345
contents += "\""+coll_list+"\","
345346
contents += "\""+space_list+"\""
347+
if (returnAllColums) contents += "," // datasets do not have status
346348
contents += "\n"
347349

348350
return contents
@@ -391,6 +393,7 @@ class Reporting @Inject()(selections: SelectionService,
391393
if (returnAllColums) contents += "," // collections do not have parent_datasets
392394
contents += "\""+coll_list+"\","
393395
contents += "\""+space_list+"\""
396+
if (returnAllColums) contents += "," // collections do not have status
394397
contents += "\n"
395398

396399
return contents

app/services/EventSinkService.scala

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,34 @@ class EventSinkService {
261261
"size" -> (dataset.files.length + dataset.folders.length)
262262
))
263263
}
264+
265+
def logFileArchiveEvent(file: File, archiver: Option[User]) = {
266+
logEvent(Json.obj(
267+
"category" -> "archive",
268+
"type" -> "file",
269+
"resource_id" -> file.id,
270+
"resource_name" -> file.filename,
271+
"author_id" -> file.author.id,
272+
"author_name" -> file.author.fullName,
273+
"user_id" -> archiver.get.id,
274+
"user_name" -> archiver.get.getMiniUser.fullName,
275+
"size" -> file.length
276+
))
277+
}
278+
279+
def logFileUnarchiveEvent(file: File, unarchiver: Option[User]) = {
280+
logEvent(Json.obj(
281+
"category" -> "unarchive",
282+
"type" -> "file",
283+
"resource_id" -> file.id,
284+
"resource_name" -> file.filename,
285+
"author_id" -> file.author.id,
286+
"author_name" -> file.author.fullName,
287+
"user_id" -> unarchiver.get.id,
288+
"user_name" -> unarchiver.get.getMiniUser.fullName,
289+
"size" -> file.length
290+
))
291+
}
264292
}
265293

266294
//case class EventSinkMessage(created: Long, category: String, metadata: JsValue)

app/services/mongodb/MongoDBFileService.scala

Lines changed: 26 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,23 @@ import play.api.mvc.Request
44
import services._
55
import models._
66
import com.mongodb.casbah.commons.{Imports, MongoDBObject}
7-
import java.text.SimpleDateFormat
87

8+
import java.text.SimpleDateFormat
99
import _root_.util.{License, Parsers, SearchUtils}
1010

1111
import scala.collection.mutable.ListBuffer
1212
import Transformation.LidoToCidocConvertion
13-
import java.util.{ArrayList, Calendar}
14-
import java.io._
1513

14+
import java.util.{ArrayList, Calendar, Date}
15+
import java.io._
1616
import org.apache.commons.io.FileUtils
1717
import org.json.JSONObject
1818
import play.api.libs.json.{JsValue, Json}
1919
import com.mongodb.util.JSON
20+
2021
import java.nio.file.{FileSystems, Files}
2122
import java.nio.file.attribute.BasicFileAttributes
22-
import java.time.LocalDateTime
23-
23+
import java.time.Instant
2424
import collection.JavaConverters._
2525
import scala.collection.JavaConversions._
2626
import javax.inject.{Inject, Singleton}
@@ -31,15 +31,16 @@ import scala.util.parsing.json.JSONArray
3131
import play.api.libs.json.JsArray
3232
import models.File
3333
import play.api.libs.json.JsObject
34-
import java.util.Date
35-
3634
import com.novus.salat.dao.{ModelCompanion, SalatDAO}
3735
import MongoContext.context
3836
import play.api.Play._
3937
import com.mongodb.casbah.Imports._
4038
import models.FileStatus.FileStatus
4139
import org.bson.types.ObjectId
4240

41+
import java.time.temporal.ChronoUnit
42+
import scala.concurrent.duration.FiniteDuration
43+
4344

4445
/**
4546
* Use mongo for both metadata and blobs.
@@ -201,48 +202,41 @@ class MongoDBFileService @Inject() (
201202
* This may be expanded to support per-space configuration in the future.
202203
*
203204
* Reads the following parameters from Clowder configuration:
204-
* - archiveAutoAfterDaysInactive - timeout after which files are considered
205+
* - archiveAutoAfterInactiveCount - timeout after which files are considered
205206
* to be candidates for archival (see below)
206-
* - archiveMinimumStorageSize - files below this size (in Bytes) should not be archived
207+
* - archiveAutoAfterInactiveUnits - time unit that should be used for the timeout (see below)
208+
* - archiveAutoAboveMinimumStorageSize - files below this size (in Bytes) should not be archived
207209
* - clowder.rabbitmq.clowderurl - the Clowder hostname to pass to the archival extractor
208210
* - commKey - the admin key to pass to the archival extractor
209211
*
210212
* Archival candidates are currently defined as follows:
211-
* - file must be over `archiveMinimumStorageSize` Bytes in size
212-
* - file must be over `archiveAutoAfterDaysInactive` days old
213+
* - file's size must be greater than `archiveAutoAboveMinimumStorageSize` Bytes
214+
* - file's age must be greater than `archiveAutoAfterInactiveCount` * `archiveAutoAfterInactiveUnits`
215+
* (e.g. 10 days old)
213216
* - AND one of the following must be true:
214217
* - file has never been downloaded (0 downloads)
215218
* OR
216-
* - file has not been downloaded in the past `archiveAutoAfterDaysInactive` days
219+
* - file has not been downloaded in the past `archiveAutoAfterInactiveCount` `archiveAutoAfterInactiveUnits`
217220
*
218221
*
219222
*/
220223
def autoArchiveCandidateFiles() = {
221-
val timeout = configuration(play.api.Play.current).getInt("archiveAutoAfterDaysInactive")
224+
val timeout: Option[Long] = configuration(play.api.Play.current).getLong("archiveAutoAfterInactiveCount")
222225
timeout match {
223226
case None => Logger.info("No archival auto inactivity timeout set - skipping auto archival loop.")
224-
case Some(days) => {
225-
if (days == 0) {
227+
case Some(inactiveTimeout) => {
228+
if (inactiveTimeout == 0) {
226229
Logger.info("Archival auto inactivity timeout set to 0 - skipping auto archival loop.")
227230
} else {
228-
// DEBUG ONLY: query for files that were uploaded within the past hour
229-
val archiveDebug = configuration(play.api.Play.current).getBoolean("archiveDebug").getOrElse(false)
230-
val oneHourAgo = LocalDateTime.now.minusHours(1).toString + "-00:00"
231-
232-
// Query for files that haven't been accessed for at least this many days
233-
val daysAgo = LocalDateTime.now.minusDays(days).toString + "-00:00"
234-
val notDownloadedWithinTimeout = if (archiveDebug) {
235-
("stats.last_downloaded" $gte Parsers.fromISO8601(oneHourAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
236-
} else {
237-
("stats.last_downloaded" $lt Parsers.fromISO8601(daysAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
238-
}
231+
val unit = configuration(play.api.Play.current).getString("archiveAutoAfterInactiveUnits").getOrElse("days")
232+
val timeoutAgo = FiniteDuration(inactiveTimeout, unit)
233+
234+
// Query for files that haven't been accessed for at least this many units
235+
val since = Instant.now().minus(timeoutAgo.length.toLong, ChronoUnit.valueOf(timeoutAgo.unit.toString)).toString + "-00:00"
236+
val notDownloadedWithinTimeout = ("stats.last_downloaded" $lt Parsers.fromISO8601(since)) ++ ("status" $eq FileStatus.PROCESSED.toString)
239237

240238
// Include files that have never been downloaded, but make sure they are old enough
241-
val neverDownloaded = if (archiveDebug) {
242-
("stats.downloads" $eq 0) ++ ("uploadDate" $gte Parsers.fromISO8601(oneHourAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
243-
} else {
244-
("stats.downloads" $eq 0) ++ ("uploadDate" $lt Parsers.fromISO8601(daysAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
245-
}
239+
val neverDownloaded = ("stats.downloads" $eq 0) ++ ("uploadDate" $lt Parsers.fromISO8601(since)) ++ ("status" $eq FileStatus.PROCESSED.toString)
246240

247241
// TODO: How to get host / apiKey / admin internally without a request?
248242
val host = configuration(play.api.Play.current).getString("clowder.rabbitmq.clowderurl").getOrElse("http://localhost:9000")
@@ -257,7 +251,7 @@ class MongoDBFileService @Inject() (
257251
Logger.info("Archival candidates found: " + matchingFiles.length)
258252

259253
// Exclude candidates that do not exceed our minimum file size threshold
260-
val minSize = configuration(play.api.Play.current).getLong("archiveMinimumStorageSize").getOrElse(1000000L)
254+
val minSize = configuration(play.api.Play.current).getLong("archiveAutoAboveMinimumStorageSize").getOrElse(1000000L)
261255

262256
// Loop all candidate files and submit each one for archival
263257
for (file <- matchingFiles) {

conf/application.conf

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -587,18 +587,20 @@ enableUsernamePassword = true
587587
# "archive" and "unarchive" should be purely inverse operations, such
588588
# that unarchive(archive(x)) == x for any valid input.
589589
#
590-
# Available archival extractors:
591-
# - ncsa.archival.disk - https://opensource.ncsa.illinois.edu/bitbucket/projects/CATS/repos/extractors-archival-disk/browse
592-
# - ncsa.archival.s3 - https://opensource.ncsa.illinois.edu/bitbucket/projects/CATS/repos/extractors-archival-s3/browse
590+
# See https://github.com/clowder-framework/extractors-archival for available extractors
593591
#
594592
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
595593
archiveEnabled=false
596-
archiveDebug=false
594+
archiveAllowUnarchive=false
597595
#archiveExtractorId="ncsa.archival.s3"
598596
archiveExtractorId="ncsa.archival.disk"
599-
archiveAllowUnarchive=false
600-
archiveAutoAfterDaysInactive=90
601-
archiveMinimumStorageSize=1000000
597+
598+
# NOTE: Setting interval to zero will disable automatic archiving
599+
archiveAutoInterval=0 # in seconds (e.g. 86400 == 24 hours)
600+
archiveAutoDelay=120 # in seconds (e.g. 86400 == 24 hours)
601+
archiveAutoAfterInactiveCount=90 # NOTE: Setting count to zero will disable automatic archiving
602+
archiveAutoAfterInactiveUnits="days"
603+
archiveAutoAboveMinimumStorageSize=1000000
602604

603605
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
604606
# Configuration file for securesocial

doc/src/sphinx/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
author = 'Luigi Marini'
2323

2424
# The full version, including alpha/beta/rc tags
25-
release = '1.15.1'
25+
release = '1.16.0'
2626

2727

2828
# -- General configuration ---------------------------------------------------

project/Build.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import NativePackagerKeys._
1313
object ApplicationBuild extends Build {
1414

1515
val appName = "clowder"
16-
val version = "1.15.1"
16+
val version = "1.16.0"
1717
val jvm = "1.7"
1818

1919
def appVersion: String = {

public/swagger.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ info:
99
Clowder is a customizable and scalable data management system to support any
1010
data format and multiple research domains. It is under active development
1111
and deployed for a variety of research projects.
12-
version: 1.15.1
12+
version: 1.16.0
1313
termsOfService: https://clowder.ncsa.illinois.edu/clowder/tos
1414
contact:
1515
name: Clowder

0 commit comments

Comments
 (0)