Skip to content

Commit 94a0450

Browse files
authored
Merge branch 'develop' into rda-recommendations
2 parents 07a5102 + 8db88b4 commit 94a0450

25 files changed

+670
-213
lines changed

CHANGELOG.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,20 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](http://keepachangelog.com/)
55
and this project adheres to [Semantic Versioning](http://semver.org/).
66

7-
## Unreleased
7+
## [Unreleased]
88

99
### Added
1010
- Datasets downloaded with Clowder now include DataCite v4 XML files in the output /metadata folder for interoperability purposes.
11+
- Script to clean extractors' tmp files.
12+
- Script for RabbitMQ error queue cleanup.
13+
14+
### Changed
15+
- Improved simple test to report all day success.
16+
- Expose a read-only list of extractors to all users.
17+
18+
### Fixed
19+
- Escape colon characters on search values for search box and advanced search to allow those values in a search.
20+
- typesafe now only offers https access
1121

1222
## 1.10.1 - 2020-07-16
1323

app/api/Extractions.scala

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import play.api.libs.concurrent.Execution.Implicits._
1616
import play.api.libs.json.Json._
1717
import play.api.libs.json._
1818
import play.api.libs.ws.{Response, WS}
19+
import play.api.libs.functional.syntax._
1920
import play.api.mvc.MultipartFormData
2021
import services._
2122

@@ -129,29 +130,31 @@ class Extractions @Inject()(
129130
}
130131

131132
/**
132-
* *
133-
* For DTS service use case: suppose a user posts a file to the extractions API, no extractors and its corresponding queues in the Rabbitmq are available. Now she checks the status
134-
* for extractors, i.e., if any new extractor has subscribed to the Rabbitmq. If yes, she may again wants to submit the file for extraction again. Since she has already uploaded
135-
* it, this time will just uses the file id to submit the request again.
136-
* This API takes file id and notifies the user that the request has been sent for processing.
137-
* This may change depending on our our design on DTS extraction service.
138133
*
134+
* Given a file id (UUID), submit this file for extraction
139135
*/
140136
def submitExtraction(id: UUID) = PermissionAction(Permission.ViewFile, Some(ResourceRef(ResourceRef.file, id)))(parse.json) { implicit request =>
141137
current.plugin[RabbitmqPlugin] match {
142138
case Some(plugin) => {
143139
if (UUID.isValid(id.stringify)) {
144140
files.get(id) match {
145141
case Some(file) => {
146-
current.plugin[RabbitmqPlugin].foreach {
147-
// FIXME dataset not available?
148-
_.fileCreated(file, None, Utils.baseUrl(request), request.apiKey)
142+
// FIXME dataset not available?
143+
plugin.fileCreated(file, None, Utils.baseUrl(request), request.apiKey) match {
144+
case Some(jobId) => {
145+
Ok(Json.obj("status" -> "OK", "job_id" -> jobId))
146+
}
147+
case None => {
148+
val message = "No jobId found for Extraction"
149+
Logger.error(message)
150+
InternalServerError(toJson(Map("status" -> "KO", "message" -> message)))
151+
}
149152
}
150-
Ok("Sent for Extraction. check the status")
151153
}
152-
case None =>
154+
case None => {
153155
Logger.error("Could not retrieve file that was just saved.")
154156
InternalServerError("Error uploading file")
157+
}
155158
} //file match
156159
} // if Object id
157160
else {
@@ -529,12 +532,20 @@ class Extractions @Inject()(
529532
// if extractor_id is not specified default to execution of all extractors matching mime type
530533
val key = (request.body \ "extractor").asOpt[String] match {
531534
case Some(extractorId) =>
532-
p.submitFileManually(new UUID(originalId), file, Utils.baseUrl(request), extractorId, extra,
535+
val job_id = p.submitFileManually(new UUID(originalId), file, Utils.baseUrl(request), extractorId, extra,
533536
datasetId, newFlags, request.apiKey, request.user)
537+
Ok(Json.obj("status" -> "OK", "job_id" -> job_id))
534538
case None =>
535-
p.fileCreated(file, None, Utils.baseUrl(request), request.apiKey)
539+
p.fileCreated(file, None, Utils.baseUrl(request), request.apiKey) match {
540+
case Some(job_id) => {
541+
Ok(Json.obj("status" -> "OK", "job_id" -> job_id))
542+
}
543+
}
536544
}
537-
Ok(Json.obj("status" -> "OK"))
545+
546+
val message = "No jobId found for Extraction on fileid=" + file_id.stringify
547+
Logger.error(message)
548+
InternalServerError(toJson(Map("status" -> "KO", "msg" -> message)))
538549
} else {
539550
Conflict(toJson(Map("status" -> "error", "msg" -> "File is not ready. Please wait and try again.")))
540551
}
@@ -570,8 +581,8 @@ class Extractions @Inject()(
570581
"parameters" -> parameters.toString,
571582
"action" -> "manual-submission")
572583

573-
p.submitDatasetManually(host, key, extra, ds_id, "", request.apiKey, request.user)
574-
Ok(Json.obj("status" -> "OK"))
584+
val job_id = p.submitDatasetManually(host, key, extra, ds_id, "", request.apiKey, request.user)
585+
Ok(Json.obj("status" -> "OK", "job_id" -> job_id))
575586
}
576587
case None =>
577588
BadRequest(toJson(Map("request" -> "Dataset not found")))
@@ -597,9 +608,10 @@ class Extractions @Inject()(
597608
// check that the file is ready for processing
598609
if (file.status.equals(models.FileStatus.PROCESSED.toString)) {
599610
(request.body \ "extractor").asOpt[String] match {
600-
case Some(extractorId) =>
611+
case Some(extractorId) => {
601612
p.cancelPendingSubmission(file_id, extractorId, msg_id)
602-
Ok(Json.obj("status" -> "OK"))
613+
Ok(Json.obj("status" -> "OK"))
614+
}
603615
case None =>
604616
BadRequest(toJson(Map("request" -> "extractor field not found")))
605617
}
@@ -623,9 +635,10 @@ class Extractions @Inject()(
623635
datasets.get(ds_id) match {
624636
case Some(ds) => {
625637
(request.body \ "extractor").asOpt[String] match {
626-
case Some(extractorId) =>
638+
case Some(extractorId) => {
627639
p.cancelPendingSubmission(ds_id, extractorId, msg_id)
628640
Ok(Json.obj("status" -> "OK"))
641+
}
629642
case None => BadRequest(toJson(Map("request" -> "extractor field not found")))
630643
}
631644
}

app/controllers/Extractors.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class Extractors @Inject() (extractions: ExtractionService,
7171

7272

7373

74-
def showExtractorInfo(extractorName: String) = ServerAdminAction { implicit request =>
74+
def showExtractorInfo(extractorName: String) = AuthenticatedAction { implicit request =>
7575
implicit val user = request.user
7676
val targetExtractor = extractorService.listExtractorsInfo(List.empty).find(p => p.name == extractorName)
7777
targetExtractor match {

app/models/Extraction.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ import play.api.libs.functional.syntax._
1212
case class Extraction(
1313
id: UUID = UUID.generate,
1414
file_id: UUID,
15+
job_id: Option[UUID],
1516
extractor_id: String,
1617
status: String = "N/A",
17-
start: Option[Date],
18+
start: Date,
1819
end: Option[Date])
1920

2021
/**
@@ -169,9 +170,10 @@ case class ExtractorProcessTriggers(dataset: List[String] = List.empty,
169170
file: List[String] = List.empty,
170171
metadata: List[String] = List.empty)
171172

173+
172174
case class ExtractionGroup(
173175
firstMsgTime: String,
174176
latestMsgTime: String,
175177
latestMsg: String,
176-
allMsgs: List[Extraction]
178+
allMsgs: Map[UUID, List[Extraction]]
177179
)

app/services/ElasticsearchPlugin.scala

Lines changed: 41 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,8 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
746746
if (key == "_all")
747747
builder.startObject().startObject("regexp").field("_all", wrapRegex(value)).endObject().endObject()
748748
else
749-
builder.startObject().startObject("query_string").field("default_field", key).field("query", value).endObject().endObject()
749+
builder.startObject().startObject("query_string").field("default_field", key)
750+
.field("query", "\""+value+"\"").endObject().endObject()
750751
}
751752
case _ => {}
752753
}
@@ -848,54 +849,64 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
848849

849850
// Use regex to split string into a list, preserving quoted phrases as single value
850851
val matches = ListBuffer[String]()
851-
val m = Pattern.compile("([^\"]\\S*|\".+?\")\\s*").matcher(query.replace(":", " "))
852+
val m = Pattern.compile("([^\"]\\S*|\".+?\")\\s*").matcher(query)
852853
while (m.find()) {
853-
matches += m.group(1).replace("\"", "").replace("__", " ")
854+
var mat = m.group(1).replace("\"", "").replace("__", " ")
855+
if (mat.startsWith(":")) mat = mat.substring(1)
856+
if (mat.endsWith(":")) mat = mat.substring(0, mat.length-2)
857+
matches += mat
854858
}
855859

856860
// If a term is specified that isn't in this list, it's assumed to be a metadata field
857861
val official_terms = List("name", "creator", "email", "resource_type", "in", "contains", "tag")
858862

859-
// Create list of "key:value" terms for parsing by builder
860-
val terms = ListBuffer[String]()
861-
var currterm = ""
863+
// Create list of (key, operator, value) for passing to builder
864+
val terms = ListBuffer[(String, String, String)]()
865+
var currkey = "_all"
866+
var curropr = ":" // Defaults to 'contains' match on _all if no key:value pairs are found (assumes whole string is the value)
867+
var currval = ""
862868
matches.foreach(mt => {
863-
// Determine if the string was a key or value
869+
// Check if the current term appears before or after one of the operators, and what operator is
870+
var entryType = "value"
864871
if (query.contains(mt+":") || query.contains("\""+mt+"\":")) {
872+
entryType = "key"
873+
}
874+
875+
// Determine if the string was a key or value
876+
if (entryType == "key") {
865877
// Do some user-friendly replacement
866878
if (mt == "tag")
867-
currterm += "tags:"
879+
currkey = "tags"
868880
else if (mt == "in")
869-
currterm += "child_of:"
881+
currkey = "child_of"
870882
else if (mt == "contains")
871-
currterm += "parent_of:"
883+
currkey = "parent_of"
872884
else if (mt == "creator")
873-
currterm += "creator_name:"
885+
currkey = "creator_name"
874886
else if (mt == "email")
875-
currterm += "creator_email:"
887+
currkey = "creator_email"
876888
else if (!official_terms.contains(mt))
877-
currterm += "metadata."+mt+":"
889+
currkey = "metadata."+mt
878890
else
879-
currterm += mt+":"
880-
} else if (query.contains(":"+mt) || query.contains(":\""+mt+"\"")) {
881-
currterm += mt.toLowerCase()
882-
terms += currterm
883-
currterm = ""
884-
} else {
885-
terms += "_all:"+mt.toLowerCase()
891+
currkey = mt
892+
} else if (entryType == "value") {
893+
currval += mt.toLowerCase()
894+
terms += ((currkey, curropr, currval))
895+
currkey = "_all"
896+
currval = ""
886897
}
887898
})
888899

889900
var builder = jsonBuilder().startObject().startObject("bool")
890901

891902
// First, populate the MUST portion of Bool query
892903
var populatedMust = false
893-
terms.map(term => {
904+
terms.map(entry => {
905+
val key = entry._1
906+
val curropr = entry._2
907+
val value = entry._3
894908
for (operator <- mustOperators) {
895-
if (term.contains(operator)) {
896-
val key = term.substring(0, term.indexOf(operator))
897-
val value = term.substring(term.indexOf(operator)+1, term.length)
898-
909+
if (curropr == operator) {
899910
// Only add a MUST object if we have terms to populate it; empty objects break Elasticsearch
900911
if (mustOperators.contains(operator) && !populatedMust) {
901912
builder.startArray("must")
@@ -939,12 +950,12 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
939950

940951
// Second, populate the MUST NOT portion of Bool query
941952
var populatedMustNot = false
942-
terms.map(term => {
953+
terms.map(entry => {
954+
val key = entry._1
955+
val curropr = entry._2
956+
val value = entry._3
943957
for (operator <- mustNotOperators) {
944-
if (term.contains(operator)) {
945-
val key = term.substring(0, term.indexOf(operator))
946-
val value = term.substring(term.indexOf(operator), term.length)
947-
958+
if (curropr == operator) {
948959
// Only add a MUST object if we have terms to populate it; empty objects break Elasticsearch
949960
if (mustNotOperators.contains(operator) && !populatedMustNot) {
950961
builder.startArray("must_not")

app/services/ExtractionService.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ trait ExtractionService {
1616

1717
def findAll(max: Int = 100): List[Extraction]
1818

19+
def get(msgId: UUID): Option[Extraction]
20+
1921
def findById(resource: ResourceRef): List[Extraction]
2022

2123
def insert(extraction: Extraction): Option[ObjectId]

0 commit comments

Comments
 (0)