clowder-framework
diff --git a/‎CHANGELOG.md‎
Lines changed: 11 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎app/api/Extractions.scala‎
Lines changed: 32 additions & 19 deletions b/‎app/api/Extractions.scala‎
Lines changed: 32 additions & 19 deletions
diff --git a/‎app/controllers/Extractors.scala‎
Lines changed: 1 addition & 1 deletion b/‎app/controllers/Extractors.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎app/models/Extraction.scala‎
Lines changed: 4 additions & 2 deletions b/‎app/models/Extraction.scala‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎app/services/ElasticsearchPlugin.scala‎
Lines changed: 41 additions & 30 deletions b/‎app/services/ElasticsearchPlugin.scala‎
Lines changed: 41 additions & 30 deletions
diff --git a/‎app/services/ExtractionService.scala‎
Lines changed: 2 additions & 0 deletions b/‎app/services/ExtractionService.scala‎
Lines changed: 2 additions & 0 deletions
@@ -4,10 +4,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
-## Unreleased
+## [Unreleased]
 
 ### Added
 - Datasets downloaded with Clowder now include DataCite v4 XML files in the output /metadata folder for interoperability purposes.
+- Script to clean extractors' tmp files.
+- Script for RabbitMQ error queue cleanup.
+
+### Changed
+- Improved simple test to report all day success.
+- Expose a read-only list of extractors to all users.
+
+### Fixed
+- Escape colon characters on search values for search box and advanced search to allow those values in a search.
+- typesafe now only offers https access
 
 ## 1.10.1 - 2020-07-16
 
 
@@ -16,6 +16,7 @@ import play.api.libs.concurrent.Execution.Implicits._
 import play.api.libs.json.Json._
 import play.api.libs.json._
 import play.api.libs.ws.{Response, WS}
+import play.api.libs.functional.syntax._
 import play.api.mvc.MultipartFormData
 import services._
 
@@ -129,29 +130,31 @@ class Extractions @Inject()(
   }
 
   /**
-   * *
-   * For DTS service use case: suppose a user posts a file to the extractions API, no extractors and its corresponding queues in the Rabbitmq are available. Now she checks the status
-   * for extractors, i.e., if any new extractor has subscribed to the Rabbitmq. If yes, she may again wants to submit the file for extraction again. Since she has already uploaded
-   * it, this time will just uses the file id to submit the request again.
-   * This API takes file id and notifies the user that the request has been sent for processing.
-   * This may change depending on our our design on DTS extraction service.
    *
+   * Given a file id (UUID), submit this file for extraction
    */
   def submitExtraction(id: UUID) = PermissionAction(Permission.ViewFile, Some(ResourceRef(ResourceRef.file, id)))(parse.json) { implicit request =>
     current.plugin[RabbitmqPlugin] match {
       case Some(plugin) => {
         if (UUID.isValid(id.stringify)) {
           files.get(id) match {
             case Some(file) => {
-              current.plugin[RabbitmqPlugin].foreach {
-                // FIXME dataset not available?
-                _.fileCreated(file, None, Utils.baseUrl(request), request.apiKey)
+              // FIXME dataset not available?
+              plugin.fileCreated(file, None, Utils.baseUrl(request), request.apiKey) match {
+                case Some(jobId) => {
+                  Ok(Json.obj("status" -> "OK", "job_id" -> jobId))
+                }
+                case None => {
+                  val message = "No jobId found for Extraction"
+                  Logger.error(message)
+                  InternalServerError(toJson(Map("status" -> "KO", "message" -> message)))
+                }
               }
-              Ok("Sent for Extraction. check the status")
             }
-            case None =>
+            case None => {
               Logger.error("Could not retrieve file that was just saved.")
               InternalServerError("Error uploading file")
+            }
           } //file match
         } // if Object id
         else {
@@ -529,12 +532,20 @@ class Extractions @Inject()(
               // if extractor_id is not specified default to execution of all extractors matching mime type
               val key = (request.body \ "extractor").asOpt[String] match {
                 case Some(extractorId) =>
-                  p.submitFileManually(new UUID(originalId), file, Utils.baseUrl(request), extractorId, extra,
+                  val job_id = p.submitFileManually(new UUID(originalId), file, Utils.baseUrl(request), extractorId, extra,
                     datasetId, newFlags, request.apiKey, request.user)
+                  Ok(Json.obj("status" -> "OK", "job_id" -> job_id))
                 case None =>
-                  p.fileCreated(file, None, Utils.baseUrl(request), request.apiKey)
+                  p.fileCreated(file, None, Utils.baseUrl(request), request.apiKey) match {
+                    case Some(job_id) => {
+                      Ok(Json.obj("status" -> "OK", "job_id" -> job_id))
+                    }
+                  }
               }
-              Ok(Json.obj("status" -> "OK"))
+
+              val message = "No jobId found for Extraction on fileid=" + file_id.stringify
+              Logger.error(message)
+              InternalServerError(toJson(Map("status" -> "KO", "msg" -> message)))
             } else {
               Conflict(toJson(Map("status" -> "error", "msg" -> "File is not ready. Please wait and try again.")))
             }
@@ -570,8 +581,8 @@ class Extractions @Inject()(
               "parameters" -> parameters.toString,
               "action" -> "manual-submission")
 
-            p.submitDatasetManually(host, key, extra, ds_id, "", request.apiKey, request.user)
-            Ok(Json.obj("status" -> "OK"))
+            val job_id = p.submitDatasetManually(host, key, extra, ds_id, "", request.apiKey, request.user)
+            Ok(Json.obj("status" -> "OK", "job_id" -> job_id))
           }
           case None =>
             BadRequest(toJson(Map("request" -> "Dataset not found")))
@@ -597,9 +608,10 @@ class Extractions @Inject()(
             // check that the file is ready for processing
             if (file.status.equals(models.FileStatus.PROCESSED.toString)) {
               (request.body \ "extractor").asOpt[String] match {
-                case Some(extractorId) =>
+                case Some(extractorId) => {
                   p.cancelPendingSubmission(file_id, extractorId, msg_id)
-                    Ok(Json.obj("status" -> "OK"))
+                  Ok(Json.obj("status" -> "OK"))
+                }
                 case None =>
                   BadRequest(toJson(Map("request" -> "extractor field not found")))
               }
@@ -623,9 +635,10 @@ class Extractions @Inject()(
         datasets.get(ds_id) match {
           case Some(ds) => {
             (request.body \ "extractor").asOpt[String] match {
-              case Some(extractorId) =>
+              case Some(extractorId) => {
                 p.cancelPendingSubmission(ds_id, extractorId, msg_id)
                 Ok(Json.obj("status" -> "OK"))
+              }
               case None => BadRequest(toJson(Map("request" -> "extractor field not found")))
             }
           }
 
@@ -71,7 +71,7 @@ class Extractors  @Inject() (extractions: ExtractionService,
 
 
 
-  def showExtractorInfo(extractorName: String) = ServerAdminAction { implicit request =>
+  def showExtractorInfo(extractorName: String) = AuthenticatedAction { implicit request =>
     implicit val user = request.user
     val targetExtractor = extractorService.listExtractorsInfo(List.empty).find(p => p.name == extractorName)
     targetExtractor match {
 
@@ -12,9 +12,10 @@ import play.api.libs.functional.syntax._
 case class Extraction(
   id: UUID = UUID.generate,
   file_id: UUID,
+  job_id: Option[UUID],
   extractor_id: String,
   status: String = "N/A",
-  start: Option[Date],
+  start: Date,
   end: Option[Date])
 
 /**
@@ -169,9 +170,10 @@ case class ExtractorProcessTriggers(dataset: List[String] = List.empty,
                                     file: List[String] = List.empty,
                                     metadata: List[String] = List.empty)
 
+
 case class ExtractionGroup(
                           firstMsgTime: String,
                           latestMsgTime: String,
                           latestMsg: String,
-                          allMsgs: List[Extraction]
+                          allMsgs: Map[UUID, List[Extraction]]
                           )
@@ -746,7 +746,8 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
         if (key == "_all")
           builder.startObject().startObject("regexp").field("_all", wrapRegex(value)).endObject().endObject()
         else
-          builder.startObject().startObject("query_string").field("default_field", key).field("query", value).endObject().endObject()
+          builder.startObject().startObject("query_string").field("default_field", key)
+            .field("query", "\""+value+"\"").endObject().endObject()
       }
       case _ => {}
     }
@@ -848,54 +849,64 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
 
     // Use regex to split string into a list, preserving quoted phrases as single value
     val matches = ListBuffer[String]()
-    val m = Pattern.compile("([^\"]\\S*|\".+?\")\\s*").matcher(query.replace(":", " "))
+    val m = Pattern.compile("([^\"]\\S*|\".+?\")\\s*").matcher(query)
     while (m.find()) {
-      matches += m.group(1).replace("\"", "").replace("__", " ")
+      var mat = m.group(1).replace("\"", "").replace("__", " ")
+      if (mat.startsWith(":")) mat = mat.substring(1)
+      if (mat.endsWith(":")) mat = mat.substring(0, mat.length-2)
+      matches += mat
     }
 
     // If a term is specified that isn't in this list, it's assumed to be a metadata field
     val official_terms = List("name", "creator", "email", "resource_type", "in", "contains", "tag")
 
-    // Create list of "key:value" terms for parsing by builder
-    val terms = ListBuffer[String]()
-    var currterm = ""
+    // Create list of (key, operator, value) for passing to builder
+    val terms = ListBuffer[(String, String, String)]()
+    var currkey = "_all"
+    var curropr = ":" // Defaults to 'contains' match on _all if no key:value pairs are found (assumes whole string is the value)
+    var currval = ""
     matches.foreach(mt => {
-      // Determine if the string was a key or value
+      // Check if the current term appears before or after one of the operators, and what operator is
+      var entryType = "value"
       if (query.contains(mt+":") || query.contains("\""+mt+"\":")) {
+        entryType = "key"
+      }
+
+      // Determine if the string was a key or value
+      if (entryType == "key") {
         // Do some user-friendly replacement
         if (mt == "tag")
-          currterm += "tags:"
+          currkey = "tags"
         else if (mt == "in")
-          currterm += "child_of:"
+          currkey = "child_of"
         else if (mt == "contains")
-          currterm += "parent_of:"
+          currkey = "parent_of"
         else if (mt == "creator")
-          currterm += "creator_name:"
+          currkey = "creator_name"
         else if (mt == "email")
-          currterm += "creator_email:"
+          currkey = "creator_email"
         else if (!official_terms.contains(mt))
-          currterm += "metadata."+mt+":"
+          currkey = "metadata."+mt
         else
-          currterm += mt+":"
-      } else if (query.contains(":"+mt) || query.contains(":\""+mt+"\"")) {
-        currterm += mt.toLowerCase()
-        terms += currterm
-        currterm = ""
-      } else {
-        terms += "_all:"+mt.toLowerCase()
+          currkey = mt
+      } else if (entryType == "value") {
+        currval += mt.toLowerCase()
+        terms += ((currkey, curropr, currval))
+        currkey = "_all"
+        currval = ""
       }
     })
 
     var builder = jsonBuilder().startObject().startObject("bool")
 
     // First, populate the MUST portion of Bool query
     var populatedMust = false
-    terms.map(term => {
+    terms.map(entry => {
+      val key = entry._1
+      val curropr = entry._2
+      val value = entry._3
       for (operator <- mustOperators) {
-        if (term.contains(operator)) {
-          val key = term.substring(0, term.indexOf(operator))
-          val value = term.substring(term.indexOf(operator)+1, term.length)
-
+        if (curropr == operator) {
           // Only add a MUST object if we have terms to populate it; empty objects break Elasticsearch
           if (mustOperators.contains(operator) && !populatedMust) {
             builder.startArray("must")
@@ -939,12 +950,12 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
 
     // Second, populate the MUST NOT portion of Bool query
     var populatedMustNot = false
-    terms.map(term => {
+    terms.map(entry => {
+      val key = entry._1
+      val curropr = entry._2
+      val value = entry._3
       for (operator <- mustNotOperators) {
-        if (term.contains(operator)) {
-          val key = term.substring(0, term.indexOf(operator))
-          val value = term.substring(term.indexOf(operator), term.length)
-
+        if (curropr == operator) {
           // Only add a MUST object if we have terms to populate it; empty objects break Elasticsearch
           if (mustNotOperators.contains(operator) && !populatedMustNot) {
             builder.startArray("must_not")
 
@@ -16,6 +16,8 @@ trait ExtractionService {
 
   def findAll(max: Int = 100): List[Extraction]
 
+  def get(msgId: UUID): Option[Extraction]
+
   def findById(resource: ResourceRef): List[Extraction]
 
   def insert(extraction: Extraction): Option[ObjectId]