Skip to content

Commit 8db88b4

Browse files
authored
Merge pull request #45 from clowder-framework/Fix-colon-values-in-search-strings-(master)
Better support for colon characters in search
2 parents a4a7fef + 886847a commit 8db88b4

File tree

2 files changed

+42
-30
lines changed

2 files changed

+42
-30
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1515
- Expose a read-only list of extractors to all users.
1616

1717
### Fixed
18+
- Escape colon characters on search values for search box and advanced search to allow those values in a search.
1819
- typesafe now only offers https access
1920
[#49](https://github.com/clowder-framework/clowder/issues/49)
2021

app/services/ElasticsearchPlugin.scala

Lines changed: 41 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,8 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
746746
if (key == "_all")
747747
builder.startObject().startObject("regexp").field("_all", wrapRegex(value)).endObject().endObject()
748748
else
749-
builder.startObject().startObject("query_string").field("default_field", key).field("query", value).endObject().endObject()
749+
builder.startObject().startObject("query_string").field("default_field", key)
750+
.field("query", "\""+value+"\"").endObject().endObject()
750751
}
751752
case _ => {}
752753
}
@@ -848,54 +849,64 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
848849

849850
// Use regex to split string into a list, preserving quoted phrases as single value
850851
val matches = ListBuffer[String]()
851-
val m = Pattern.compile("([^\"]\\S*|\".+?\")\\s*").matcher(query.replace(":", " "))
852+
val m = Pattern.compile("([^\"]\\S*|\".+?\")\\s*").matcher(query)
852853
while (m.find()) {
853-
matches += m.group(1).replace("\"", "").replace("__", " ")
854+
var mat = m.group(1).replace("\"", "").replace("__", " ")
855+
if (mat.startsWith(":")) mat = mat.substring(1)
856+
if (mat.endsWith(":")) mat = mat.substring(0, mat.length-2)
857+
matches += mat
854858
}
855859

856860
// If a term is specified that isn't in this list, it's assumed to be a metadata field
857861
val official_terms = List("name", "creator", "email", "resource_type", "in", "contains", "tag")
858862

859-
// Create list of "key:value" terms for parsing by builder
860-
val terms = ListBuffer[String]()
861-
var currterm = ""
863+
// Create list of (key, operator, value) for passing to builder
864+
val terms = ListBuffer[(String, String, String)]()
865+
var currkey = "_all"
866+
var curropr = ":" // Defaults to 'contains' match on _all if no key:value pairs are found (assumes whole string is the value)
867+
var currval = ""
862868
matches.foreach(mt => {
863-
// Determine if the string was a key or value
869+
// Check if the current term appears before or after one of the operators, and what operator is
870+
var entryType = "value"
864871
if (query.contains(mt+":") || query.contains("\""+mt+"\":")) {
872+
entryType = "key"
873+
}
874+
875+
// Determine if the string was a key or value
876+
if (entryType == "key") {
865877
// Do some user-friendly replacement
866878
if (mt == "tag")
867-
currterm += "tags:"
879+
currkey = "tags"
868880
else if (mt == "in")
869-
currterm += "child_of:"
881+
currkey = "child_of"
870882
else if (mt == "contains")
871-
currterm += "parent_of:"
883+
currkey = "parent_of"
872884
else if (mt == "creator")
873-
currterm += "creator_name:"
885+
currkey = "creator_name"
874886
else if (mt == "email")
875-
currterm += "creator_email:"
887+
currkey = "creator_email"
876888
else if (!official_terms.contains(mt))
877-
currterm += "metadata."+mt+":"
889+
currkey = "metadata."+mt
878890
else
879-
currterm += mt+":"
880-
} else if (query.contains(":"+mt) || query.contains(":\""+mt+"\"")) {
881-
currterm += mt.toLowerCase()
882-
terms += currterm
883-
currterm = ""
884-
} else {
885-
terms += "_all:"+mt.toLowerCase()
891+
currkey = mt
892+
} else if (entryType == "value") {
893+
currval += mt.toLowerCase()
894+
terms += ((currkey, curropr, currval))
895+
currkey = "_all"
896+
currval = ""
886897
}
887898
})
888899

889900
var builder = jsonBuilder().startObject().startObject("bool")
890901

891902
// First, populate the MUST portion of Bool query
892903
var populatedMust = false
893-
terms.map(term => {
904+
terms.map(entry => {
905+
val key = entry._1
906+
val curropr = entry._2
907+
val value = entry._3
894908
for (operator <- mustOperators) {
895-
if (term.contains(operator)) {
896-
val key = term.substring(0, term.indexOf(operator))
897-
val value = term.substring(term.indexOf(operator)+1, term.length)
898-
909+
if (curropr == operator) {
899910
// Only add a MUST object if we have terms to populate it; empty objects break Elasticsearch
900911
if (mustOperators.contains(operator) && !populatedMust) {
901912
builder.startArray("must")
@@ -939,12 +950,12 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
939950

940951
// Second, populate the MUST NOT portion of Bool query
941952
var populatedMustNot = false
942-
terms.map(term => {
953+
terms.map(entry => {
954+
val key = entry._1
955+
val curropr = entry._2
956+
val value = entry._3
943957
for (operator <- mustNotOperators) {
944-
if (term.contains(operator)) {
945-
val key = term.substring(0, term.indexOf(operator))
946-
val value = term.substring(term.indexOf(operator), term.length)
947-
958+
if (curropr == operator) {
948959
// Only add a MUST object if we have terms to populate it; empty objects break Elasticsearch
949960
if (mustNotOperators.contains(operator) && !populatedMustNot) {
950961
builder.startArray("must_not")

0 commit comments

Comments
 (0)