Skip to content

Commit b298d7f

Browse files
authored
SOLR-14787 - Adding support to use inequalities to the payload check query parser. (#1954)
1 parent 107926e commit b298d7f

File tree

4 files changed

+68
-23
lines changed

4 files changed

+68
-23
lines changed

lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java

100755100644
File mode changed.

solr/core/src/java/org/apache/solr/search/PayloadCheckQParserPlugin.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,16 @@
2121
import java.lang.invoke.MethodHandles;
2222
import java.util.ArrayList;
2323
import java.util.List;
24+
import java.util.Locale;
2425

2526
import org.apache.lucene.analysis.Analyzer;
2627
import org.apache.lucene.analysis.payloads.FloatEncoder;
2728
import org.apache.lucene.analysis.payloads.IdentityEncoder;
2829
import org.apache.lucene.analysis.payloads.IntegerEncoder;
2930
import org.apache.lucene.analysis.payloads.PayloadEncoder;
3031
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery;
32+
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.MatchOperation;
33+
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.PayloadType;
3134
import org.apache.lucene.search.Query;
3235
import org.apache.lucene.search.spans.SpanQuery;
3336
import org.apache.lucene.util.BytesRef;
@@ -53,6 +56,13 @@ public Query parse() throws SyntaxError {
5356
String field = localParams.get(QueryParsing.F);
5457
String value = localParams.get(QueryParsing.V);
5558
String p = localParams.get("payloads");
59+
// payloads and op parameter are probably mutually exclusive. we could consider making a different query
60+
// not a span payload check query, but something that just operates on payloads without the span?
61+
String strOp = localParams.get("op");
62+
MatchOperation op = MatchOperation.EQ;
63+
if (strOp != null) {
64+
op = MatchOperation.valueOf(strOp.toUpperCase(Locale.ROOT));
65+
}
5666

5767
if (field == null) {
5868
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'f' not specified");
@@ -81,12 +91,16 @@ public Query parse() throws SyntaxError {
8191

8292
PayloadEncoder encoder = null;
8393
String e = PayloadUtils.getPayloadEncoder(ft);
94+
PayloadType payloadType = null;
8495
if ("float".equals(e)) { // TODO: centralize this string->PayloadEncoder logic (see DelimitedPayloadTokenFilterFactory)
8596
encoder = new FloatEncoder();
97+
payloadType = PayloadType.FLOAT;
8698
} else if ("integer".equals(e)) {
8799
encoder = new IntegerEncoder();
100+
payloadType = PayloadType.INT;
88101
} else if ("identity".equals(e)) {
89102
encoder = new IdentityEncoder();
103+
payloadType = PayloadType.STRING;
90104
}
91105

92106
if (encoder == null) {
@@ -99,8 +113,7 @@ public Query parse() throws SyntaxError {
99113
if (rawPayload.length() > 0)
100114
payloads.add(encoder.encode(rawPayload.toCharArray()));
101115
}
102-
103-
return new SpanPayloadCheckQuery(query, payloads);
116+
return new SpanPayloadCheckQuery(query, payloads, payloadType, op);
104117
}
105118
};
106119

solr/core/src/test/org/apache/solr/search/TestPayloadCheckQParserPlugin.java

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,20 +46,41 @@ public void test() {
4646
"{!payload_check f=vals_dpi payloads='1 2'}A B",
4747
// "{!payload_check f=vals_dpi payloads='1 2.0'}A B", // ideally this should pass, but IntegerEncoder can't handle "2.0"
4848
"{!payload_check f=vals_dpi payloads='1 2 3'}A B C",
49-
5049
"{!payload_check f=vals_dpf payloads='1 2'}one two",
50+
"{!payload_check f=vals_dpf payloads='1 2' op='eq'}one two",
5151
"{!payload_check f=vals_dpf payloads='1 2.0'}one two", // shows that FloatEncoder can handle "1"
52-
53-
"{!payload_check f=vals_dps payloads='NOUN VERB'}cat jumped"
52+
"{!payload_check f=vals_dps payloads='NOUN VERB'}cat jumped",
53+
"{!payload_check f=vals_dpf payloads='0.75' op='gt'}one",
54+
"{!payload_check f=vals_dpf payloads='0.75 1.5' op='gt'}one two",
55+
"{!payload_check f=vals_dpf payloads='1.25' op='lt'}one", // inequality on float lt
56+
"{!payload_check f=vals_dpf payloads='1.0' op='lte'}one", // inequality on float lte
57+
"{!payload_check f=vals_dpf payloads='0.75' op='gt'}one", // inequality on float gt
58+
"{!payload_check f=vals_dpf payloads='1.0' op='gte'}one", // inequality on float gte
59+
"{!payload_check f=vals_dpi payloads='2' op='lt'}A", // inequality on int lt
60+
"{!payload_check f=vals_dpi payloads='1' op='lte'}A", // inequality on int lte
61+
"{!payload_check f=vals_dpi payloads='0' op='gt'}A", // inequality on int gt
62+
"{!payload_check f=vals_dpi payloads='1' op='gte'}A" // inequality on int gte
5463
};
5564

5665
String[] should_not_matches = new String[] {
66+
"{!payload_check f=vals_dpf payloads='0.75' op='gt'}one two", // too few payloads
67+
"{!payload_check f=vals_dpf payloads='0.75 1.5 2.0' op='gt'}one two", // too many payloads
5768
"{!payload_check f=vals_dpi v=A payloads=2}",
5869
"{!payload_check f=vals_dpi payloads='1 2'}B C",
5970
"{!payload_check f=vals_dpi payloads='1 2 3'}A B",
6071
"{!payload_check f=vals_dpi payloads='1 2'}A B C",
6172
"{!payload_check f=vals_dpf payloads='1 2.0'}two three",
62-
"{!payload_check f=vals_dps payloads='VERB NOUN'}cat jumped"
73+
"{!payload_check f=vals_dps payloads='VERB NOUN'}cat jumped",
74+
"{!payload_check f=vals_dpf payloads='1.25' op='gt'}one",
75+
"{!payload_check f=vals_dpf payloads='0.75 3' op='gt'}one two",
76+
"{!payload_check f=vals_dpf payloads='1.0' op='lt'}one", // inequality on float lt
77+
"{!payload_check f=vals_dpf payloads='0.75' op='lte'}one", // inequality on float lte
78+
"{!payload_check f=vals_dpf payloads='1.0' op='gt'}one", // inequality on float gt
79+
"{!payload_check f=vals_dpf payloads='1.25' op='gte'}one", // inequality on float gte
80+
"{!payload_check f=vals_dpi payloads='1' op='lt'}A", // inequality on int lt
81+
"{!payload_check f=vals_dpi payloads='0' op='lte'}A", // inequality on int lte
82+
"{!payload_check f=vals_dpi payloads='1' op='gt'}A", // inequality on int gt
83+
"{!payload_check f=vals_dpi payloads='2' op='gte'}A" // inequality on int gte
6384
};
6485

6586
for(String should_match : should_matches) {

solr/solr-ref-guide/src/other-parsers.adoc

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -977,16 +977,11 @@ For more information about the possibilities of nested queries, see Yonik Seeley
977977

978978
== Payload Query Parsers
979979

980-
These query parsers utilize payloads encoded on terms during indexing.
981-
982-
The main query, for both of these parsers, is parsed straightforwardly from the field type's query analysis into a `SpanQuery`. The generated `SpanQuery` will be either a `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted. Payloads can be encoded on terms using either the `DelimitedPayloadTokenFilter` or the `NumericPayloadTokenFilter`. The payload using parsers are:
983-
984-
* `PayloadScoreQParser`
985-
* `PayloadCheckQParser`
980+
These query parsers utilize payloads encoded on terms during indexing. Payloads can be encoded on terms using either the `DelimitedPayloadTokenFilter` or the `NumericPayloadTokenFilter`.
986981

987982
=== Payload Score Parser
988983

989-
`PayloadScoreQParser` incorporates each matching term's numeric (integer or float) payloads into the scores.
984+
`PayloadScoreQParser` incorporates each matching term's numeric (integer or float) payloads into the scores. The main query is parsed from the field type's query analysis into a `SpanQuery` based on the value of the `operator` parameter below.
990985

991986
This parser accepts the following parameters:
992987

@@ -997,7 +992,9 @@ The field to use. This parameter is required.
997992
The payload function. The options are: `min`, `max`, `average`, or `sum`. This parameter is required.
998993

999994
`operator`::
1000-
A search operator. The options are `or` and `phrase`, which is the default. This defines if the search query should be an OR query or a phrase query.
995+
A search operator. The options are
996+
* `or` will generate either a `SpanTermQuery` or a `SpanOrQuery` depending on the number of tokens emitted.
997+
* `phrase` will generate either `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted.
1001998

1002999
`includeSpanScore`::
10031000
If `true`, multiples the computed payload factor by the score of the original query. If `false`, the default, the computed payload factor is the score.
@@ -1012,26 +1009,40 @@ If `true`, multiples the computed payload factor by the score of the original qu
10121009

10131010
=== Payload Check Parser
10141011

1015-
`PayloadCheckQParser` only matches when the matching terms also have the specified payloads.
1012+
`PayloadCheckQParser` only matches when the matching terms also have the specified relationship to the payloads. The default relationship is equals, however, inequality matching can also be performed. The main query, for both of these parsers, is parsed straightforwardly from the field type's query analysis into a `SpanQuery`. The generated `SpanQuery` will be either a `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted. The net effect is that the main query always operates in a manner similar to a phrase query in the standard lucene parser (thus ignoring any value for `q.op`).
1013+
1014+
NOTE: If when the field analysis is applied to the query, it alters the number of tokens, the final number of tokens must match the number of payloads supplied in the `payloads` parameter. If there is a mismatch between the number of query tokens, and the number of payload values supplied with this query, the query will not match.
10161015

10171016
This parser accepts the following parameters:
10181017

10191018
`f`::
10201019
The field to use (required).
10211020

10221021
`payloads`::
1023-
A space-separated list of payloads that must match the query terms (required)
1024-
+
1025-
Each specified payload will be encoded using the encoder determined from the field type and encoded accordingly for matching.
1026-
+
1027-
`DelimitedPayloadTokenFilter` 'identity' encoded payloads also work here, as well as float and integer encoded ones.
1022+
A space-separated list of payloads to be compared with payloads in the matching tokens from the document (required). Each specified payload will be encoded using the encoder determined from the field type prior to matching. Integer, float and identity (string) encodings are supported with the same meanings as for DelimitedPayloadTokenFilter.
1023+
1024+
`op`::
1025+
The inequality operation to apply to the payload check. All operations require that consecutive tokens derived from the analysis of the query match consecutive tokens in the document, and additionally the payloads on the document tokens must be:
1026+
* `eq` - equal to the specified payloads (default)
1027+
* `gt` - greater than the specified payloads
1028+
* `lt` - less than the specified payloads
1029+
* `gte` - greater than or equal to the specified payloads
1030+
* `lte` - less than or equal to the specified payloads
10281031

1029-
*Example*
1032+
*Examples*
10301033

1034+
Find all documents with the phrase "searching stuff" where searching has a payload of "VERB" and "stuff" has a payload of "NOUN"
10311035
[source,text]
1032-
----
10331036
{!payload_check f=words_dps payloads="VERB NOUN"}searching stuff
1034-
----
1037+
1038+
Find all documents with "foo" where "foo" has a payload with a value of greater than or equal to 0.75
1039+
[source,text]
1040+
{!payload_check f=words_dpf payloads="0.75" op="gte"}foo
1041+
1042+
Find all documents with the phrase "foo bar" where term "foo" has a payload greater than 9 and "bar" has a payload greater than 5
1043+
[source,text]
1044+
{!payload_check f=words_dpi payloads="9 5" op="gt"}foo bar
1045+
10351046

10361047
== Prefix Query Parser
10371048

0 commit comments

Comments
 (0)