10
10
import org .apache .lucene .index .BinaryDocValues ;
11
11
import org .apache .lucene .index .DocValues ;
12
12
import org .apache .lucene .index .LeafReaderContext ;
13
+ import org .apache .lucene .index .Term ;
13
14
import org .apache .lucene .search .ConstantScoreScorer ;
14
15
import org .apache .lucene .search .ConstantScoreWeight ;
15
16
import org .apache .lucene .search .DocIdSetIterator ;
17
+ import org .apache .lucene .search .FuzzyQuery ;
16
18
import org .apache .lucene .search .IndexSearcher ;
17
19
import org .apache .lucene .search .Query ;
18
20
import org .apache .lucene .search .QueryVisitor ;
19
21
import org .apache .lucene .search .ScoreMode ;
20
22
import org .apache .lucene .search .Scorer ;
21
23
import org .apache .lucene .search .ScorerSupplier ;
24
+ import org .apache .lucene .search .TermRangeQuery ;
22
25
import org .apache .lucene .search .TwoPhaseIterator ;
23
26
import org .apache .lucene .search .Weight ;
27
+ import org .apache .lucene .search .WildcardQuery ;
24
28
import org .apache .lucene .util .BytesRef ;
25
29
import org .apache .lucene .util .automaton .Automaton ;
26
30
import org .apache .lucene .util .automaton .ByteRunAutomaton ;
31
+ import org .apache .lucene .util .automaton .Operations ;
32
+ import org .apache .lucene .util .automaton .RegExp ;
27
33
import org .elasticsearch .common .io .stream .ByteArrayStreamInput ;
34
+ import org .elasticsearch .common .lucene .search .AutomatonQueries ;
28
35
29
36
import java .io .IOException ;
30
37
import java .util .Arrays ;
@@ -46,15 +53,65 @@ private BinaryDvConfirmedQuery(Query approximation, String field) {
46
53
}
47
54
48
55
/**
49
- * Returns a query that runs the provided Automaton across all binary doc values (but only for docs that also
50
- * match a provided approximation query which is key to getting good performance).
56
+ * Returns a query that runs the generated Automaton from a range query across
57
+ * all binary doc values (but only for docs that also match a provided approximation query which is key
58
+ * to getting good performance).
51
59
*/
52
- public static Query fromAutomaton (Query approximation , String field , String matchPattern , Automaton automaton ) {
53
- return new BinaryDvConfirmedAutomatonQuery (approximation , field , matchPattern , automaton );
60
+ public static Query fromRangeQuery (
61
+ Query approximation ,
62
+ String field ,
63
+ BytesRef lower ,
64
+ BytesRef upper ,
65
+ boolean includeLower ,
66
+ boolean includeUpper
67
+ ) {
68
+ return new BinaryDvConfirmedAutomatonQuery (
69
+ approximation ,
70
+ field ,
71
+ new RangeAutomatonProvider (lower , upper , includeLower , includeUpper )
72
+ );
54
73
}
55
74
56
75
/**
57
- * Returns a query that checks for equality of at leat one of the provided terms across
76
+ * Returns a query that runs the generated Automaton from a wildcard query across
77
+ * all binary doc values (but only for docs that also match a provided approximation query which is key
78
+ * to getting good performance).
79
+ */
80
+ public static Query fromWildcardQuery (Query approximation , String field , String matchPattern , boolean caseInsensitive ) {
81
+ return new BinaryDvConfirmedAutomatonQuery (approximation , field , new PatternAutomatonProvider (matchPattern , caseInsensitive ));
82
+ }
83
+
84
+ /**
85
+ * Returns a query that runs the generated Automaton from a regexp query across
86
+ * all binary doc values (but only for docs that also match a provided approximation query which is key
87
+ * to getting good performance).
88
+ */
89
+ public static Query fromRegexpQuery (
90
+ Query approximation ,
91
+ String field ,
92
+ String value ,
93
+ int syntaxFlags ,
94
+ int matchFlags ,
95
+ int maxDeterminizedStates
96
+ ) {
97
+ return new BinaryDvConfirmedAutomatonQuery (
98
+ approximation ,
99
+ field ,
100
+ new RegexAutomatonProvider (value , syntaxFlags , matchFlags , maxDeterminizedStates )
101
+ );
102
+ }
103
+
104
+ /**
105
+ * Returns a query that runs the generated Automaton from a fuzzy query across
106
+ * all binary doc values (but only for docs that also match a provided approximation query which is key
107
+ * to getting good performance).
108
+ */
109
+ public static Query fromFuzzyQuery (Query approximation , String field , String searchTerm , FuzzyQuery fuzzyQuery ) {
110
+ return new BinaryDvConfirmedAutomatonQuery (approximation , field , new FuzzyQueryAutomatonProvider (searchTerm , fuzzyQuery ));
111
+ }
112
+
113
+ /**
114
+ * Returns a query that checks for equality of at least one of the provided terms across
58
115
* all binary doc values (but only for docs that also match a provided approximation query which
59
116
* is key to getting good performance).
60
117
*/
@@ -63,7 +120,7 @@ public static Query fromTerms(Query approximation, String field, BytesRef... ter
63
120
return new BinaryDvConfirmedTermsQuery (approximation , field , terms );
64
121
}
65
122
66
- protected abstract boolean matchesBinaryDV ( ByteArrayStreamInput bytes , BytesRef bytesRef , BytesRef scratch ) throws IOException ;
123
+ protected abstract BinaryDVMatcher getBinaryDVMatcher () ;
67
124
68
125
protected abstract Query rewrite (Query approxRewrite ) throws IOException ;
69
126
@@ -79,7 +136,7 @@ public Query rewrite(IndexSearcher searcher) throws IOException {
79
136
@ Override
80
137
public Weight createWeight (IndexSearcher searcher , ScoreMode scoreMode , float boost ) throws IOException {
81
138
final Weight approxWeight = approxQuery .createWeight (searcher , scoreMode , boost );
82
-
139
+ final BinaryDVMatcher matcher = getBinaryDVMatcher ();
83
140
return new ConstantScoreWeight (this , boost ) {
84
141
85
142
@ Override
@@ -106,7 +163,7 @@ public boolean matches() throws IOException {
106
163
}
107
164
final BytesRef bytesRef = values .binaryValue ();
108
165
bytes .reset (bytesRef .bytes , bytesRef .offset , bytesRef .length );
109
- return matchesBinaryDV (bytes , bytesRef , scratch );
166
+ return matcher . matchesBinaryDV (bytes , bytesRef , scratch );
110
167
}
111
168
112
169
@ Override
@@ -157,55 +214,56 @@ public void visit(QueryVisitor visitor) {
157
214
}
158
215
}
159
216
160
- private static class BinaryDvConfirmedAutomatonQuery extends BinaryDvConfirmedQuery {
217
+ interface BinaryDVMatcher {
218
+ boolean matchesBinaryDV (ByteArrayStreamInput bytes , BytesRef bytesRef , BytesRef scratch ) throws IOException ;
219
+ }
161
220
162
- private final ByteRunAutomaton byteRunAutomaton ;
163
- private final String matchPattern ;
221
+ private static class BinaryDvConfirmedAutomatonQuery extends BinaryDvConfirmedQuery {
164
222
165
- private BinaryDvConfirmedAutomatonQuery (Query approximation , String field , String matchPattern , Automaton automaton ) {
166
- this (approximation , field , matchPattern , new ByteRunAutomaton (automaton ));
167
- }
223
+ private final AutomatonProvider automatonProvider ;
168
224
169
- private BinaryDvConfirmedAutomatonQuery (Query approximation , String field , String matchPattern , ByteRunAutomaton byteRunAutomaton ) {
225
+ private BinaryDvConfirmedAutomatonQuery (Query approximation , String field , AutomatonProvider automatonProvider ) {
170
226
super (approximation , field );
171
- this .matchPattern = matchPattern ;
172
- this .byteRunAutomaton = byteRunAutomaton ;
227
+ this .automatonProvider = automatonProvider ;
173
228
}
174
229
175
230
@ Override
176
- protected boolean matchesBinaryDV (ByteArrayStreamInput bytes , BytesRef bytesRef , BytesRef scratch ) throws IOException {
177
- int size = bytes .readVInt ();
178
- for (int i = 0 ; i < size ; i ++) {
179
- int valLength = bytes .readVInt ();
180
- if (byteRunAutomaton .run (bytesRef .bytes , bytes .getPosition (), valLength )) {
181
- return true ;
231
+ protected BinaryDVMatcher getBinaryDVMatcher () {
232
+ final ByteRunAutomaton byteRunAutomaton = new ByteRunAutomaton (automatonProvider .getAutomaton (field ));
233
+ return (bytes , bytesRef , scratch ) -> {
234
+ final int size = bytes .readVInt ();
235
+ for (int i = 0 ; i < size ; i ++) {
236
+ final int valLength = bytes .readVInt ();
237
+ if (byteRunAutomaton .run (bytesRef .bytes , bytes .getPosition (), valLength )) {
238
+ return true ;
239
+ }
240
+ bytes .skipBytes (valLength );
182
241
}
183
- bytes .skipBytes (valLength );
184
- }
185
- return false ;
242
+ return false ;
243
+ };
186
244
}
187
245
188
246
@ Override
189
247
protected Query rewrite (Query approxRewrite ) {
190
- return new BinaryDvConfirmedAutomatonQuery (approxRewrite , field , matchPattern , byteRunAutomaton );
248
+ return new BinaryDvConfirmedAutomatonQuery (approxRewrite , field , automatonProvider );
191
249
}
192
250
193
251
@ Override
194
252
public String toString (String field ) {
195
- return field + ":" + matchPattern ;
253
+ return field + ":" + automatonProvider . toString () ;
196
254
}
197
255
198
256
@ Override
199
257
public boolean equals (Object o ) {
200
258
if (o == null || getClass () != o .getClass ()) return false ;
201
259
if (super .equals (o ) == false ) return false ;
202
260
BinaryDvConfirmedAutomatonQuery other = (BinaryDvConfirmedAutomatonQuery ) o ;
203
- return Objects .equals (byteRunAutomaton , other .byteRunAutomaton ) && Objects . equals ( matchPattern , other . matchPattern );
261
+ return Objects .equals (automatonProvider , other .automatonProvider );
204
262
}
205
263
206
264
@ Override
207
265
public int hashCode () {
208
- return Objects .hash (super .hashCode (), matchPattern , byteRunAutomaton );
266
+ return Objects .hash (super .hashCode (), automatonProvider );
209
267
}
210
268
}
211
269
@@ -220,28 +278,31 @@ private BinaryDvConfirmedTermsQuery(Query approximation, String field, BytesRef[
220
278
}
221
279
222
280
@ Override
223
- protected boolean matchesBinaryDV (ByteArrayStreamInput bytes , BytesRef bytesRef , BytesRef scratch ) throws IOException {
224
- scratch .bytes = bytesRef .bytes ;
225
- final int size = bytes .readVInt ();
226
- for (int i = 0 ; i < size ; i ++) {
227
- final int valLength = bytes .readVInt ();
228
- scratch .offset = bytes .getPosition ();
229
- scratch .length = valLength ;
230
- if (terms .length == 1 ) {
231
- if (terms [0 ].bytesEquals (scratch )) {
232
- return true ;
233
- }
234
- } else {
235
- final int pos = Arrays .binarySearch (terms , scratch , BytesRef ::compareTo );
236
- if (pos >= 0 ) {
237
- assert terms [pos ].bytesEquals (scratch ) : "Expected term at position " + pos + " to match scratch, but it did not." ;
238
- return true ;
281
+ protected BinaryDVMatcher getBinaryDVMatcher () {
282
+ return (bytes , bytesRef , scratch ) -> {
283
+ scratch .bytes = bytesRef .bytes ;
284
+ final int size = bytes .readVInt ();
285
+ for (int i = 0 ; i < size ; i ++) {
286
+ final int valLength = bytes .readVInt ();
287
+ scratch .offset = bytes .getPosition ();
288
+ scratch .length = valLength ;
289
+ if (terms .length == 1 ) {
290
+ if (terms [0 ].bytesEquals (scratch )) {
291
+ return true ;
292
+ }
293
+ } else {
294
+ final int pos = Arrays .binarySearch (terms , scratch , BytesRef ::compareTo );
295
+ if (pos >= 0 ) {
296
+ assert terms [pos ].bytesEquals (scratch )
297
+ : "Expected term at position " + pos + " to match scratch, but it did not." ;
298
+ return true ;
299
+ }
239
300
}
301
+ bytes .skipBytes (valLength );
240
302
}
241
- bytes .skipBytes (valLength );
242
- }
243
- assert bytes .available () == 0 : "Expected no bytes left to read, but found " + bytes .available ();
244
- return false ;
303
+ assert bytes .available () == 0 : "Expected no bytes left to read, but found " + bytes .available ();
304
+ return false ;
305
+ };
245
306
}
246
307
247
308
@ Override
@@ -275,4 +336,43 @@ public int hashCode() {
275
336
return Objects .hash (super .hashCode (), Arrays .hashCode (terms ));
276
337
}
277
338
}
339
+
340
+ private interface AutomatonProvider {
341
+ Automaton getAutomaton (String field );
342
+ }
343
+
344
+ private record PatternAutomatonProvider (String matchPattern , boolean caseInsensitive ) implements AutomatonProvider {
345
+ @ Override
346
+ public Automaton getAutomaton (String field ) {
347
+ return caseInsensitive
348
+ ? AutomatonQueries .toCaseInsensitiveWildcardAutomaton (new Term (field , matchPattern ))
349
+ : WildcardQuery .toAutomaton (new Term (field , matchPattern ), Operations .DEFAULT_DETERMINIZE_WORK_LIMIT );
350
+ }
351
+ }
352
+
353
+ private record RegexAutomatonProvider (String value , int syntaxFlags , int matchFlags , int maxDeterminizedStates )
354
+ implements
355
+ AutomatonProvider {
356
+ @ Override
357
+ public Automaton getAutomaton (String field ) {
358
+ RegExp regex = new RegExp (value , syntaxFlags , matchFlags );
359
+ return Operations .determinize (regex .toAutomaton (), maxDeterminizedStates );
360
+ }
361
+ }
362
+
363
+ private record RangeAutomatonProvider (BytesRef lower , BytesRef upper , boolean includeLower , boolean includeUpper )
364
+ implements
365
+ AutomatonProvider {
366
+ @ Override
367
+ public Automaton getAutomaton (String field ) {
368
+ return TermRangeQuery .toAutomaton (lower , upper , includeLower , includeUpper );
369
+ }
370
+ }
371
+
372
+ private record FuzzyQueryAutomatonProvider (String searchTerm , FuzzyQuery fuzzyQuery ) implements AutomatonProvider {
373
+ @ Override
374
+ public Automaton getAutomaton (String field ) {
375
+ return fuzzyQuery .getAutomata ().automaton ;
376
+ }
377
+ }
278
378
}
0 commit comments