1010import org .apache .lucene .index .BinaryDocValues ;
1111import org .apache .lucene .index .DocValues ;
1212import org .apache .lucene .index .LeafReaderContext ;
13+ import org .apache .lucene .index .Term ;
1314import org .apache .lucene .search .ConstantScoreScorer ;
1415import org .apache .lucene .search .ConstantScoreWeight ;
1516import org .apache .lucene .search .DocIdSetIterator ;
17+ import org .apache .lucene .search .FuzzyQuery ;
1618import org .apache .lucene .search .IndexSearcher ;
1719import org .apache .lucene .search .Query ;
1820import org .apache .lucene .search .QueryVisitor ;
1921import org .apache .lucene .search .ScoreMode ;
2022import org .apache .lucene .search .Scorer ;
2123import org .apache .lucene .search .ScorerSupplier ;
24+ import org .apache .lucene .search .TermRangeQuery ;
2225import org .apache .lucene .search .TwoPhaseIterator ;
2326import org .apache .lucene .search .Weight ;
27+ import org .apache .lucene .search .WildcardQuery ;
2428import org .apache .lucene .util .BytesRef ;
2529import org .apache .lucene .util .automaton .Automaton ;
2630import org .apache .lucene .util .automaton .ByteRunAutomaton ;
31+ import org .apache .lucene .util .automaton .Operations ;
32+ import org .apache .lucene .util .automaton .RegExp ;
2733import org .elasticsearch .common .io .stream .ByteArrayStreamInput ;
34+ import org .elasticsearch .common .lucene .search .AutomatonQueries ;
2835
2936import java .io .IOException ;
3037import java .util .Arrays ;
@@ -46,15 +53,65 @@ private BinaryDvConfirmedQuery(Query approximation, String field) {
4653 }
4754
4855 /**
49- * Returns a query that runs the provided Automaton across all binary doc values (but only for docs that also
50- * match a provided approximation query which is key to getting good performance).
56+ * Returns a query that runs the generated Automaton from a range query across
57+ * all binary doc values (but only for docs that also match a provided approximation query which is key
58+ * to getting good performance).
5159 */
52- public static Query fromAutomaton (Query approximation , String field , String matchPattern , Automaton automaton ) {
53- return new BinaryDvConfirmedAutomatonQuery (approximation , field , matchPattern , automaton );
60+ public static Query fromRangeQuery (
61+ Query approximation ,
62+ String field ,
63+ BytesRef lower ,
64+ BytesRef upper ,
65+ boolean includeLower ,
66+ boolean includeUpper
67+ ) {
68+ return new BinaryDvConfirmedAutomatonQuery (
69+ approximation ,
70+ field ,
71+ new RangeAutomatonProvider (lower , upper , includeLower , includeUpper )
72+ );
5473 }
5574
5675 /**
57- * Returns a query that checks for equality of at leat one of the provided terms across
76+ * Returns a query that runs the generated Automaton from a wildcard query across
77+ * all binary doc values (but only for docs that also match a provided approximation query which is key
78+ * to getting good performance).
79+ */
80+ public static Query fromWildcardQuery (Query approximation , String field , String matchPattern , boolean caseInsensitive ) {
81+ return new BinaryDvConfirmedAutomatonQuery (approximation , field , new PatternAutomatonProvider (matchPattern , caseInsensitive ));
82+ }
83+
84+ /**
85+ * Returns a query that runs the generated Automaton from a regexp query across
86+ * all binary doc values (but only for docs that also match a provided approximation query which is key
87+ * to getting good performance).
88+ */
89+ public static Query fromRegexpQuery (
90+ Query approximation ,
91+ String field ,
92+ String value ,
93+ int syntaxFlags ,
94+ int matchFlags ,
95+ int maxDeterminizedStates
96+ ) {
97+ return new BinaryDvConfirmedAutomatonQuery (
98+ approximation ,
99+ field ,
100+ new RegexAutomatonProvider (value , syntaxFlags , matchFlags , maxDeterminizedStates )
101+ );
102+ }
103+
104+ /**
105+ * Returns a query that runs the generated Automaton from a fuzzy query across
106+ * all binary doc values (but only for docs that also match a provided approximation query which is key
107+ * to getting good performance).
108+ */
109+ public static Query fromFuzzyQuery (Query approximation , String field , String searchTerm , FuzzyQuery fuzzyQuery ) {
110+ return new BinaryDvConfirmedAutomatonQuery (approximation , field , new FuzzyQueryAutomatonProvider (searchTerm , fuzzyQuery ));
111+ }
112+
113+ /**
114+ * Returns a query that checks for equality of at least one of the provided terms across
58115 * all binary doc values (but only for docs that also match a provided approximation query which
59116 * is key to getting good performance).
60117 */
@@ -63,7 +120,7 @@ public static Query fromTerms(Query approximation, String field, BytesRef... ter
63120 return new BinaryDvConfirmedTermsQuery (approximation , field , terms );
64121 }
65122
66- protected abstract boolean matchesBinaryDV ( ByteArrayStreamInput bytes , BytesRef bytesRef , BytesRef scratch ) throws IOException ;
123+ protected abstract BinaryDVMatcher getBinaryDVMatcher () ;
67124
68125 protected abstract Query rewrite (Query approxRewrite ) throws IOException ;
69126
@@ -79,7 +136,7 @@ public Query rewrite(IndexSearcher searcher) throws IOException {
79136 @ Override
80137 public Weight createWeight (IndexSearcher searcher , ScoreMode scoreMode , float boost ) throws IOException {
81138 final Weight approxWeight = approxQuery .createWeight (searcher , scoreMode , boost );
82-
139+ final BinaryDVMatcher matcher = getBinaryDVMatcher ();
83140 return new ConstantScoreWeight (this , boost ) {
84141
85142 @ Override
@@ -106,7 +163,7 @@ public boolean matches() throws IOException {
106163 }
107164 final BytesRef bytesRef = values .binaryValue ();
108165 bytes .reset (bytesRef .bytes , bytesRef .offset , bytesRef .length );
109- return matchesBinaryDV (bytes , bytesRef , scratch );
166+ return matcher . matchesBinaryDV (bytes , bytesRef , scratch );
110167 }
111168
112169 @ Override
@@ -157,55 +214,56 @@ public void visit(QueryVisitor visitor) {
157214 }
158215 }
159216
160- private static class BinaryDvConfirmedAutomatonQuery extends BinaryDvConfirmedQuery {
217+ interface BinaryDVMatcher {
218+ boolean matchesBinaryDV (ByteArrayStreamInput bytes , BytesRef bytesRef , BytesRef scratch ) throws IOException ;
219+ }
161220
162- private final ByteRunAutomaton byteRunAutomaton ;
163- private final String matchPattern ;
221+ private static class BinaryDvConfirmedAutomatonQuery extends BinaryDvConfirmedQuery {
164222
165- private BinaryDvConfirmedAutomatonQuery (Query approximation , String field , String matchPattern , Automaton automaton ) {
166- this (approximation , field , matchPattern , new ByteRunAutomaton (automaton ));
167- }
223+ private final AutomatonProvider automatonProvider ;
168224
169- private BinaryDvConfirmedAutomatonQuery (Query approximation , String field , String matchPattern , ByteRunAutomaton byteRunAutomaton ) {
225+ private BinaryDvConfirmedAutomatonQuery (Query approximation , String field , AutomatonProvider automatonProvider ) {
170226 super (approximation , field );
171- this .matchPattern = matchPattern ;
172- this .byteRunAutomaton = byteRunAutomaton ;
227+ this .automatonProvider = automatonProvider ;
173228 }
174229
175230 @ Override
176- protected boolean matchesBinaryDV (ByteArrayStreamInput bytes , BytesRef bytesRef , BytesRef scratch ) throws IOException {
177- int size = bytes .readVInt ();
178- for (int i = 0 ; i < size ; i ++) {
179- int valLength = bytes .readVInt ();
180- if (byteRunAutomaton .run (bytesRef .bytes , bytes .getPosition (), valLength )) {
181- return true ;
231+ protected BinaryDVMatcher getBinaryDVMatcher () {
232+ final ByteRunAutomaton byteRunAutomaton = new ByteRunAutomaton (automatonProvider .getAutomaton (field ));
233+ return (bytes , bytesRef , scratch ) -> {
234+ final int size = bytes .readVInt ();
235+ for (int i = 0 ; i < size ; i ++) {
236+ final int valLength = bytes .readVInt ();
237+ if (byteRunAutomaton .run (bytesRef .bytes , bytes .getPosition (), valLength )) {
238+ return true ;
239+ }
240+ bytes .skipBytes (valLength );
182241 }
183- bytes .skipBytes (valLength );
184- }
185- return false ;
242+ return false ;
243+ };
186244 }
187245
188246 @ Override
189247 protected Query rewrite (Query approxRewrite ) {
190- return new BinaryDvConfirmedAutomatonQuery (approxRewrite , field , matchPattern , byteRunAutomaton );
248+ return new BinaryDvConfirmedAutomatonQuery (approxRewrite , field , automatonProvider );
191249 }
192250
193251 @ Override
194252 public String toString (String field ) {
195- return field + ":" + matchPattern ;
253+ return field + ":" + automatonProvider . toString () ;
196254 }
197255
198256 @ Override
199257 public boolean equals (Object o ) {
200258 if (o == null || getClass () != o .getClass ()) return false ;
201259 if (super .equals (o ) == false ) return false ;
202260 BinaryDvConfirmedAutomatonQuery other = (BinaryDvConfirmedAutomatonQuery ) o ;
203- return Objects .equals (byteRunAutomaton , other .byteRunAutomaton ) && Objects . equals ( matchPattern , other . matchPattern );
261+ return Objects .equals (automatonProvider , other .automatonProvider );
204262 }
205263
206264 @ Override
207265 public int hashCode () {
208- return Objects .hash (super .hashCode (), matchPattern , byteRunAutomaton );
266+ return Objects .hash (super .hashCode (), automatonProvider );
209267 }
210268 }
211269
@@ -220,28 +278,31 @@ private BinaryDvConfirmedTermsQuery(Query approximation, String field, BytesRef[
220278 }
221279
222280 @ Override
223- protected boolean matchesBinaryDV (ByteArrayStreamInput bytes , BytesRef bytesRef , BytesRef scratch ) throws IOException {
224- scratch .bytes = bytesRef .bytes ;
225- final int size = bytes .readVInt ();
226- for (int i = 0 ; i < size ; i ++) {
227- final int valLength = bytes .readVInt ();
228- scratch .offset = bytes .getPosition ();
229- scratch .length = valLength ;
230- if (terms .length == 1 ) {
231- if (terms [0 ].bytesEquals (scratch )) {
232- return true ;
233- }
234- } else {
235- final int pos = Arrays .binarySearch (terms , scratch , BytesRef ::compareTo );
236- if (pos >= 0 ) {
237- assert terms [pos ].bytesEquals (scratch ) : "Expected term at position " + pos + " to match scratch, but it did not." ;
238- return true ;
281+ protected BinaryDVMatcher getBinaryDVMatcher () {
282+ return (bytes , bytesRef , scratch ) -> {
283+ scratch .bytes = bytesRef .bytes ;
284+ final int size = bytes .readVInt ();
285+ for (int i = 0 ; i < size ; i ++) {
286+ final int valLength = bytes .readVInt ();
287+ scratch .offset = bytes .getPosition ();
288+ scratch .length = valLength ;
289+ if (terms .length == 1 ) {
290+ if (terms [0 ].bytesEquals (scratch )) {
291+ return true ;
292+ }
293+ } else {
294+ final int pos = Arrays .binarySearch (terms , scratch , BytesRef ::compareTo );
295+ if (pos >= 0 ) {
296+ assert terms [pos ].bytesEquals (scratch )
297+ : "Expected term at position " + pos + " to match scratch, but it did not." ;
298+ return true ;
299+ }
239300 }
301+ bytes .skipBytes (valLength );
240302 }
241- bytes .skipBytes (valLength );
242- }
243- assert bytes .available () == 0 : "Expected no bytes left to read, but found " + bytes .available ();
244- return false ;
303+ assert bytes .available () == 0 : "Expected no bytes left to read, but found " + bytes .available ();
304+ return false ;
305+ };
245306 }
246307
247308 @ Override
@@ -275,4 +336,43 @@ public int hashCode() {
275336 return Objects .hash (super .hashCode (), Arrays .hashCode (terms ));
276337 }
277338 }
339+
340+ private interface AutomatonProvider {
341+ Automaton getAutomaton (String field );
342+ }
343+
344+ private record PatternAutomatonProvider (String matchPattern , boolean caseInsensitive ) implements AutomatonProvider {
345+ @ Override
346+ public Automaton getAutomaton (String field ) {
347+ return caseInsensitive
348+ ? AutomatonQueries .toCaseInsensitiveWildcardAutomaton (new Term (field , matchPattern ))
349+ : WildcardQuery .toAutomaton (new Term (field , matchPattern ), Operations .DEFAULT_DETERMINIZE_WORK_LIMIT );
350+ }
351+ }
352+
353+ private record RegexAutomatonProvider (String value , int syntaxFlags , int matchFlags , int maxDeterminizedStates )
354+ implements
355+ AutomatonProvider {
356+ @ Override
357+ public Automaton getAutomaton (String field ) {
358+ RegExp regex = new RegExp (value , syntaxFlags , matchFlags );
359+ return Operations .determinize (regex .toAutomaton (), maxDeterminizedStates );
360+ }
361+ }
362+
363+ private record RangeAutomatonProvider (BytesRef lower , BytesRef upper , boolean includeLower , boolean includeUpper )
364+ implements
365+ AutomatonProvider {
366+ @ Override
367+ public Automaton getAutomaton (String field ) {
368+ return TermRangeQuery .toAutomaton (lower , upper , includeLower , includeUpper );
369+ }
370+ }
371+
372+ private record FuzzyQueryAutomatonProvider (String searchTerm , FuzzyQuery fuzzyQuery ) implements AutomatonProvider {
373+ @ Override
374+ public Automaton getAutomaton (String field ) {
375+ return fuzzyQuery .getAutomata ().automaton ;
376+ }
377+ }
278378}
0 commit comments