Skip to content

Commit 3cc748a

Browse files
authored
[KQL query] Implement the KQL AST builder (#115084)
1 parent 2275894 commit 3cc748a

File tree

22 files changed

+1630
-378
lines changed

22 files changed

+1630
-378
lines changed

server/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,10 @@ protected TermsQueryBuilder doCreateTestQueryBuilder() {
7171
// make between 0 and 5 different values of the same type
7272
String fieldName = randomValueOtherThanMany(
7373
choice -> choice.equals(GEO_POINT_FIELD_NAME)
74+
|| choice.equals(BINARY_FIELD_NAME)
7475
|| choice.equals(GEO_POINT_ALIAS_FIELD_NAME)
7576
|| choice.equals(INT_RANGE_FIELD_NAME)
77+
|| choice.equals(DATE_ALIAS_FIELD_NAME)
7678
|| choice.equals(DATE_RANGE_FIELD_NAME)
7779
|| choice.equals(DATE_NANOS_FIELD_NAME), // TODO: needs testing for date_nanos type
7880
AbstractQueryTestCase::getRandomFieldName
@@ -115,6 +117,7 @@ protected void doAssertLuceneQuery(TermsQueryBuilder queryBuilder, Query query,
115117

116118
// we only do the check below for string fields (otherwise we'd have to decode the values)
117119
if (queryBuilder.fieldName().equals(INT_FIELD_NAME)
120+
|| queryBuilder.fieldName().equals(INT_ALIAS_FIELD_NAME)
118121
|| queryBuilder.fieldName().equals(DOUBLE_FIELD_NAME)
119122
|| queryBuilder.fieldName().equals(BOOLEAN_FIELD_NAME)
120123
|| queryBuilder.fieldName().equals(DATE_FIELD_NAME)) {

test/framework/src/main/java/org/elasticsearch/test/AbstractBuilderTestCase.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,28 +136,36 @@ public abstract class AbstractBuilderTestCase extends ESTestCase {
136136
protected static final String[] MAPPED_FIELD_NAMES = new String[] {
137137
TEXT_FIELD_NAME,
138138
TEXT_ALIAS_FIELD_NAME,
139+
KEYWORD_FIELD_NAME,
139140
INT_FIELD_NAME,
141+
INT_ALIAS_FIELD_NAME,
140142
INT_RANGE_FIELD_NAME,
141143
DOUBLE_FIELD_NAME,
142144
BOOLEAN_FIELD_NAME,
143145
DATE_NANOS_FIELD_NAME,
144146
DATE_FIELD_NAME,
147+
DATE_ALIAS_FIELD_NAME,
145148
DATE_RANGE_FIELD_NAME,
146149
OBJECT_FIELD_NAME,
147150
GEO_POINT_FIELD_NAME,
148-
GEO_POINT_ALIAS_FIELD_NAME };
151+
GEO_POINT_ALIAS_FIELD_NAME,
152+
BINARY_FIELD_NAME };
149153
protected static final String[] MAPPED_LEAF_FIELD_NAMES = new String[] {
150154
TEXT_FIELD_NAME,
151155
TEXT_ALIAS_FIELD_NAME,
156+
KEYWORD_FIELD_NAME,
152157
INT_FIELD_NAME,
158+
INT_ALIAS_FIELD_NAME,
153159
INT_RANGE_FIELD_NAME,
154160
DOUBLE_FIELD_NAME,
155161
BOOLEAN_FIELD_NAME,
156162
DATE_NANOS_FIELD_NAME,
157163
DATE_FIELD_NAME,
164+
DATE_ALIAS_FIELD_NAME,
158165
DATE_RANGE_FIELD_NAME,
159166
GEO_POINT_FIELD_NAME,
160-
GEO_POINT_ALIAS_FIELD_NAME };
167+
GEO_POINT_ALIAS_FIELD_NAME,
168+
BINARY_FIELD_NAME };
161169

162170
private static final Map<String, String> ALIAS_TO_CONCRETE_FIELD_NAME = new HashMap<>();
163171
static {

test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,7 @@ protected static Object getRandomValueForFieldName(String fieldName) {
751751
}
752752
break;
753753
case INT_FIELD_NAME:
754+
case INT_ALIAS_FIELD_NAME:
754755
value = randomIntBetween(0, 10);
755756
break;
756757
case DOUBLE_FIELD_NAME:
@@ -815,6 +816,7 @@ protected static String getRandomRewriteMethod() {
815816
protected static Fuzziness randomFuzziness(String fieldName) {
816817
switch (fieldName) {
817818
case INT_FIELD_NAME:
819+
case INT_ALIAS_FIELD_NAME:
818820
case DOUBLE_FIELD_NAME:
819821
case DATE_FIELD_NAME:
820822
case DATE_NANOS_FIELD_NAME:

x-pack/plugin/kql/src/main/antlr/KqlBase.g4

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ topLevelQuery
2626
;
2727

2828
query
29-
: <assoc=right> query operator=(AND | OR) query #booleanQuery
30-
| NOT subQuery=simpleQuery #notQuery
31-
| simpleQuery #defaultQuery
29+
: <assoc=right> query operator=(AND|OR) query #booleanQuery
30+
| simpleQuery #defaultQuery
3231
;
3332

3433
simpleQuery
35-
: nestedQuery
34+
: notQuery
35+
| nestedQuery
3636
| parenthesizedQuery
3737
| matchAllQuery
3838
| existsQuery
@@ -41,6 +41,10 @@ simpleQuery
4141
| fieldLessQuery
4242
;
4343

44+
notQuery:
45+
NOT subQuery=simpleQuery
46+
;
47+
4448
nestedQuery
4549
: fieldName COLON LEFT_CURLY_BRACKET query RIGHT_CURLY_BRACKET
4650
;
@@ -77,9 +81,9 @@ fieldLessQuery
7781
;
7882

7983
fieldQueryValue
80-
: (AND|OR)? (UNQUOTED_LITERAL | WILDCARD )+
81-
| (UNQUOTED_LITERAL | WILDCARD )+ (AND|OR)?
82-
| (NOT|AND|OR)
84+
: (AND|OR|NOT)? (UNQUOTED_LITERAL|WILDCARD)+ (NOT|AND|OR)?
85+
| (AND|OR) (AND|OR|NOT)?
86+
| NOT (AND|OR)?
8387
| QUOTED_STRING
8488
;
8589

x-pack/plugin/kql/src/main/java/org/elasticsearch/xpack/kql/parser/KqlAstBuilder.java

Lines changed: 204 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,222 @@
88
package org.elasticsearch.xpack.kql.parser;
99

1010
import org.antlr.v4.runtime.ParserRuleContext;
11+
import org.antlr.v4.runtime.Token;
12+
import org.elasticsearch.index.mapper.MappedFieldType;
13+
import org.elasticsearch.index.query.BoolQueryBuilder;
1114
import org.elasticsearch.index.query.MatchAllQueryBuilder;
15+
import org.elasticsearch.index.query.MatchNoneQueryBuilder;
16+
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
1217
import org.elasticsearch.index.query.QueryBuilder;
13-
import org.elasticsearch.index.query.SearchExecutionContext;
18+
import org.elasticsearch.index.query.QueryBuilders;
19+
import org.elasticsearch.index.query.RangeQueryBuilder;
20+
21+
import java.util.function.BiConsumer;
22+
import java.util.function.BiFunction;
23+
24+
import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
25+
import static org.elasticsearch.xpack.kql.parser.KqlParserExecutionContext.isDateField;
26+
import static org.elasticsearch.xpack.kql.parser.KqlParserExecutionContext.isKeywordField;
27+
import static org.elasticsearch.xpack.kql.parser.KqlParserExecutionContext.isRuntimeField;
28+
import static org.elasticsearch.xpack.kql.parser.ParserUtils.escapeLuceneQueryString;
29+
import static org.elasticsearch.xpack.kql.parser.ParserUtils.hasWildcard;
1430

1531
class KqlAstBuilder extends KqlBaseBaseVisitor<QueryBuilder> {
16-
private final SearchExecutionContext searchExecutionContext;
32+
private final KqlParserExecutionContext kqlParserExecutionContext;
1733

18-
KqlAstBuilder(SearchExecutionContext searchExecutionContext) {
19-
this.searchExecutionContext = searchExecutionContext;
34+
KqlAstBuilder(KqlParserExecutionContext kqlParserExecutionContext) {
35+
this.kqlParserExecutionContext = kqlParserExecutionContext;
2036
}
2137

2238
public QueryBuilder toQueryBuilder(ParserRuleContext ctx) {
2339
if (ctx instanceof KqlBaseParser.TopLevelQueryContext topLeveQueryContext) {
40+
if (topLeveQueryContext.query() != null) {
41+
return ParserUtils.typedParsing(this, topLeveQueryContext.query(), QueryBuilder.class);
42+
}
43+
2444
return new MatchAllQueryBuilder();
2545
}
2646

2747
throw new IllegalArgumentException("context should be of type TopLevelQueryContext");
2848
}
49+
50+
@Override
51+
public QueryBuilder visitBooleanQuery(KqlBaseParser.BooleanQueryContext ctx) {
52+
assert ctx.operator != null;
53+
return isAndQuery(ctx) ? visitAndBooleanQuery(ctx) : visitOrBooleanQuery(ctx);
54+
}
55+
56+
public QueryBuilder visitAndBooleanQuery(KqlBaseParser.BooleanQueryContext ctx) {
57+
BoolQueryBuilder builder = QueryBuilders.boolQuery();
58+
59+
// TODO: KQLContext has an option to wrap the clauses into a filter instead of a must clause. Do we need it?
60+
for (ParserRuleContext subQueryCtx : ctx.query()) {
61+
if (subQueryCtx instanceof KqlBaseParser.BooleanQueryContext booleanSubQueryCtx && isAndQuery(booleanSubQueryCtx)) {
62+
ParserUtils.typedParsing(this, subQueryCtx, BoolQueryBuilder.class).must().forEach(builder::must);
63+
} else {
64+
builder.must(ParserUtils.typedParsing(this, subQueryCtx, QueryBuilder.class));
65+
}
66+
}
67+
68+
return rewriteConjunctionQuery(builder);
69+
}
70+
71+
public QueryBuilder visitOrBooleanQuery(KqlBaseParser.BooleanQueryContext ctx) {
72+
BoolQueryBuilder builder = QueryBuilders.boolQuery().minimumShouldMatch(1);
73+
74+
for (ParserRuleContext subQueryCtx : ctx.query()) {
75+
if (subQueryCtx instanceof KqlBaseParser.BooleanQueryContext booleanSubQueryCtx && isOrQuery(booleanSubQueryCtx)) {
76+
ParserUtils.typedParsing(this, subQueryCtx, BoolQueryBuilder.class).should().forEach(builder::should);
77+
} else {
78+
builder.should(ParserUtils.typedParsing(this, subQueryCtx, QueryBuilder.class));
79+
}
80+
}
81+
82+
return rewriteDisjunctionQuery(builder);
83+
}
84+
85+
@Override
86+
public QueryBuilder visitNotQuery(KqlBaseParser.NotQueryContext ctx) {
87+
return QueryBuilders.boolQuery().mustNot(ParserUtils.typedParsing(this, ctx.simpleQuery(), QueryBuilder.class));
88+
}
89+
90+
@Override
91+
public QueryBuilder visitParenthesizedQuery(KqlBaseParser.ParenthesizedQueryContext ctx) {
92+
return ParserUtils.typedParsing(this, ctx.query(), QueryBuilder.class);
93+
}
94+
95+
@Override
96+
public QueryBuilder visitNestedQuery(KqlBaseParser.NestedQueryContext ctx) {
97+
// TODO: implementation
98+
return new MatchNoneQueryBuilder();
99+
}
100+
101+
@Override
102+
public QueryBuilder visitMatchAllQuery(KqlBaseParser.MatchAllQueryContext ctx) {
103+
return new MatchAllQueryBuilder();
104+
}
105+
106+
@Override
107+
public QueryBuilder visitExistsQuery(KqlBaseParser.ExistsQueryContext ctx) {
108+
assert ctx.fieldName() != null; // Should not happen since the grammar does not allow the fieldname to be null.
109+
110+
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().minimumShouldMatch(1);
111+
withFields(ctx.fieldName(), (fieldName, mappedFieldType) -> {
112+
if (isRuntimeField(mappedFieldType) == false) {
113+
boolQueryBuilder.should(QueryBuilders.existsQuery(fieldName));
114+
}
115+
});
116+
117+
return rewriteDisjunctionQuery(boolQueryBuilder);
118+
}
119+
120+
@Override
121+
public QueryBuilder visitRangeQuery(KqlBaseParser.RangeQueryContext ctx) {
122+
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().minimumShouldMatch(1);
123+
124+
String queryText = ParserUtils.extractText(ctx.rangeQueryValue());
125+
BiFunction<RangeQueryBuilder, String, RangeQueryBuilder> rangeOperation = rangeOperation(ctx.operator);
126+
127+
withFields(ctx.fieldName(), (fieldName, mappedFieldType) -> {
128+
RangeQueryBuilder rangeQuery = rangeOperation.apply(QueryBuilders.rangeQuery(fieldName), queryText);
129+
// TODO: add timezone for date fields
130+
boolQueryBuilder.should(rangeQuery);
131+
});
132+
133+
return rewriteDisjunctionQuery(boolQueryBuilder);
134+
}
135+
136+
@Override
137+
public QueryBuilder visitFieldLessQuery(KqlBaseParser.FieldLessQueryContext ctx) {
138+
String queryText = ParserUtils.extractText(ctx.fieldQueryValue());
139+
140+
if (hasWildcard(ctx.fieldQueryValue())) {
141+
// TODO: set default fields.
142+
return QueryBuilders.queryStringQuery(escapeLuceneQueryString(queryText, true));
143+
}
144+
145+
boolean isPhraseMatch = ctx.fieldQueryValue().QUOTED_STRING() != null;
146+
147+
return QueryBuilders.multiMatchQuery(queryText)
148+
// TODO: add default fields?
149+
.type(isPhraseMatch ? MultiMatchQueryBuilder.Type.PHRASE : MultiMatchQueryBuilder.Type.BEST_FIELDS)
150+
.lenient(true);
151+
}
152+
153+
@Override
154+
public QueryBuilder visitFieldQuery(KqlBaseParser.FieldQueryContext ctx) {
155+
156+
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().minimumShouldMatch(1);
157+
String queryText = ParserUtils.extractText(ctx.fieldQueryValue());
158+
boolean hasWildcard = hasWildcard(ctx.fieldQueryValue());
159+
160+
withFields(ctx.fieldName(), (fieldName, mappedFieldType) -> {
161+
QueryBuilder fieldQuery = null;
162+
163+
if (hasWildcard && isKeywordField(mappedFieldType)) {
164+
fieldQuery = QueryBuilders.wildcardQuery(fieldName, queryText)
165+
.caseInsensitive(kqlParserExecutionContext.isCaseSensitive() == false);
166+
} else if (hasWildcard) {
167+
fieldQuery = QueryBuilders.queryStringQuery(escapeLuceneQueryString(queryText, true)).field(fieldName);
168+
} else if (isDateField(mappedFieldType)) {
169+
// TODO: add timezone
170+
fieldQuery = QueryBuilders.rangeQuery(fieldName).gte(queryText).lte(queryText);
171+
} else if (isKeywordField(mappedFieldType)) {
172+
fieldQuery = QueryBuilders.termQuery(fieldName, queryText)
173+
.caseInsensitive(kqlParserExecutionContext.isCaseSensitive() == false);
174+
} else if (ctx.fieldQueryValue().QUOTED_STRING() != null) {
175+
fieldQuery = QueryBuilders.matchPhraseQuery(fieldName, queryText);
176+
} else {
177+
fieldQuery = QueryBuilders.matchQuery(fieldName, queryText);
178+
}
179+
180+
if (fieldQuery != null) {
181+
boolQueryBuilder.should(fieldQuery);
182+
}
183+
});
184+
185+
return rewriteDisjunctionQuery(boolQueryBuilder);
186+
}
187+
188+
private static boolean isAndQuery(KqlBaseParser.BooleanQueryContext ctx) {
189+
return ctx.operator.getType() == KqlBaseParser.AND;
190+
}
191+
192+
private static boolean isOrQuery(KqlBaseParser.BooleanQueryContext ctx) {
193+
return ctx.operator.getType() == KqlBaseParser.OR;
194+
}
195+
196+
private void withFields(KqlBaseParser.FieldNameContext ctx, BiConsumer<String, MappedFieldType> fieldConsummer) {
197+
kqlParserExecutionContext.resolveFields(ctx).forEach(fieldDef -> fieldConsummer.accept(fieldDef.v1(), fieldDef.v2()));
198+
}
199+
200+
private QueryBuilder rewriteDisjunctionQuery(BoolQueryBuilder boolQueryBuilder) {
201+
assert boolQueryBuilder.must().isEmpty() && boolQueryBuilder.filter().isEmpty() && boolQueryBuilder.mustNot().isEmpty();
202+
203+
if (boolQueryBuilder.should().isEmpty()) {
204+
return new MatchNoneQueryBuilder();
205+
}
206+
207+
return boolQueryBuilder.should().size() == 1 ? boolQueryBuilder.should().getFirst() : boolQueryBuilder;
208+
}
209+
210+
private QueryBuilder rewriteConjunctionQuery(BoolQueryBuilder boolQueryBuilder) {
211+
assert boolQueryBuilder.should().isEmpty() && boolQueryBuilder.filter().isEmpty() && boolQueryBuilder.mustNot().isEmpty();
212+
213+
if (boolQueryBuilder.must().isEmpty()) {
214+
return new MatchNoneQueryBuilder();
215+
}
216+
217+
return boolQueryBuilder.must().size() == 1 ? boolQueryBuilder.must().getFirst() : boolQueryBuilder;
218+
}
219+
220+
private BiFunction<RangeQueryBuilder, String, RangeQueryBuilder> rangeOperation(Token operator) {
221+
return switch (operator.getType()) {
222+
case KqlBaseParser.OP_LESS -> RangeQueryBuilder::lt;
223+
case KqlBaseParser.OP_LESS_EQ -> RangeQueryBuilder::lte;
224+
case KqlBaseParser.OP_MORE -> RangeQueryBuilder::gt;
225+
case KqlBaseParser.OP_MORE_EQ -> RangeQueryBuilder::gte;
226+
default -> throw new IllegalArgumentException(format(null, "Invalid range operator {}\"", operator.getText()));
227+
};
228+
}
29229
}

0 commit comments

Comments
 (0)