Skip to content

Commit fcf243a

Browse files
committed
WIP
``` text eq {"took": 13,"documents_found": 1000} not text eq {"took":4482,"documents_found":10000000} ```
1 parent 385e0d9 commit fcf243a

File tree

10 files changed

+194
-54
lines changed

10 files changed

+194
-54
lines changed

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,8 @@ private TextFieldType buildFieldType(
405405
SyntheticSourceHelper.syntheticSourceDelegate(fieldType, multiFields),
406406
meta.getValue(),
407407
eagerGlobalOrdinals.getValue(),
408-
indexPhrases.getValue()
408+
indexPhrases.getValue(),
409+
matchQueryYieldsCandidateMatchesForEquality()
409410
);
410411
if (fieldData.getValue()) {
411412
ft.setFielddata(true, freqFilter.getValue());
@@ -414,6 +415,25 @@ private TextFieldType buildFieldType(
414415
return ft;
415416
}
416417

418+
/**
419+
* Does a `match` query generate all valid candidates for `==`? Meaning,
420+
* if I do a match query for any string, say `foo bar baz`, then that
421+
* query will find all documents that indexed the same string.
422+
* <p>
423+
* This should be true for most sanely configured text fields. That's
424+
* just how we use them for search. But it's quite possible to make
425+
* the index analyzer not agree with the search analyzer, for example.
426+
* </p>
427+
* <p>
428+
* So this implementation is ultra-paranoid.
429+
* </p>
430+
*/
431+
private boolean matchQueryYieldsCandidateMatchesForEquality() {
432+
return index.getValue() == Boolean.TRUE
433+
&& analyzers.indexAnalyzer.isConfigured() == false
434+
&& analyzers.searchAnalyzer.isConfigured() == false;
435+
}
436+
417437
private SubFieldInfo buildPrefixInfo(MapperBuilderContext context, FieldType fieldType, TextFieldType tft) {
418438
if (indexPrefixes.get() == null) {
419439
return null;
@@ -686,6 +706,9 @@ public static class TextFieldType extends StringFieldType {
686706
*/
687707
private final KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate;
688708

709+
// NOCOMMIT docs
710+
private final boolean matchQueryYieldsCandidateMatchesForEquality;
711+
689712
public TextFieldType(
690713
String name,
691714
boolean indexed,
@@ -695,7 +718,8 @@ public TextFieldType(
695718
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate,
696719
Map<String, String> meta,
697720
boolean eagerGlobalOrdinals,
698-
boolean indexPhrases
721+
boolean indexPhrases,
722+
boolean matchQueryYieldsCandidateMatchesForEquality
699723
) {
700724
super(name, indexed, stored, false, tsi, meta);
701725
fielddata = false;
@@ -704,6 +728,7 @@ public TextFieldType(
704728
this.syntheticSourceDelegate = syntheticSourceDelegate;
705729
this.eagerGlobalOrdinals = eagerGlobalOrdinals;
706730
this.indexPhrases = indexPhrases;
731+
this.matchQueryYieldsCandidateMatchesForEquality = matchQueryYieldsCandidateMatchesForEquality;
707732
}
708733

709734
public TextFieldType(String name, boolean indexed, boolean stored, Map<String, String> meta) {
@@ -720,6 +745,7 @@ public TextFieldType(String name, boolean indexed, boolean stored, Map<String, S
720745
syntheticSourceDelegate = null;
721746
eagerGlobalOrdinals = false;
722747
indexPhrases = false;
748+
matchQueryYieldsCandidateMatchesForEquality = true;
723749
}
724750

725751
public TextFieldType(String name, boolean isSyntheticSource) {
@@ -732,7 +758,8 @@ public TextFieldType(String name, boolean isSyntheticSource) {
732758
null,
733759
Collections.emptyMap(),
734760
false,
735-
false
761+
false,
762+
true
736763
);
737764
}
738765

@@ -1019,6 +1046,10 @@ public boolean canUseSyntheticSourceDelegateForQueryingEquality(String str) {
10191046
return str.length() <= syntheticSourceDelegate.ignoreAbove();
10201047
}
10211048

1049+
public boolean matchQueryYieldsCandidateMatchesForEquality() {
1050+
return matchQueryYieldsCandidateMatchesForEquality;
1051+
}
1052+
10221053
@Override
10231054
public BlockLoader blockLoader(BlockLoaderContext blContext) {
10241055
if (canUseSyntheticSourceDelegateForLoading()) {
@@ -1208,7 +1239,7 @@ public KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate() {
12081239
public static class ConstantScoreTextFieldType extends TextFieldType {
12091240

12101241
public ConstantScoreTextFieldType(String name, boolean indexed, boolean stored, TextSearchInfo tsi, Map<String, String> meta) {
1211-
super(name, indexed, stored, tsi, false, null, meta, false, false);
1242+
super(name, indexed, stored, tsi, false, null, meta, false, false, /* unused */ false);
12121243
}
12131244

12141245
public ConstantScoreTextFieldType(String name) {

x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java

Lines changed: 70 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
package org.elasticsearch.xpack.esql.qa.single_node;
99

1010
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
11+
import com.carrotsearch.randomizedtesting.annotations.Repeat;
1112
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
1213

1314
import org.elasticsearch.client.Request;
@@ -50,14 +51,15 @@
5051
/**
5152
* Tests for pushing queries to lucene.
5253
*/
54+
//@Repeat(iterations = 10)
5355
@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
5456
public class PushQueriesIT extends ESRestTestCase {
5557
@ClassRule
5658
public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> spec.plugin("inference-service-test"));
5759

5860
@ParametersFactory(argumentFormatting = "%1s")
5961
public static List<Object[]> args() {
60-
return Stream.of("auto", "text", "match_only_text", "semantic_text").map(s -> new Object[] { s }).toList();
62+
return Stream.of("auto", "text_alone", "text", "match_only_text", "semantic_text").map(s -> new Object[] { s }).toList();
6163
}
6264

6365
private final String type;
@@ -74,13 +76,14 @@ public void testEquality() throws IOException {
7476
""";
7577
String luceneQuery = switch (type) {
7678
case "text", "auto" -> "#test.keyword:%value -_ignored:test.keyword";
79+
case "text_alone" -> "test:%value";
7780
case "match_only_text" -> "*:*";
7881
case "semantic_text" -> "FieldExistsQuery [field=_primary_term]";
7982
default -> throw new UnsupportedOperationException("unknown type [" + type + "]");
8083
};
8184
boolean filterInCompute = switch (type) {
8285
case "text", "auto" -> false;
83-
case "match_only_text", "semantic_text" -> true;
86+
case "text_alone", "match_only_text", "semantic_text" -> true;
8487
default -> throw new UnsupportedOperationException("unknown type [" + type + "]");
8588
};
8689
testPushQuery(value, esqlQuery, List.of(luceneQuery), filterInCompute, true);
@@ -92,12 +95,21 @@ public void testEqualityTooBigToPush() throws IOException {
9295
FROM test
9396
| WHERE test == "%value"
9497
""";
95-
String luceneQuery = switch (type) {
96-
case "text", "auto", "match_only_text" -> "*:*";
97-
case "semantic_text" -> "FieldExistsQuery [field=_primary_term]";
98+
List<String> luceneQueryOptions = switch (type) {
99+
case "text", "auto" -> {
100+
// We split tokens at 256 characters by in the standard analyzer.
101+
String first = "#test:" + "a".repeat(255);
102+
if (value.length() % 255 == 0) {
103+
yield List.of(first);
104+
}
105+
String rest = "#test:" + "a".repeat(value.length() % 255);
106+
yield List.of(first + " " + rest, rest + " " + first);
107+
}
108+
case "match_only_text" -> List.of("*:*");
109+
case "semantic_text" -> List.of("FieldExistsQuery [field=_primary_term]");
98110
default -> throw new UnsupportedOperationException("unknown type [" + type + "]");
99111
};
100-
testPushQuery(value, esqlQuery, List.of(luceneQuery), true, true);
112+
testPushQuery(value, esqlQuery, luceneQueryOptions, true, true);
101113
}
102114

103115
/**
@@ -195,6 +207,29 @@ public void testInequalityTooBigToPush() throws IOException {
195207
testPushQuery(value, esqlQuery, List.of(luceneQuery), true, false);
196208
}
197209

210+
/**
211+
* {@code NOT !=} should function just like {@code ==}.
212+
*/
213+
public void testNotInequality() throws IOException {
214+
String value = "v".repeat(between(0, 256));
215+
String esqlQuery = """
216+
FROM test
217+
| WHERE NOT test != "%value"
218+
""";
219+
String luceneQuery = switch (type) {
220+
case "text", "auto" -> "#test.keyword:%value -_ignored:test.keyword";
221+
case "match_only_text" -> "*:*";
222+
case "semantic_text" -> "FieldExistsQuery [field=_primary_term]";
223+
default -> throw new UnsupportedOperationException("unknown type [" + type + "]");
224+
};
225+
boolean filterInCompute = switch (type) {
226+
case "text", "auto" -> false;
227+
case "match_only_text", "semantic_text" -> true;
228+
default -> throw new UnsupportedOperationException("unknown type [" + type + "]");
229+
};
230+
testPushQuery(value, esqlQuery, List.of(luceneQuery), filterInCompute, true);
231+
}
232+
198233
public void testCaseInsensitiveEquality() throws IOException {
199234
String value = "a".repeat(between(0, 256));
200235
String esqlQuery = """
@@ -217,6 +252,7 @@ private void testPushQuery(String value, String esqlQuery, List<String> luceneQu
217252
String replacedQuery = esqlQuery.replaceAll("%value", value).replaceAll("%different_value", differentValue);
218253
RestEsqlTestCase.RequestObjectBuilder builder = requestObjectBuilder().query(replacedQuery + "\n| KEEP test");
219254
builder.profile(true);
255+
builder.allowPartialResults(false);
220256
Map<String, Object> result = runEsql(builder, new AssertWarnings.NoWarnings(), RestEsqlTestCase.Mode.SYNC);
221257
assertResultMap(
222258
result,
@@ -310,22 +346,36 @@ private void indexValue(String value) throws IOException {
310346
}
311347
}
312348
}""";
313-
default -> """
314-
,
315-
"mappings": {
316-
"properties": {
317-
"test": {
318-
"type": "%type",
319-
"fields": {
320-
"keyword": {
321-
"type": "keyword",
322-
"ignore_above": 256
349+
default -> {
350+
if (type.endsWith("_alone")) {
351+
yield """
352+
,
353+
"mappings": {
354+
"properties": {
355+
"test": {
356+
"type": "%type"
323357
}
324358
}
325359
}
326-
}
327-
}
328-
}""".replace("%type", type);
360+
}""".replace("%type", type.replace("_alone", ""));
361+
}
362+
yield """
363+
,
364+
"mappings": {
365+
"properties": {
366+
"test": {
367+
"type": "%type",
368+
"fields": {
369+
"keyword": {
370+
"type": "keyword",
371+
"ignore_above": 256
372+
}
373+
}
374+
}
375+
}
376+
}
377+
}""".replace("%type", type);
378+
}
329379
};
330380
json += "}";
331381
createIndex.setJsonEntity(json);
@@ -365,7 +415,7 @@ protected String getTestRestCluster() {
365415

366416
@Override
367417
protected boolean preserveClusterUponCompletion() {
368-
// Preserve the cluser to speed up the semantic_text tests
418+
// Preserve the cluster to speed up the semantic_text tests
369419
return true;
370420
}
371421

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,11 @@ public boolean isSingleValue(FieldName field) {
302302
public boolean canUseEqualityOnSyntheticSourceDelegate(FieldName name, String value) {
303303
return false;
304304
}
305+
306+
@Override
307+
public boolean matchQueryYieldsCandidateMatchesForEquality(String name) {
308+
return false;
309+
}
305310
}
306311

307312
/**

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/TranslationAware.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.elasticsearch.compute.operator.FilterOperator;
1212
import org.elasticsearch.xpack.esql.core.expression.Expression;
1313
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
14+
import org.elasticsearch.xpack.esql.expression.predicate.logical.Not;
1415
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
1516
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
1617

@@ -117,11 +118,19 @@ public FinishedTranslatable finish() {
117118
return finish;
118119
}
119120

121+
/**
122+
* <strong>Essentially</strong> the {@link TranslationAware#translatable}
123+
* implementation for the {@link Not} expression. When you wrap an expression
124+
* in {@link Not} the result is <strong>mostly</strong> pushable in the same
125+
* way as the original expression. But there are some expressions that aren't
126+
* need rechecks or can't be pushed at all. This handles that.
127+
*/
120128
public Translatable negate() {
121-
if (this == YES_BUT_RECHECK_NEGATED) {
122-
return RECHECK;
123-
}
124-
return this;
129+
return switch (this) {
130+
case YES_BUT_RECHECK_NEGATED -> Translatable.RECHECK;
131+
case RECHECK -> Translatable.NO;
132+
default -> this;
133+
};
125134
}
126135

127136
/**

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/Equals.java

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1111
import org.elasticsearch.common.time.DateUtils;
1212
import org.elasticsearch.compute.ann.Evaluator;
13+
import org.elasticsearch.index.query.MatchQueryBuilder;
1314
import org.elasticsearch.xpack.esql.core.expression.Expression;
1415
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
1516
import org.elasticsearch.xpack.esql.core.expression.Literal;
@@ -24,6 +25,7 @@
2425
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
2526
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2627
import org.elasticsearch.xpack.esql.querydsl.query.EqualsSyntheticSourceDelegate;
28+
import org.elasticsearch.xpack.esql.querydsl.query.MatchQuery;
2729
import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery;
2830

2931
import java.time.ZoneId;
@@ -129,26 +131,37 @@ public Equals(Source source, Expression left, Expression right, ZoneId zoneId) {
129131

130132
@Override
131133
public Translatable translatable(LucenePushdownPredicates pushdownPredicates) {
132-
if (right() instanceof Literal lit) {
133-
if (left().dataType() == DataType.TEXT && left() instanceof FieldAttribute fa) {
134-
if (pushdownPredicates.canUseEqualityOnSyntheticSourceDelegate(fa, ((BytesRef) lit.value()).utf8ToString())) {
135-
return Translatable.YES_BUT_RECHECK_NEGATED;
136-
}
137-
}
134+
if (right() instanceof Literal rhs && left().dataType() == DataType.TEXT && left() instanceof FieldAttribute lhs) {
135+
return translatableText(pushdownPredicates, lhs, ((BytesRef) rhs.value()).utf8ToString());
136+
}
137+
return super.translatable(pushdownPredicates);
138+
}
139+
140+
private Translatable translatableText(LucenePushdownPredicates pushdownPredicates, FieldAttribute lhs, String rhs) {
141+
if (pushdownPredicates.canUseEqualityOnSyntheticSourceDelegate(lhs, rhs)) {
142+
return Translatable.YES_BUT_RECHECK_NEGATED;
143+
}
144+
if (pushdownPredicates.matchQueryYieldsCandidateMatchesForEquality(lhs)) {
145+
return Translatable.RECHECK;
138146
}
139147
return super.translatable(pushdownPredicates);
140148
}
141149

142150
@Override
143151
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
144-
if (right() instanceof Literal lit) {
145-
if (left().dataType() == DataType.TEXT && left() instanceof FieldAttribute fa) {
146-
String value = ((BytesRef) lit.value()).utf8ToString();
147-
if (pushdownPredicates.canUseEqualityOnSyntheticSourceDelegate(fa, value)) {
148-
String name = handler.nameOf(fa);
149-
return new SingleValueQuery(new EqualsSyntheticSourceDelegate(source(), name, value), name, true);
150-
}
151-
}
152+
if (right() instanceof Literal rhs && left().dataType() == DataType.TEXT && left() instanceof FieldAttribute lhs) {
153+
return asQueryText(pushdownPredicates, handler, lhs, ((BytesRef) rhs.value()).utf8ToString());
154+
}
155+
return super.asQuery(pushdownPredicates, handler);
156+
}
157+
158+
private Query asQueryText(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler, FieldAttribute lhs, String rhs) {
159+
String name = handler.nameOf(lhs);
160+
if (pushdownPredicates.canUseEqualityOnSyntheticSourceDelegate(lhs, rhs)) {
161+
return new SingleValueQuery(new EqualsSyntheticSourceDelegate(source(), name, rhs), name, true);
162+
}
163+
if (pushdownPredicates.matchQueryYieldsCandidateMatchesForEquality(lhs)) {
164+
return new MatchQuery(source(), name, rhs, Map.of(MatchQueryBuilder.OPERATOR_FIELD.getPreferredName(), "AND"));
152165
}
153166
return super.asQuery(pushdownPredicates, handler);
154167
}

0 commit comments

Comments
 (0)