Skip to content

Commit a19434c

Browse files
peteralfonsiPeter Alfonsi
andauthored
Disable request cache for queries on fields with non-default use_similarity or split_queries_on_whitespace (opensearch-project#19385)
--------- Signed-off-by: Peter Alfonsi <[email protected]> Signed-off-by: Peter Alfonsi <[email protected]> Co-authored-by: Peter Alfonsi <[email protected]>
1 parent ad4c7f7 commit a19434c

File tree

4 files changed

+146
-0
lines changed

4 files changed

+146
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6868
- [Flaky Test] Fix flaky test IngestFromKinesisIT.testAllActiveIngestion ([#19380](https://github.com/opensearch-project/OpenSearch/pull/19380))
6969
- Fix lag metric for pull-based ingestion when streaming source is empty ([#19393](https://github.com/opensearch-project/OpenSearch/pull/19393))
7070
- Fix ingestion state xcontent serialization in IndexMetadata and fail fast on mapping errors([#19320](https://github.com/opensearch-project/OpenSearch/pull/19320))
71+
- Fix updated keyword field params leading to stale responses from request cache ([#19385](https://github.com/opensearch-project/OpenSearch/pull/19385))
7172

7273
### Dependencies
7374
- Bump `com.gradleup.shadow:shadow-gradle-plugin` from 8.3.5 to 8.3.9 ([#19400](https://github.com/opensearch-project/OpenSearch/pull/19400))

server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.opensearch.action.admin.indices.alias.Alias;
4848
import org.opensearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest;
4949
import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse;
50+
import org.opensearch.action.admin.indices.mapping.put.PutMappingRequest;
5051
import org.opensearch.action.search.SearchResponse;
5152
import org.opensearch.action.search.SearchType;
5253
import org.opensearch.cluster.ClusterState;
@@ -861,6 +862,115 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
861862
assertEquals(0, requestCacheStats.getMemorySizeInBytes());
862863
}
863864

865+
public void testKeywordFieldUseSimilarityCacheability() throws Exception {
866+
testKeywordFieldParameterCacheabilityCase("use_similarity");
867+
}
868+
869+
public void testKeywordFieldSplitWhitespaceCacheability() throws Exception {
870+
testKeywordFieldParameterCacheabilityCase("split_queries_on_whitespace");
871+
}
872+
873+
private void testKeywordFieldParameterCacheabilityCase(String paramName) throws Exception {
874+
// When keyword fields have non-default values for "use_similarity" or "split_queries_on_whitespace",
875+
// queries on those fields shouldn't be cacheable. Default value is false for both parameters.
876+
String node = internalCluster().startNode(
877+
Settings.builder().put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, TimeValue.timeValueMillis(50))
878+
);
879+
Client client = client(node);
880+
String index = "index";
881+
String field1 = "field1";
882+
String field2 = "field2";
883+
assertAcked(
884+
client.admin()
885+
.indices()
886+
.prepareCreate(index)
887+
.setMapping(field1, "type=keyword," + paramName + "=false", field2, "type=keyword," + paramName + "=true")
888+
.setSettings(
889+
Settings.builder()
890+
.put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true)
891+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
892+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
893+
// Disable index refreshing to avoid cache being invalidated mid-test
894+
.put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), TimeValue.timeValueMillis(-1))
895+
)
896+
.get()
897+
);
898+
899+
indexRandom(true, client.prepareIndex(index).setSource(field1, "foo", field2, "foo"));
900+
indexRandom(true, client.prepareIndex(index).setSource(field1, "foo2", field2, "foo2"));
901+
ensureSearchable(index);
902+
// Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache
903+
forceMerge(client, index);
904+
905+
// Show this behavior works for a nested query where the termQuery built by KeywordFieldMapper is not the outermost query
906+
SearchResponse resp = client.prepareSearch(index)
907+
.setRequestCache(true)
908+
.setQuery(
909+
QueryBuilders.boolQuery().should(QueryBuilders.termQuery(field1, "foo")).should(QueryBuilders.termQuery(field1, "foo2"))
910+
)
911+
.get();
912+
assertSearchResponse(resp);
913+
// This query should be cached
914+
RequestCacheStats stats = getNodeCacheStats(client);
915+
assertEquals(1, stats.getMissCount());
916+
long cacheSize = stats.getMemorySizeInBytes();
917+
assertTrue(cacheSize > 0);
918+
919+
// The same query but involving field2 at all should not be cacheable even if setRequestCache is true:
920+
// no extra misses and no extra values in cache
921+
assertQueryCausesCacheState(
922+
client,
923+
index,
924+
QueryBuilders.boolQuery().should(QueryBuilders.termQuery(field1, "foo")).should(QueryBuilders.termQuery(field2, "foo2")),
925+
0,
926+
1,
927+
cacheSize
928+
);
929+
930+
// Now change the parameter value to true for field1, and check the same query again does NOT come from the cache (hits == 0)
931+
// and also shouldn't add any extra entries to the cache
932+
PutMappingRequest mappingRequest = new PutMappingRequest().indices(index).source(field1, "type=keyword," + paramName + "=true");
933+
client.admin().indices().putMapping(mappingRequest).actionGet();
934+
assertQueryCausesCacheState(
935+
client,
936+
index,
937+
QueryBuilders.boolQuery().should(QueryBuilders.termQuery(field1, "foo")).should(QueryBuilders.termQuery(field1, "foo2")),
938+
0,
939+
1,
940+
cacheSize
941+
);
942+
943+
// Now test all query building methods on field1 and ensure none are cacheable
944+
945+
assertQueryCausesCacheState(client, index, QueryBuilders.termsQuery(field1, "foo", "foo2"), 0, 1, cacheSize);
946+
947+
assertQueryCausesCacheState(client, index, QueryBuilders.prefixQuery(field1, "fo"), 0, 1, cacheSize);
948+
949+
assertQueryCausesCacheState(client, index, QueryBuilders.regexpQuery(field1, "f*o"), 0, 1, cacheSize);
950+
951+
assertQueryCausesCacheState(client, index, QueryBuilders.rangeQuery(field1).gte("f").lte("g"), 0, 1, cacheSize);
952+
953+
assertQueryCausesCacheState(client, index, QueryBuilders.fuzzyQuery(field1, "foe"), 0, 1, cacheSize);
954+
955+
assertQueryCausesCacheState(client, index, QueryBuilders.wildcardQuery(field1, "f*"), 0, 1, cacheSize);
956+
}
957+
958+
private void assertQueryCausesCacheState(
959+
Client client,
960+
String index,
961+
QueryBuilder builder,
962+
long expectedHits,
963+
long expectedMisses,
964+
long expectedMemory
965+
) {
966+
SearchResponse resp = client.prepareSearch(index).setRequestCache(true).setQuery(builder).get();
967+
assertSearchResponse(resp);
968+
RequestCacheStats stats = getNodeCacheStats(client);
969+
assertEquals(expectedHits, stats.getHitCount());
970+
assertEquals(expectedMisses, stats.getMissCount());
971+
assertEquals(expectedMemory, stats.getMemorySizeInBytes());
972+
}
973+
864974
private Path[] shardDirectory(String server, Index index, int shard) {
865975
NodeEnvironment env = internalCluster().getInstance(NodeEnvironment.class, server);
866976
final Path[] paths = env.availableShardPaths(new ShardId(index, shard));

server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ public static class KeywordFieldType extends StringFieldType {
312312
private final int ignoreAbove;
313313
private final String nullValue;
314314
private final boolean useSimilarity;
315+
private final boolean splitQueriesOnWhitespace;
315316

316317
public KeywordFieldType(String name, FieldType fieldType, NamedAnalyzer normalizer, NamedAnalyzer searchAnalyzer, Builder builder) {
317318
super(
@@ -328,18 +329,31 @@ public KeywordFieldType(String name, FieldType fieldType, NamedAnalyzer normaliz
328329
this.ignoreAbove = builder.ignoreAbove.getValue();
329330
this.nullValue = builder.nullValue.getValue();
330331
this.useSimilarity = builder.useSimilarity.getValue();
332+
this.splitQueriesOnWhitespace = builder.splitQueriesOnWhitespace.getValue();
331333
}
332334

333335
public KeywordFieldType(String name, boolean isSearchable, boolean hasDocValues, Map<String, String> meta) {
334336
this(name, isSearchable, hasDocValues, false, meta);
335337
}
336338

337339
public KeywordFieldType(String name, boolean isSearchable, boolean hasDocValues, boolean useSimilarity, Map<String, String> meta) {
340+
this(name, isSearchable, hasDocValues, useSimilarity, false, meta);
341+
}
342+
343+
public KeywordFieldType(
344+
String name,
345+
boolean isSearchable,
346+
boolean hasDocValues,
347+
boolean useSimilarity,
348+
boolean splitQueriesOnWhitespace,
349+
Map<String, String> meta
350+
) {
338351
super(name, isSearchable, false, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
339352
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
340353
this.ignoreAbove = Integer.MAX_VALUE;
341354
this.nullValue = null;
342355
this.useSimilarity = useSimilarity;
356+
this.splitQueriesOnWhitespace = splitQueriesOnWhitespace;
343357
}
344358

345359
public KeywordFieldType(String name) {
@@ -358,13 +372,15 @@ public KeywordFieldType(String name, FieldType fieldType) {
358372
this.ignoreAbove = Integer.MAX_VALUE;
359373
this.nullValue = null;
360374
this.useSimilarity = false;
375+
this.splitQueriesOnWhitespace = false;
361376
}
362377

363378
public KeywordFieldType(String name, NamedAnalyzer analyzer) {
364379
super(name, true, false, true, new TextSearchInfo(Defaults.FIELD_TYPE, null, analyzer, analyzer), Collections.emptyMap());
365380
this.ignoreAbove = Integer.MAX_VALUE;
366381
this.nullValue = null;
367382
this.useSimilarity = false;
383+
this.splitQueriesOnWhitespace = false;
368384
}
369385

370386
@Override
@@ -447,6 +463,7 @@ protected Object rewriteForDocValue(Object value) {
447463
@Override
448464
public Query termQueryCaseInsensitive(Object value, QueryShardContext context) {
449465
failIfNotIndexedAndNoDocValues();
466+
checkToDisableCaching(context);
450467
if (isSearchable()) {
451468
return super.termQueryCaseInsensitive(value, context);
452469
} else {
@@ -467,6 +484,7 @@ public Query termQueryCaseInsensitive(Object value, QueryShardContext context) {
467484
@Override
468485
public Query termQuery(Object value, QueryShardContext context) {
469486
failIfNotIndexedAndNoDocValues();
487+
checkToDisableCaching(context);
470488
if (isSearchable()) {
471489
Query query = super.termQuery(value, context);
472490
if (!this.useSimilarity) {
@@ -494,6 +512,7 @@ public Query termQuery(Object value, QueryShardContext context) {
494512
@Override
495513
public Query termsQuery(List<?> values, QueryShardContext context) {
496514
failIfNotIndexedAndNoDocValues();
515+
checkToDisableCaching(context);
497516
// has index and doc_values enabled
498517
if (isSearchable() && hasDocValues()) {
499518
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
@@ -540,6 +559,7 @@ public Query prefixQuery(
540559
);
541560
}
542561
failIfNotIndexedAndNoDocValues();
562+
checkToDisableCaching(context);
543563
if (isSearchable() && hasDocValues()) {
544564
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
545565
return super.prefixQuery(value, method, caseInsensitive, context);
@@ -583,6 +603,7 @@ public Query regexpQuery(
583603
);
584604
}
585605
failIfNotIndexedAndNoDocValues();
606+
checkToDisableCaching(context);
586607
if (isSearchable() && hasDocValues()) {
587608
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
588609
return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context);
@@ -621,6 +642,7 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower
621642
);
622643
}
623644
failIfNotIndexedAndNoDocValues();
645+
checkToDisableCaching(context);
624646
if (isSearchable() && hasDocValues()) {
625647
Query indexQuery = new TermRangeQuery(
626648
name(),
@@ -669,6 +691,7 @@ public Query fuzzyQuery(
669691
QueryShardContext context
670692
) {
671693
failIfNotIndexedAndNoDocValues();
694+
checkToDisableCaching(context);
672695
if (context.allowExpensiveQueries() == false) {
673696
throw new OpenSearchException(
674697
"[fuzzy] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false."
@@ -716,6 +739,7 @@ public Query wildcardQuery(
716739
);
717740
}
718741
failIfNotIndexedAndNoDocValues();
742+
checkToDisableCaching(context);
719743
// keyword field types are always normalized, so ignore case sensitivity and force normalize the
720744
// wildcard
721745
// query text
@@ -745,6 +769,13 @@ public Query wildcardQuery(
745769
return super.wildcardQuery(value, method, caseInsensitive, true, context);
746770
}
747771

772+
private void checkToDisableCaching(QueryShardContext context) {
773+
// Mark the query as non-cacheable if the defaults for useSimilarity or splitQueriesOnWhitespace are not used.
774+
if (useSimilarity || splitQueriesOnWhitespace) {
775+
context.setIsCacheable(false);
776+
}
777+
}
778+
748779
}
749780

750781
private final boolean indexed;

server/src/main/java/org/opensearch/index/query/QueryShardContext.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,10 @@ public final boolean isCacheable() {
652652
return cacheable;
653653
}
654654

655+
public final void setIsCacheable(boolean isCacheable) {
656+
this.cacheable = isCacheable;
657+
}
658+
655659
/**
656660
* Returns the shard ID this context was created for.
657661
*/

0 commit comments

Comments
 (0)