Skip to content

Commit 63b00ff

Browse files
committed
refactor effectivepruning calculation
1 parent c5102cd commit 63b00ff

File tree

1 file changed

+92
-118
lines changed

1 file changed

+92
-118
lines changed

server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java

Lines changed: 92 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.elasticsearch.common.bytes.BytesReference;
2727
import org.elasticsearch.common.compress.CompressedXContent;
2828
import org.elasticsearch.core.CheckedConsumer;
29+
import org.elasticsearch.core.Nullable;
2930
import org.elasticsearch.core.Tuple;
3031
import org.elasticsearch.index.IndexVersion;
3132
import org.elasticsearch.index.IndexVersions;
@@ -40,7 +41,6 @@
4041
import org.elasticsearch.search.lookup.Source;
4142
import org.elasticsearch.search.vectors.SparseVectorQueryWrapper;
4243
import org.elasticsearch.test.index.IndexVersionUtils;
43-
import org.elasticsearch.test.junit.annotations.TestLogging;
4444
import org.elasticsearch.xcontent.ToXContent;
4545
import org.elasticsearch.xcontent.XContentBuilder;
4646
import org.elasticsearch.xcontent.XContentParseException;
@@ -50,10 +50,8 @@
5050
import org.junit.AssumptionViolatedException;
5151

5252
import java.io.IOException;
53-
import java.util.ArrayList;
5453
import java.util.Arrays;
5554
import java.util.Collection;
56-
import java.util.EnumSet;
5755
import java.util.LinkedHashMap;
5856
import java.util.List;
5957
import java.util.Map;
@@ -203,6 +201,30 @@ protected void mappingWithIndexOptionsPruneFalse(XContentBuilder b) throws IOExc
203201
b.endObject();
204202
}
205203

204+
private void mapping(XContentBuilder b, @Nullable Boolean prune, PruningConfig pruningConfig, Boolean previousVersion) throws IOException {
205+
b.field("type", "sparse_vector");
206+
if (previousVersion == false && prune != null) {
207+
b.startObject("index_options");
208+
{
209+
b.field("prune", prune);
210+
if (pruningConfig != PruningConfig.NULL) {
211+
b.startObject("pruning_config");
212+
{
213+
if (pruningConfig == PruningConfig.EXPLICIT_DEFAULT) {
214+
b.field("tokens_freq_ratio_threshold", TokenPruningConfig.DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD);
215+
b.field("tokens_weight_threshold", TokenPruningConfig.DEFAULT_TOKENS_WEIGHT_THRESHOLD);
216+
} else if (pruningConfig == PruningConfig.STRICT) {
217+
b.field("tokens_freq_ratio_threshold", STRICT_TOKENS_FREQ_RATIO_THRESHOLD);
218+
b.field("tokens_weight_threshold", STRICT_TOKENS_WEIGHT_THRESHOLD);
219+
}
220+
}
221+
b.endObject();
222+
}
223+
}
224+
b.endObject();
225+
}
226+
}
227+
206228
@Override
207229
protected boolean supportsStoredFields() {
208230
return false;
@@ -701,27 +723,25 @@ public void testTokensWeightThresholdCorrect() {
701723
);
702724
}
703725

704-
public enum PruningScenario {
726+
private enum PruningScenario {
705727
NO_PRUNING, // No pruning applied - all tokens preserved
706728
DEFAULT_PRUNING, // Default pruning configuration
707729
STRICT_PRUNING // Stricter pruning with higher thresholds
708730
}
709731

710-
public enum QueryPruningScenario {
711-
PRUNE_FALSE_NO_CONFIG,
712-
PRUNE_FALSE_WITH_CONFIG,
713-
PRUNE_TRUE_NO_CONFIG,
714-
PRUNE_TRUE_WITH_CONFIG,
715-
PRUNE_NULL_WITH_CONFIG,
716-
PRUNE_NULL_NO_CONFIG
732+
private enum PruningConfig {
733+
NULL, EXPLICIT_DEFAULT, STRICT
717734
}
718735

719-
public enum IndexPruningScenario {
720-
PRUNE_FALSE_NO_CONFIG,
721-
PRUNE_TRUE_NO_CONFIG,
722-
PRUNE_TRUE_WITH_CONFIG,
723-
PRUNE_NULL_NO_CONFIG
724-
}
736+
private final Set<PruningOptions> validIndexPruningScenarios = Set.of(
737+
new PruningOptions(false, PruningConfig.NULL),
738+
new PruningOptions(true, PruningConfig.NULL),
739+
new PruningOptions(true, PruningConfig.EXPLICIT_DEFAULT),
740+
new PruningOptions(true, PruningConfig.STRICT),
741+
new PruningOptions(null, PruningConfig.NULL)
742+
);
743+
744+
private record PruningOptions(@Nullable Boolean prune, PruningConfig pruningConfig) {}
725745

726746
private void withSearchExecutionContext(MapperService mapperService, CheckedConsumer<SearchExecutionContext, IOException> consumer)
727747
throws IOException {
@@ -757,47 +777,17 @@ private void withSearchExecutionContext(MapperService mapperService, CheckedCons
757777
}
758778

759779
public void testPruningScenarios() throws Exception {
760-
for (int i = 0; i < 60; i++) {
761-
assertPruningScenario(randomFrom(IndexPruningScenario.values()), randomFrom(QueryPruningScenario.values()));
762-
}
763-
}
764-
765-
public void testPruningDefaultsPreIndexOptions() throws Exception {
766-
IndexVersion version = IndexVersionUtils.randomVersionBetween(
767-
random(),
768-
UPGRADE_TO_LUCENE_10_0_0,
769-
IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT)
770-
);
771-
IndexPruningScenario indexPruningScenario = IndexPruningScenario.PRUNE_NULL_NO_CONFIG;
772-
QueryPruningScenario queryPruningScenario = QueryPruningScenario.PRUNE_NULL_NO_CONFIG;
773-
MapperService mapperService = createMapperService(version, getIndexMapping(indexPruningScenario));
774-
Tuple<Boolean, TokenPruningConfig> queryPruneConfig = getQueryPruneConfig(queryPruningScenario);
775-
776-
withSearchExecutionContext(mapperService, (context) -> {
777-
SparseVectorFieldMapper.SparseVectorFieldType ft = (SparseVectorFieldMapper.SparseVectorFieldType) mapperService.fieldType(
778-
"field"
780+
for (int i = 0; i < 120; i++) {
781+
assertPruningScenario(
782+
randomFrom(validIndexPruningScenarios),
783+
new PruningOptions(randomBoolean() ? randomBoolean() : null,
784+
randomFrom(PruningConfig.values()))
779785
);
780-
Query finalizedQuery = ft.finalizeSparseVectorQuery(
781-
context,
782-
"field",
783-
QUERY_VECTORS,
784-
queryPruneConfig.v1(),
785-
queryPruneConfig.v2()
786-
);
787-
// query should _not_ be pruned by default on older index versions
788-
List<Query> expectedQueryClauses = getExpectedQueryClauses(ft, PruningScenario.NO_PRUNING, context);
789-
assertQueryContains(expectedQueryClauses, finalizedQuery);
790-
});
791-
786+
}
792787
}
793788

794-
private XContentBuilder getIndexMapping(IndexPruningScenario pruningScenario) throws IOException {
795-
return switch (pruningScenario) {
796-
case PRUNE_FALSE_NO_CONFIG -> fieldMapping(this::mappingWithIndexOptionsPruneFalse);
797-
case PRUNE_TRUE_NO_CONFIG -> fieldMapping(this::mappingWithIndexOptionsOnlyPruneTrue);
798-
case PRUNE_TRUE_WITH_CONFIG -> fieldMapping(this::minimalMappingWithExplicitIndexOptions);
799-
case PRUNE_NULL_NO_CONFIG -> fieldMapping(this::minimalMapping);
800-
};
789+
private XContentBuilder getIndexMapping(PruningOptions pruningOptions, Boolean usePreviousIndex) throws IOException {
790+
return fieldMapping(b -> mapping(b, pruningOptions.prune(), pruningOptions.pruningConfig(), usePreviousIndex));
801791
}
802792

803793
private void assertQueryContains(List<Query> expectedClauses, Query query) {
@@ -810,71 +800,53 @@ private void assertQueryContains(List<Query> expectedClauses, Query query) {
810800
assertThat(shouldClauses, Matchers.containsInAnyOrder(expectedClauses.toArray()));
811801
}
812802

813-
private void assertQueryHasClauseCount(Query query, int clauseCount) {
814-
SparseVectorQueryWrapper queryWrapper = (SparseVectorQueryWrapper) query;
815-
var termsQuery = queryWrapper.getTermsQuery();
816-
assertNotNull(termsQuery);
817-
var booleanQuery = (BooleanQuery) termsQuery;
818-
Collection<Query> clauses = booleanQuery.getClauses(BooleanClause.Occur.SHOULD);
819-
assertThat(clauses.size(), equalTo(clauseCount));
803+
private PruningScenario getPruningLevel(PruningConfig config) {
804+
if (config == PruningConfig.STRICT) {
805+
return PruningScenario.STRICT_PRUNING;
806+
}
807+
return PruningScenario.DEFAULT_PRUNING;
820808
}
821809

822810
private PruningScenario getEffectivePruningScenario(
823-
IndexPruningScenario indexPruningScenario,
824-
QueryPruningScenario queryPruningScenario
811+
PruningOptions indexPruningOptions,
812+
PruningOptions queryPruningOptions,
813+
Boolean usePreviousIndex
825814
) {
826-
PruningScenario effectivePruningScenario = null;
827-
if (queryPruningScenario != null) {
828-
effectivePruningScenario = switch (queryPruningScenario) {
829-
case PRUNE_FALSE_NO_CONFIG, PRUNE_FALSE_WITH_CONFIG -> PruningScenario.NO_PRUNING;
830-
case PRUNE_TRUE_WITH_CONFIG -> PruningScenario.STRICT_PRUNING;
831-
case PRUNE_TRUE_NO_CONFIG, PRUNE_NULL_NO_CONFIG, PRUNE_NULL_WITH_CONFIG -> null; // Need to compare with
832-
// indexPruningScenario
833-
};
815+
if (usePreviousIndex) {
816+
return (queryPruningOptions.prune != null && queryPruningOptions.prune)
817+
? getPruningLevel(queryPruningOptions.pruningConfig)
818+
: PruningScenario.NO_PRUNING;
834819
}
835820

836-
if (effectivePruningScenario != null) {
837-
return effectivePruningScenario;
838-
}
839-
Set<IndexPruningScenario> indexOptionsWithoutConfig = EnumSet.of(
840-
IndexPruningScenario.PRUNE_NULL_NO_CONFIG,
841-
IndexPruningScenario.PRUNE_TRUE_NO_CONFIG
842-
);
843-
844-
if (queryPruningScenario == QueryPruningScenario.PRUNE_NULL_NO_CONFIG) {
845-
if (indexPruningScenario == IndexPruningScenario.PRUNE_FALSE_NO_CONFIG) return PruningScenario.NO_PRUNING;
846-
else if (indexOptionsWithoutConfig.contains(indexPruningScenario)) return PruningScenario.DEFAULT_PRUNING;
847-
else return PruningScenario.STRICT_PRUNING;
821+
Boolean shouldPrune = indexPruningOptions.prune;
822+
if (queryPruningOptions.prune != null) {
823+
shouldPrune = queryPruningOptions.prune;
848824
}
849825

850-
if (queryPruningScenario == QueryPruningScenario.PRUNE_TRUE_NO_CONFIG) {
851-
if (indexPruningScenario == IndexPruningScenario.PRUNE_TRUE_WITH_CONFIG) return PruningScenario.STRICT_PRUNING;
852-
else return PruningScenario.DEFAULT_PRUNING;
826+
if (shouldPrune != null && shouldPrune == false) {
827+
// Pruning is explicitly disabled
828+
return PruningScenario.NO_PRUNING;
853829
}
854830

855-
if (queryPruningScenario == QueryPruningScenario.PRUNE_NULL_WITH_CONFIG) {
856-
if (indexPruningScenario == IndexPruningScenario.PRUNE_FALSE_NO_CONFIG) return PruningScenario.NO_PRUNING;
857-
else return PruningScenario.STRICT_PRUNING;
831+
return queryPruningOptions.pruningConfig != PruningConfig.NULL
832+
? getPruningLevel(queryPruningOptions.pruningConfig)
833+
: getPruningLevel(indexPruningOptions.pruningConfig);
834+
}
835+
836+
private Tuple<Boolean, TokenPruningConfig> getQueryPruneConfig(PruningOptions queryPruningOptions) {
837+
Boolean prune = queryPruningOptions.prune;
838+
TokenPruningConfig tokenPruningConfig = null;
839+
if (queryPruningOptions.pruningConfig != PruningConfig.NULL) {
840+
switch (queryPruningOptions.pruningConfig) {
841+
case EXPLICIT_DEFAULT -> tokenPruningConfig = new TokenPruningConfig();
842+
case STRICT -> tokenPruningConfig = new TokenPruningConfig(
843+
STRICT_TOKENS_FREQ_RATIO_THRESHOLD,
844+
STRICT_TOKENS_WEIGHT_THRESHOLD,
845+
false
846+
);
847+
}
858848
}
859-
860-
return PruningScenario.DEFAULT_PRUNING;
861-
}
862-
863-
private Tuple<Boolean, TokenPruningConfig> getQueryPruneConfig(QueryPruningScenario queryPruningScenario) {
864-
return switch (queryPruningScenario) {
865-
case PRUNE_FALSE_NO_CONFIG -> new Tuple<>(false, null);
866-
case PRUNE_FALSE_WITH_CONFIG -> new Tuple<>(false, new TokenPruningConfig());
867-
case PRUNE_TRUE_NO_CONFIG -> new Tuple<>(true, null);
868-
case PRUNE_TRUE_WITH_CONFIG -> new Tuple<>(
869-
true,
870-
new TokenPruningConfig(STRICT_TOKENS_FREQ_RATIO_THRESHOLD, STRICT_TOKENS_WEIGHT_THRESHOLD, false)
871-
);
872-
case PRUNE_NULL_WITH_CONFIG -> new Tuple<>(
873-
null,
874-
new TokenPruningConfig(STRICT_TOKENS_FREQ_RATIO_THRESHOLD, STRICT_TOKENS_WEIGHT_THRESHOLD, false)
875-
);
876-
case PRUNE_NULL_NO_CONFIG -> new Tuple<>(null, null);
877-
};
849+
return new Tuple<>(prune, tokenPruningConfig);
878850
}
879851

880852
private List<Query> getExpectedQueryClauses(SparseVectorFieldMapper.SparseVectorFieldType ft, PruningScenario pruningScenario, SearchExecutionContext searchExecutionContext) {
@@ -892,16 +864,18 @@ private List<Query> getExpectedQueryClauses(SparseVectorFieldMapper.SparseVector
892864
}).collect(Collectors.toUnmodifiableList());
893865
}
894866

895-
private void assertPruningScenario(IndexPruningScenario indexPruningScenario, QueryPruningScenario queryPruningScenario)
867+
private void assertPruningScenario(PruningOptions indexPruningOptions, PruningOptions queryPruningOptions)
896868
throws IOException {
897-
IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(
898-
random(),
899-
SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT,
900-
IndexVersion.current()
901-
);
902-
MapperService mapperService = createMapperService(indexVersion, getIndexMapping(indexPruningScenario));
903-
Tuple<Boolean, TokenPruningConfig> queryPruneConfig = getQueryPruneConfig(queryPruningScenario);
904-
PruningScenario effectivePruningScenario = getEffectivePruningScenario(indexPruningScenario, queryPruningScenario);
869+
870+
boolean usePreIndexOptionsIndex = false;
871+
if (indexPruningOptions.prune == null && indexPruningOptions.pruningConfig == PruningConfig.NULL) {
872+
usePreIndexOptionsIndex = randomBoolean();
873+
}
874+
875+
IndexVersion indexVersion = getIndexVersionForTest(usePreIndexOptionsIndex);
876+
MapperService mapperService = createMapperService(indexVersion, getIndexMapping(indexPruningOptions, usePreIndexOptionsIndex));
877+
Tuple<Boolean, TokenPruningConfig> queryPruneConfig = getQueryPruneConfig(queryPruningOptions);
878+
PruningScenario effectivePruningScenario = getEffectivePruningScenario(indexPruningOptions, queryPruningOptions, usePreIndexOptionsIndex);
905879
withSearchExecutionContext(mapperService, (context) -> {
906880
SparseVectorFieldMapper.SparseVectorFieldType ft = (SparseVectorFieldMapper.SparseVectorFieldType) mapperService.fieldType(
907881
"field"

0 commit comments

Comments
 (0)