2626import org .elasticsearch .common .bytes .BytesReference ;
2727import org .elasticsearch .common .compress .CompressedXContent ;
2828import org .elasticsearch .core .CheckedConsumer ;
29+ import org .elasticsearch .core .Nullable ;
2930import org .elasticsearch .core .Tuple ;
3031import org .elasticsearch .index .IndexVersion ;
3132import org .elasticsearch .index .IndexVersions ;
4041import org .elasticsearch .search .lookup .Source ;
4142import org .elasticsearch .search .vectors .SparseVectorQueryWrapper ;
4243import org .elasticsearch .test .index .IndexVersionUtils ;
43- import org .elasticsearch .test .junit .annotations .TestLogging ;
4444import org .elasticsearch .xcontent .ToXContent ;
4545import org .elasticsearch .xcontent .XContentBuilder ;
4646import org .elasticsearch .xcontent .XContentParseException ;
5050import org .junit .AssumptionViolatedException ;
5151
5252import java .io .IOException ;
53- import java .util .ArrayList ;
5453import java .util .Arrays ;
5554import java .util .Collection ;
56- import java .util .EnumSet ;
5755import java .util .LinkedHashMap ;
5856import java .util .List ;
5957import java .util .Map ;
@@ -203,6 +201,30 @@ protected void mappingWithIndexOptionsPruneFalse(XContentBuilder b) throws IOExc
203201 b .endObject ();
204202 }
205203
204+ private void mapping (XContentBuilder b , @ Nullable Boolean prune , PruningConfig pruningConfig , Boolean previousVersion ) throws IOException {
205+ b .field ("type" , "sparse_vector" );
206+ if (previousVersion == false && prune != null ) {
207+ b .startObject ("index_options" );
208+ {
209+ b .field ("prune" , prune );
210+ if (pruningConfig != PruningConfig .NULL ) {
211+ b .startObject ("pruning_config" );
212+ {
213+ if (pruningConfig == PruningConfig .EXPLICIT_DEFAULT ) {
214+ b .field ("tokens_freq_ratio_threshold" , TokenPruningConfig .DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD );
215+ b .field ("tokens_weight_threshold" , TokenPruningConfig .DEFAULT_TOKENS_WEIGHT_THRESHOLD );
216+ } else if (pruningConfig == PruningConfig .STRICT ) {
217+ b .field ("tokens_freq_ratio_threshold" , STRICT_TOKENS_FREQ_RATIO_THRESHOLD );
218+ b .field ("tokens_weight_threshold" , STRICT_TOKENS_WEIGHT_THRESHOLD );
219+ }
220+ }
221+ b .endObject ();
222+ }
223+ }
224+ b .endObject ();
225+ }
226+ }
227+
206228 @ Override
207229 protected boolean supportsStoredFields () {
208230 return false ;
@@ -701,27 +723,25 @@ public void testTokensWeightThresholdCorrect() {
701723 );
702724 }
703725
704- public enum PruningScenario {
726+ private enum PruningScenario {
705727 NO_PRUNING , // No pruning applied - all tokens preserved
706728 DEFAULT_PRUNING , // Default pruning configuration
707729 STRICT_PRUNING // Stricter pruning with higher thresholds
708730 }
709731
710- public enum QueryPruningScenario {
711- PRUNE_FALSE_NO_CONFIG ,
712- PRUNE_FALSE_WITH_CONFIG ,
713- PRUNE_TRUE_NO_CONFIG ,
714- PRUNE_TRUE_WITH_CONFIG ,
715- PRUNE_NULL_WITH_CONFIG ,
716- PRUNE_NULL_NO_CONFIG
732+ private enum PruningConfig {
733+ NULL , EXPLICIT_DEFAULT , STRICT
717734 }
718735
719- public enum IndexPruningScenario {
720- PRUNE_FALSE_NO_CONFIG ,
721- PRUNE_TRUE_NO_CONFIG ,
722- PRUNE_TRUE_WITH_CONFIG ,
723- PRUNE_NULL_NO_CONFIG
724- }
736+ private final Set <PruningOptions > validIndexPruningScenarios = Set .of (
737+ new PruningOptions (false , PruningConfig .NULL ),
738+ new PruningOptions (true , PruningConfig .NULL ),
739+ new PruningOptions (true , PruningConfig .EXPLICIT_DEFAULT ),
740+ new PruningOptions (true , PruningConfig .STRICT ),
741+ new PruningOptions (null , PruningConfig .NULL )
742+ );
743+
744+ private record PruningOptions (@ Nullable Boolean prune , PruningConfig pruningConfig ) {}
725745
726746 private void withSearchExecutionContext (MapperService mapperService , CheckedConsumer <SearchExecutionContext , IOException > consumer )
727747 throws IOException {
@@ -757,47 +777,17 @@ private void withSearchExecutionContext(MapperService mapperService, CheckedCons
757777 }
758778
759779 public void testPruningScenarios () throws Exception {
760- for (int i = 0 ; i < 60 ; i ++) {
761- assertPruningScenario (randomFrom (IndexPruningScenario .values ()), randomFrom (QueryPruningScenario .values ()));
762- }
763- }
764-
765- public void testPruningDefaultsPreIndexOptions () throws Exception {
766- IndexVersion version = IndexVersionUtils .randomVersionBetween (
767- random (),
768- UPGRADE_TO_LUCENE_10_0_0 ,
769- IndexVersionUtils .getPreviousVersion (SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT )
770- );
771- IndexPruningScenario indexPruningScenario = IndexPruningScenario .PRUNE_NULL_NO_CONFIG ;
772- QueryPruningScenario queryPruningScenario = QueryPruningScenario .PRUNE_NULL_NO_CONFIG ;
773- MapperService mapperService = createMapperService (version , getIndexMapping (indexPruningScenario ));
774- Tuple <Boolean , TokenPruningConfig > queryPruneConfig = getQueryPruneConfig (queryPruningScenario );
775-
776- withSearchExecutionContext (mapperService , (context ) -> {
777- SparseVectorFieldMapper .SparseVectorFieldType ft = (SparseVectorFieldMapper .SparseVectorFieldType ) mapperService .fieldType (
778- "field"
780+ for (int i = 0 ; i < 120 ; i ++) {
781+ assertPruningScenario (
782+ randomFrom (validIndexPruningScenarios ),
783+ new PruningOptions (randomBoolean () ? randomBoolean () : null ,
784+ randomFrom (PruningConfig .values ()))
779785 );
780- Query finalizedQuery = ft .finalizeSparseVectorQuery (
781- context ,
782- "field" ,
783- QUERY_VECTORS ,
784- queryPruneConfig .v1 (),
785- queryPruneConfig .v2 ()
786- );
787- // query should _not_ be pruned by default on older index versions
788- List <Query > expectedQueryClauses = getExpectedQueryClauses (ft , PruningScenario .NO_PRUNING , context );
789- assertQueryContains (expectedQueryClauses , finalizedQuery );
790- });
791-
786+ }
792787 }
793788
794- private XContentBuilder getIndexMapping (IndexPruningScenario pruningScenario ) throws IOException {
795- return switch (pruningScenario ) {
796- case PRUNE_FALSE_NO_CONFIG -> fieldMapping (this ::mappingWithIndexOptionsPruneFalse );
797- case PRUNE_TRUE_NO_CONFIG -> fieldMapping (this ::mappingWithIndexOptionsOnlyPruneTrue );
798- case PRUNE_TRUE_WITH_CONFIG -> fieldMapping (this ::minimalMappingWithExplicitIndexOptions );
799- case PRUNE_NULL_NO_CONFIG -> fieldMapping (this ::minimalMapping );
800- };
789+ private XContentBuilder getIndexMapping (PruningOptions pruningOptions , Boolean usePreviousIndex ) throws IOException {
790+ return fieldMapping (b -> mapping (b , pruningOptions .prune (), pruningOptions .pruningConfig (), usePreviousIndex ));
801791 }
802792
803793 private void assertQueryContains (List <Query > expectedClauses , Query query ) {
@@ -810,71 +800,53 @@ private void assertQueryContains(List<Query> expectedClauses, Query query) {
810800 assertThat (shouldClauses , Matchers .containsInAnyOrder (expectedClauses .toArray ()));
811801 }
812802
813- private void assertQueryHasClauseCount (Query query , int clauseCount ) {
814- SparseVectorQueryWrapper queryWrapper = (SparseVectorQueryWrapper ) query ;
815- var termsQuery = queryWrapper .getTermsQuery ();
816- assertNotNull (termsQuery );
817- var booleanQuery = (BooleanQuery ) termsQuery ;
818- Collection <Query > clauses = booleanQuery .getClauses (BooleanClause .Occur .SHOULD );
819- assertThat (clauses .size (), equalTo (clauseCount ));
803+ private PruningScenario getPruningLevel (PruningConfig config ) {
804+ if (config == PruningConfig .STRICT ) {
805+ return PruningScenario .STRICT_PRUNING ;
806+ }
807+ return PruningScenario .DEFAULT_PRUNING ;
820808 }
821809
822810 private PruningScenario getEffectivePruningScenario (
823- IndexPruningScenario indexPruningScenario ,
824- QueryPruningScenario queryPruningScenario
811+ PruningOptions indexPruningOptions ,
812+ PruningOptions queryPruningOptions ,
813+ Boolean usePreviousIndex
825814 ) {
826- PruningScenario effectivePruningScenario = null ;
827- if (queryPruningScenario != null ) {
828- effectivePruningScenario = switch (queryPruningScenario ) {
829- case PRUNE_FALSE_NO_CONFIG , PRUNE_FALSE_WITH_CONFIG -> PruningScenario .NO_PRUNING ;
830- case PRUNE_TRUE_WITH_CONFIG -> PruningScenario .STRICT_PRUNING ;
831- case PRUNE_TRUE_NO_CONFIG , PRUNE_NULL_NO_CONFIG , PRUNE_NULL_WITH_CONFIG -> null ; // Need to compare with
832- // indexPruningScenario
833- };
815+ if (usePreviousIndex ) {
816+ return (queryPruningOptions .prune != null && queryPruningOptions .prune )
817+ ? getPruningLevel (queryPruningOptions .pruningConfig )
818+ : PruningScenario .NO_PRUNING ;
834819 }
835820
836- if (effectivePruningScenario != null ) {
837- return effectivePruningScenario ;
838- }
839- Set <IndexPruningScenario > indexOptionsWithoutConfig = EnumSet .of (
840- IndexPruningScenario .PRUNE_NULL_NO_CONFIG ,
841- IndexPruningScenario .PRUNE_TRUE_NO_CONFIG
842- );
843-
844- if (queryPruningScenario == QueryPruningScenario .PRUNE_NULL_NO_CONFIG ) {
845- if (indexPruningScenario == IndexPruningScenario .PRUNE_FALSE_NO_CONFIG ) return PruningScenario .NO_PRUNING ;
846- else if (indexOptionsWithoutConfig .contains (indexPruningScenario )) return PruningScenario .DEFAULT_PRUNING ;
847- else return PruningScenario .STRICT_PRUNING ;
821+ Boolean shouldPrune = indexPruningOptions .prune ;
822+ if (queryPruningOptions .prune != null ) {
823+ shouldPrune = queryPruningOptions .prune ;
848824 }
849825
850- if (queryPruningScenario == QueryPruningScenario . PRUNE_TRUE_NO_CONFIG ) {
851- if ( indexPruningScenario == IndexPruningScenario . PRUNE_TRUE_WITH_CONFIG ) return PruningScenario . STRICT_PRUNING ;
852- else return PruningScenario .DEFAULT_PRUNING ;
826+ if (shouldPrune != null && shouldPrune == false ) {
827+ // Pruning is explicitly disabled
828+ return PruningScenario .NO_PRUNING ;
853829 }
854830
855- if (queryPruningScenario == QueryPruningScenario .PRUNE_NULL_WITH_CONFIG ) {
856- if (indexPruningScenario == IndexPruningScenario .PRUNE_FALSE_NO_CONFIG ) return PruningScenario .NO_PRUNING ;
857- else return PruningScenario .STRICT_PRUNING ;
831+ return queryPruningOptions .pruningConfig != PruningConfig .NULL
832+ ? getPruningLevel (queryPruningOptions .pruningConfig )
833+ : getPruningLevel (indexPruningOptions .pruningConfig );
834+ }
835+
836+ private Tuple <Boolean , TokenPruningConfig > getQueryPruneConfig (PruningOptions queryPruningOptions ) {
837+ Boolean prune = queryPruningOptions .prune ;
838+ TokenPruningConfig tokenPruningConfig = null ;
839+ if (queryPruningOptions .pruningConfig != PruningConfig .NULL ) {
840+ switch (queryPruningOptions .pruningConfig ) {
841+ case EXPLICIT_DEFAULT -> tokenPruningConfig = new TokenPruningConfig ();
842+ case STRICT -> tokenPruningConfig = new TokenPruningConfig (
843+ STRICT_TOKENS_FREQ_RATIO_THRESHOLD ,
844+ STRICT_TOKENS_WEIGHT_THRESHOLD ,
845+ false
846+ );
847+ }
858848 }
859-
860- return PruningScenario .DEFAULT_PRUNING ;
861- }
862-
863- private Tuple <Boolean , TokenPruningConfig > getQueryPruneConfig (QueryPruningScenario queryPruningScenario ) {
864- return switch (queryPruningScenario ) {
865- case PRUNE_FALSE_NO_CONFIG -> new Tuple <>(false , null );
866- case PRUNE_FALSE_WITH_CONFIG -> new Tuple <>(false , new TokenPruningConfig ());
867- case PRUNE_TRUE_NO_CONFIG -> new Tuple <>(true , null );
868- case PRUNE_TRUE_WITH_CONFIG -> new Tuple <>(
869- true ,
870- new TokenPruningConfig (STRICT_TOKENS_FREQ_RATIO_THRESHOLD , STRICT_TOKENS_WEIGHT_THRESHOLD , false )
871- );
872- case PRUNE_NULL_WITH_CONFIG -> new Tuple <>(
873- null ,
874- new TokenPruningConfig (STRICT_TOKENS_FREQ_RATIO_THRESHOLD , STRICT_TOKENS_WEIGHT_THRESHOLD , false )
875- );
876- case PRUNE_NULL_NO_CONFIG -> new Tuple <>(null , null );
877- };
849+ return new Tuple <>(prune , tokenPruningConfig );
878850 }
879851
880852 private List <Query > getExpectedQueryClauses (SparseVectorFieldMapper .SparseVectorFieldType ft , PruningScenario pruningScenario , SearchExecutionContext searchExecutionContext ) {
@@ -892,16 +864,18 @@ private List<Query> getExpectedQueryClauses(SparseVectorFieldMapper.SparseVector
892864 }).collect (Collectors .toUnmodifiableList ());
893865 }
894866
895- private void assertPruningScenario (IndexPruningScenario indexPruningScenario , QueryPruningScenario queryPruningScenario )
867+ private void assertPruningScenario (PruningOptions indexPruningOptions , PruningOptions queryPruningOptions )
896868 throws IOException {
897- IndexVersion indexVersion = IndexVersionUtils .randomVersionBetween (
898- random (),
899- SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT ,
900- IndexVersion .current ()
901- );
902- MapperService mapperService = createMapperService (indexVersion , getIndexMapping (indexPruningScenario ));
903- Tuple <Boolean , TokenPruningConfig > queryPruneConfig = getQueryPruneConfig (queryPruningScenario );
904- PruningScenario effectivePruningScenario = getEffectivePruningScenario (indexPruningScenario , queryPruningScenario );
869+
870+ boolean usePreIndexOptionsIndex = false ;
871+ if (indexPruningOptions .prune == null && indexPruningOptions .pruningConfig == PruningConfig .NULL ) {
872+ usePreIndexOptionsIndex = randomBoolean ();
873+ }
874+
875+ IndexVersion indexVersion = getIndexVersionForTest (usePreIndexOptionsIndex );
876+ MapperService mapperService = createMapperService (indexVersion , getIndexMapping (indexPruningOptions , usePreIndexOptionsIndex ));
877+ Tuple <Boolean , TokenPruningConfig > queryPruneConfig = getQueryPruneConfig (queryPruningOptions );
878+ PruningScenario effectivePruningScenario = getEffectivePruningScenario (indexPruningOptions , queryPruningOptions , usePreIndexOptionsIndex );
905879 withSearchExecutionContext (mapperService , (context ) -> {
906880 SparseVectorFieldMapper .SparseVectorFieldType ft = (SparseVectorFieldMapper .SparseVectorFieldType ) mapperService .fieldType (
907881 "field"
0 commit comments