|
17 | 17 | import org.elasticsearch.common.io.stream.StreamInput; |
18 | 18 | import org.elasticsearch.common.io.stream.StreamOutput; |
19 | 19 | import org.elasticsearch.core.Nullable; |
| 20 | +import org.elasticsearch.index.IndexVersions; |
20 | 21 | import org.elasticsearch.index.mapper.MappedFieldType; |
21 | 22 | import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; |
22 | 23 | import org.elasticsearch.index.mapper.vectors.TokenPruningConfig; |
@@ -235,23 +236,10 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { |
235 | 236 | ); |
236 | 237 | } |
237 | 238 |
|
238 | | - Boolean pruneTokensToUse = shouldPruneTokens; |
239 | | - TokenPruningConfig pruningConfigToUse = tokenPruningConfig; |
| 239 | + TokenPruningConfig pruningConfig = getTokenPruningConfigForQuery(ft, context); |
240 | 240 |
|
241 | | - // if the query options for pruning are not set, we need to check the index options for this field |
242 | | - // and use those if set - however, only if the index was created after we added this support. |
243 | | - if (ft.getClass().equals(SparseVectorFieldMapper.SparseVectorFieldType.class) |
244 | | - && context.indexVersionCreated().onOrAfter(SparseVectorFieldMapper.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)) { |
245 | | - SparseVectorFieldMapper.SparseVectorFieldType asSVFieldType = (SparseVectorFieldMapper.SparseVectorFieldType) ft; |
246 | | - |
247 | | - if (asSVFieldType.getIndexOptions() != null) { |
248 | | - pruneTokensToUse = pruneTokensToUse == null ? asSVFieldType.getIndexOptions().getPrune() : pruneTokensToUse; |
249 | | - pruningConfigToUse = pruningConfigToUse == null ? asSVFieldType.getIndexOptions().getPruningConfig() : pruningConfigToUse; |
250 | | - } |
251 | | - } |
252 | | - |
253 | | - return (pruneTokensToUse != null && pruneTokensToUse) |
254 | | - ? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, pruningConfigToUse, queryVectors, ft, context) |
| 241 | + return pruningConfig != null |
| 242 | + ? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, pruningConfig, queryVectors, ft, context) |
255 | 243 | : WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, ft, context); |
256 | 244 | } |
257 | 245 |
|
@@ -362,6 +350,52 @@ public TransportVersion getMinimalSupportedVersion() { |
362 | 350 | return TransportVersions.V_8_15_0; |
363 | 351 | } |
364 | 352 |
|
| 353 | + private TokenPruningConfig getTokenPruningConfigForQuery(MappedFieldType ft, SearchExecutionContext context) { |
| 354 | + TokenPruningConfig queryPruningConfig = (shouldPruneTokens != null && shouldPruneTokens) ? tokenPruningConfig : null; |
| 355 | + |
| 356 | + // query values should always override any index options |
| 357 | + if (shouldPruneTokens != null) { |
| 358 | + return queryPruningConfig; |
| 359 | + } |
| 360 | + |
| 361 | + // if we are not on a supported index version, do not prune by default |
| 362 | + // nor do we check the index options |
| 363 | + if (context.indexVersionCreated().onOrAfter(IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT) == false |
| 364 | + && context.indexVersionCreated() |
| 365 | + .between( |
| 366 | + IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X, |
| 367 | + IndexVersions.UPGRADE_TO_LUCENE_10_0_0 |
| 368 | + ) == false) { |
| 369 | + return null; |
| 370 | + } |
| 371 | + |
| 372 | + if (ft instanceof SparseVectorFieldMapper.SparseVectorFieldType asSVFieldType) { |
| 373 | + Boolean pruneTokensToUse = shouldPruneTokens; |
| 374 | + TokenPruningConfig pruningConfigToUse = tokenPruningConfig; |
| 375 | + |
| 376 | + if (asSVFieldType.getIndexOptions() != null) { |
| 377 | + pruneTokensToUse = asSVFieldType.getIndexOptions().getPrune(); |
| 378 | + pruningConfigToUse = pruningConfigToUse == null ? asSVFieldType.getIndexOptions().getPruningConfig() : pruningConfigToUse; |
| 379 | + } |
| 380 | + |
| 381 | + // if we're still null, use defaults |
| 382 | + pruneTokensToUse = pruneTokensToUse == null ? true : pruneTokensToUse; |
| 383 | + pruningConfigToUse = pruningConfigToUse == null |
| 384 | + ? new TokenPruningConfig( |
| 385 | + TokenPruningConfig.DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD, |
| 386 | + TokenPruningConfig.DEFAULT_TOKENS_WEIGHT_THRESHOLD, |
| 387 | + false |
| 388 | + ) |
| 389 | + : pruningConfigToUse; |
| 390 | + |
| 391 | + return pruneTokensToUse ? pruningConfigToUse : null; |
| 392 | + } |
| 393 | + |
| 394 | + // should never happen that we're not operating on a SparseVectorFieldType |
| 395 | + // but just in case, return null and do not prune |
| 396 | + return null; |
| 397 | + } |
| 398 | + |
365 | 399 | private static final ConstructingObjectParser<SparseVectorQueryBuilder, Void> PARSER = new ConstructingObjectParser<>(NAME, a -> { |
366 | 400 | String fieldName = (String) a[0]; |
367 | 401 | @SuppressWarnings("unchecked") |
|
0 commit comments