From a0d0b770f60157a4014a810fa8abcba39a7dc581 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 1 Aug 2025 15:08:17 -0400 Subject: [PATCH 01/46] add multi_match intercepter --- .../xpack/inference/InferencePlugin.java | 2 + ...nticMultiMatchQueryRewriteInterceptor.java | 227 ++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index 1a2de4cc6b31f..8fbaf11091528 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -95,6 +95,7 @@ import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; import org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor; import org.elasticsearch.xpack.inference.queries.SemanticMatchQueryRewriteInterceptor; +import org.elasticsearch.xpack.inference.queries.SemanticMultiMatchQueryRewriteInterceptor; import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; import org.elasticsearch.xpack.inference.queries.SemanticSparseVectorQueryRewriteInterceptor; import org.elasticsearch.xpack.inference.rank.random.RandomRankBuilder; @@ -571,6 +572,7 @@ public List getQueryRewriteInterceptors() { return List.of( new SemanticKnnVectorQueryRewriteInterceptor(), new SemanticMatchQueryRewriteInterceptor(), + new SemanticMultiMatchQueryRewriteInterceptor(), new SemanticSparseVectorQueryRewriteInterceptor() ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java new file mode 100644 index 0000000000000..358f2354a3b03 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -0,0 +1,227 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.queries; + +import org.elasticsearch.action.ResolvedIndices; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; +import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.mapper.IndexFieldMapper; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.MultiMatchQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryRewriteContext; +import org.elasticsearch.index.query.TermsQueryBuilder; +import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class SemanticMultiMatchQueryRewriteInterceptor implements QueryRewriteInterceptor { + + public static final NodeFeature SEMANTIC_MULTI_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED = new NodeFeature( + "search.semantic_multi_match_query_rewrite_interception_supported" + ); + + public SemanticMultiMatchQueryRewriteInterceptor() {} + + @Override + public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilder queryBuilder) { + assert (queryBuilder instanceof MultiMatchQueryBuilder); + MultiMatchQueryBuilder multiMatchQueryBuilder = (MultiMatchQueryBuilder) queryBuilder; + + ResolvedIndices resolvedIndices = context.getResolvedIndices(); + if (resolvedIndices == null) { + // No resolved indices, so return the original query. + return queryBuilder; + } + + Map fields = multiMatchQueryBuilder.fields(); + if (fields == null || fields.isEmpty()) { + // No fields specified, return original query + return queryBuilder; + } + + MultiFieldInferenceInfo inferenceInfo = resolveInferenceInfoForFields(fields.keySet(), resolvedIndices); + + if (inferenceInfo.getInferenceFields().isEmpty()) { + // No inference fields were identified, so return the original query. + return queryBuilder; + } else if (inferenceInfo.hasNonInferenceFields()) { + // Combined case where some fields are semantic_text and others are not + return buildCombinedInferenceAndNonInferenceQuery(multiMatchQueryBuilder, inferenceInfo); + } else { + // All specified fields are inference fields (semantic_text) + return buildInferenceQuery(multiMatchQueryBuilder, inferenceInfo); + } + } + + @Override + public String getQueryName() { + return MultiMatchQueryBuilder.NAME; + } + + private MultiFieldInferenceInfo resolveInferenceInfoForFields(Set fieldNames, ResolvedIndices resolvedIndices) { + Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); + Map> inferenceFieldsPerIndex = new HashMap<>(); + List nonInferenceIndices = new ArrayList<>(); + Map> inferenceFieldsByIndex = new HashMap<>(); + + for (IndexMetadata indexMetadata : indexMetadataCollection) { + String indexName = indexMetadata.getIndex().getName(); + Map indexInferenceFields = new HashMap<>(); + Set indexInferenceFieldNames = fieldNames.stream() + .filter(fieldName -> indexMetadata.getInferenceFields().containsKey(fieldName)) + .collect(Collectors.toSet()); + + if (indexInferenceFieldNames.isEmpty()) { + nonInferenceIndices.add(indexName); + } else { + for (String fieldName : indexInferenceFieldNames) { + indexInferenceFields.put(fieldName, indexMetadata.getInferenceFields().get(fieldName)); + } + inferenceFieldsPerIndex.put(indexName, indexInferenceFields); + inferenceFieldsByIndex.put(indexName, indexInferenceFieldNames); + } + } + + return new MultiFieldInferenceInfo(fieldNames, inferenceFieldsPerIndex, nonInferenceIndices, inferenceFieldsByIndex); + } + + private QueryBuilder buildInferenceQuery(MultiMatchQueryBuilder originalQuery, MultiFieldInferenceInfo inferenceInfo) { + String queryValue = (String) originalQuery.value(); + Set inferenceFields = inferenceInfo.getInferenceFields(); + + if (inferenceFields.size() == 1) { + // Single inference field - create a simple semantic query + String fieldName = inferenceFields.iterator().next(); + SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); + semanticQuery.boost(originalQuery.boost()); + semanticQuery.queryName(originalQuery.queryName()); + return semanticQuery; + } else { + // Multiple inference fields - create a boolean query with semantic subqueries + BoolQueryBuilder boolQuery = new BoolQueryBuilder(); + for (String fieldName : inferenceFields) { + SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); + boolQuery.should(semanticQuery); + } + boolQuery.boost(originalQuery.boost()); + boolQuery.queryName(originalQuery.queryName()); + return boolQuery; + } + } + + private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( + MultiMatchQueryBuilder originalQuery, + MultiFieldInferenceInfo inferenceInfo + ) { + BoolQueryBuilder combinedQuery = new BoolQueryBuilder(); + String queryValue = (String) originalQuery.value(); + + // Add semantic queries for inference fields per index + Map> inferenceFieldsPerIndex = inferenceInfo.getInferenceFieldsPerIndex(); + for (Map.Entry> entry : inferenceFieldsPerIndex.entrySet()) { + String indexName = entry.getKey(); + Map indexInferenceFields = entry.getValue(); + + for (String fieldName : indexInferenceFields.keySet()) { + BoolQueryBuilder indexSpecificQuery = new BoolQueryBuilder(); + indexSpecificQuery.must(new SemanticQueryBuilder(fieldName, queryValue, true)); + indexSpecificQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, List.of(indexName))); + combinedQuery.should(indexSpecificQuery); + } + } + + // Add non-inference query for indices without semantic_text fields + if (inferenceInfo.getNonInferenceIndices().isEmpty() == false) { + MultiMatchQueryBuilder nonInferenceQuery = copyMultiMatchQueryBuilder(originalQuery); + BoolQueryBuilder indexFilteredQuery = new BoolQueryBuilder(); + indexFilteredQuery.must(nonInferenceQuery); + indexFilteredQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, inferenceInfo.getNonInferenceIndices())); + combinedQuery.should(indexFilteredQuery); + } + + combinedQuery.boost(originalQuery.boost()); + combinedQuery.queryName(originalQuery.queryName()); + return combinedQuery; + } + + private MultiMatchQueryBuilder copyMultiMatchQueryBuilder(MultiMatchQueryBuilder original) { + MultiMatchQueryBuilder copy = new MultiMatchQueryBuilder(original.value()); + copy.fields(original.fields()); + copy.type(original.type()); + copy.operator(original.operator()); + copy.slop(original.slop()); + copy.analyzer(original.analyzer()); + copy.minimumShouldMatch(original.minimumShouldMatch()); + copy.fuzzyRewrite(original.fuzzyRewrite()); + copy.prefixLength(original.prefixLength()); + copy.maxExpansions(original.maxExpansions()); + copy.fuzzyTranspositions(original.fuzzyTranspositions()); + copy.lenient(original.lenient()); + copy.zeroTermsQuery(original.zeroTermsQuery()); + copy.autoGenerateSynonymsPhraseQuery(original.autoGenerateSynonymsPhraseQuery()); + copy.tieBreaker(original.tieBreaker()); + + if (original.fuzziness() != null) { + copy.fuzziness(original.fuzziness()); + } + + return copy; + } + + /** + * Represents the inference information for multiple fields across indices. + */ + public static class MultiFieldInferenceInfo { + private final Set originalFields; + private final Map> inferenceFieldsPerIndex; + private final List nonInferenceIndices; + private final Map> inferenceFieldsByIndex; + + public MultiFieldInferenceInfo( + Set originalFields, + Map> inferenceFieldsPerIndex, + List nonInferenceIndices, + Map> inferenceFieldsByIndex + ) { + this.originalFields = originalFields; + this.inferenceFieldsPerIndex = inferenceFieldsPerIndex; + this.nonInferenceIndices = nonInferenceIndices; + this.inferenceFieldsByIndex = inferenceFieldsByIndex; + } + + public Set getInferenceFields() { + return inferenceFieldsPerIndex.values().stream() + .flatMap(fields -> fields.keySet().stream()) + .collect(Collectors.toSet()); + } + + public Map> getInferenceFieldsPerIndex() { + return inferenceFieldsPerIndex; + } + + public List getNonInferenceIndices() { + return nonInferenceIndices; + } + + public boolean hasNonInferenceFields() { + return !nonInferenceIndices.isEmpty(); + } + + public Map> getInferenceFieldsByIndex() { + return inferenceFieldsByIndex; + } + } +} From dabbb85cfcf10f2b320f3a9e152e53c5d45ee205 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 1 Aug 2025 15:54:22 -0400 Subject: [PATCH 02/46] implement types --- .../xpack/inference/InferenceFeatures.java | 2 + ...nticMultiMatchQueryRewriteInterceptor.java | 94 +++++++++++++++++-- 2 files changed, 86 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 996f4e601289a..2a0d2ed772a2c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -24,6 +24,7 @@ import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_FILTER_FIX; import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED; import static org.elasticsearch.xpack.inference.queries.SemanticMatchQueryRewriteInterceptor.SEMANTIC_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED; +import static org.elasticsearch.xpack.inference.queries.SemanticMultiMatchQueryRewriteInterceptor.SEMANTIC_MULTI_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED; import static org.elasticsearch.xpack.inference.queries.SemanticSparseVectorQueryRewriteInterceptor.SEMANTIC_SPARSE_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED; import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.RERANK_SNIPPETS; import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_SNIPPETS; @@ -60,6 +61,7 @@ public Set getTestFeatures() { SemanticTextFieldMapper.SEMANTIC_TEXT_SKIP_INFERENCE_FIELDS, SEMANTIC_TEXT_HIGHLIGHTER, SEMANTIC_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED, + SEMANTIC_MULTI_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED, SEMANTIC_SPARSE_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED, SemanticInferenceMetadataFieldsMapper.EXPLICIT_NULL_FIXES, SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 358f2354a3b03..242e1ae62a39d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -13,6 +13,7 @@ import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.mapper.IndexFieldMapper; import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.DisMaxQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; @@ -110,15 +111,88 @@ private QueryBuilder buildInferenceQuery(MultiMatchQueryBuilder originalQuery, M semanticQuery.queryName(originalQuery.queryName()); return semanticQuery; } else { - // Multiple inference fields - create a boolean query with semantic subqueries - BoolQueryBuilder boolQuery = new BoolQueryBuilder(); - for (String fieldName : inferenceFields) { - SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); - boolQuery.should(semanticQuery); - } - boolQuery.boost(originalQuery.boost()); - boolQuery.queryName(originalQuery.queryName()); - return boolQuery; + // Multiple inference fields - handle based on multi-match query type + return buildMultiFieldSemanticQuery(originalQuery, inferenceFields, queryValue); + } + } + + private QueryBuilder buildMultiFieldSemanticQuery( + MultiMatchQueryBuilder originalQuery, + Set inferenceFields, + String queryValue + ) { + switch (originalQuery.type()) { + case BEST_FIELDS: + // For best_fields, use dis_max to find the single best matching field + // This mimics the behavior of multi_match best_fields which wraps match queries in dis_max + DisMaxQueryBuilder disMaxQuery = new DisMaxQueryBuilder(); + for (String fieldName : inferenceFields) { + SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); + disMaxQuery.add(semanticQuery); + } + // Apply tie_breaker if specified + if (originalQuery.tieBreaker() != null) { + disMaxQuery.tieBreaker(originalQuery.tieBreaker()); + } + disMaxQuery.boost(originalQuery.boost()); + disMaxQuery.queryName(originalQuery.queryName()); + return disMaxQuery; + + case MOST_FIELDS: + // For most_fields, we want to score across all fields and sum the scores + // This can be reasonably approximated with semantic queries + BoolQueryBuilder boolQuery = new BoolQueryBuilder(); + for (String fieldName : inferenceFields) { + SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); + boolQuery.should(semanticQuery); + } + boolQuery.minimumShouldMatch("1"); + boolQuery.boost(originalQuery.boost()); + boolQuery.queryName(originalQuery.queryName()); + return boolQuery; + + case CROSS_FIELDS: + // Cross-fields requires term-level analysis across fields which doesn't translate + // meaningfully to semantic queries that work with dense vectors + throw new IllegalArgumentException( + "multi_match query with type [cross_fields] is not supported for semantic_text fields. " + + "Use [best_fields] or [most_fields] instead." + ); + + case PHRASE: + // Phrase queries require positional information which semantic queries don't have + throw new IllegalArgumentException( + "multi_match query with type [phrase] is not supported for semantic_text fields. " + + "Use [best_fields] instead." + ); + + case PHRASE_PREFIX: + // Phrase prefix queries require positional and prefix information + throw new IllegalArgumentException( + "multi_match query with type [phrase_prefix] is not supported for semantic_text fields. " + + "Use [best_fields] instead." + ); + + case BOOL_PREFIX: + // Bool prefix requires term-level prefix analysis + throw new IllegalArgumentException( + "multi_match query with type [bool_prefix] is not supported for semantic_text fields. " + + "Use [best_fields] or [most_fields] instead." + ); + + default: + // Fallback to best_fields behavior for unknown types + DisMaxQueryBuilder defaultDisMaxQuery = new DisMaxQueryBuilder(); + for (String fieldName : inferenceFields) { + SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); + defaultDisMaxQuery.add(semanticQuery); + } + if (originalQuery.tieBreaker() != null) { + defaultDisMaxQuery.tieBreaker(originalQuery.tieBreaker()); + } + defaultDisMaxQuery.boost(originalQuery.boost()); + defaultDisMaxQuery.queryName(originalQuery.queryName()); + return defaultDisMaxQuery; } } @@ -217,7 +291,7 @@ public List getNonInferenceIndices() { } public boolean hasNonInferenceFields() { - return !nonInferenceIndices.isEmpty(); + return nonInferenceIndices.isEmpty() == false; } public Map> getInferenceFieldsByIndex() { From 21245c3c395bc5e992bf0a667b49301ead387f0b Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Tue, 5 Aug 2025 11:38:47 -0400 Subject: [PATCH 03/46] fixing warning header logic --- .../java/org/elasticsearch/xpack/inference/InferencePlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index 8fbaf11091528..6525fb4eb948e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -572,7 +572,7 @@ public List getQueryRewriteInterceptors() { return List.of( new SemanticKnnVectorQueryRewriteInterceptor(), new SemanticMatchQueryRewriteInterceptor(), - new SemanticMultiMatchQueryRewriteInterceptor(), + new SemanticMultiMatchQueryRewriteInterceptor(getModelRegistry()), new SemanticSparseVectorQueryRewriteInterceptor() ); } From 8a3ca94f7b73fcf7f246fc533ebc80b5790c6b0e Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Tue, 5 Aug 2025 11:39:09 -0400 Subject: [PATCH 04/46] fix warning header logic in semantic multi_match --- ...nticMultiMatchQueryRewriteInterceptor.java | 102 ++++++++++++++++-- 1 file changed, 94 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 242e1ae62a39d..91efc74fc99f4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -10,6 +10,7 @@ import org.elasticsearch.action.ResolvedIndices; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; +import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.mapper.IndexFieldMapper; import org.elasticsearch.index.query.BoolQueryBuilder; @@ -18,14 +19,19 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.TermsQueryBuilder; +import org.elasticsearch.inference.MinimalServiceSettings; +import org.elasticsearch.inference.TaskType; import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor; +import org.elasticsearch.xpack.inference.registry.ModelRegistry; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.Supplier; import java.util.stream.Collectors; public class SemanticMultiMatchQueryRewriteInterceptor implements QueryRewriteInterceptor { @@ -34,7 +40,12 @@ public class SemanticMultiMatchQueryRewriteInterceptor implements QueryRewriteIn "search.semantic_multi_match_query_rewrite_interception_supported" ); - public SemanticMultiMatchQueryRewriteInterceptor() {} + private final Supplier modelRegistrySupplier; + + + public SemanticMultiMatchQueryRewriteInterceptor(Supplier modelRegistrySupplier) { + this.modelRegistrySupplier = modelRegistrySupplier; + } @Override public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilder queryBuilder) { @@ -96,7 +107,17 @@ private MultiFieldInferenceInfo resolveInferenceInfoForFields(Set fieldN } } - return new MultiFieldInferenceInfo(fieldNames, inferenceFieldsPerIndex, nonInferenceIndices, inferenceFieldsByIndex); + MultiFieldInferenceInfo inferenceInfo = new MultiFieldInferenceInfo( + fieldNames, + inferenceFieldsPerIndex, + nonInferenceIndices, + inferenceFieldsByIndex + ); + + // Perform early detection of score range mismatches and emit warning if needed + detectAndWarnScoreRangeMismatch(inferenceInfo); + + return inferenceInfo; } private QueryBuilder buildInferenceQuery(MultiMatchQueryBuilder originalQuery, MultiFieldInferenceInfo inferenceInfo) { @@ -137,7 +158,7 @@ private QueryBuilder buildMultiFieldSemanticQuery( disMaxQuery.boost(originalQuery.boost()); disMaxQuery.queryName(originalQuery.queryName()); return disMaxQuery; - + case MOST_FIELDS: // For most_fields, we want to score across all fields and sum the scores // This can be reasonably approximated with semantic queries @@ -150,7 +171,7 @@ private QueryBuilder buildMultiFieldSemanticQuery( boolQuery.boost(originalQuery.boost()); boolQuery.queryName(originalQuery.queryName()); return boolQuery; - + case CROSS_FIELDS: // Cross-fields requires term-level analysis across fields which doesn't translate // meaningfully to semantic queries that work with dense vectors @@ -158,28 +179,28 @@ private QueryBuilder buildMultiFieldSemanticQuery( "multi_match query with type [cross_fields] is not supported for semantic_text fields. " + "Use [best_fields] or [most_fields] instead." ); - + case PHRASE: // Phrase queries require positional information which semantic queries don't have throw new IllegalArgumentException( "multi_match query with type [phrase] is not supported for semantic_text fields. " + "Use [best_fields] instead." ); - + case PHRASE_PREFIX: // Phrase prefix queries require positional and prefix information throw new IllegalArgumentException( "multi_match query with type [phrase_prefix] is not supported for semantic_text fields. " + "Use [best_fields] instead." ); - + case BOOL_PREFIX: // Bool prefix requires term-level prefix analysis throw new IllegalArgumentException( "multi_match query with type [bool_prefix] is not supported for semantic_text fields. " + "Use [best_fields] or [most_fields] instead." ); - + default: // Fallback to best_fields behavior for unknown types DisMaxQueryBuilder defaultDisMaxQuery = new DisMaxQueryBuilder(); @@ -231,6 +252,71 @@ private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( return combinedQuery; } + /** + * Detects and warns about score range mismatches when a multi_match query has at least one dense vector model (TEXT_EMBEDDING) + * mixed with sparse vector models (SPARSE_EMBEDDING) or non-inference fields. + * Dense vector models typically produce bounded scores (0-1) while sparse vector models and + * non-inference fields produce unbounded scores, causing score range mismatches. + */ + private void detectAndWarnScoreRangeMismatch(MultiFieldInferenceInfo inferenceInfo) { + ModelRegistry modelRegistry = modelRegistrySupplier.get(); + if (modelRegistry == null) { + // Fallback: warn for any mixed semantic_text + non-inference combination + // since we can't determine the exact task types + if (inferenceInfo.hasNonInferenceFields() && inferenceInfo.getInferenceFields().isEmpty() == false) { + HeaderWarning.addWarning( + "Query spans both semantic_text and non-inference fields. " + + "Dense vector models (TEXT_EMBEDDING) produce bounded scores (0-1) while sparse vector models " + + "(SPARSE_EMBEDDING) and non-inference fields produce unbounded scores, which may cause score " + + "range mismatches and affect result ranking. Consider using separate queries or score normalization." + ); + } + return; + } + + // Check if we have any dense vector models mixed with sparse vector models or non-inference fields + boolean hasDenseVectorModel = false; + boolean hasSparseVectorModel = false; + boolean hasNonInferenceFields = inferenceInfo.hasNonInferenceFields(); + + // Collect all inference IDs from all fields + Set allInferenceIds = new HashSet<>(); + for (Map indexFields : inferenceInfo.getInferenceFieldsPerIndex().values()) { + for (InferenceFieldMetadata fieldMetadata : indexFields.values()) { + allInferenceIds.add(fieldMetadata.getSearchInferenceId()); + } + } + + // Check task types for each inference ID + for (String inferenceId : allInferenceIds) { + try { + MinimalServiceSettings settings = modelRegistry.getMinimalServiceSettings(inferenceId); + if (settings != null) { + TaskType taskType = settings.taskType(); + if (taskType == TaskType.TEXT_EMBEDDING) { + hasDenseVectorModel = true; + } else if (taskType == TaskType.SPARSE_EMBEDDING) { + hasSparseVectorModel = true; + } + } + } catch (Exception e) { + // If we can't get model info, skip this inference ID + // Or maybe we can throw an error + } + } + + // Emit warning only if we have dense vector model mixed with sparse vector or non-inference fields + if (hasDenseVectorModel && (hasSparseVectorModel || hasNonInferenceFields)) { + HeaderWarning.addWarning( + "Query contains dense vector model (TEXT_EMBEDDING) with bounded scores (0-1) mixed with " + + (hasSparseVectorModel ? "sparse vector model (SPARSE_EMBEDDING) and/or " : "") + + (hasNonInferenceFields ? "non-inference fields " : "") + + "that produce unbounded scores. This may cause score range mismatches and affect result ranking. " + + "Consider using separate queries or score normalization for optimal results." + ); + } + } + private MultiMatchQueryBuilder copyMultiMatchQueryBuilder(MultiMatchQueryBuilder original) { MultiMatchQueryBuilder copy = new MultiMatchQueryBuilder(original.value()); copy.fields(original.fields()); From d5a315d95a16f0ea995d9a0ac1872c57b4272a9f Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Tue, 5 Aug 2025 17:45:59 -0400 Subject: [PATCH 05/46] centralize validation logic for both mixed and single field --- ...nticMultiMatchQueryRewriteInterceptor.java | 188 ++++++++++++------ 1 file changed, 129 insertions(+), 59 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 91efc74fc99f4..65bc65840779f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -142,88 +142,117 @@ private QueryBuilder buildMultiFieldSemanticQuery( Set inferenceFields, String queryValue ) { + validateQueryTypeSupported(originalQuery.type()); + switch (originalQuery.type()) { case BEST_FIELDS: - // For best_fields, use dis_max to find the single best matching field - // This mimics the behavior of multi_match best_fields which wraps match queries in dis_max - DisMaxQueryBuilder disMaxQuery = new DisMaxQueryBuilder(); - for (String fieldName : inferenceFields) { - SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); - disMaxQuery.add(semanticQuery); - } - // Apply tie_breaker if specified - if (originalQuery.tieBreaker() != null) { - disMaxQuery.tieBreaker(originalQuery.tieBreaker()); - } - disMaxQuery.boost(originalQuery.boost()); - disMaxQuery.queryName(originalQuery.queryName()); - return disMaxQuery; - + return buildBestFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); case MOST_FIELDS: - // For most_fields, we want to score across all fields and sum the scores - // This can be reasonably approximated with semantic queries - BoolQueryBuilder boolQuery = new BoolQueryBuilder(); - for (String fieldName : inferenceFields) { - SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); - boolQuery.should(semanticQuery); - } - boolQuery.minimumShouldMatch("1"); - boolQuery.boost(originalQuery.boost()); - boolQuery.queryName(originalQuery.queryName()); - return boolQuery; + return buildMostFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); + default: + // Fallback to best_fields behavior for unknown types + return buildBestFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); + } + } + private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( + MultiMatchQueryBuilder originalQuery, + MultiFieldInferenceInfo inferenceInfo + ) { + validateQueryTypeSupported(originalQuery.type()); + + String queryValue = (String) originalQuery.value(); + + switch (originalQuery.type()) { + case BEST_FIELDS: + return buildBestFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); + case MOST_FIELDS: + return buildMostFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); + default: + // Fallback to best_fields behavior + return buildBestFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); + } + } + + /** + * Validates that the multi_match query type is supported for semantic_text fields. + * Throws IllegalArgumentException for unsupported types. + */ + private void validateQueryTypeSupported(MultiMatchQueryBuilder.Type queryType) { + switch (queryType) { case CROSS_FIELDS: - // Cross-fields requires term-level analysis across fields which doesn't translate - // meaningfully to semantic queries that work with dense vectors throw new IllegalArgumentException( "multi_match query with type [cross_fields] is not supported for semantic_text fields. " + "Use [best_fields] or [most_fields] instead." ); - case PHRASE: - // Phrase queries require positional information which semantic queries don't have throw new IllegalArgumentException( "multi_match query with type [phrase] is not supported for semantic_text fields. " + "Use [best_fields] instead." ); - case PHRASE_PREFIX: - // Phrase prefix queries require positional and prefix information throw new IllegalArgumentException( "multi_match query with type [phrase_prefix] is not supported for semantic_text fields. " + "Use [best_fields] instead." ); - case BOOL_PREFIX: - // Bool prefix requires term-level prefix analysis throw new IllegalArgumentException( "multi_match query with type [bool_prefix] is not supported for semantic_text fields. " + "Use [best_fields] or [most_fields] instead." ); - - default: - // Fallback to best_fields behavior for unknown types - DisMaxQueryBuilder defaultDisMaxQuery = new DisMaxQueryBuilder(); - for (String fieldName : inferenceFields) { - SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); - defaultDisMaxQuery.add(semanticQuery); - } - if (originalQuery.tieBreaker() != null) { - defaultDisMaxQuery.tieBreaker(originalQuery.tieBreaker()); - } - defaultDisMaxQuery.boost(originalQuery.boost()); - defaultDisMaxQuery.queryName(originalQuery.queryName()); - return defaultDisMaxQuery; + // BEST_FIELDS and MOST_FIELDS are supported - no validation needed } } - - private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( + + /** + * Builds a best_fields query for pure semantic fields using DisMaxQueryBuilder. + */ + private QueryBuilder buildBestFieldsSemanticQuery( MultiMatchQueryBuilder originalQuery, - MultiFieldInferenceInfo inferenceInfo + Set inferenceFields, + String queryValue ) { - BoolQueryBuilder combinedQuery = new BoolQueryBuilder(); - String queryValue = (String) originalQuery.value(); - + DisMaxQueryBuilder disMaxQuery = new DisMaxQueryBuilder(); + for (String fieldName : inferenceFields) { + SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); + disMaxQuery.add(semanticQuery); + } + // Apply tie_breaker if specified + if (originalQuery.tieBreaker() != null) { + disMaxQuery.tieBreaker(originalQuery.tieBreaker()); + } + disMaxQuery.boost(originalQuery.boost()); + disMaxQuery.queryName(originalQuery.queryName()); + return disMaxQuery; + } + + /** + * Builds a most_fields query for pure semantic fields using BoolQueryBuilder. + */ + private QueryBuilder buildMostFieldsSemanticQuery( + MultiMatchQueryBuilder originalQuery, + Set inferenceFields, + String queryValue + ) { + BoolQueryBuilder boolQuery = new BoolQueryBuilder(); + for (String fieldName : inferenceFields) { + SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); + boolQuery.should(semanticQuery); + } + boolQuery.minimumShouldMatch("1"); + boolQuery.boost(originalQuery.boost()); + boolQuery.queryName(originalQuery.queryName()); + return boolQuery; + } + + private QueryBuilder buildBestFieldsCombinedQuery( + MultiMatchQueryBuilder originalQuery, + MultiFieldInferenceInfo inferenceInfo, + String queryValue + ) { + // For best_fields, use dis_max to find the single best matching field across all field types + DisMaxQueryBuilder disMaxQuery = new DisMaxQueryBuilder(); + // Add semantic queries for inference fields per index Map> inferenceFieldsPerIndex = inferenceInfo.getInferenceFieldsPerIndex(); for (Map.Entry> entry : inferenceFieldsPerIndex.entrySet()) { @@ -234,7 +263,7 @@ private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( BoolQueryBuilder indexSpecificQuery = new BoolQueryBuilder(); indexSpecificQuery.must(new SemanticQueryBuilder(fieldName, queryValue, true)); indexSpecificQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, List.of(indexName))); - combinedQuery.should(indexSpecificQuery); + disMaxQuery.add(indexSpecificQuery); } } @@ -244,12 +273,53 @@ private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( BoolQueryBuilder indexFilteredQuery = new BoolQueryBuilder(); indexFilteredQuery.must(nonInferenceQuery); indexFilteredQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, inferenceInfo.getNonInferenceIndices())); - combinedQuery.should(indexFilteredQuery); + disMaxQuery.add(indexFilteredQuery); } + + // Apply tie_breaker if specified + if (originalQuery.tieBreaker() != null) { + disMaxQuery.tieBreaker(originalQuery.tieBreaker()); + } + disMaxQuery.boost(originalQuery.boost()); + disMaxQuery.queryName(originalQuery.queryName()); + return disMaxQuery; + } + + private QueryBuilder buildMostFieldsCombinedQuery( + MultiMatchQueryBuilder originalQuery, + MultiFieldInferenceInfo inferenceInfo, + String queryValue + ) { + // For most_fields, use bool should to score across all fields + BoolQueryBuilder boolQuery = new BoolQueryBuilder(); + + // Add semantic queries for inference fields per index + Map> inferenceFieldsPerIndex = inferenceInfo.getInferenceFieldsPerIndex(); + for (Map.Entry> entry : inferenceFieldsPerIndex.entrySet()) { + String indexName = entry.getKey(); + Map indexInferenceFields = entry.getValue(); - combinedQuery.boost(originalQuery.boost()); - combinedQuery.queryName(originalQuery.queryName()); - return combinedQuery; + for (String fieldName : indexInferenceFields.keySet()) { + BoolQueryBuilder indexSpecificQuery = new BoolQueryBuilder(); + indexSpecificQuery.must(new SemanticQueryBuilder(fieldName, queryValue, true)); + indexSpecificQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, List.of(indexName))); + boolQuery.should(indexSpecificQuery); + } + } + + // Add non-inference query for indices without semantic_text fields + if (inferenceInfo.getNonInferenceIndices().isEmpty() == false) { + MultiMatchQueryBuilder nonInferenceQuery = copyMultiMatchQueryBuilder(originalQuery); + BoolQueryBuilder indexFilteredQuery = new BoolQueryBuilder(); + indexFilteredQuery.must(nonInferenceQuery); + indexFilteredQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, inferenceInfo.getNonInferenceIndices())); + boolQuery.should(indexFilteredQuery); + } + + boolQuery.minimumShouldMatch("1"); + boolQuery.boost(originalQuery.boost()); + boolQuery.queryName(originalQuery.queryName()); + return boolQuery; } /** From a71aa73f3a055470bce3ae0c649d202e3e52b0fa Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Tue, 5 Aug 2025 17:56:47 -0400 Subject: [PATCH 06/46] added comments to better understanding and todo --- ...nticMultiMatchQueryRewriteInterceptor.java | 47 +++++++++---------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 65bc65840779f..258d0ce9fa565 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -143,16 +143,14 @@ private QueryBuilder buildMultiFieldSemanticQuery( String queryValue ) { validateQueryTypeSupported(originalQuery.type()); - - switch (originalQuery.type()) { - case BEST_FIELDS: - return buildBestFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); - case MOST_FIELDS: - return buildMostFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); - default: + + return switch (originalQuery.type()) { + case BEST_FIELDS -> buildBestFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); + case MOST_FIELDS -> buildMostFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); + default -> // Fallback to best_fields behavior for unknown types - return buildBestFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); - } + buildBestFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); + }; } private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( @@ -160,9 +158,9 @@ private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( MultiFieldInferenceInfo inferenceInfo ) { validateQueryTypeSupported(originalQuery.type()); - + String queryValue = (String) originalQuery.value(); - + switch (originalQuery.type()) { case BEST_FIELDS: return buildBestFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); @@ -173,7 +171,7 @@ private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( return buildBestFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); } } - + /** * Validates that the multi_match query type is supported for semantic_text fields. * Throws IllegalArgumentException for unsupported types. @@ -203,7 +201,7 @@ private void validateQueryTypeSupported(MultiMatchQueryBuilder.Type queryType) { // BEST_FIELDS and MOST_FIELDS are supported - no validation needed } } - + /** * Builds a best_fields query for pure semantic fields using DisMaxQueryBuilder. */ @@ -225,7 +223,7 @@ private QueryBuilder buildBestFieldsSemanticQuery( disMaxQuery.queryName(originalQuery.queryName()); return disMaxQuery; } - + /** * Builds a most_fields query for pure semantic fields using BoolQueryBuilder. */ @@ -244,7 +242,7 @@ private QueryBuilder buildMostFieldsSemanticQuery( boolQuery.queryName(originalQuery.queryName()); return boolQuery; } - + private QueryBuilder buildBestFieldsCombinedQuery( MultiMatchQueryBuilder originalQuery, MultiFieldInferenceInfo inferenceInfo, @@ -252,7 +250,7 @@ private QueryBuilder buildBestFieldsCombinedQuery( ) { // For best_fields, use dis_max to find the single best matching field across all field types DisMaxQueryBuilder disMaxQuery = new DisMaxQueryBuilder(); - + // Add semantic queries for inference fields per index Map> inferenceFieldsPerIndex = inferenceInfo.getInferenceFieldsPerIndex(); for (Map.Entry> entry : inferenceFieldsPerIndex.entrySet()) { @@ -275,7 +273,7 @@ private QueryBuilder buildBestFieldsCombinedQuery( indexFilteredQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, inferenceInfo.getNonInferenceIndices())); disMaxQuery.add(indexFilteredQuery); } - + // Apply tie_breaker if specified if (originalQuery.tieBreaker() != null) { disMaxQuery.tieBreaker(originalQuery.tieBreaker()); @@ -284,7 +282,7 @@ private QueryBuilder buildBestFieldsCombinedQuery( disMaxQuery.queryName(originalQuery.queryName()); return disMaxQuery; } - + private QueryBuilder buildMostFieldsCombinedQuery( MultiMatchQueryBuilder originalQuery, MultiFieldInferenceInfo inferenceInfo, @@ -292,7 +290,7 @@ private QueryBuilder buildMostFieldsCombinedQuery( ) { // For most_fields, use bool should to score across all fields BoolQueryBuilder boolQuery = new BoolQueryBuilder(); - + // Add semantic queries for inference fields per index Map> inferenceFieldsPerIndex = inferenceInfo.getInferenceFieldsPerIndex(); for (Map.Entry> entry : inferenceFieldsPerIndex.entrySet()) { @@ -315,7 +313,7 @@ private QueryBuilder buildMostFieldsCombinedQuery( indexFilteredQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, inferenceInfo.getNonInferenceIndices())); boolQuery.should(indexFilteredQuery); } - + boolQuery.minimumShouldMatch("1"); boolQuery.boost(originalQuery.boost()); boolQuery.queryName(originalQuery.queryName()); @@ -330,6 +328,7 @@ private QueryBuilder buildMostFieldsCombinedQuery( */ private void detectAndWarnScoreRangeMismatch(MultiFieldInferenceInfo inferenceInfo) { ModelRegistry modelRegistry = modelRegistrySupplier.get(); + // TODO: validate if we need to check if modelRegistry is null or not if (modelRegistry == null) { // Fallback: warn for any mixed semantic_text + non-inference combination // since we can't determine the exact task types @@ -338,7 +337,7 @@ private void detectAndWarnScoreRangeMismatch(MultiFieldInferenceInfo inferenceIn "Query spans both semantic_text and non-inference fields. " + "Dense vector models (TEXT_EMBEDDING) produce bounded scores (0-1) while sparse vector models " + "(SPARSE_EMBEDDING) and non-inference fields produce unbounded scores, which may cause score " + - "range mismatches and affect result ranking. Consider using separate queries or score normalization." + "range mismatches and affect result ranking. Consider using Linear or RRF retrievers." ); } return; @@ -370,8 +369,7 @@ private void detectAndWarnScoreRangeMismatch(MultiFieldInferenceInfo inferenceIn } } } catch (Exception e) { - // If we can't get model info, skip this inference ID - // Or maybe we can throw an error + // TODO: validate If we can't get model info, skip this inference ID or throw an error } } @@ -382,7 +380,7 @@ private void detectAndWarnScoreRangeMismatch(MultiFieldInferenceInfo inferenceIn (hasSparseVectorModel ? "sparse vector model (SPARSE_EMBEDDING) and/or " : "") + (hasNonInferenceFields ? "non-inference fields " : "") + "that produce unbounded scores. This may cause score range mismatches and affect result ranking. " + - "Consider using separate queries or score normalization for optimal results." + "Consider using Linear or RRF retrievers." ); } } @@ -415,6 +413,7 @@ private MultiMatchQueryBuilder copyMultiMatchQueryBuilder(MultiMatchQueryBuilder * Represents the inference information for multiple fields across indices. */ public static class MultiFieldInferenceInfo { + // TODO: Remove originalFields if not needed private final Set originalFields; private final Map> inferenceFieldsPerIndex; private final List nonInferenceIndices; From c17fed5cc1f65933082b7b36f53cb39c0d415554 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Tue, 5 Aug 2025 18:16:24 -0400 Subject: [PATCH 07/46] validate query types for single inference field --- ...nticMultiMatchQueryRewriteInterceptor.java | 31 +++++++------------ 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 258d0ce9fa565..ca244ab84314c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -13,12 +13,7 @@ import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.mapper.IndexFieldMapper; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.DisMaxQueryBuilder; -import org.elasticsearch.index.query.MultiMatchQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryRewriteContext; -import org.elasticsearch.index.query.TermsQueryBuilder; +import org.elasticsearch.index.query.*; import org.elasticsearch.inference.MinimalServiceSettings; import org.elasticsearch.inference.TaskType; import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor; @@ -125,14 +120,15 @@ private QueryBuilder buildInferenceQuery(MultiMatchQueryBuilder originalQuery, M Set inferenceFields = inferenceInfo.getInferenceFields(); if (inferenceFields.size() == 1) { - // Single inference field - create a simple semantic query + // Single inference field - all multi_match types work the same (like original Elasticsearch) + // No validation needed since single field queries don't require type-specific combination logic String fieldName = inferenceFields.iterator().next(); SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); semanticQuery.boost(originalQuery.boost()); semanticQuery.queryName(originalQuery.queryName()); return semanticQuery; } else { - // Multiple inference fields - handle based on multi-match query type + // Multiple inference fields - handle based on multi-match query type (validation happens here) return buildMultiFieldSemanticQuery(originalQuery, inferenceFields, queryValue); } } @@ -142,8 +138,6 @@ private QueryBuilder buildMultiFieldSemanticQuery( Set inferenceFields, String queryValue ) { - validateQueryTypeSupported(originalQuery.type()); - return switch (originalQuery.type()) { case BEST_FIELDS -> buildBestFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); case MOST_FIELDS -> buildMostFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); @@ -161,15 +155,13 @@ private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( String queryValue = (String) originalQuery.value(); - switch (originalQuery.type()) { - case BEST_FIELDS: - return buildBestFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); - case MOST_FIELDS: - return buildMostFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); - default: + return switch (originalQuery.type()) { + case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); + case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); + default -> // Fallback to best_fields behavior - return buildBestFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); - } + buildBestFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); + }; } /** @@ -198,7 +190,6 @@ private void validateQueryTypeSupported(MultiMatchQueryBuilder.Type queryType) { "multi_match query with type [bool_prefix] is not supported for semantic_text fields. " + "Use [best_fields] or [most_fields] instead." ); - // BEST_FIELDS and MOST_FIELDS are supported - no validation needed } } @@ -210,7 +201,7 @@ private QueryBuilder buildBestFieldsSemanticQuery( Set inferenceFields, String queryValue ) { - DisMaxQueryBuilder disMaxQuery = new DisMaxQueryBuilder(); + DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); for (String fieldName : inferenceFields) { SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); disMaxQuery.add(semanticQuery); From d7a7640b3c120c09c96ce7a7c02e29cded651831 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 6 Aug 2025 13:49:05 -0400 Subject: [PATCH 08/46] apply field and query boost --- ...nticMultiMatchQueryRewriteInterceptor.java | 79 ++++++++++++++++--- 1 file changed, 68 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index ca244ab84314c..986d97122d596 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -13,7 +13,13 @@ import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.mapper.IndexFieldMapper; -import org.elasticsearch.index.query.*; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.DisMaxQueryBuilder; +import org.elasticsearch.index.query.MultiMatchQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.QueryRewriteContext; +import org.elasticsearch.index.query.TermsQueryBuilder; import org.elasticsearch.inference.MinimalServiceSettings; import org.elasticsearch.inference.TaskType; import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor; @@ -117,6 +123,7 @@ private MultiFieldInferenceInfo resolveInferenceInfoForFields(Set fieldN private QueryBuilder buildInferenceQuery(MultiMatchQueryBuilder originalQuery, MultiFieldInferenceInfo inferenceInfo) { String queryValue = (String) originalQuery.value(); + Map fieldsBoosts = originalQuery.fields(); Set inferenceFields = inferenceInfo.getInferenceFields(); if (inferenceFields.size() == 1) { @@ -124,26 +131,39 @@ private QueryBuilder buildInferenceQuery(MultiMatchQueryBuilder originalQuery, M // No validation needed since single field queries don't require type-specific combination logic String fieldName = inferenceFields.iterator().next(); SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); - semanticQuery.boost(originalQuery.boost()); + + // Apply per-field boost if specified + Float fieldBoost = fieldsBoosts.get(fieldName); + if (fieldBoost != null && fieldBoost != 1.0f) { + semanticQuery.boost(fieldBoost); + } + + // Apply top-level query boost and name + if (originalQuery.boost() != 1.0f) { + // If we already have field boost, combine with query boost + float finalBoost = semanticQuery.boost() * originalQuery.boost(); + semanticQuery.boost(finalBoost); + } semanticQuery.queryName(originalQuery.queryName()); return semanticQuery; } else { // Multiple inference fields - handle based on multi-match query type (validation happens here) - return buildMultiFieldSemanticQuery(originalQuery, inferenceFields, queryValue); + return buildMultiFieldSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); } } private QueryBuilder buildMultiFieldSemanticQuery( MultiMatchQueryBuilder originalQuery, + Map fieldsBoosts, Set inferenceFields, String queryValue ) { return switch (originalQuery.type()) { - case BEST_FIELDS -> buildBestFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); - case MOST_FIELDS -> buildMostFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); + case BEST_FIELDS -> buildBestFieldsSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); + case MOST_FIELDS -> buildMostFieldsSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); default -> // Fallback to best_fields behavior for unknown types - buildBestFieldsSemanticQuery(originalQuery, inferenceFields, queryValue); + buildBestFieldsSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); }; } @@ -154,13 +174,14 @@ private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( validateQueryTypeSupported(originalQuery.type()); String queryValue = (String) originalQuery.value(); + Map fieldsBoosts = originalQuery.fields(); return switch (originalQuery.type()) { - case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); - case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); + case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, inferenceInfo, queryValue); + case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, fieldsBoosts, inferenceInfo, queryValue); default -> // Fallback to best_fields behavior - buildBestFieldsCombinedQuery(originalQuery, inferenceInfo, queryValue); + buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, inferenceInfo, queryValue); }; } @@ -198,12 +219,20 @@ private void validateQueryTypeSupported(MultiMatchQueryBuilder.Type queryType) { */ private QueryBuilder buildBestFieldsSemanticQuery( MultiMatchQueryBuilder originalQuery, + Map fieldsBoosts, Set inferenceFields, String queryValue ) { DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); for (String fieldName : inferenceFields) { SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); + + // Apply per-field boost if specified + Float fieldBoost = fieldsBoosts.get(fieldName); + if (fieldBoost != null && fieldBoost != 1.0f) { + semanticQuery.boost(fieldBoost); + } + disMaxQuery.add(semanticQuery); } // Apply tie_breaker if specified @@ -220,12 +249,20 @@ private QueryBuilder buildBestFieldsSemanticQuery( */ private QueryBuilder buildMostFieldsSemanticQuery( MultiMatchQueryBuilder originalQuery, + Map fieldsBoosts, Set inferenceFields, String queryValue ) { BoolQueryBuilder boolQuery = new BoolQueryBuilder(); for (String fieldName : inferenceFields) { SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); + + // Apply per-field boost if specified + Float fieldBoost = fieldsBoosts.get(fieldName); + if (fieldBoost != null && fieldBoost != 1.0f) { + semanticQuery.boost(fieldBoost); + } + boolQuery.should(semanticQuery); } boolQuery.minimumShouldMatch("1"); @@ -236,6 +273,7 @@ private QueryBuilder buildMostFieldsSemanticQuery( private QueryBuilder buildBestFieldsCombinedQuery( MultiMatchQueryBuilder originalQuery, + Map fieldsBoosts, MultiFieldInferenceInfo inferenceInfo, String queryValue ) { @@ -249,14 +287,23 @@ private QueryBuilder buildBestFieldsCombinedQuery( Map indexInferenceFields = entry.getValue(); for (String fieldName : indexInferenceFields.keySet()) { + SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, true); + + // Apply per-field boost if specified + Float fieldBoost = fieldsBoosts.get(fieldName); + if (fieldBoost != null && fieldBoost != 1.0f) { + semanticQuery.boost(fieldBoost); + } + BoolQueryBuilder indexSpecificQuery = new BoolQueryBuilder(); - indexSpecificQuery.must(new SemanticQueryBuilder(fieldName, queryValue, true)); + indexSpecificQuery.must(semanticQuery); indexSpecificQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, List.of(indexName))); disMaxQuery.add(indexSpecificQuery); } } // Add non-inference query for indices without semantic_text fields + // Note: Field boosts are preserved in the copied MultiMatchQueryBuilder if (inferenceInfo.getNonInferenceIndices().isEmpty() == false) { MultiMatchQueryBuilder nonInferenceQuery = copyMultiMatchQueryBuilder(originalQuery); BoolQueryBuilder indexFilteredQuery = new BoolQueryBuilder(); @@ -276,6 +323,7 @@ private QueryBuilder buildBestFieldsCombinedQuery( private QueryBuilder buildMostFieldsCombinedQuery( MultiMatchQueryBuilder originalQuery, + Map fieldsBoosts, MultiFieldInferenceInfo inferenceInfo, String queryValue ) { @@ -289,14 +337,23 @@ private QueryBuilder buildMostFieldsCombinedQuery( Map indexInferenceFields = entry.getValue(); for (String fieldName : indexInferenceFields.keySet()) { + SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, true); + + // Apply per-field boost if specified + Float fieldBoost = fieldsBoosts.get(fieldName); + if (fieldBoost != null && fieldBoost != 1.0f) { + semanticQuery.boost(fieldBoost); + } + BoolQueryBuilder indexSpecificQuery = new BoolQueryBuilder(); - indexSpecificQuery.must(new SemanticQueryBuilder(fieldName, queryValue, true)); + indexSpecificQuery.must(semanticQuery); indexSpecificQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, List.of(indexName))); boolQuery.should(indexSpecificQuery); } } // Add non-inference query for indices without semantic_text fields + // Note: Field boosts are preserved in the copied MultiMatchQueryBuilder if (inferenceInfo.getNonInferenceIndices().isEmpty() == false) { MultiMatchQueryBuilder nonInferenceQuery = copyMultiMatchQueryBuilder(originalQuery); BoolQueryBuilder indexFilteredQuery = new BoolQueryBuilder(); From d8b97f321c0ede5425f480bc2c54ddc03ba4dd95 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 6 Aug 2025 13:56:52 -0400 Subject: [PATCH 09/46] fix duplicate boosting retrieve --- ...nticMultiMatchQueryRewriteInterceptor.java | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 986d97122d596..1ff3de7295b09 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -72,10 +72,10 @@ public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilde return queryBuilder; } else if (inferenceInfo.hasNonInferenceFields()) { // Combined case where some fields are semantic_text and others are not - return buildCombinedInferenceAndNonInferenceQuery(multiMatchQueryBuilder, inferenceInfo); + return buildCombinedInferenceAndNonInferenceQuery(multiMatchQueryBuilder, inferenceInfo, fields); } else { // All specified fields are inference fields (semantic_text) - return buildInferenceQuery(multiMatchQueryBuilder, inferenceInfo); + return buildInferenceQuery(multiMatchQueryBuilder, inferenceInfo, fields); } } @@ -88,7 +88,6 @@ private MultiFieldInferenceInfo resolveInferenceInfoForFields(Set fieldN Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); Map> inferenceFieldsPerIndex = new HashMap<>(); List nonInferenceIndices = new ArrayList<>(); - Map> inferenceFieldsByIndex = new HashMap<>(); for (IndexMetadata indexMetadata : indexMetadataCollection) { String indexName = indexMetadata.getIndex().getName(); @@ -104,15 +103,13 @@ private MultiFieldInferenceInfo resolveInferenceInfoForFields(Set fieldN indexInferenceFields.put(fieldName, indexMetadata.getInferenceFields().get(fieldName)); } inferenceFieldsPerIndex.put(indexName, indexInferenceFields); - inferenceFieldsByIndex.put(indexName, indexInferenceFieldNames); } } MultiFieldInferenceInfo inferenceInfo = new MultiFieldInferenceInfo( fieldNames, inferenceFieldsPerIndex, - nonInferenceIndices, - inferenceFieldsByIndex + nonInferenceIndices ); // Perform early detection of score range mismatches and emit warning if needed @@ -121,9 +118,8 @@ private MultiFieldInferenceInfo resolveInferenceInfoForFields(Set fieldN return inferenceInfo; } - private QueryBuilder buildInferenceQuery(MultiMatchQueryBuilder originalQuery, MultiFieldInferenceInfo inferenceInfo) { + private QueryBuilder buildInferenceQuery(MultiMatchQueryBuilder originalQuery, MultiFieldInferenceInfo inferenceInfo, Map fieldsBoosts) { String queryValue = (String) originalQuery.value(); - Map fieldsBoosts = originalQuery.fields(); Set inferenceFields = inferenceInfo.getInferenceFields(); if (inferenceFields.size() == 1) { @@ -169,12 +165,12 @@ private QueryBuilder buildMultiFieldSemanticQuery( private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( MultiMatchQueryBuilder originalQuery, - MultiFieldInferenceInfo inferenceInfo + MultiFieldInferenceInfo inferenceInfo, + Map fieldsBoosts ) { validateQueryTypeSupported(originalQuery.type()); String queryValue = (String) originalQuery.value(); - Map fieldsBoosts = originalQuery.fields(); return switch (originalQuery.type()) { case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, inferenceInfo, queryValue); @@ -465,18 +461,15 @@ public static class MultiFieldInferenceInfo { private final Set originalFields; private final Map> inferenceFieldsPerIndex; private final List nonInferenceIndices; - private final Map> inferenceFieldsByIndex; public MultiFieldInferenceInfo( Set originalFields, Map> inferenceFieldsPerIndex, - List nonInferenceIndices, - Map> inferenceFieldsByIndex + List nonInferenceIndices ) { this.originalFields = originalFields; this.inferenceFieldsPerIndex = inferenceFieldsPerIndex; this.nonInferenceIndices = nonInferenceIndices; - this.inferenceFieldsByIndex = inferenceFieldsByIndex; } public Set getInferenceFields() { @@ -497,8 +490,5 @@ public boolean hasNonInferenceFields() { return nonInferenceIndices.isEmpty() == false; } - public Map> getInferenceFieldsByIndex() { - return inferenceFieldsByIndex; - } } } From 944053fe848460021f937f756ec97b0163de80d9 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 6 Aug 2025 14:06:22 -0400 Subject: [PATCH 10/46] refactor boosting checks --- ...nticMultiMatchQueryRewriteInterceptor.java | 43 +++++++------------ 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 1ff3de7295b09..33674027a03d7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -42,6 +42,7 @@ public class SemanticMultiMatchQueryRewriteInterceptor implements QueryRewriteIn ); private final Supplier modelRegistrySupplier; + private final float DEFAULT_BOOST_FIELD = 1.0f; public SemanticMultiMatchQueryRewriteInterceptor(Supplier modelRegistrySupplier) { @@ -128,18 +129,12 @@ private QueryBuilder buildInferenceQuery(MultiMatchQueryBuilder originalQuery, M String fieldName = inferenceFields.iterator().next(); SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); - // Apply per-field boost if specified - Float fieldBoost = fieldsBoosts.get(fieldName); - if (fieldBoost != null && fieldBoost != 1.0f) { - semanticQuery.boost(fieldBoost); - } + // Apply per-field boost + float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); + semanticQuery.boost(fieldBoost); // Apply top-level query boost and name - if (originalQuery.boost() != 1.0f) { - // If we already have field boost, combine with query boost - float finalBoost = semanticQuery.boost() * originalQuery.boost(); - semanticQuery.boost(finalBoost); - } + semanticQuery.boost(semanticQuery.boost() * originalQuery.boost()); semanticQuery.queryName(originalQuery.queryName()); return semanticQuery; } else { @@ -224,10 +219,8 @@ private QueryBuilder buildBestFieldsSemanticQuery( SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); // Apply per-field boost if specified - Float fieldBoost = fieldsBoosts.get(fieldName); - if (fieldBoost != null && fieldBoost != 1.0f) { - semanticQuery.boost(fieldBoost); - } + float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); + semanticQuery.boost(fieldBoost); disMaxQuery.add(semanticQuery); } @@ -254,10 +247,8 @@ private QueryBuilder buildMostFieldsSemanticQuery( SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); // Apply per-field boost if specified - Float fieldBoost = fieldsBoosts.get(fieldName); - if (fieldBoost != null && fieldBoost != 1.0f) { - semanticQuery.boost(fieldBoost); - } + float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); + semanticQuery.boost(fieldBoost); boolQuery.should(semanticQuery); } @@ -285,11 +276,9 @@ private QueryBuilder buildBestFieldsCombinedQuery( for (String fieldName : indexInferenceFields.keySet()) { SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, true); - // Apply per-field boost if specified - Float fieldBoost = fieldsBoosts.get(fieldName); - if (fieldBoost != null && fieldBoost != 1.0f) { - semanticQuery.boost(fieldBoost); - } + // Apply per-field boost + float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); + semanticQuery.boost(fieldBoost); BoolQueryBuilder indexSpecificQuery = new BoolQueryBuilder(); indexSpecificQuery.must(semanticQuery); @@ -335,11 +324,9 @@ private QueryBuilder buildMostFieldsCombinedQuery( for (String fieldName : indexInferenceFields.keySet()) { SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, true); - // Apply per-field boost if specified - Float fieldBoost = fieldsBoosts.get(fieldName); - if (fieldBoost != null && fieldBoost != 1.0f) { - semanticQuery.boost(fieldBoost); - } + // Apply per-field boost + float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); + semanticQuery.boost(fieldBoost); BoolQueryBuilder indexSpecificQuery = new BoolQueryBuilder(); indexSpecificQuery.must(semanticQuery); From 69f439f43f3fd808aea51b747999b5040781c788 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 6 Aug 2025 16:15:58 -0400 Subject: [PATCH 11/46] Convert MultiFieldInferenceInfo to record --- ...nticMultiMatchQueryRewriteInterceptor.java | 285 +++++++++--------- 1 file changed, 144 insertions(+), 141 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 33674027a03d7..134aa01a0ef3b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -25,7 +25,6 @@ import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor; import org.elasticsearch.xpack.inference.registry.ModelRegistry; -import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -88,29 +87,36 @@ public String getQueryName() { private MultiFieldInferenceInfo resolveInferenceInfoForFields(Set fieldNames, ResolvedIndices resolvedIndices) { Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); Map> inferenceFieldsPerIndex = new HashMap<>(); - List nonInferenceIndices = new ArrayList<>(); + Map> nonInferenceFieldsPerIndex = new HashMap<>(); for (IndexMetadata indexMetadata : indexMetadataCollection) { String indexName = indexMetadata.getIndex().getName(); Map indexInferenceFields = new HashMap<>(); - Set indexInferenceFieldNames = fieldNames.stream() - .filter(fieldName -> indexMetadata.getInferenceFields().containsKey(fieldName)) - .collect(Collectors.toSet()); - - if (indexInferenceFieldNames.isEmpty()) { - nonInferenceIndices.add(indexName); - } else { - for (String fieldName : indexInferenceFieldNames) { + Set indexNonInferenceFields = new HashSet<>(); + + // Classify each field as inference or non-inference + for (String fieldName : fieldNames) { + if (indexMetadata.getInferenceFields().containsKey(fieldName)) { indexInferenceFields.put(fieldName, indexMetadata.getInferenceFields().get(fieldName)); + } else { + indexNonInferenceFields.add(fieldName); } + } + + // Store inference fields if any exist + if (indexInferenceFields.isEmpty() == false) { inferenceFieldsPerIndex.put(indexName, indexInferenceFields); } + + // Store non-inference fields if any exist + if (indexNonInferenceFields.isEmpty() == false) { + nonInferenceFieldsPerIndex.put(indexName, indexNonInferenceFields); + } } MultiFieldInferenceInfo inferenceInfo = new MultiFieldInferenceInfo( - fieldNames, inferenceFieldsPerIndex, - nonInferenceIndices + nonInferenceFieldsPerIndex ); // Perform early detection of score range mismatches and emit warning if needed @@ -131,10 +137,9 @@ private QueryBuilder buildInferenceQuery(MultiMatchQueryBuilder originalQuery, M // Apply per-field boost float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); - semanticQuery.boost(fieldBoost); - // Apply top-level query boost and name - semanticQuery.boost(semanticQuery.boost() * originalQuery.boost()); + // Apply top-level query boost with per field and name + semanticQuery.boost(fieldBoost * originalQuery.boost()); semanticQuery.queryName(originalQuery.queryName()); return semanticQuery; } else { @@ -206,8 +211,73 @@ private void validateQueryTypeSupported(MultiMatchQueryBuilder.Type queryType) { } /** - * Builds a best_fields query for pure semantic fields using DisMaxQueryBuilder. + * Creates a semantic query with field boost applied. */ + private SemanticQueryBuilder createSemanticQuery(String fieldName, String queryValue, Map fieldsBoosts, boolean lenient) { + SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, lenient); + float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); + semanticQuery.boost(fieldBoost); + return semanticQuery; + } + + /** + * Adds semantic queries for inference fields per index to the provided query builder. + */ + private void addInferenceQueriesPerIndex( + QueryBuilder parentQuery, + MultiFieldInferenceInfo inferenceInfo, + String queryValue, + Map fieldsBoosts + ) { + Map> inferenceFieldsPerIndex = inferenceInfo.inferenceFieldsPerIndex(); + for (Map.Entry> entry : inferenceFieldsPerIndex.entrySet()) { + String indexName = entry.getKey(); + Map indexInferenceFields = entry.getValue(); + + for (String fieldName : indexInferenceFields.keySet()) { + SemanticQueryBuilder semanticQuery = createSemanticQuery(fieldName, queryValue, fieldsBoosts, true); + + BoolQueryBuilder indexSpecificQuery = new BoolQueryBuilder(); + indexSpecificQuery.must(semanticQuery); + indexSpecificQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, List.of(indexName))); + + if (parentQuery instanceof DisMaxQueryBuilder) { + ((DisMaxQueryBuilder) parentQuery).add(indexSpecificQuery); + } else if (parentQuery instanceof BoolQueryBuilder) { + ((BoolQueryBuilder) parentQuery).should(indexSpecificQuery); + } + } + } + } + + /** + * Adds non-inference queries for non-inference fields per index to the provided query builder. + */ + private void addNonInferenceQueriesPerIndex( + QueryBuilder parentQuery, + MultiFieldInferenceInfo inferenceInfo, + MultiMatchQueryBuilder originalQuery, + Map fieldsBoosts + ) { + Map> nonInferenceFieldsPerIndex = inferenceInfo.nonInferenceFieldsPerIndex(); + for (Map.Entry> entry : nonInferenceFieldsPerIndex.entrySet()) { + String indexName = entry.getKey(); + Set nonInferenceFields = entry.getValue(); + + MultiMatchQueryBuilder indexSpecificQuery = createNonInferenceQueryForIndex(originalQuery, nonInferenceFields, fieldsBoosts); + + BoolQueryBuilder indexFilteredQuery = new BoolQueryBuilder(); + indexFilteredQuery.must(indexSpecificQuery); + indexFilteredQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, List.of(indexName))); + + if (parentQuery instanceof DisMaxQueryBuilder) { + ((DisMaxQueryBuilder) parentQuery).add(indexFilteredQuery); + } else if (parentQuery instanceof BoolQueryBuilder) { + ((BoolQueryBuilder) parentQuery).should(indexFilteredQuery); + } + } + } + private QueryBuilder buildBestFieldsSemanticQuery( MultiMatchQueryBuilder originalQuery, Map fieldsBoosts, @@ -216,13 +286,7 @@ private QueryBuilder buildBestFieldsSemanticQuery( ) { DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); for (String fieldName : inferenceFields) { - SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); - - // Apply per-field boost if specified - float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); - semanticQuery.boost(fieldBoost); - - disMaxQuery.add(semanticQuery); + disMaxQuery.add(createSemanticQuery(fieldName, queryValue, fieldsBoosts, false)); } // Apply tie_breaker if specified if (originalQuery.tieBreaker() != null) { @@ -244,13 +308,7 @@ private QueryBuilder buildMostFieldsSemanticQuery( ) { BoolQueryBuilder boolQuery = new BoolQueryBuilder(); for (String fieldName : inferenceFields) { - SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); - - // Apply per-field boost if specified - float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); - semanticQuery.boost(fieldBoost); - - boolQuery.should(semanticQuery); + boolQuery.should(createSemanticQuery(fieldName, queryValue, fieldsBoosts, false)); } boolQuery.minimumShouldMatch("1"); boolQuery.boost(originalQuery.boost()); @@ -264,38 +322,10 @@ private QueryBuilder buildBestFieldsCombinedQuery( MultiFieldInferenceInfo inferenceInfo, String queryValue ) { - // For best_fields, use dis_max to find the single best matching field across all field types - DisMaxQueryBuilder disMaxQuery = new DisMaxQueryBuilder(); - - // Add semantic queries for inference fields per index - Map> inferenceFieldsPerIndex = inferenceInfo.getInferenceFieldsPerIndex(); - for (Map.Entry> entry : inferenceFieldsPerIndex.entrySet()) { - String indexName = entry.getKey(); - Map indexInferenceFields = entry.getValue(); - - for (String fieldName : indexInferenceFields.keySet()) { - SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, true); + DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); - // Apply per-field boost - float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); - semanticQuery.boost(fieldBoost); - - BoolQueryBuilder indexSpecificQuery = new BoolQueryBuilder(); - indexSpecificQuery.must(semanticQuery); - indexSpecificQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, List.of(indexName))); - disMaxQuery.add(indexSpecificQuery); - } - } - - // Add non-inference query for indices without semantic_text fields - // Note: Field boosts are preserved in the copied MultiMatchQueryBuilder - if (inferenceInfo.getNonInferenceIndices().isEmpty() == false) { - MultiMatchQueryBuilder nonInferenceQuery = copyMultiMatchQueryBuilder(originalQuery); - BoolQueryBuilder indexFilteredQuery = new BoolQueryBuilder(); - indexFilteredQuery.must(nonInferenceQuery); - indexFilteredQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, inferenceInfo.getNonInferenceIndices())); - disMaxQuery.add(indexFilteredQuery); - } + addInferenceQueriesPerIndex(disMaxQuery, inferenceInfo, queryValue, fieldsBoosts); + addNonInferenceQueriesPerIndex(disMaxQuery, inferenceInfo, originalQuery, fieldsBoosts); // Apply tie_breaker if specified if (originalQuery.tieBreaker() != null) { @@ -312,38 +342,10 @@ private QueryBuilder buildMostFieldsCombinedQuery( MultiFieldInferenceInfo inferenceInfo, String queryValue ) { - // For most_fields, use bool should to score across all fields BoolQueryBuilder boolQuery = new BoolQueryBuilder(); - // Add semantic queries for inference fields per index - Map> inferenceFieldsPerIndex = inferenceInfo.getInferenceFieldsPerIndex(); - for (Map.Entry> entry : inferenceFieldsPerIndex.entrySet()) { - String indexName = entry.getKey(); - Map indexInferenceFields = entry.getValue(); - - for (String fieldName : indexInferenceFields.keySet()) { - SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, true); - - // Apply per-field boost - float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); - semanticQuery.boost(fieldBoost); - - BoolQueryBuilder indexSpecificQuery = new BoolQueryBuilder(); - indexSpecificQuery.must(semanticQuery); - indexSpecificQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, List.of(indexName))); - boolQuery.should(indexSpecificQuery); - } - } - - // Add non-inference query for indices without semantic_text fields - // Note: Field boosts are preserved in the copied MultiMatchQueryBuilder - if (inferenceInfo.getNonInferenceIndices().isEmpty() == false) { - MultiMatchQueryBuilder nonInferenceQuery = copyMultiMatchQueryBuilder(originalQuery); - BoolQueryBuilder indexFilteredQuery = new BoolQueryBuilder(); - indexFilteredQuery.must(nonInferenceQuery); - indexFilteredQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, inferenceInfo.getNonInferenceIndices())); - boolQuery.should(indexFilteredQuery); - } + addInferenceQueriesPerIndex(boolQuery, inferenceInfo, queryValue, fieldsBoosts); + addNonInferenceQueriesPerIndex(boolQuery, inferenceInfo, originalQuery, fieldsBoosts); boolQuery.minimumShouldMatch("1"); boolQuery.boost(originalQuery.boost()); @@ -381,7 +383,7 @@ private void detectAndWarnScoreRangeMismatch(MultiFieldInferenceInfo inferenceIn // Collect all inference IDs from all fields Set allInferenceIds = new HashSet<>(); - for (Map indexFields : inferenceInfo.getInferenceFieldsPerIndex().values()) { + for (Map indexFields : inferenceInfo.inferenceFieldsPerIndex().values()) { for (InferenceFieldMetadata fieldMetadata : indexFields.values()) { allInferenceIds.add(fieldMetadata.getSearchInferenceId()); } @@ -416,66 +418,67 @@ private void detectAndWarnScoreRangeMismatch(MultiFieldInferenceInfo inferenceIn } } - private MultiMatchQueryBuilder copyMultiMatchQueryBuilder(MultiMatchQueryBuilder original) { - MultiMatchQueryBuilder copy = new MultiMatchQueryBuilder(original.value()); - copy.fields(original.fields()); - copy.type(original.type()); - copy.operator(original.operator()); - copy.slop(original.slop()); - copy.analyzer(original.analyzer()); - copy.minimumShouldMatch(original.minimumShouldMatch()); - copy.fuzzyRewrite(original.fuzzyRewrite()); - copy.prefixLength(original.prefixLength()); - copy.maxExpansions(original.maxExpansions()); - copy.fuzzyTranspositions(original.fuzzyTranspositions()); - copy.lenient(original.lenient()); - copy.zeroTermsQuery(original.zeroTermsQuery()); - copy.autoGenerateSynonymsPhraseQuery(original.autoGenerateSynonymsPhraseQuery()); - copy.tieBreaker(original.tieBreaker()); + /** + * Copies all properties from original query to target query except fields. + */ + private void copyQueryProperties(MultiMatchQueryBuilder original, MultiMatchQueryBuilder target) { + target.type(original.type()); + target.operator(original.operator()); + target.slop(original.slop()); + target.analyzer(original.analyzer()); + target.minimumShouldMatch(original.minimumShouldMatch()); + target.fuzzyRewrite(original.fuzzyRewrite()); + target.prefixLength(original.prefixLength()); + target.maxExpansions(original.maxExpansions()); + target.fuzzyTranspositions(original.fuzzyTranspositions()); + target.lenient(original.lenient()); + target.zeroTermsQuery(original.zeroTermsQuery()); + target.autoGenerateSynonymsPhraseQuery(original.autoGenerateSynonymsPhraseQuery()); + target.tieBreaker(original.tieBreaker()); if (original.fuzziness() != null) { - copy.fuzziness(original.fuzziness()); + target.fuzziness(original.fuzziness()); } - - return copy; } /** - * Represents the inference information for multiple fields across indices. + * Creates a non-inference MultiMatchQuery for a specific index with only the specified fields. */ - public static class MultiFieldInferenceInfo { - // TODO: Remove originalFields if not needed - private final Set originalFields; - private final Map> inferenceFieldsPerIndex; - private final List nonInferenceIndices; - - public MultiFieldInferenceInfo( - Set originalFields, - Map> inferenceFieldsPerIndex, - List nonInferenceIndices - ) { - this.originalFields = originalFields; - this.inferenceFieldsPerIndex = inferenceFieldsPerIndex; - this.nonInferenceIndices = nonInferenceIndices; - } + private MultiMatchQueryBuilder createNonInferenceQueryForIndex( + MultiMatchQueryBuilder originalQuery, + Set nonInferenceFields, + Map fieldsBoosts + ) { + MultiMatchQueryBuilder query = new MultiMatchQueryBuilder(originalQuery.value()); - public Set getInferenceFields() { - return inferenceFieldsPerIndex.values().stream() - .flatMap(fields -> fields.keySet().stream()) - .collect(Collectors.toSet()); + // Set only the non-inference fields with their boosts + Map filteredFields = new HashMap<>(); + for (String fieldName : nonInferenceFields) { + float boost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); + filteredFields.put(fieldName, boost); } + query.fields(filteredFields); - public Map> getInferenceFieldsPerIndex() { - return inferenceFieldsPerIndex; - } + copyQueryProperties(originalQuery, query); - public List getNonInferenceIndices() { - return nonInferenceIndices; - } + return query; + } - public boolean hasNonInferenceFields() { - return nonInferenceIndices.isEmpty() == false; - } + /** + * Represents the inference information for multiple fields across indices. + */ + public record MultiFieldInferenceInfo(Map> inferenceFieldsPerIndex, + Map> nonInferenceFieldsPerIndex) { - } + public Set getInferenceFields() { + return inferenceFieldsPerIndex.values().stream() + .flatMap(fields -> fields.keySet().stream()) + .collect(Collectors.toSet()); + } + + public boolean hasNonInferenceFields() { + return nonInferenceFieldsPerIndex.isEmpty() == false; + } + + } } From 776a65c703c146519965f4e21bc4f78258a9c7d5 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 6 Aug 2025 16:17:01 -0400 Subject: [PATCH 12/46] linting --- ...nticMultiMatchQueryRewriteInterceptor.java | 75 ++++++++++--------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 134aa01a0ef3b..83151276f1bc6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -43,7 +43,6 @@ public class SemanticMultiMatchQueryRewriteInterceptor implements QueryRewriteIn private final Supplier modelRegistrySupplier; private final float DEFAULT_BOOST_FIELD = 1.0f; - public SemanticMultiMatchQueryRewriteInterceptor(Supplier modelRegistrySupplier) { this.modelRegistrySupplier = modelRegistrySupplier; } @@ -114,10 +113,7 @@ private MultiFieldInferenceInfo resolveInferenceInfoForFields(Set fieldN } } - MultiFieldInferenceInfo inferenceInfo = new MultiFieldInferenceInfo( - inferenceFieldsPerIndex, - nonInferenceFieldsPerIndex - ); + MultiFieldInferenceInfo inferenceInfo = new MultiFieldInferenceInfo(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex); // Perform early detection of score range mismatches and emit warning if needed detectAndWarnScoreRangeMismatch(inferenceInfo); @@ -125,7 +121,11 @@ private MultiFieldInferenceInfo resolveInferenceInfoForFields(Set fieldN return inferenceInfo; } - private QueryBuilder buildInferenceQuery(MultiMatchQueryBuilder originalQuery, MultiFieldInferenceInfo inferenceInfo, Map fieldsBoosts) { + private QueryBuilder buildInferenceQuery( + MultiMatchQueryBuilder originalQuery, + MultiFieldInferenceInfo inferenceInfo, + Map fieldsBoosts + ) { String queryValue = (String) originalQuery.value(); Set inferenceFields = inferenceInfo.getInferenceFields(); @@ -159,7 +159,7 @@ private QueryBuilder buildMultiFieldSemanticQuery( case MOST_FIELDS -> buildMostFieldsSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); default -> // Fallback to best_fields behavior for unknown types - buildBestFieldsSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); + buildBestFieldsSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); }; } @@ -177,7 +177,7 @@ private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, fieldsBoosts, inferenceInfo, queryValue); default -> // Fallback to best_fields behavior - buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, inferenceInfo, queryValue); + buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, inferenceInfo, queryValue); }; } @@ -189,23 +189,21 @@ private void validateQueryTypeSupported(MultiMatchQueryBuilder.Type queryType) { switch (queryType) { case CROSS_FIELDS: throw new IllegalArgumentException( - "multi_match query with type [cross_fields] is not supported for semantic_text fields. " + - "Use [best_fields] or [most_fields] instead." + "multi_match query with type [cross_fields] is not supported for semantic_text fields. " + + "Use [best_fields] or [most_fields] instead." ); case PHRASE: throw new IllegalArgumentException( - "multi_match query with type [phrase] is not supported for semantic_text fields. " + - "Use [best_fields] instead." + "multi_match query with type [phrase] is not supported for semantic_text fields. " + "Use [best_fields] instead." ); case PHRASE_PREFIX: throw new IllegalArgumentException( - "multi_match query with type [phrase_prefix] is not supported for semantic_text fields. " + - "Use [best_fields] instead." + "multi_match query with type [phrase_prefix] is not supported for semantic_text fields. " + "Use [best_fields] instead." ); case BOOL_PREFIX: throw new IllegalArgumentException( - "multi_match query with type [bool_prefix] is not supported for semantic_text fields. " + - "Use [best_fields] or [most_fields] instead." + "multi_match query with type [bool_prefix] is not supported for semantic_text fields. " + + "Use [best_fields] or [most_fields] instead." ); } } @@ -213,7 +211,12 @@ private void validateQueryTypeSupported(MultiMatchQueryBuilder.Type queryType) { /** * Creates a semantic query with field boost applied. */ - private SemanticQueryBuilder createSemanticQuery(String fieldName, String queryValue, Map fieldsBoosts, boolean lenient) { + private SemanticQueryBuilder createSemanticQuery( + String fieldName, + String queryValue, + Map fieldsBoosts, + boolean lenient + ) { SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, lenient); float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); semanticQuery.boost(fieldBoost); @@ -367,10 +370,10 @@ private void detectAndWarnScoreRangeMismatch(MultiFieldInferenceInfo inferenceIn // since we can't determine the exact task types if (inferenceInfo.hasNonInferenceFields() && inferenceInfo.getInferenceFields().isEmpty() == false) { HeaderWarning.addWarning( - "Query spans both semantic_text and non-inference fields. " + - "Dense vector models (TEXT_EMBEDDING) produce bounded scores (0-1) while sparse vector models " + - "(SPARSE_EMBEDDING) and non-inference fields produce unbounded scores, which may cause score " + - "range mismatches and affect result ranking. Consider using Linear or RRF retrievers." + "Query spans both semantic_text and non-inference fields. " + + "Dense vector models (TEXT_EMBEDDING) produce bounded scores (0-1) while sparse vector models " + + "(SPARSE_EMBEDDING) and non-inference fields produce unbounded scores, which may cause score " + + "range mismatches and affect result ranking. Consider using Linear or RRF retrievers." ); } return; @@ -409,11 +412,11 @@ private void detectAndWarnScoreRangeMismatch(MultiFieldInferenceInfo inferenceIn // Emit warning only if we have dense vector model mixed with sparse vector or non-inference fields if (hasDenseVectorModel && (hasSparseVectorModel || hasNonInferenceFields)) { HeaderWarning.addWarning( - "Query contains dense vector model (TEXT_EMBEDDING) with bounded scores (0-1) mixed with " + - (hasSparseVectorModel ? "sparse vector model (SPARSE_EMBEDDING) and/or " : "") + - (hasNonInferenceFields ? "non-inference fields " : "") + - "that produce unbounded scores. This may cause score range mismatches and affect result ranking. " + - "Consider using Linear or RRF retrievers." + "Query contains dense vector model (TEXT_EMBEDDING) with bounded scores (0-1) mixed with " + + (hasSparseVectorModel ? "sparse vector model (SPARSE_EMBEDDING) and/or " : "") + + (hasNonInferenceFields ? "non-inference fields " : "") + + "that produce unbounded scores. This may cause score range mismatches and affect result ranking. " + + "Consider using Linear or RRF retrievers." ); } } @@ -467,18 +470,18 @@ private MultiMatchQueryBuilder createNonInferenceQueryForIndex( /** * Represents the inference information for multiple fields across indices. */ - public record MultiFieldInferenceInfo(Map> inferenceFieldsPerIndex, - Map> nonInferenceFieldsPerIndex) { + public record MultiFieldInferenceInfo( + Map> inferenceFieldsPerIndex, + Map> nonInferenceFieldsPerIndex + ) { public Set getInferenceFields() { - return inferenceFieldsPerIndex.values().stream() - .flatMap(fields -> fields.keySet().stream()) - .collect(Collectors.toSet()); - } - - public boolean hasNonInferenceFields() { - return nonInferenceFieldsPerIndex.isEmpty() == false; - } + return inferenceFieldsPerIndex.values().stream().flatMap(fields -> fields.keySet().stream()).collect(Collectors.toSet()); + } + public boolean hasNonInferenceFields() { + return nonInferenceFieldsPerIndex.isEmpty() == false; } + + } } From a785a19e66c91d1aafb6fed6aba78e04067a23b5 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 8 Aug 2025 10:32:37 -0400 Subject: [PATCH 13/46] refactoring multimatch intercepter --- .../SemanticQueryRewriteInterceptor.java | 135 +++++++++++++++--- 1 file changed, 113 insertions(+), 22 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index bb76ef0be24e9..22feb6417ca25 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -20,8 +20,10 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; /** @@ -33,7 +35,6 @@ public SemanticQueryRewriteInterceptor() {} @Override public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilder queryBuilder) { - String fieldName = getFieldName(queryBuilder); ResolvedIndices resolvedIndices = context.getResolvedIndices(); if (resolvedIndices == null) { @@ -41,18 +42,15 @@ public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilde return queryBuilder; } - InferenceIndexInformationForField indexInformation = resolveIndicesForField(fieldName, resolvedIndices); - if (indexInformation.getInferenceIndices().isEmpty()) { + InferenceIndexInformationForField indexInformation = resolveIndicesForFields(queryBuilder, resolvedIndices); + if (!indexInformation.hasInferenceFields()) { // No inference fields were identified, so return the original query. return queryBuilder; - } else if (indexInformation.nonInferenceIndices().isEmpty() == false) { - // Combined case where the field name requested by this query contains both - // semantic_text and non-inference fields, so we have to combine queries per index - // containing each field type. + } else if (indexInformation.hasNonInferenceFields()) { + // Combined case where some fields are semantic_text and others are not return buildCombinedInferenceAndNonInferenceQuery(queryBuilder, indexInformation); } else { - // The only fields we've identified are inference fields (e.g. semantic_text), - // so rewrite the entire query to work on a semantic_text field. + // All specified fields are inference fields (semantic_text) return buildInferenceQuery(queryBuilder, indexInformation); } } @@ -63,6 +61,16 @@ public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilde */ protected abstract String getFieldName(QueryBuilder queryBuilder); + /** + * @param queryBuilder {@link QueryBuilder} + * @return The field names with their weights requested by the provided query builder. + */ + protected Map getFieldsWithWeights(QueryBuilder queryBuilder) { + // Default implementation for single-field queries + String fieldName = getFieldName(queryBuilder); + return Map.of(fieldName, 1.0f); + } + /** * @param queryBuilder {@link QueryBuilder} * @return The text/query string requested by the provided query builder. @@ -90,6 +98,42 @@ protected abstract QueryBuilder buildCombinedInferenceAndNonInferenceQuery( InferenceIndexInformationForField indexInformation ); + private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder queryBuilder, ResolvedIndices resolvedIndices) { + Map fieldsWithWeights = getFieldsWithWeights(queryBuilder); + Set fieldNames = fieldsWithWeights.keySet(); + Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); + + Map> inferenceFieldsPerIndex = new HashMap<>(); + Map> nonInferenceFieldsPerIndex = new HashMap<>(); + + for (IndexMetadata indexMetadata : indexMetadataCollection) { + String indexName = indexMetadata.getIndex().getName(); + Map indexInferenceFields = new HashMap<>(); + Set indexNonInferenceFields = new HashSet<>(); + + // Classify each field as inference or non-inference + for (String fieldName : fieldNames) { + if (indexMetadata.getInferenceFields().containsKey(fieldName)) { + indexInferenceFields.put(fieldName, indexMetadata.getInferenceFields().get(fieldName)); + } else { + indexNonInferenceFields.add(fieldName); + } + } + + // Store inference fields if any exist + if (!indexInferenceFields.isEmpty()) { + inferenceFieldsPerIndex.put(indexName, indexInferenceFields); + } + + // Store non-inference fields if any exist + if (!indexNonInferenceFields.isEmpty()) { + nonInferenceFieldsPerIndex.put(indexName, indexNonInferenceFields); + } + } + + return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex); + } + private InferenceIndexInformationForField resolveIndicesForField(String fieldName, ResolvedIndices resolvedIndices) { Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); Map inferenceIndicesMetadata = new HashMap<>(); @@ -122,27 +166,74 @@ protected QueryBuilder createSemanticSubQuery(Collection indices, String } /** - * Represents the indices and associated inference information for a field. + * Represents the indices and associated inference information for fields. */ public record InferenceIndexInformationForField( - String fieldName, - Map inferenceIndicesMetadata, - List nonInferenceIndices + // Map: IndexName -> (FieldName -> InferenceFieldMetadata) + Map> inferenceFieldsPerIndex, + // Map: IndexName -> Set of non-inference field names + Map> nonInferenceFieldsPerIndex ) { + // Backward compatibility for single-field queries + public InferenceIndexInformationForField(String fieldName, Map inferenceIndicesMetadata, List nonInferenceIndices) { + this( + // Convert single field metadata to multi-field structure + inferenceIndicesMetadata.entrySet().stream() + .collect(Collectors.toMap( + Map.Entry::getKey, + entry -> Map.of(fieldName, entry.getValue()) + )), + // Convert non-inference indices to multi-field structure + nonInferenceIndices.stream() + .collect(Collectors.toMap( + indexName -> indexName, + indexName -> Set.of(fieldName) + )) + ); + } + + public Set getAllInferenceFields() { + return inferenceFieldsPerIndex.values() + .stream() + .flatMap(fields -> fields.keySet().stream()) + .collect(Collectors.toSet()); + } + + public Set getAllNonInferenceFields() { + return nonInferenceFieldsPerIndex.values() + .stream() + .flatMap(Set::stream) + .collect(Collectors.toSet()); + } + + public boolean hasInferenceFields() { + return !inferenceFieldsPerIndex.isEmpty(); + } + + public boolean hasNonInferenceFields() { + return !nonInferenceFieldsPerIndex.isEmpty(); + } + + // Backward compatibility methods public Collection getInferenceIndices() { - return inferenceIndicesMetadata.keySet(); + return inferenceFieldsPerIndex.keySet(); + } + + public List nonInferenceIndices() { + return new ArrayList<>(nonInferenceFieldsPerIndex.keySet()); } public Map> getInferenceIdsIndices() { - return inferenceIndicesMetadata.entrySet() - .stream() - .collect( - Collectors.groupingBy( - entry -> entry.getValue().getSearchInferenceId(), - Collectors.mapping(Map.Entry::getKey, Collectors.toList()) - ) - ); + Map> result = new HashMap<>(); + for (Map.Entry> indexEntry : inferenceFieldsPerIndex.entrySet()) { + String indexName = indexEntry.getKey(); + for (InferenceFieldMetadata metadata : indexEntry.getValue().values()) { + String inferenceId = metadata.getSearchInferenceId(); + result.computeIfAbsent(inferenceId, k -> new ArrayList<>()).add(indexName); + } + } + return result; } } } From 1b7501f853e357074be20697ec74c5bf94ec5311 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 8 Aug 2025 10:32:44 -0400 Subject: [PATCH 14/46] refactoring multimatch intercepter --- .../SemanticMatchQueryRewriteInterceptor.java | 2 +- ...nticMultiMatchQueryRewriteInterceptor.java | 365 +++++------------- .../SemanticQueryRewriteInterceptor.java | 21 +- 3 files changed, 118 insertions(+), 270 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index a6599afc66c3f..bc977a5ec8e14 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -36,7 +36,7 @@ protected String getQuery(QueryBuilder queryBuilder) { @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { - SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(indexInformation.fieldName(), getQuery(queryBuilder), false); + SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(getFieldName(queryBuilder), getQuery(queryBuilder), false); semanticQueryBuilder.boost(queryBuilder.boost()); semanticQueryBuilder.queryName(queryBuilder.queryName()); return semanticQueryBuilder; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 83151276f1bc6..19445ac44e330 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -7,127 +7,61 @@ package org.elasticsearch.xpack.inference.queries; -import org.elasticsearch.action.ResolvedIndices; -import org.elasticsearch.cluster.metadata.IndexMetadata; -import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; -import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.features.NodeFeature; -import org.elasticsearch.index.mapper.IndexFieldMapper; +import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.DisMaxQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.query.QueryRewriteContext; -import org.elasticsearch.index.query.TermsQueryBuilder; -import org.elasticsearch.inference.MinimalServiceSettings; -import org.elasticsearch.inference.TaskType; -import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor; import org.elasticsearch.xpack.inference.registry.ModelRegistry; -import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Supplier; -import java.util.stream.Collectors; -public class SemanticMultiMatchQueryRewriteInterceptor implements QueryRewriteInterceptor { +public class SemanticMultiMatchQueryRewriteInterceptor extends SemanticQueryRewriteInterceptor { public static final NodeFeature SEMANTIC_MULTI_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED = new NodeFeature( "search.semantic_multi_match_query_rewrite_interception_supported" ); private final Supplier modelRegistrySupplier; - private final float DEFAULT_BOOST_FIELD = 1.0f; public SemanticMultiMatchQueryRewriteInterceptor(Supplier modelRegistrySupplier) { + super(); this.modelRegistrySupplier = modelRegistrySupplier; } @Override - public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilder queryBuilder) { - assert (queryBuilder instanceof MultiMatchQueryBuilder); - MultiMatchQueryBuilder multiMatchQueryBuilder = (MultiMatchQueryBuilder) queryBuilder; - - ResolvedIndices resolvedIndices = context.getResolvedIndices(); - if (resolvedIndices == null) { - // No resolved indices, so return the original query. - return queryBuilder; - } - - Map fields = multiMatchQueryBuilder.fields(); - if (fields == null || fields.isEmpty()) { - // No fields specified, return original query - return queryBuilder; - } - - MultiFieldInferenceInfo inferenceInfo = resolveInferenceInfoForFields(fields.keySet(), resolvedIndices); - - if (inferenceInfo.getInferenceFields().isEmpty()) { - // No inference fields were identified, so return the original query. - return queryBuilder; - } else if (inferenceInfo.hasNonInferenceFields()) { - // Combined case where some fields are semantic_text and others are not - return buildCombinedInferenceAndNonInferenceQuery(multiMatchQueryBuilder, inferenceInfo, fields); - } else { - // All specified fields are inference fields (semantic_text) - return buildInferenceQuery(multiMatchQueryBuilder, inferenceInfo, fields); + protected String getFieldName(QueryBuilder queryBuilder) { + MultiMatchQueryBuilder multiMatchQuery = (MultiMatchQueryBuilder) queryBuilder; + Map fields = multiMatchQuery.fields(); + if (fields.size() > 1) { + throw new IllegalArgumentException("getFieldName() called on MultiMatchQuery with multiple fields"); } + return fields.keySet().iterator().next(); } @Override - public String getQueryName() { - return MultiMatchQueryBuilder.NAME; + protected Map getFieldsWithWeights(QueryBuilder queryBuilder) { + MultiMatchQueryBuilder multiMatchQuery = (MultiMatchQueryBuilder) queryBuilder; + return multiMatchQuery.fields(); } - private MultiFieldInferenceInfo resolveInferenceInfoForFields(Set fieldNames, ResolvedIndices resolvedIndices) { - Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); - Map> inferenceFieldsPerIndex = new HashMap<>(); - Map> nonInferenceFieldsPerIndex = new HashMap<>(); - - for (IndexMetadata indexMetadata : indexMetadataCollection) { - String indexName = indexMetadata.getIndex().getName(); - Map indexInferenceFields = new HashMap<>(); - Set indexNonInferenceFields = new HashSet<>(); - - // Classify each field as inference or non-inference - for (String fieldName : fieldNames) { - if (indexMetadata.getInferenceFields().containsKey(fieldName)) { - indexInferenceFields.put(fieldName, indexMetadata.getInferenceFields().get(fieldName)); - } else { - indexNonInferenceFields.add(fieldName); - } - } - - // Store inference fields if any exist - if (indexInferenceFields.isEmpty() == false) { - inferenceFieldsPerIndex.put(indexName, indexInferenceFields); - } - - // Store non-inference fields if any exist - if (indexNonInferenceFields.isEmpty() == false) { - nonInferenceFieldsPerIndex.put(indexName, indexNonInferenceFields); - } - } - - MultiFieldInferenceInfo inferenceInfo = new MultiFieldInferenceInfo(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex); - - // Perform early detection of score range mismatches and emit warning if needed - detectAndWarnScoreRangeMismatch(inferenceInfo); - - return inferenceInfo; + @Override + protected String getQuery(QueryBuilder queryBuilder) { + MultiMatchQueryBuilder multiMatchQuery = (MultiMatchQueryBuilder) queryBuilder; + return (String) multiMatchQuery.value(); } - private QueryBuilder buildInferenceQuery( - MultiMatchQueryBuilder originalQuery, - MultiFieldInferenceInfo inferenceInfo, - Map fieldsBoosts - ) { - String queryValue = (String) originalQuery.value(); - Set inferenceFields = inferenceInfo.getInferenceFields(); + @Override + protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { + MultiMatchQueryBuilder originalQuery = (MultiMatchQueryBuilder) queryBuilder; + Map fieldsBoosts = getFieldsWithWeights(queryBuilder); + String queryValue = getQuery(queryBuilder); + Set inferenceFields = indexInformation.getAllInferenceFields(); if (inferenceFields.size() == 1) { // Single inference field - all multi_match types work the same (like original Elasticsearch) @@ -136,7 +70,7 @@ private QueryBuilder buildInferenceQuery( SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); // Apply per-field boost - float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); + float fieldBoost = fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST); // Apply top-level query boost with per field and name semanticQuery.boost(fieldBoost * originalQuery.boost()); @@ -148,6 +82,31 @@ private QueryBuilder buildInferenceQuery( } } + @Override + protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( + QueryBuilder queryBuilder, + InferenceIndexInformationForField indexInformation + ) { + MultiMatchQueryBuilder originalQuery = (MultiMatchQueryBuilder) queryBuilder; + Map fieldsBoosts = getFieldsWithWeights(queryBuilder); + String queryValue = getQuery(queryBuilder); + + validateQueryTypeSupported(originalQuery.type()); + + return switch (originalQuery.type()) { + case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, indexInformation, queryValue); + case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, fieldsBoosts, indexInformation, queryValue); + default -> + // Fallback to best_fields behavior + buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, indexInformation, queryValue); + }; + } + + @Override + public String getQueryName() { + return MultiMatchQueryBuilder.NAME; + } + private QueryBuilder buildMultiFieldSemanticQuery( MultiMatchQueryBuilder originalQuery, Map fieldsBoosts, @@ -163,23 +122,6 @@ private QueryBuilder buildMultiFieldSemanticQuery( }; } - private QueryBuilder buildCombinedInferenceAndNonInferenceQuery( - MultiMatchQueryBuilder originalQuery, - MultiFieldInferenceInfo inferenceInfo, - Map fieldsBoosts - ) { - validateQueryTypeSupported(originalQuery.type()); - - String queryValue = (String) originalQuery.value(); - - return switch (originalQuery.type()) { - case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, inferenceInfo, queryValue); - case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, fieldsBoosts, inferenceInfo, queryValue); - default -> - // Fallback to best_fields behavior - buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, inferenceInfo, queryValue); - }; - } /** * Validates that the multi_match query type is supported for semantic_text fields. @@ -218,70 +160,13 @@ private SemanticQueryBuilder createSemanticQuery( boolean lenient ) { SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, lenient); - float fieldBoost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); + float fieldBoost = fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST); semanticQuery.boost(fieldBoost); return semanticQuery; } - /** - * Adds semantic queries for inference fields per index to the provided query builder. - */ - private void addInferenceQueriesPerIndex( - QueryBuilder parentQuery, - MultiFieldInferenceInfo inferenceInfo, - String queryValue, - Map fieldsBoosts - ) { - Map> inferenceFieldsPerIndex = inferenceInfo.inferenceFieldsPerIndex(); - for (Map.Entry> entry : inferenceFieldsPerIndex.entrySet()) { - String indexName = entry.getKey(); - Map indexInferenceFields = entry.getValue(); - - for (String fieldName : indexInferenceFields.keySet()) { - SemanticQueryBuilder semanticQuery = createSemanticQuery(fieldName, queryValue, fieldsBoosts, true); - - BoolQueryBuilder indexSpecificQuery = new BoolQueryBuilder(); - indexSpecificQuery.must(semanticQuery); - indexSpecificQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, List.of(indexName))); - - if (parentQuery instanceof DisMaxQueryBuilder) { - ((DisMaxQueryBuilder) parentQuery).add(indexSpecificQuery); - } else if (parentQuery instanceof BoolQueryBuilder) { - ((BoolQueryBuilder) parentQuery).should(indexSpecificQuery); - } - } - } - } - - /** - * Adds non-inference queries for non-inference fields per index to the provided query builder. - */ - private void addNonInferenceQueriesPerIndex( - QueryBuilder parentQuery, - MultiFieldInferenceInfo inferenceInfo, - MultiMatchQueryBuilder originalQuery, - Map fieldsBoosts - ) { - Map> nonInferenceFieldsPerIndex = inferenceInfo.nonInferenceFieldsPerIndex(); - for (Map.Entry> entry : nonInferenceFieldsPerIndex.entrySet()) { - String indexName = entry.getKey(); - Set nonInferenceFields = entry.getValue(); - - MultiMatchQueryBuilder indexSpecificQuery = createNonInferenceQueryForIndex(originalQuery, nonInferenceFields, fieldsBoosts); - - BoolQueryBuilder indexFilteredQuery = new BoolQueryBuilder(); - indexFilteredQuery.must(indexSpecificQuery); - indexFilteredQuery.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, List.of(indexName))); - - if (parentQuery instanceof DisMaxQueryBuilder) { - ((DisMaxQueryBuilder) parentQuery).add(indexFilteredQuery); - } else if (parentQuery instanceof BoolQueryBuilder) { - ((BoolQueryBuilder) parentQuery).should(indexFilteredQuery); - } - } - } - private QueryBuilder buildBestFieldsSemanticQuery( + private QueryBuilder buildBestFieldsSemanticQuery( MultiMatchQueryBuilder originalQuery, Map fieldsBoosts, Set inferenceFields, @@ -291,9 +176,12 @@ private QueryBuilder buildBestFieldsSemanticQuery( for (String fieldName : inferenceFields) { disMaxQuery.add(createSemanticQuery(fieldName, queryValue, fieldsBoosts, false)); } - // Apply tie_breaker if specified - if (originalQuery.tieBreaker() != null) { - disMaxQuery.tieBreaker(originalQuery.tieBreaker()); + // Apply tie_breaker - use explicit value or fall back to type's default + Float tieBreaker = originalQuery.tieBreaker(); + if (tieBreaker != null) { + disMaxQuery.tieBreaker(tieBreaker); + } else { + disMaxQuery.tieBreaker(originalQuery.type().tieBreaker()); } disMaxQuery.boost(originalQuery.boost()); disMaxQuery.queryName(originalQuery.queryName()); @@ -322,17 +210,38 @@ private QueryBuilder buildMostFieldsSemanticQuery( private QueryBuilder buildBestFieldsCombinedQuery( MultiMatchQueryBuilder originalQuery, Map fieldsBoosts, - MultiFieldInferenceInfo inferenceInfo, + InferenceIndexInformationForField inferenceInfo, String queryValue ) { DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); - addInferenceQueriesPerIndex(disMaxQuery, inferenceInfo, queryValue, fieldsBoosts); - addNonInferenceQueriesPerIndex(disMaxQuery, inferenceInfo, originalQuery, fieldsBoosts); + // Add semantic queries for each inference field across different indices + for (String fieldName : inferenceInfo.getAllInferenceFields()) { + disMaxQuery.add( + createSemanticSubQuery( + inferenceInfo.getInferenceIndices(), + fieldName, + queryValue + ).boost(fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)) + ); + } - // Apply tie_breaker if specified - if (originalQuery.tieBreaker() != null) { - disMaxQuery.tieBreaker(originalQuery.tieBreaker()); + // Add non-inference query for non-inference fields + if (inferenceInfo.hasNonInferenceFields()) { + MultiMatchQueryBuilder nonInferenceQuery = createNonInferenceQueryForIndex( + originalQuery, + inferenceInfo.getAllNonInferenceFields(), + fieldsBoosts + ); + disMaxQuery.add(createSubQueryForIndices(inferenceInfo.nonInferenceIndices(), nonInferenceQuery)); + } + + // Apply tie_breaker - use explicit value or fall back to type's default + Float tieBreaker = originalQuery.tieBreaker(); + if (tieBreaker != null) { + disMaxQuery.tieBreaker(tieBreaker); + } else { + disMaxQuery.tieBreaker(originalQuery.type().tieBreaker()); } disMaxQuery.boost(originalQuery.boost()); disMaxQuery.queryName(originalQuery.queryName()); @@ -342,13 +251,31 @@ private QueryBuilder buildBestFieldsCombinedQuery( private QueryBuilder buildMostFieldsCombinedQuery( MultiMatchQueryBuilder originalQuery, Map fieldsBoosts, - MultiFieldInferenceInfo inferenceInfo, + InferenceIndexInformationForField inferenceInfo, String queryValue ) { BoolQueryBuilder boolQuery = new BoolQueryBuilder(); - addInferenceQueriesPerIndex(boolQuery, inferenceInfo, queryValue, fieldsBoosts); - addNonInferenceQueriesPerIndex(boolQuery, inferenceInfo, originalQuery, fieldsBoosts); + // Add semantic queries for each inference field + for (String fieldName : inferenceInfo.getAllInferenceFields()) { + boolQuery.should( + createSemanticSubQuery( + inferenceInfo.getInferenceIndices(), + fieldName, + queryValue + ).boost(fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)) + ); + } + + // Add non-inference query for non-inference fields + if (inferenceInfo.hasNonInferenceFields()) { + MultiMatchQueryBuilder nonInferenceQuery = createNonInferenceQueryForIndex( + originalQuery, + inferenceInfo.getAllNonInferenceFields(), + fieldsBoosts + ); + boolQuery.should(createSubQueryForIndices(inferenceInfo.nonInferenceIndices(), nonInferenceQuery)); + } boolQuery.minimumShouldMatch("1"); boolQuery.boost(originalQuery.boost()); @@ -356,71 +283,6 @@ private QueryBuilder buildMostFieldsCombinedQuery( return boolQuery; } - /** - * Detects and warns about score range mismatches when a multi_match query has at least one dense vector model (TEXT_EMBEDDING) - * mixed with sparse vector models (SPARSE_EMBEDDING) or non-inference fields. - * Dense vector models typically produce bounded scores (0-1) while sparse vector models and - * non-inference fields produce unbounded scores, causing score range mismatches. - */ - private void detectAndWarnScoreRangeMismatch(MultiFieldInferenceInfo inferenceInfo) { - ModelRegistry modelRegistry = modelRegistrySupplier.get(); - // TODO: validate if we need to check if modelRegistry is null or not - if (modelRegistry == null) { - // Fallback: warn for any mixed semantic_text + non-inference combination - // since we can't determine the exact task types - if (inferenceInfo.hasNonInferenceFields() && inferenceInfo.getInferenceFields().isEmpty() == false) { - HeaderWarning.addWarning( - "Query spans both semantic_text and non-inference fields. " - + "Dense vector models (TEXT_EMBEDDING) produce bounded scores (0-1) while sparse vector models " - + "(SPARSE_EMBEDDING) and non-inference fields produce unbounded scores, which may cause score " - + "range mismatches and affect result ranking. Consider using Linear or RRF retrievers." - ); - } - return; - } - - // Check if we have any dense vector models mixed with sparse vector models or non-inference fields - boolean hasDenseVectorModel = false; - boolean hasSparseVectorModel = false; - boolean hasNonInferenceFields = inferenceInfo.hasNonInferenceFields(); - - // Collect all inference IDs from all fields - Set allInferenceIds = new HashSet<>(); - for (Map indexFields : inferenceInfo.inferenceFieldsPerIndex().values()) { - for (InferenceFieldMetadata fieldMetadata : indexFields.values()) { - allInferenceIds.add(fieldMetadata.getSearchInferenceId()); - } - } - - // Check task types for each inference ID - for (String inferenceId : allInferenceIds) { - try { - MinimalServiceSettings settings = modelRegistry.getMinimalServiceSettings(inferenceId); - if (settings != null) { - TaskType taskType = settings.taskType(); - if (taskType == TaskType.TEXT_EMBEDDING) { - hasDenseVectorModel = true; - } else if (taskType == TaskType.SPARSE_EMBEDDING) { - hasSparseVectorModel = true; - } - } - } catch (Exception e) { - // TODO: validate If we can't get model info, skip this inference ID or throw an error - } - } - - // Emit warning only if we have dense vector model mixed with sparse vector or non-inference fields - if (hasDenseVectorModel && (hasSparseVectorModel || hasNonInferenceFields)) { - HeaderWarning.addWarning( - "Query contains dense vector model (TEXT_EMBEDDING) with bounded scores (0-1) mixed with " - + (hasSparseVectorModel ? "sparse vector model (SPARSE_EMBEDDING) and/or " : "") - + (hasNonInferenceFields ? "non-inference fields " : "") - + "that produce unbounded scores. This may cause score range mismatches and affect result ranking. " - + "Consider using Linear or RRF retrievers." - ); - } - } - /** * Copies all properties from original query to target query except fields. */ @@ -457,7 +319,7 @@ private MultiMatchQueryBuilder createNonInferenceQueryForIndex( // Set only the non-inference fields with their boosts Map filteredFields = new HashMap<>(); for (String fieldName : nonInferenceFields) { - float boost = fieldsBoosts.getOrDefault(fieldName, DEFAULT_BOOST_FIELD); + float boost = fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST); filteredFields.put(fieldName, boost); } query.fields(filteredFields); @@ -467,21 +329,4 @@ private MultiMatchQueryBuilder createNonInferenceQueryForIndex( return query; } - /** - * Represents the inference information for multiple fields across indices. - */ - public record MultiFieldInferenceInfo( - Map> inferenceFieldsPerIndex, - Map> nonInferenceFieldsPerIndex - ) { - - public Set getInferenceFields() { - return inferenceFieldsPerIndex.values().stream().flatMap(fields -> fields.keySet().stream()).collect(Collectors.toSet()); - } - - public boolean hasNonInferenceFields() { - return nonInferenceFieldsPerIndex.isEmpty() == false; - } - - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 22feb6417ca25..89ff82934674e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -43,14 +43,17 @@ public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilde } InferenceIndexInformationForField indexInformation = resolveIndicesForFields(queryBuilder, resolvedIndices); - if (!indexInformation.hasInferenceFields()) { + if (indexInformation.hasInferenceFields() == false) { // No inference fields were identified, so return the original query. return queryBuilder; } else if (indexInformation.hasNonInferenceFields()) { - // Combined case where some fields are semantic_text and others are not + // Combined case where the field name(s) requested by this query contain both + // semantic_text and non-inference fields, so we have to combine queries per index + // containing each field type. return buildCombinedInferenceAndNonInferenceQuery(queryBuilder, indexInformation); } else { - // All specified fields are inference fields (semantic_text) + // The only fields we've identified are inference fields (e.g. semantic_text), + // so rewrite the entire query to work on semantic_text field(s). return buildInferenceQuery(queryBuilder, indexInformation); } } @@ -102,7 +105,7 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q Map fieldsWithWeights = getFieldsWithWeights(queryBuilder); Set fieldNames = fieldsWithWeights.keySet(); Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); - + Map> inferenceFieldsPerIndex = new HashMap<>(); Map> nonInferenceFieldsPerIndex = new HashMap<>(); @@ -199,27 +202,27 @@ public Set getAllInferenceFields() { .flatMap(fields -> fields.keySet().stream()) .collect(Collectors.toSet()); } - + public Set getAllNonInferenceFields() { return nonInferenceFieldsPerIndex.values() .stream() .flatMap(Set::stream) .collect(Collectors.toSet()); } - + public boolean hasInferenceFields() { return !inferenceFieldsPerIndex.isEmpty(); } - + public boolean hasNonInferenceFields() { return !nonInferenceFieldsPerIndex.isEmpty(); } - + // Backward compatibility methods public Collection getInferenceIndices() { return inferenceFieldsPerIndex.keySet(); } - + public List nonInferenceIndices() { return new ArrayList<>(nonInferenceFieldsPerIndex.keySet()); } From 00f14de5f970982393839df51f12c7f04e3e0a1c Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 8 Aug 2025 10:55:39 -0400 Subject: [PATCH 15/46] fix tiebreaker default issue --- .../SemanticMultiMatchQueryRewriteInterceptor.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 19445ac44e330..56422b1bd873b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -201,7 +201,9 @@ private QueryBuilder buildMostFieldsSemanticQuery( for (String fieldName : inferenceFields) { boolQuery.should(createSemanticQuery(fieldName, queryValue, fieldsBoosts, false)); } - boolQuery.minimumShouldMatch("1"); + // Apply minimumShouldMatch - use original query's value or default to "1" + String minimumShouldMatch = originalQuery.minimumShouldMatch(); + boolQuery.minimumShouldMatch(minimumShouldMatch != null ? minimumShouldMatch : "1"); boolQuery.boost(originalQuery.boost()); boolQuery.queryName(originalQuery.queryName()); return boolQuery; @@ -277,7 +279,9 @@ private QueryBuilder buildMostFieldsCombinedQuery( boolQuery.should(createSubQueryForIndices(inferenceInfo.nonInferenceIndices(), nonInferenceQuery)); } - boolQuery.minimumShouldMatch("1"); + // Apply minimumShouldMatch - use original query's value or default to "1" + String minimumShouldMatch = originalQuery.minimumShouldMatch(); + boolQuery.minimumShouldMatch(minimumShouldMatch != null ? minimumShouldMatch : "1"); boolQuery.boost(originalQuery.boost()); boolQuery.queryName(originalQuery.queryName()); return boolQuery; From 6f618e481f84392d2c8fa54c8ced63d0730a9c95 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 8 Aug 2025 12:13:03 -0400 Subject: [PATCH 16/46] small fix --- ...nticMultiMatchQueryRewriteInterceptor.java | 50 +++++++++++++++++++ .../SemanticQueryRewriteInterceptor.java | 8 +-- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 56422b1bd873b..9d64deecb8bc7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.inference.queries; +import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.BoolQueryBuilder; @@ -14,6 +15,8 @@ import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.inference.MinimalServiceSettings; +import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.inference.registry.ModelRegistry; import java.util.HashMap; @@ -78,6 +81,7 @@ protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceI return semanticQuery; } else { // Multiple inference fields - handle based on multi-match query type (validation happens here) + detectAndWarnScoreRangeMismatch(indexInformation); return buildMultiFieldSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); } } @@ -92,6 +96,7 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( String queryValue = getQuery(queryBuilder); validateQueryTypeSupported(originalQuery.type()); + detectAndWarnScoreRangeMismatch(indexInformation); return switch (originalQuery.type()) { case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, indexInformation, queryValue); @@ -333,4 +338,49 @@ private MultiMatchQueryBuilder createNonInferenceQueryForIndex( return query; } + /** + * Detects and warns about score range mismatches when a multi_match query has at least one dense vector model (TEXT_EMBEDDING) + * mixed with sparse vector models (SPARSE_EMBEDDING) or non-inference fields. + * Dense vector models typically produce bounded scores (0-1) while sparse vector models and + * non-inference fields produce unbounded scores, causing score range mismatches. + */ + private void detectAndWarnScoreRangeMismatch(InferenceIndexInformationForField indexInformation) { + ModelRegistry modelRegistry = modelRegistrySupplier.get(); + // Check if we have any dense vector models mixed with sparse vector models or non-inference fields + boolean hasDenseVectorModel = false; + boolean hasSparseVectorModel = false; + boolean hasNonInferenceFields = indexInformation.hasNonInferenceFields(); + + // Collect all inference IDs from all fields using the public API + Set allInferenceIds = indexInformation.getInferenceIdsIndices().keySet(); + + // Check task types for each inference ID + for (String inferenceId : allInferenceIds) { + try { + MinimalServiceSettings settings = modelRegistry.getMinimalServiceSettings(inferenceId); + if (settings != null) { + TaskType taskType = settings.taskType(); + if (taskType == TaskType.TEXT_EMBEDDING) { + hasDenseVectorModel = true; + } else if (taskType == TaskType.SPARSE_EMBEDDING) { + hasSparseVectorModel = true; + } + } + } catch (Exception e) { + // TODO: validate If we can't get model info, skip this inference ID or throw an error + } + } + + // Emit warning only if we have dense vector model mixed with sparse vector or non-inference fields + if (hasDenseVectorModel && (hasSparseVectorModel || hasNonInferenceFields)) { + HeaderWarning.addWarning( + "Query contains dense vector model (TEXT_EMBEDDING) with bounded scores (0-1) mixed with " + + (hasSparseVectorModel ? "sparse vector model (SPARSE_EMBEDDING) and/or " : "") + + (hasNonInferenceFields ? "non-inference fields " : "") + + "that produce unbounded scores. This may cause score range mismatches and affect result ranking. " + + "Consider using Linear or RRF retrievers." + ); + } + } + } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 89ff82934674e..b0f917bc5c920 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -124,12 +124,12 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q } // Store inference fields if any exist - if (!indexInferenceFields.isEmpty()) { + if (indexInferenceFields.isEmpty() == false) { inferenceFieldsPerIndex.put(indexName, indexInferenceFields); } // Store non-inference fields if any exist - if (!indexNonInferenceFields.isEmpty()) { + if (indexNonInferenceFields.isEmpty() == false) { nonInferenceFieldsPerIndex.put(indexName, indexNonInferenceFields); } } @@ -211,11 +211,11 @@ public Set getAllNonInferenceFields() { } public boolean hasInferenceFields() { - return !inferenceFieldsPerIndex.isEmpty(); + return inferenceFieldsPerIndex.isEmpty() == false; } public boolean hasNonInferenceFields() { - return !nonInferenceFieldsPerIndex.isEmpty(); + return nonInferenceFieldsPerIndex.isEmpty() == false; } // Backward compatibility methods From c74080414eef9595b1d592f769fb6a9257938f27 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 8 Aug 2025 12:16:06 -0400 Subject: [PATCH 17/46] small refactor --- ...nticMultiMatchQueryRewriteInterceptor.java | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 9d64deecb8bc7..244ba564a8b20 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -21,6 +21,7 @@ import java.util.HashMap; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.function.Supplier; @@ -161,10 +162,9 @@ private void validateQueryTypeSupported(MultiMatchQueryBuilder.Type queryType) { private SemanticQueryBuilder createSemanticQuery( String fieldName, String queryValue, - Map fieldsBoosts, - boolean lenient + Map fieldsBoosts ) { - SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, lenient); + SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); float fieldBoost = fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST); semanticQuery.boost(fieldBoost); return semanticQuery; @@ -179,15 +179,11 @@ private QueryBuilder buildBestFieldsSemanticQuery( ) { DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); for (String fieldName : inferenceFields) { - disMaxQuery.add(createSemanticQuery(fieldName, queryValue, fieldsBoosts, false)); + disMaxQuery.add(createSemanticQuery(fieldName, queryValue, fieldsBoosts)); } // Apply tie_breaker - use explicit value or fall back to type's default Float tieBreaker = originalQuery.tieBreaker(); - if (tieBreaker != null) { - disMaxQuery.tieBreaker(tieBreaker); - } else { - disMaxQuery.tieBreaker(originalQuery.type().tieBreaker()); - } + disMaxQuery.tieBreaker(Objects.requireNonNullElseGet(tieBreaker, () -> originalQuery.type().tieBreaker())); disMaxQuery.boost(originalQuery.boost()); disMaxQuery.queryName(originalQuery.queryName()); return disMaxQuery; @@ -204,7 +200,7 @@ private QueryBuilder buildMostFieldsSemanticQuery( ) { BoolQueryBuilder boolQuery = new BoolQueryBuilder(); for (String fieldName : inferenceFields) { - boolQuery.should(createSemanticQuery(fieldName, queryValue, fieldsBoosts, false)); + boolQuery.should(createSemanticQuery(fieldName, queryValue, fieldsBoosts)); } // Apply minimumShouldMatch - use original query's value or default to "1" String minimumShouldMatch = originalQuery.minimumShouldMatch(); @@ -245,11 +241,7 @@ private QueryBuilder buildBestFieldsCombinedQuery( // Apply tie_breaker - use explicit value or fall back to type's default Float tieBreaker = originalQuery.tieBreaker(); - if (tieBreaker != null) { - disMaxQuery.tieBreaker(tieBreaker); - } else { - disMaxQuery.tieBreaker(originalQuery.type().tieBreaker()); - } + disMaxQuery.tieBreaker(Objects.requireNonNullElseGet(tieBreaker, () -> originalQuery.type().tieBreaker())); disMaxQuery.boost(originalQuery.boost()); disMaxQuery.queryName(originalQuery.queryName()); return disMaxQuery; From 8b472691dda96dade85478b160f0479c37f0fa72 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 8 Aug 2025 16:30:09 -0400 Subject: [PATCH 18/46] fix multi index broken search --- ...nticMultiMatchQueryRewriteInterceptor.java | 99 +++++++++---------- .../SemanticQueryRewriteInterceptor.java | 26 +++++ 2 files changed, 70 insertions(+), 55 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 244ba564a8b20..e3d3d742e6280 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -19,7 +19,6 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.inference.registry.ModelRegistry; -import java.util.HashMap; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -218,25 +217,31 @@ private QueryBuilder buildBestFieldsCombinedQuery( ) { DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); - // Add semantic queries for each inference field across different indices + // Add semantic queries for each inference field, but only in indices where that field is semantic for (String fieldName : inferenceInfo.getAllInferenceFields()) { - disMaxQuery.add( - createSemanticSubQuery( - inferenceInfo.getInferenceIndices(), - fieldName, - queryValue - ).boost(fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)) - ); + Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); + if (semanticIndices.isEmpty() == false) { + disMaxQuery.add( + createSemanticSubQuery( + semanticIndices, + fieldName, + queryValue + ).boost(fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)) + ); + } } - // Add non-inference query for non-inference fields - if (inferenceInfo.hasNonInferenceFields()) { - MultiMatchQueryBuilder nonInferenceQuery = createNonInferenceQueryForIndex( - originalQuery, - inferenceInfo.getAllNonInferenceFields(), - fieldsBoosts - ); - disMaxQuery.add(createSubQueryForIndices(inferenceInfo.nonInferenceIndices(), nonInferenceQuery)); + // Add separate queries for non-inference fields, but only in indices where they are non-inference + for (String fieldName : inferenceInfo.getAllNonInferenceFields()) { + Set nonInferenceIndices = inferenceInfo.getNonInferenceIndicesForField(fieldName); + if (nonInferenceIndices.isEmpty() == false) { + // Create a single-field multi_match query for this field + MultiMatchQueryBuilder singleFieldQuery = new MultiMatchQueryBuilder(originalQuery.value()); + singleFieldQuery.field(fieldName, fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)); + copyQueryProperties(originalQuery, singleFieldQuery); + + disMaxQuery.add(createSubQueryForIndices(nonInferenceIndices, singleFieldQuery)); + } } // Apply tie_breaker - use explicit value or fall back to type's default @@ -255,25 +260,31 @@ private QueryBuilder buildMostFieldsCombinedQuery( ) { BoolQueryBuilder boolQuery = new BoolQueryBuilder(); - // Add semantic queries for each inference field + // Add semantic queries for each inference field, but only in indices where that field is semantic for (String fieldName : inferenceInfo.getAllInferenceFields()) { - boolQuery.should( - createSemanticSubQuery( - inferenceInfo.getInferenceIndices(), - fieldName, - queryValue - ).boost(fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)) - ); + Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); + if (semanticIndices.isEmpty() == false) { + boolQuery.should( + createSemanticSubQuery( + semanticIndices, + fieldName, + queryValue + ).boost(fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)) + ); + } } - // Add non-inference query for non-inference fields - if (inferenceInfo.hasNonInferenceFields()) { - MultiMatchQueryBuilder nonInferenceQuery = createNonInferenceQueryForIndex( - originalQuery, - inferenceInfo.getAllNonInferenceFields(), - fieldsBoosts - ); - boolQuery.should(createSubQueryForIndices(inferenceInfo.nonInferenceIndices(), nonInferenceQuery)); + // Add separate queries for non-inference fields, but only in indices where they are non-inference + for (String fieldName : inferenceInfo.getAllNonInferenceFields()) { + Set nonInferenceIndices = inferenceInfo.getNonInferenceIndicesForField(fieldName); + if (nonInferenceIndices.isEmpty() == false) { + // Create a single-field multi_match query for this field + MultiMatchQueryBuilder singleFieldQuery = new MultiMatchQueryBuilder(originalQuery.value()); + singleFieldQuery.field(fieldName, fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)); + copyQueryProperties(originalQuery, singleFieldQuery); + + boolQuery.should(createSubQueryForIndices(nonInferenceIndices, singleFieldQuery)); + } } // Apply minimumShouldMatch - use original query's value or default to "1" @@ -307,28 +318,6 @@ private void copyQueryProperties(MultiMatchQueryBuilder original, MultiMatchQuer } } - /** - * Creates a non-inference MultiMatchQuery for a specific index with only the specified fields. - */ - private MultiMatchQueryBuilder createNonInferenceQueryForIndex( - MultiMatchQueryBuilder originalQuery, - Set nonInferenceFields, - Map fieldsBoosts - ) { - MultiMatchQueryBuilder query = new MultiMatchQueryBuilder(originalQuery.value()); - - // Set only the non-inference fields with their boosts - Map filteredFields = new HashMap<>(); - for (String fieldName : nonInferenceFields) { - float boost = fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST); - filteredFields.put(fieldName, boost); - } - query.fields(filteredFields); - - copyQueryProperties(originalQuery, query); - - return query; - } /** * Detects and warns about score range mismatches when a multi_match query has at least one dense vector model (TEXT_EMBEDDING) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index b0f917bc5c920..f8bd024abee96 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -238,5 +238,31 @@ public Map> getInferenceIdsIndices() { } return result; } + + /** + * Returns the set of indices where the given field is a semantic field (has inference metadata). + */ + public Set getInferenceIndicesForField(String fieldName) { + Set indices = new HashSet<>(); + for (Map.Entry> entry : inferenceFieldsPerIndex.entrySet()) { + if (entry.getValue().containsKey(fieldName)) { + indices.add(entry.getKey()); + } + } + return indices; + } + + /** + * Returns the set of indices where the given field is a non-semantic field. + */ + public Set getNonInferenceIndicesForField(String fieldName) { + Set indices = new HashSet<>(); + for (Map.Entry> entry : nonInferenceFieldsPerIndex.entrySet()) { + if (entry.getValue().contains(fieldName)) { + indices.add(entry.getKey()); + } + } + return indices; + } } } From 7064442367c7f38deac05fa0d65e70e111081387 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 8 Aug 2025 17:44:58 -0400 Subject: [PATCH 19/46] refactor recalculating of non-inference fields --- ...nticMultiMatchQueryRewriteInterceptor.java | 66 ++++++++----------- .../SemanticQueryRewriteInterceptor.java | 60 +++++------------ 2 files changed, 45 insertions(+), 81 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index e3d3d742e6280..357be2c4a6d19 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -19,6 +19,7 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.inference.registry.ModelRegistry; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -127,7 +128,6 @@ private QueryBuilder buildMultiFieldSemanticQuery( }; } - /** * Validates that the multi_match query type is supported for semantic_text fields. * Throws IllegalArgumentException for unsupported types. @@ -158,19 +158,14 @@ private void validateQueryTypeSupported(MultiMatchQueryBuilder.Type queryType) { /** * Creates a semantic query with field boost applied. */ - private SemanticQueryBuilder createSemanticQuery( - String fieldName, - String queryValue, - Map fieldsBoosts - ) { + private SemanticQueryBuilder createSemanticQuery(String fieldName, String queryValue, Map fieldsBoosts) { SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); float fieldBoost = fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST); semanticQuery.boost(fieldBoost); return semanticQuery; } - - private QueryBuilder buildBestFieldsSemanticQuery( + private QueryBuilder buildBestFieldsSemanticQuery( MultiMatchQueryBuilder originalQuery, Map fieldsBoosts, Set inferenceFields, @@ -222,26 +217,23 @@ private QueryBuilder buildBestFieldsCombinedQuery( Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); if (semanticIndices.isEmpty() == false) { disMaxQuery.add( - createSemanticSubQuery( - semanticIndices, - fieldName, - queryValue - ).boost(fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)) + createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost( + fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST) + ) ); } } - // Add separate queries for non-inference fields, but only in indices where they are non-inference - for (String fieldName : inferenceInfo.getAllNonInferenceFields()) { - Set nonInferenceIndices = inferenceInfo.getNonInferenceIndicesForField(fieldName); - if (nonInferenceIndices.isEmpty() == false) { - // Create a single-field multi_match query for this field - MultiMatchQueryBuilder singleFieldQuery = new MultiMatchQueryBuilder(originalQuery.value()); - singleFieldQuery.field(fieldName, fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)); - copyQueryProperties(originalQuery, singleFieldQuery); + // Add one multi_match query per index containing all non-inference fields in that index + for (Map.Entry> entry : inferenceInfo.nonInferenceFieldsPerIndex().entrySet()) { + String indexName = entry.getKey(); + Map indexFields = entry.getValue(); - disMaxQuery.add(createSubQueryForIndices(nonInferenceIndices, singleFieldQuery)); - } + MultiMatchQueryBuilder indexQuery = new MultiMatchQueryBuilder(originalQuery.value()); + indexQuery.fields(indexFields); + copyQueryProperties(originalQuery, indexQuery); + + disMaxQuery.add(createSubQueryForIndices(List.of(indexName), indexQuery)); } // Apply tie_breaker - use explicit value or fall back to type's default @@ -265,26 +257,23 @@ private QueryBuilder buildMostFieldsCombinedQuery( Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); if (semanticIndices.isEmpty() == false) { boolQuery.should( - createSemanticSubQuery( - semanticIndices, - fieldName, - queryValue - ).boost(fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)) + createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost( + fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST) + ) ); } } - // Add separate queries for non-inference fields, but only in indices where they are non-inference - for (String fieldName : inferenceInfo.getAllNonInferenceFields()) { - Set nonInferenceIndices = inferenceInfo.getNonInferenceIndicesForField(fieldName); - if (nonInferenceIndices.isEmpty() == false) { - // Create a single-field multi_match query for this field - MultiMatchQueryBuilder singleFieldQuery = new MultiMatchQueryBuilder(originalQuery.value()); - singleFieldQuery.field(fieldName, fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST)); - copyQueryProperties(originalQuery, singleFieldQuery); + // Add one multi_match query per index containing all non-inference fields in that index + for (Map.Entry> entry : inferenceInfo.nonInferenceFieldsPerIndex().entrySet()) { + String indexName = entry.getKey(); + Map indexFields = entry.getValue(); - boolQuery.should(createSubQueryForIndices(nonInferenceIndices, singleFieldQuery)); - } + MultiMatchQueryBuilder indexQuery = new MultiMatchQueryBuilder(originalQuery.value()); + indexQuery.fields(indexFields); + copyQueryProperties(originalQuery, indexQuery); + + boolQuery.should(createSubQueryForIndices(List.of(indexName), indexQuery)); } // Apply minimumShouldMatch - use original query's value or default to "1" @@ -318,7 +307,6 @@ private void copyQueryProperties(MultiMatchQueryBuilder original, MultiMatchQuer } } - /** * Detects and warns about score range mismatches when a multi_match query has at least one dense vector model (TEXT_EMBEDDING) * mixed with sparse vector models (SPARSE_EMBEDDING) or non-inference fields. diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index f8bd024abee96..8c500314c5afd 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -107,19 +107,19 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); Map> inferenceFieldsPerIndex = new HashMap<>(); - Map> nonInferenceFieldsPerIndex = new HashMap<>(); + Map> nonInferenceFieldsPerIndex = new HashMap<>(); for (IndexMetadata indexMetadata : indexMetadataCollection) { String indexName = indexMetadata.getIndex().getName(); Map indexInferenceFields = new HashMap<>(); - Set indexNonInferenceFields = new HashSet<>(); + Map indexNonInferenceFields = new HashMap<>(); // Classify each field as inference or non-inference for (String fieldName : fieldNames) { if (indexMetadata.getInferenceFields().containsKey(fieldName)) { indexInferenceFields.put(fieldName, indexMetadata.getInferenceFields().get(fieldName)); } else { - indexNonInferenceFields.add(fieldName); + indexNonInferenceFields.put(fieldName, fieldsWithWeights.get(fieldName)); } } @@ -174,40 +174,28 @@ protected QueryBuilder createSemanticSubQuery(Collection indices, String public record InferenceIndexInformationForField( // Map: IndexName -> (FieldName -> InferenceFieldMetadata) Map> inferenceFieldsPerIndex, - // Map: IndexName -> Set of non-inference field names - Map> nonInferenceFieldsPerIndex + // Map: IndexName -> (FieldName -> Boost) + Map> nonInferenceFieldsPerIndex ) { // Backward compatibility for single-field queries - public InferenceIndexInformationForField(String fieldName, Map inferenceIndicesMetadata, List nonInferenceIndices) { + public InferenceIndexInformationForField( + String fieldName, + Map inferenceIndicesMetadata, + List nonInferenceIndices + ) { this( // Convert single field metadata to multi-field structure - inferenceIndicesMetadata.entrySet().stream() - .collect(Collectors.toMap( - Map.Entry::getKey, - entry -> Map.of(fieldName, entry.getValue()) - )), - // Convert non-inference indices to multi-field structure - nonInferenceIndices.stream() - .collect(Collectors.toMap( - indexName -> indexName, - indexName -> Set.of(fieldName) - )) + inferenceIndicesMetadata.entrySet() + .stream() + .collect(Collectors.toMap(Map.Entry::getKey, entry -> Map.of(fieldName, entry.getValue()))), + // Convert non-inference indices to multi-field structure with default boost + nonInferenceIndices.stream().collect(Collectors.toMap(indexName -> indexName, indexName -> Map.of(fieldName, 1.0f))) ); } public Set getAllInferenceFields() { - return inferenceFieldsPerIndex.values() - .stream() - .flatMap(fields -> fields.keySet().stream()) - .collect(Collectors.toSet()); - } - - public Set getAllNonInferenceFields() { - return nonInferenceFieldsPerIndex.values() - .stream() - .flatMap(Set::stream) - .collect(Collectors.toSet()); + return inferenceFieldsPerIndex.values().stream().flatMap(fields -> fields.keySet().stream()).collect(Collectors.toSet()); } public boolean hasInferenceFields() { @@ -238,7 +226,7 @@ public Map> getInferenceIdsIndices() { } return result; } - + /** * Returns the set of indices where the given field is a semantic field (has inference metadata). */ @@ -251,18 +239,6 @@ public Set getInferenceIndicesForField(String fieldName) { } return indices; } - - /** - * Returns the set of indices where the given field is a non-semantic field. - */ - public Set getNonInferenceIndicesForField(String fieldName) { - Set indices = new HashSet<>(); - for (Map.Entry> entry : nonInferenceFieldsPerIndex.entrySet()) { - if (entry.getValue().contains(fieldName)) { - indices.add(entry.getKey()); - } - } - return indices; - } + } } From 76af4b09b6d2c9b4c75bc12879971c889568560a Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Mon, 11 Aug 2025 14:19:33 -0400 Subject: [PATCH 20/46] wildcard resolution with field boost --- ...nticMultiMatchQueryRewriteInterceptor.java | 53 +++++++------- .../SemanticQueryRewriteInterceptor.java | 71 ++++++++++++++++--- 2 files changed, 86 insertions(+), 38 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 357be2c4a6d19..62890fec456fc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -74,7 +74,7 @@ protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceI SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); // Apply per-field boost - float fieldBoost = fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST); + float fieldBoost = indexInformation.getInferenceFieldBoost(fieldName); // Apply top-level query boost with per field and name semanticQuery.boost(fieldBoost * originalQuery.boost()); @@ -83,7 +83,7 @@ protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceI } else { // Multiple inference fields - handle based on multi-match query type (validation happens here) detectAndWarnScoreRangeMismatch(indexInformation); - return buildMultiFieldSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); + return buildMultiFieldSemanticQuery(originalQuery, inferenceFields, queryValue, indexInformation); } } @@ -100,11 +100,11 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( detectAndWarnScoreRangeMismatch(indexInformation); return switch (originalQuery.type()) { - case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, indexInformation, queryValue); - case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, fieldsBoosts, indexInformation, queryValue); + case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, indexInformation, queryValue); + case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, indexInformation, queryValue); default -> // Fallback to best_fields behavior - buildBestFieldsCombinedQuery(originalQuery, fieldsBoosts, indexInformation, queryValue); + buildBestFieldsCombinedQuery(originalQuery, indexInformation, queryValue); }; } @@ -115,16 +115,16 @@ public String getQueryName() { private QueryBuilder buildMultiFieldSemanticQuery( MultiMatchQueryBuilder originalQuery, - Map fieldsBoosts, Set inferenceFields, - String queryValue + String queryValue, + InferenceIndexInformationForField indexInformation ) { return switch (originalQuery.type()) { - case BEST_FIELDS -> buildBestFieldsSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); - case MOST_FIELDS -> buildMostFieldsSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); + case BEST_FIELDS -> buildBestFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); + case MOST_FIELDS -> buildMostFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); default -> // Fallback to best_fields behavior for unknown types - buildBestFieldsSemanticQuery(originalQuery, fieldsBoosts, inferenceFields, queryValue); + buildBestFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); }; } @@ -156,24 +156,25 @@ private void validateQueryTypeSupported(MultiMatchQueryBuilder.Type queryType) { } /** - * Creates a semantic query with field boost applied. + * Creates a semantic query with field boost applied, supporting wildcard-resolved boosts. */ - private SemanticQueryBuilder createSemanticQuery(String fieldName, String queryValue, Map fieldsBoosts) { + private SemanticQueryBuilder createSemanticQuery(String fieldName, String queryValue, InferenceIndexInformationForField inferenceInfo) { SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); - float fieldBoost = fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST); + // Use resolved wildcard boost from inference info + float fieldBoost = inferenceInfo.getInferenceFieldBoost(fieldName); semanticQuery.boost(fieldBoost); return semanticQuery; } private QueryBuilder buildBestFieldsSemanticQuery( MultiMatchQueryBuilder originalQuery, - Map fieldsBoosts, + InferenceIndexInformationForField indexInformation, Set inferenceFields, String queryValue ) { DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); for (String fieldName : inferenceFields) { - disMaxQuery.add(createSemanticQuery(fieldName, queryValue, fieldsBoosts)); + disMaxQuery.add(createSemanticQuery(fieldName, queryValue, indexInformation)); } // Apply tie_breaker - use explicit value or fall back to type's default Float tieBreaker = originalQuery.tieBreaker(); @@ -188,13 +189,13 @@ private QueryBuilder buildBestFieldsSemanticQuery( */ private QueryBuilder buildMostFieldsSemanticQuery( MultiMatchQueryBuilder originalQuery, - Map fieldsBoosts, + InferenceIndexInformationForField indexInformation, Set inferenceFields, String queryValue ) { BoolQueryBuilder boolQuery = new BoolQueryBuilder(); for (String fieldName : inferenceFields) { - boolQuery.should(createSemanticQuery(fieldName, queryValue, fieldsBoosts)); + boolQuery.should(createSemanticQuery(fieldName, queryValue, indexInformation)); } // Apply minimumShouldMatch - use original query's value or default to "1" String minimumShouldMatch = originalQuery.minimumShouldMatch(); @@ -206,7 +207,6 @@ private QueryBuilder buildMostFieldsSemanticQuery( private QueryBuilder buildBestFieldsCombinedQuery( MultiMatchQueryBuilder originalQuery, - Map fieldsBoosts, InferenceIndexInformationForField inferenceInfo, String queryValue ) { @@ -216,11 +216,9 @@ private QueryBuilder buildBestFieldsCombinedQuery( for (String fieldName : inferenceInfo.getAllInferenceFields()) { Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); if (semanticIndices.isEmpty() == false) { - disMaxQuery.add( - createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost( - fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST) - ) - ); + // Create semantic query with resolved wildcard boost + float fieldBoost = inferenceInfo.getInferenceFieldBoost(fieldName); + disMaxQuery.add(createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(fieldBoost)); } } @@ -246,7 +244,6 @@ private QueryBuilder buildBestFieldsCombinedQuery( private QueryBuilder buildMostFieldsCombinedQuery( MultiMatchQueryBuilder originalQuery, - Map fieldsBoosts, InferenceIndexInformationForField inferenceInfo, String queryValue ) { @@ -256,11 +253,9 @@ private QueryBuilder buildMostFieldsCombinedQuery( for (String fieldName : inferenceInfo.getAllInferenceFields()) { Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); if (semanticIndices.isEmpty() == false) { - boolQuery.should( - createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost( - fieldsBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST) - ) - ); + // Create semantic query with resolved wildcard boost + float fieldBoost = inferenceInfo.getInferenceFieldBoost(fieldName); + boolQuery.should(createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(fieldBoost)); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 8c500314c5afd..38f7d6883ad7c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -10,7 +10,9 @@ import org.elasticsearch.action.ResolvedIndices; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; +import org.elasticsearch.common.regex.Regex; import org.elasticsearch.index.mapper.IndexFieldMapper; +import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; @@ -103,26 +105,55 @@ protected abstract QueryBuilder buildCombinedInferenceAndNonInferenceQuery( private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder queryBuilder, ResolvedIndices resolvedIndices) { Map fieldsWithWeights = getFieldsWithWeights(queryBuilder); - Set fieldNames = fieldsWithWeights.keySet(); Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); Map> inferenceFieldsPerIndex = new HashMap<>(); Map> nonInferenceFieldsPerIndex = new HashMap<>(); + Map globalResolvedInferenceFieldBoosts = new HashMap<>(); for (IndexMetadata indexMetadata : indexMetadataCollection) { String indexName = indexMetadata.getIndex().getName(); Map indexInferenceFields = new HashMap<>(); Map indexNonInferenceFields = new HashMap<>(); - // Classify each field as inference or non-inference - for (String fieldName : fieldNames) { - if (indexMetadata.getInferenceFields().containsKey(fieldName)) { - indexInferenceFields.put(fieldName, indexMetadata.getInferenceFields().get(fieldName)); + Map indexInferenceMetadata = indexMetadata.getInferenceFields(); + + // Expand wildcards for inference fields only (following RRF pattern) + Map resolvedInferenceFields = new HashMap<>(); + for (Map.Entry entry : fieldsWithWeights.entrySet()) { + String field = entry.getKey(); + Float weight = entry.getValue(); + + if (Regex.isMatchAllPattern(field)) { + // Handle "*" - match all inference fields + indexInferenceMetadata.keySet().forEach(f -> + addToInferenceFieldsMap(resolvedInferenceFields, f, weight)); + } else if (Regex.isSimpleMatchPattern(field)) { + // Handle wildcards like "text*", "*field", etc. + indexInferenceMetadata.keySet() + .stream() + .filter(f -> Regex.simpleMatch(field, f)) + .forEach(f -> addToInferenceFieldsMap(resolvedInferenceFields, f, weight)); } else { - indexNonInferenceFields.put(fieldName, fieldsWithWeights.get(fieldName)); + // No wildcards in field name - exact match + if (indexInferenceMetadata.containsKey(field)) { + addToInferenceFieldsMap(resolvedInferenceFields, field, weight); + } } } + // Copy resolved inference fields to metadata map and aggregate global boosts + for (String fieldName : resolvedInferenceFields.keySet()) { + indexInferenceFields.put(fieldName, indexInferenceMetadata.get(fieldName)); + // Store the resolved boost globally (same field should have same boost across indices) + globalResolvedInferenceFieldBoosts.put(fieldName, resolvedInferenceFields.get(fieldName)); + } + + // Non-inference fields: start with all original patterns, remove only resolved inference field names + // This preserves wildcard patterns that MultiMatchQueryBuilder will expand itself + indexNonInferenceFields = new HashMap<>(fieldsWithWeights); + indexNonInferenceFields.keySet().removeAll(resolvedInferenceFields.keySet()); + // Store inference fields if any exist if (indexInferenceFields.isEmpty() == false) { inferenceFieldsPerIndex.put(indexName, indexInferenceFields); @@ -134,7 +165,14 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q } } - return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex); + return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex, globalResolvedInferenceFieldBoosts); + } + + /** + * Helper method to add inference fields with weight handling like in RRF + */ + private void addToInferenceFieldsMap(Map inferenceFields, String fieldName, Float weight) { + inferenceFields.compute(fieldName, (k, v) -> v == null ? weight : v * weight); } private InferenceIndexInformationForField resolveIndicesForField(String fieldName, ResolvedIndices resolvedIndices) { @@ -175,7 +213,9 @@ public record InferenceIndexInformationForField( // Map: IndexName -> (FieldName -> InferenceFieldMetadata) Map> inferenceFieldsPerIndex, // Map: IndexName -> (FieldName -> Boost) - Map> nonInferenceFieldsPerIndex + Map> nonInferenceFieldsPerIndex, + // Map: FieldName -> ResolvedBoost - stores resolved wildcard boosts for inference fields + Map resolvedInferenceFieldBoosts ) { // Backward compatibility for single-field queries @@ -190,7 +230,9 @@ public InferenceIndexInformationForField( .stream() .collect(Collectors.toMap(Map.Entry::getKey, entry -> Map.of(fieldName, entry.getValue()))), // Convert non-inference indices to multi-field structure with default boost - nonInferenceIndices.stream().collect(Collectors.toMap(indexName -> indexName, indexName -> Map.of(fieldName, 1.0f))) + nonInferenceIndices.stream().collect(Collectors.toMap(indexName -> indexName, indexName -> Map.of(fieldName, 1.0f))), + // Default boost for single-field (no wildcards) + Map.of(fieldName, 1.0f) ); } @@ -240,5 +282,16 @@ public Set getInferenceIndicesForField(String fieldName) { return indices; } + /** + * Returns the resolved boost for an inference field. + * This accounts for wildcard expansion boosts. + * + * @param fieldName the field name + * @return the resolved boost for the field + */ + public float getInferenceFieldBoost(String fieldName) { + return resolvedInferenceFieldBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST); + } + } } From 53a9a6fc4d0787f1b2415bd371a9ab228b141559 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Mon, 11 Aug 2025 16:10:31 -0400 Subject: [PATCH 21/46] refactor and simplify data structure --- ...nticMultiMatchQueryRewriteInterceptor.java | 41 +++---- .../SemanticQueryRewriteInterceptor.java | 108 +++++------------- 2 files changed, 51 insertions(+), 98 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 62890fec456fc..34bbe3153c514 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -9,7 +9,6 @@ import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.features.NodeFeature; -import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.DisMaxQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; @@ -19,6 +18,7 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.inference.registry.ModelRegistry; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -63,7 +63,6 @@ protected String getQuery(QueryBuilder queryBuilder) { @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { MultiMatchQueryBuilder originalQuery = (MultiMatchQueryBuilder) queryBuilder; - Map fieldsBoosts = getFieldsWithWeights(queryBuilder); String queryValue = getQuery(queryBuilder); Set inferenceFields = indexInformation.getAllInferenceFields(); @@ -73,11 +72,8 @@ protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceI String fieldName = inferenceFields.iterator().next(); SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); - // Apply per-field boost - float fieldBoost = indexInformation.getInferenceFieldBoost(fieldName); - // Apply top-level query boost with per field and name - semanticQuery.boost(fieldBoost * originalQuery.boost()); + semanticQuery.boost(indexInformation.getFieldBoost(fieldName) * originalQuery.boost()); semanticQuery.queryName(originalQuery.queryName()); return semanticQuery; } else { @@ -93,7 +89,6 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( InferenceIndexInformationForField indexInformation ) { MultiMatchQueryBuilder originalQuery = (MultiMatchQueryBuilder) queryBuilder; - Map fieldsBoosts = getFieldsWithWeights(queryBuilder); String queryValue = getQuery(queryBuilder); validateQueryTypeSupported(originalQuery.type()); @@ -160,9 +155,7 @@ private void validateQueryTypeSupported(MultiMatchQueryBuilder.Type queryType) { */ private SemanticQueryBuilder createSemanticQuery(String fieldName, String queryValue, InferenceIndexInformationForField inferenceInfo) { SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); - // Use resolved wildcard boost from inference info - float fieldBoost = inferenceInfo.getInferenceFieldBoost(fieldName); - semanticQuery.boost(fieldBoost); + semanticQuery.boost(inferenceInfo.getFieldBoost(fieldName)); return semanticQuery; } @@ -216,16 +209,20 @@ private QueryBuilder buildBestFieldsCombinedQuery( for (String fieldName : inferenceInfo.getAllInferenceFields()) { Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); if (semanticIndices.isEmpty() == false) { - // Create semantic query with resolved wildcard boost - float fieldBoost = inferenceInfo.getInferenceFieldBoost(fieldName); - disMaxQuery.add(createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(fieldBoost)); + disMaxQuery.add(createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName))); } } // Add one multi_match query per index containing all non-inference fields in that index - for (Map.Entry> entry : inferenceInfo.nonInferenceFieldsPerIndex().entrySet()) { + for (Map.Entry> entry : inferenceInfo.nonInferenceFieldsPerIndex().entrySet()) { String indexName = entry.getKey(); - Map indexFields = entry.getValue(); + Set indexFieldNames = entry.getValue(); + + // Build field map with boosts from fieldBoosts Map + Map indexFields = new HashMap<>(); + for (String fieldName : indexFieldNames) { + indexFields.put(fieldName, inferenceInfo.getFieldBoost(fieldName)); + } MultiMatchQueryBuilder indexQuery = new MultiMatchQueryBuilder(originalQuery.value()); indexQuery.fields(indexFields); @@ -253,16 +250,20 @@ private QueryBuilder buildMostFieldsCombinedQuery( for (String fieldName : inferenceInfo.getAllInferenceFields()) { Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); if (semanticIndices.isEmpty() == false) { - // Create semantic query with resolved wildcard boost - float fieldBoost = inferenceInfo.getInferenceFieldBoost(fieldName); - boolQuery.should(createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(fieldBoost)); + boolQuery.should(createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName))); } } // Add one multi_match query per index containing all non-inference fields in that index - for (Map.Entry> entry : inferenceInfo.nonInferenceFieldsPerIndex().entrySet()) { + for (Map.Entry> entry : inferenceInfo.nonInferenceFieldsPerIndex().entrySet()) { String indexName = entry.getKey(); - Map indexFields = entry.getValue(); + Set indexFieldNames = entry.getValue(); + + // Build field map with boosts from global fieldBoosts + Map indexFields = new HashMap<>(); + for (String fieldName : indexFieldNames) { + indexFields.put(fieldName, inferenceInfo.getFieldBoost(fieldName)); + } MultiMatchQueryBuilder indexQuery = new MultiMatchQueryBuilder(originalQuery.value()); indexQuery.fields(indexFields); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 38f7d6883ad7c..40d6389ec3c1c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -108,89 +108,59 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); Map> inferenceFieldsPerIndex = new HashMap<>(); - Map> nonInferenceFieldsPerIndex = new HashMap<>(); - Map globalResolvedInferenceFieldBoosts = new HashMap<>(); + Map> nonInferenceFieldsPerIndex = new HashMap<>(); + Map fieldBoosts = new HashMap<>(); for (IndexMetadata indexMetadata : indexMetadataCollection) { String indexName = indexMetadata.getIndex().getName(); Map indexInferenceFields = new HashMap<>(); - Map indexNonInferenceFields = new HashMap<>(); - Map indexInferenceMetadata = indexMetadata.getInferenceFields(); - // Expand wildcards for inference fields only (following RRF pattern) - Map resolvedInferenceFields = new HashMap<>(); + // Resolve wildcards for inference fields and store boosts for (Map.Entry entry : fieldsWithWeights.entrySet()) { String field = entry.getKey(); - Float weight = entry.getValue(); + Float boost = entry.getValue(); if (Regex.isMatchAllPattern(field)) { - // Handle "*" - match all inference fields - indexInferenceMetadata.keySet().forEach(f -> - addToInferenceFieldsMap(resolvedInferenceFields, f, weight)); + indexInferenceMetadata.keySet().forEach(f -> { + indexInferenceFields.put(f, indexInferenceMetadata.get(f)); + fieldBoosts.put(f, boost); + }); } else if (Regex.isSimpleMatchPattern(field)) { - // Handle wildcards like "text*", "*field", etc. indexInferenceMetadata.keySet() .stream() .filter(f -> Regex.simpleMatch(field, f)) - .forEach(f -> addToInferenceFieldsMap(resolvedInferenceFields, f, weight)); - } else { - // No wildcards in field name - exact match - if (indexInferenceMetadata.containsKey(field)) { - addToInferenceFieldsMap(resolvedInferenceFields, field, weight); - } + .forEach(f -> { + indexInferenceFields.put(f, indexInferenceMetadata.get(f)); + fieldBoosts.put(f, boost); + }); + } else if (indexInferenceMetadata.containsKey(field)) { + indexInferenceFields.put(field, indexInferenceMetadata.get(field)); + fieldBoosts.put(field, boost); } } - // Copy resolved inference fields to metadata map and aggregate global boosts - for (String fieldName : resolvedInferenceFields.keySet()) { - indexInferenceFields.put(fieldName, indexInferenceMetadata.get(fieldName)); - // Store the resolved boost globally (same field should have same boost across indices) - globalResolvedInferenceFieldBoosts.put(fieldName, resolvedInferenceFields.get(fieldName)); + // Non-inference fields: original fields minus resolved inference fields + Set indexNonInferenceFields = new HashSet<>(fieldsWithWeights.keySet()); + indexNonInferenceFields.removeAll(indexInferenceFields.keySet()); + + // Store boosts for non-inference fields in global fieldBoosts map + for (String nonInferenceField : indexNonInferenceFields) { + fieldBoosts.put(nonInferenceField, fieldsWithWeights.get(nonInferenceField)); } - // Non-inference fields: start with all original patterns, remove only resolved inference field names - // This preserves wildcard patterns that MultiMatchQueryBuilder will expand itself - indexNonInferenceFields = new HashMap<>(fieldsWithWeights); - indexNonInferenceFields.keySet().removeAll(resolvedInferenceFields.keySet()); - - // Store inference fields if any exist if (indexInferenceFields.isEmpty() == false) { inferenceFieldsPerIndex.put(indexName, indexInferenceFields); } - // Store non-inference fields if any exist if (indexNonInferenceFields.isEmpty() == false) { nonInferenceFieldsPerIndex.put(indexName, indexNonInferenceFields); } } - return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex, globalResolvedInferenceFieldBoosts); + return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex, fieldBoosts); } - /** - * Helper method to add inference fields with weight handling like in RRF - */ - private void addToInferenceFieldsMap(Map inferenceFields, String fieldName, Float weight) { - inferenceFields.compute(fieldName, (k, v) -> v == null ? weight : v * weight); - } - - private InferenceIndexInformationForField resolveIndicesForField(String fieldName, ResolvedIndices resolvedIndices) { - Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); - Map inferenceIndicesMetadata = new HashMap<>(); - List nonInferenceIndices = new ArrayList<>(); - for (IndexMetadata indexMetadata : indexMetadataCollection) { - String indexName = indexMetadata.getIndex().getName(); - InferenceFieldMetadata inferenceFieldMetadata = indexMetadata.getInferenceFields().get(fieldName); - if (inferenceFieldMetadata != null) { - inferenceIndicesMetadata.put(indexName, inferenceFieldMetadata); - } else { - nonInferenceIndices.add(indexName); - } - } - - return new InferenceIndexInformationForField(fieldName, inferenceIndicesMetadata, nonInferenceIndices); - } protected QueryBuilder createSubQueryForIndices(Collection indices, QueryBuilder queryBuilder) { BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); @@ -212,30 +182,12 @@ protected QueryBuilder createSemanticSubQuery(Collection indices, String public record InferenceIndexInformationForField( // Map: IndexName -> (FieldName -> InferenceFieldMetadata) Map> inferenceFieldsPerIndex, - // Map: IndexName -> (FieldName -> Boost) - Map> nonInferenceFieldsPerIndex, - // Map: FieldName -> ResolvedBoost - stores resolved wildcard boosts for inference fields - Map resolvedInferenceFieldBoosts + // Map: IndexName -> Set - non-inference fields per index (boosts stored in fieldBoosts) + Map> nonInferenceFieldsPerIndex, + // Map: FieldName -> Boost - stores boosts for all fields (both inference and non-inference) + Map fieldBoosts ) { - // Backward compatibility for single-field queries - public InferenceIndexInformationForField( - String fieldName, - Map inferenceIndicesMetadata, - List nonInferenceIndices - ) { - this( - // Convert single field metadata to multi-field structure - inferenceIndicesMetadata.entrySet() - .stream() - .collect(Collectors.toMap(Map.Entry::getKey, entry -> Map.of(fieldName, entry.getValue()))), - // Convert non-inference indices to multi-field structure with default boost - nonInferenceIndices.stream().collect(Collectors.toMap(indexName -> indexName, indexName -> Map.of(fieldName, 1.0f))), - // Default boost for single-field (no wildcards) - Map.of(fieldName, 1.0f) - ); - } - public Set getAllInferenceFields() { return inferenceFieldsPerIndex.values().stream().flatMap(fields -> fields.keySet().stream()).collect(Collectors.toSet()); } @@ -283,14 +235,14 @@ public Set getInferenceIndicesForField(String fieldName) { } /** - * Returns the resolved boost for an inference field. + * Returns the resolved boost for a field (inference or non-inference). * This accounts for wildcard expansion boosts. * * @param fieldName the field name * @return the resolved boost for the field */ - public float getInferenceFieldBoost(String fieldName) { - return resolvedInferenceFieldBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST); + public float getFieldBoost(String fieldName) { + return fieldBoosts.getOrDefault(fieldName, AbstractQueryBuilder.DEFAULT_BOOST); } } From de5aa6cc803a30be257bab6f7c6f5a956be75f1b Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Mon, 11 Aug 2025 16:10:57 -0400 Subject: [PATCH 22/46] linting --- .../SemanticMultiMatchQueryRewriteInterceptor.java | 8 ++++++-- .../queries/SemanticQueryRewriteInterceptor.java | 14 +++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 34bbe3153c514..b1ae85a52e591 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -209,7 +209,9 @@ private QueryBuilder buildBestFieldsCombinedQuery( for (String fieldName : inferenceInfo.getAllInferenceFields()) { Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); if (semanticIndices.isEmpty() == false) { - disMaxQuery.add(createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName))); + disMaxQuery.add( + createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName)) + ); } } @@ -250,7 +252,9 @@ private QueryBuilder buildMostFieldsCombinedQuery( for (String fieldName : inferenceInfo.getAllInferenceFields()) { Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); if (semanticIndices.isEmpty() == false) { - boolQuery.should(createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName))); + boolQuery.should( + createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName)) + ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 40d6389ec3c1c..a01a820966661 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -127,13 +127,10 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q fieldBoosts.put(f, boost); }); } else if (Regex.isSimpleMatchPattern(field)) { - indexInferenceMetadata.keySet() - .stream() - .filter(f -> Regex.simpleMatch(field, f)) - .forEach(f -> { - indexInferenceFields.put(f, indexInferenceMetadata.get(f)); - fieldBoosts.put(f, boost); - }); + indexInferenceMetadata.keySet().stream().filter(f -> Regex.simpleMatch(field, f)).forEach(f -> { + indexInferenceFields.put(f, indexInferenceMetadata.get(f)); + fieldBoosts.put(f, boost); + }); } else if (indexInferenceMetadata.containsKey(field)) { indexInferenceFields.put(field, indexInferenceMetadata.get(field)); fieldBoosts.put(field, boost); @@ -143,7 +140,7 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q // Non-inference fields: original fields minus resolved inference fields Set indexNonInferenceFields = new HashSet<>(fieldsWithWeights.keySet()); indexNonInferenceFields.removeAll(indexInferenceFields.keySet()); - + // Store boosts for non-inference fields in global fieldBoosts map for (String nonInferenceField : indexNonInferenceFields) { fieldBoosts.put(nonInferenceField, fieldsWithWeights.get(nonInferenceField)); @@ -161,7 +158,6 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex, fieldBoosts); } - protected QueryBuilder createSubQueryForIndices(Collection indices, QueryBuilder queryBuilder) { BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); boolQueryBuilder.must(queryBuilder); From 22a958dc0ad28fe98fcfe0e298dce3ec8595dfea Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Mon, 11 Aug 2025 16:50:04 -0400 Subject: [PATCH 23/46] adding yaml tests for multi_match --- .../100_semantic_text_multi_match.yml | 344 ++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml new file mode 100644 index 0000000000000..3df3394feccc1 --- /dev/null +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml @@ -0,0 +1,344 @@ +setup: + - requires: + cluster_features: "search.semantic_multi_match_query_rewrite_interception_supported" + reason: semantic_text multi_match support introduced in 9.2.0 + + - do: + inference.put: + task_type: sparse_embedding + inference_id: sparse-inference-id + body: > + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 10, + "api_key": "abc64", + "similarity": "COSINE" + }, + "task_settings": { + } + } + + - do: + indices.create: + index: test-semantic-index + body: + mappings: + properties: + title: + type: semantic_text + inference_id: sparse-inference-id + content: + type: semantic_text + inference_id: sparse-inference-id + summary: + type: semantic_text + inference_id: dense-inference-id + + - do: + indices.create: + index: test-mixed-index + body: + mappings: + properties: + title: + type: text + semantic_content: + type: semantic_text + inference_id: sparse-inference-id + tags: + type: keyword + + - do: + indices.create: + index: test-text-only-index + body: + mappings: + properties: + title: + type: text + content: + type: text + +--- +"Multi-match query on semantic_text fields": + - do: + index: + index: test-semantic-index + id: doc_1 + body: + title: "Machine learning algorithms" + content: "Deep neural networks for computer vision" + summary: "AI and machine learning fundamentals" + refresh: true + + - do: + index: + index: test-semantic-index + id: doc_2 + body: + title: "Data science techniques" + content: "Statistical analysis and data processing" + summary: "Data analysis and visualization methods" + refresh: true + + - do: + search: + index: test-semantic-index + body: + query: + multi_match: + query: "machine learning neural networks" + fields: ["title", "content"] + type: "best_fields" + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "doc_1" } + +--- +"Multi-match query with field boosts on semantic_text": + - do: + index: + index: test-semantic-index + id: doc_1 + body: + title: "Advanced algorithms" + content: "Machine learning and artificial intelligence" + summary: "Comprehensive AI guide" + refresh: true + + - do: + headers: + Content-Type: application/json + search: + index: test-semantic-index + body: + query: + multi_match: + query: "machine learning" + fields: ["title^2", "content^1.5", "summary"] + type: "best_fields" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + +--- +"Multi-match query on mixed semantic_text and text fields": + - do: + index: + index: test-mixed-index + id: doc_1 + body: + title: "Quantum computing breakthrough" + semantic_content: "Revolutionary quantum algorithms for cryptography" + tags: ["quantum", "computing"] + refresh: true + + - do: + index: + index: test-mixed-index + id: doc_2 + body: + title: "AI research advances" + semantic_content: "Neural network architectures and deep learning" + tags: ["ai", "research"] + refresh: true + + - do: + search: + index: test-mixed-index + body: + query: + multi_match: + query: "quantum algorithms" + fields: ["title", "semantic_content"] + type: "best_fields" + + - match: { hits.total.value: 2 } + +--- +"Multi-match query with wildcard fields on semantic_text": + - do: + index: + index: test-semantic-index + id: doc_1 + body: + title: "Natural language processing" + content: "Text analysis and language models" + summary: "NLP fundamentals and applications" + refresh: true + + - do: + search: + index: test-semantic-index + body: + query: + multi_match: + query: "language processing" + fields: ["*"] + type: "most_fields" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + +--- +"Multi-match most_fields query on semantic_text": + - do: + index: + index: test-semantic-index + id: doc_1 + body: + title: "Computer vision systems" + content: "Image recognition and computer vision algorithms" + summary: "Visual processing and pattern recognition" + refresh: true + + - do: + search: + index: test-semantic-index + body: + query: + multi_match: + query: "computer vision" + fields: ["title", "content", "summary"] + type: "most_fields" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + +--- +"Multi-match query with overall boost on mixed fields": + - do: + index: + index: test-mixed-index + id: doc_1 + body: + title: "Blockchain technology" + semantic_content: "Distributed ledger systems and cryptocurrency" + tags: ["blockchain", "crypto"] + refresh: true + + - do: + search: + index: test-mixed-index + body: + query: + multi_match: + query: "blockchain systems" + fields: ["title^2", "semantic_content"] + type: "best_fields" + boost: 2.0 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + +--- +"Multi-match query across multiple indices with different field types": + - do: + index: + index: test-semantic-index + id: doc_1 + body: + title: "Robotics automation" + content: "Autonomous systems and robotic controls" + refresh: true + + - do: + index: + index: test-text-only-index + id: doc_2 + body: + title: "Robotics research" + content: "Industrial automation and robotics" + refresh: true + + - do: + search: + index: "test-semantic-index,test-text-only-index" + body: + query: + multi_match: + query: "robotics automation" + fields: ["title", "content"] + type: "best_fields" + + - match: { hits.total.value: 2 } + +--- +"Multi-match query with pattern fields and boosts": + - do: + index: + index: test-semantic-index + id: doc_1 + body: + title: "Artificial intelligence" + content: "AI algorithms and machine learning" + summary: "Comprehensive AI overview" + refresh: true + + - do: + search: + index: test-semantic-index + body: + query: + multi_match: + query: "artificial intelligence" + fields: ["title^3", "*content*^1.5", "summ*^2"] + type: "best_fields" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + +--- +"Multi-match query error on unsupported type": + - do: + catch: /multi_match query with type \[cross_fields\] is not supported for semantic_text fields/ + search: + index: test-semantic-index + body: + query: + multi_match: + query: "test query" + fields: ["title", "content"] + type: "cross_fields" + +--- +"Multi-match single field optimization": + - do: + index: + index: test-semantic-index + id: doc_1 + body: + title: "Single field test" + content: "This should be optimized to single semantic query" + refresh: true + + - do: + search: + index: test-semantic-index + body: + query: + multi_match: + query: "single field optimization" + fields: ["title^2"] + type: "best_fields" + boost: 1.5 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } From 6763057b4464fece58039beeeab41fdec6bacc61 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Mon, 11 Aug 2025 16:54:45 -0400 Subject: [PATCH 24/46] adding assertion --- .../queries/SemanticMultiMatchQueryRewriteInterceptor.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index b1ae85a52e591..10c4b0417b3be 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -40,6 +40,7 @@ public SemanticMultiMatchQueryRewriteInterceptor(Supplier modelRe @Override protected String getFieldName(QueryBuilder queryBuilder) { + assert (queryBuilder instanceof MultiMatchQueryBuilder); MultiMatchQueryBuilder multiMatchQuery = (MultiMatchQueryBuilder) queryBuilder; Map fields = multiMatchQuery.fields(); if (fields.size() > 1) { @@ -50,18 +51,21 @@ protected String getFieldName(QueryBuilder queryBuilder) { @Override protected Map getFieldsWithWeights(QueryBuilder queryBuilder) { + assert (queryBuilder instanceof MultiMatchQueryBuilder); MultiMatchQueryBuilder multiMatchQuery = (MultiMatchQueryBuilder) queryBuilder; return multiMatchQuery.fields(); } @Override protected String getQuery(QueryBuilder queryBuilder) { + assert (queryBuilder instanceof MultiMatchQueryBuilder); MultiMatchQueryBuilder multiMatchQuery = (MultiMatchQueryBuilder) queryBuilder; return (String) multiMatchQuery.value(); } @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { + assert (queryBuilder instanceof MultiMatchQueryBuilder); MultiMatchQueryBuilder originalQuery = (MultiMatchQueryBuilder) queryBuilder; String queryValue = getQuery(queryBuilder); Set inferenceFields = indexInformation.getAllInferenceFields(); @@ -88,6 +92,7 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation ) { + assert (queryBuilder instanceof MultiMatchQueryBuilder); MultiMatchQueryBuilder originalQuery = (MultiMatchQueryBuilder) queryBuilder; String queryValue = getQuery(queryBuilder); From fd8fbb69e012b740579552fadd87a65321f20cea Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Mon, 11 Aug 2025 16:59:01 -0400 Subject: [PATCH 25/46] update comments --- .../queries/SemanticMultiMatchQueryRewriteInterceptor.java | 6 ++---- .../inference/queries/SemanticQueryRewriteInterceptor.java | 4 ---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 10c4b0417b3be..be0f54b8fcd91 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -103,7 +103,7 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, indexInformation, queryValue); case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, indexInformation, queryValue); default -> - // Fallback to best_fields behavior + // Fallback to best_fields behavior as default type buildBestFieldsCombinedQuery(originalQuery, indexInformation, queryValue); }; } @@ -123,7 +123,7 @@ private QueryBuilder buildMultiFieldSemanticQuery( case BEST_FIELDS -> buildBestFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); case MOST_FIELDS -> buildMostFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); default -> - // Fallback to best_fields behavior for unknown types + // Fallback to best_fields behavior for default types buildBestFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); }; } @@ -195,7 +195,6 @@ private QueryBuilder buildMostFieldsSemanticQuery( for (String fieldName : inferenceFields) { boolQuery.should(createSemanticQuery(fieldName, queryValue, indexInformation)); } - // Apply minimumShouldMatch - use original query's value or default to "1" String minimumShouldMatch = originalQuery.minimumShouldMatch(); boolQuery.minimumShouldMatch(minimumShouldMatch != null ? minimumShouldMatch : "1"); boolQuery.boost(originalQuery.boost()); @@ -281,7 +280,6 @@ private QueryBuilder buildMostFieldsCombinedQuery( boolQuery.should(createSubQueryForIndices(List.of(indexName), indexQuery)); } - // Apply minimumShouldMatch - use original query's value or default to "1" String minimumShouldMatch = originalQuery.minimumShouldMatch(); boolQuery.minimumShouldMatch(minimumShouldMatch != null ? minimumShouldMatch : "1"); boolQuery.boost(originalQuery.boost()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index a01a820966661..9130464177e02 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -196,7 +196,6 @@ public boolean hasNonInferenceFields() { return nonInferenceFieldsPerIndex.isEmpty() == false; } - // Backward compatibility methods public Collection getInferenceIndices() { return inferenceFieldsPerIndex.keySet(); } @@ -231,9 +230,6 @@ public Set getInferenceIndicesForField(String fieldName) { } /** - * Returns the resolved boost for a field (inference or non-inference). - * This accounts for wildcard expansion boosts. - * * @param fieldName the field name * @return the resolved boost for the field */ From 949a28837299d9b8edc9aa82fea1bb7fe47bf631 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Tue, 12 Aug 2025 14:52:09 -0400 Subject: [PATCH 26/46] validate query types for single inference field --- ...SemanticMultiMatchQueryRewriteInterceptor.java | 5 ++--- .../inference/100_semantic_text_multi_match.yml | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index be0f54b8fcd91..bc24a917f15a1 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -68,11 +68,11 @@ protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceI assert (queryBuilder instanceof MultiMatchQueryBuilder); MultiMatchQueryBuilder originalQuery = (MultiMatchQueryBuilder) queryBuilder; String queryValue = getQuery(queryBuilder); + + validateQueryTypeSupported(originalQuery.type()); Set inferenceFields = indexInformation.getAllInferenceFields(); if (inferenceFields.size() == 1) { - // Single inference field - all multi_match types work the same (like original Elasticsearch) - // No validation needed since single field queries don't require type-specific combination logic String fieldName = inferenceFields.iterator().next(); SemanticQueryBuilder semanticQuery = new SemanticQueryBuilder(fieldName, queryValue, false); @@ -81,7 +81,6 @@ protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceI semanticQuery.queryName(originalQuery.queryName()); return semanticQuery; } else { - // Multiple inference fields - handle based on multi-match query type (validation happens here) detectAndWarnScoreRangeMismatch(indexInformation); return buildMultiFieldSemanticQuery(originalQuery, inferenceFields, queryValue, indexInformation); } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml index 3df3394feccc1..6354b3da480b1 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml @@ -306,7 +306,7 @@ setup: - match: { hits.hits.0._id: "doc_1" } --- -"Multi-match query error on unsupported type": +"Multi-match query error on unsupported type with multiple fields": - do: catch: /multi_match query with type \[cross_fields\] is not supported for semantic_text fields/ search: @@ -318,6 +318,19 @@ setup: fields: ["title", "content"] type: "cross_fields" +--- +"Multi-match query error on unsupported type with single field": + - do: + catch: /multi_match query with type \[phrase\] is not supported for semantic_text fields/ + search: + index: test-semantic-index + body: + query: + multi_match: + query: "test query" + fields: ["title"] + type: "phrase" + --- "Multi-match single field optimization": - do: From f1de7b41058250be5a50c5a7376f746a9e04c3af Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Tue, 12 Aug 2025 14:53:51 -0400 Subject: [PATCH 27/46] remove score mismatch issue --- ...nticMultiMatchQueryRewriteInterceptor.java | 48 ------------------- 1 file changed, 48 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index bc24a917f15a1..8042bb8660987 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -81,7 +81,6 @@ protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceI semanticQuery.queryName(originalQuery.queryName()); return semanticQuery; } else { - detectAndWarnScoreRangeMismatch(indexInformation); return buildMultiFieldSemanticQuery(originalQuery, inferenceFields, queryValue, indexInformation); } } @@ -96,7 +95,6 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( String queryValue = getQuery(queryBuilder); validateQueryTypeSupported(originalQuery.type()); - detectAndWarnScoreRangeMismatch(indexInformation); return switch (originalQuery.type()) { case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, indexInformation, queryValue); @@ -308,50 +306,4 @@ private void copyQueryProperties(MultiMatchQueryBuilder original, MultiMatchQuer target.fuzziness(original.fuzziness()); } } - - /** - * Detects and warns about score range mismatches when a multi_match query has at least one dense vector model (TEXT_EMBEDDING) - * mixed with sparse vector models (SPARSE_EMBEDDING) or non-inference fields. - * Dense vector models typically produce bounded scores (0-1) while sparse vector models and - * non-inference fields produce unbounded scores, causing score range mismatches. - */ - private void detectAndWarnScoreRangeMismatch(InferenceIndexInformationForField indexInformation) { - ModelRegistry modelRegistry = modelRegistrySupplier.get(); - // Check if we have any dense vector models mixed with sparse vector models or non-inference fields - boolean hasDenseVectorModel = false; - boolean hasSparseVectorModel = false; - boolean hasNonInferenceFields = indexInformation.hasNonInferenceFields(); - - // Collect all inference IDs from all fields using the public API - Set allInferenceIds = indexInformation.getInferenceIdsIndices().keySet(); - - // Check task types for each inference ID - for (String inferenceId : allInferenceIds) { - try { - MinimalServiceSettings settings = modelRegistry.getMinimalServiceSettings(inferenceId); - if (settings != null) { - TaskType taskType = settings.taskType(); - if (taskType == TaskType.TEXT_EMBEDDING) { - hasDenseVectorModel = true; - } else if (taskType == TaskType.SPARSE_EMBEDDING) { - hasSparseVectorModel = true; - } - } - } catch (Exception e) { - // TODO: validate If we can't get model info, skip this inference ID or throw an error - } - } - - // Emit warning only if we have dense vector model mixed with sparse vector or non-inference fields - if (hasDenseVectorModel && (hasSparseVectorModel || hasNonInferenceFields)) { - HeaderWarning.addWarning( - "Query contains dense vector model (TEXT_EMBEDDING) with bounded scores (0-1) mixed with " - + (hasSparseVectorModel ? "sparse vector model (SPARSE_EMBEDDING) and/or " : "") - + (hasNonInferenceFields ? "non-inference fields " : "") - + "that produce unbounded scores. This may cause score range mismatches and affect result ranking. " - + "Consider using Linear or RRF retrievers." - ); - } - } - } From cd213bb74396ea7d13f18ae1e16996bd20db7a69 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Tue, 12 Aug 2025 15:20:00 -0400 Subject: [PATCH 28/46] throw exception is type is unrocnizable --- .../SemanticMultiMatchQueryRewriteInterceptor.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 8042bb8660987..b205a2d59eada 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -99,9 +99,9 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( return switch (originalQuery.type()) { case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, indexInformation, queryValue); case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, indexInformation, queryValue); - default -> - // Fallback to best_fields behavior as default type - buildBestFieldsCombinedQuery(originalQuery, indexInformation, queryValue); + default -> throw new IllegalArgumentException( + "Unsupported query type [" + originalQuery.type() + "] for semantic_text fields" + ); }; } @@ -119,9 +119,9 @@ private QueryBuilder buildMultiFieldSemanticQuery( return switch (originalQuery.type()) { case BEST_FIELDS -> buildBestFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); case MOST_FIELDS -> buildMostFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); - default -> - // Fallback to best_fields behavior for default types - buildBestFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); + default -> throw new IllegalArgumentException( + "Unsupported query type [" + originalQuery.type() + "] for semantic_text fields" + ); }; } From eb30e4cc62c1fcfe11c209e68acbedd1057999cb Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Tue, 12 Aug 2025 16:58:23 -0400 Subject: [PATCH 29/46] default field implementation --- .../SemanticQueryRewriteInterceptor.java | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 9130464177e02..d1aca752cfeec 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -11,14 +11,18 @@ import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.mapper.IndexFieldMapper; import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.TermsQueryBuilder; +import org.elasticsearch.index.search.QueryParserHelper; import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor; +import static org.elasticsearch.index.IndexSettings.DEFAULT_FIELD_SETTING; + import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -116,8 +120,16 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q Map indexInferenceFields = new HashMap<>(); Map indexInferenceMetadata = indexMetadata.getInferenceFields(); + // Handle default fields per index when no fields are specified - following RRF pattern + Map fieldsToProcess = fieldsWithWeights; + if (fieldsToProcess.isEmpty()) { + Settings settings = indexMetadata.getSettings(); + List defaultFields = settings.getAsList(DEFAULT_FIELD_SETTING.getKey(), DEFAULT_FIELD_SETTING.getDefault(settings)); + fieldsToProcess = QueryParserHelper.parseFieldsAndWeights(defaultFields); + } + // Resolve wildcards for inference fields and store boosts - for (Map.Entry entry : fieldsWithWeights.entrySet()) { + for (Map.Entry entry : fieldsToProcess.entrySet()) { String field = entry.getKey(); Float boost = entry.getValue(); @@ -138,12 +150,12 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q } // Non-inference fields: original fields minus resolved inference fields - Set indexNonInferenceFields = new HashSet<>(fieldsWithWeights.keySet()); + Set indexNonInferenceFields = new HashSet<>(fieldsToProcess.keySet()); indexNonInferenceFields.removeAll(indexInferenceFields.keySet()); // Store boosts for non-inference fields in global fieldBoosts map for (String nonInferenceField : indexNonInferenceFields) { - fieldBoosts.put(nonInferenceField, fieldsWithWeights.get(nonInferenceField)); + fieldBoosts.put(nonInferenceField, fieldsToProcess.get(nonInferenceField)); } if (indexInferenceFields.isEmpty() == false) { From 851cdd23ecbe3c674650960b83638297383c6638 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 13 Aug 2025 10:33:42 -0400 Subject: [PATCH 30/46] adding transport layer --- server/src/main/java/org/elasticsearch/TransportVersions.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index f1f892f11ecf1..f47277a4fcf70 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -214,6 +214,7 @@ static TransportVersion def(int id) { public static final TransportVersion ESQL_FIXED_INDEX_LIKE_8_19 = def(8_841_0_64); public static final TransportVersion INITIAL_ELASTICSEARCH_8_19_1 = def(8_841_0_65); public static final TransportVersion INITIAL_ELASTICSEARCH_8_19_2 = def(8_841_0_66); + public static final TransportVersion MULTI_MATCH_SEMANTIC_TEXT_SUPPORT_8_19 = def(8_841_0_67); public static final TransportVersion V_9_0_0 = def(9_000_0_09); public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10); public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11); @@ -360,6 +361,7 @@ static TransportVersion def(int id) { public static final TransportVersion INDEX_TEMPLATE_TRACKING_INFO = def(9_136_0_00); public static final TransportVersion EXTENDED_SNAPSHOT_STATS_IN_NODE_INFO = def(9_137_0_00); public static final TransportVersion SIMULATE_INGEST_MAPPING_MERGE_TYPE = def(9_138_0_00); + public static final TransportVersion MULTI_MATCH_SEMANTIC_TEXT_SUPPORT = def(9_139_0_00); /* * STOP! READ THIS FIRST! No, really, From 2e8fea2caa6cb749bc882a003b34f6f218e9fa42 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 13 Aug 2025 11:50:34 -0400 Subject: [PATCH 31/46] logic not to resolve by default --- .../index/query/MultiMatchQueryBuilder.java | 26 +++++++++++++++++-- ...nticMultiMatchQueryRewriteInterceptor.java | 1 + .../SemanticQueryRewriteInterceptor.java | 24 ++++++++++++++--- .../rank/MultiFieldsInnerRetrieverUtils.java | 2 +- 4 files changed, 46 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index cfd2fdcda853c..3ab2b9727c407 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -52,6 +52,7 @@ public final class MultiMatchQueryBuilder extends AbstractQueryBuilder fieldsWithWeights = getFieldsWithWeights(queryBuilder); Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); @@ -133,12 +142,12 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q String field = entry.getKey(); Float boost = entry.getValue(); - if (Regex.isMatchAllPattern(field)) { + if (resolveInferenceFieldWildcards && Regex.isMatchAllPattern(field)) { indexInferenceMetadata.keySet().forEach(f -> { indexInferenceFields.put(f, indexInferenceMetadata.get(f)); fieldBoosts.put(f, boost); }); - } else if (Regex.isSimpleMatchPattern(field)) { + } else if (resolveInferenceFieldWildcards && Regex.isSimpleMatchPattern(field)) { indexInferenceMetadata.keySet().stream().filter(f -> Regex.simpleMatch(field, f)).forEach(f -> { indexInferenceFields.put(f, indexInferenceMetadata.get(f)); fieldBoosts.put(f, boost); @@ -170,6 +179,13 @@ private InferenceIndexInformationForField resolveIndicesForFields(QueryBuilder q return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex, fieldBoosts); } + private boolean extractResolveInferenceFieldWildcards(QueryBuilder queryBuilder) { + if (queryBuilder instanceof org.elasticsearch.index.query.MultiMatchQueryBuilder multiMatchQuery) { + return multiMatchQuery.resolveInferenceFieldWildcards(); + } + return false; + } + protected QueryBuilder createSubQueryForIndices(Collection indices, QueryBuilder queryBuilder) { BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); boolQueryBuilder.must(queryBuilder); diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/MultiFieldsInnerRetrieverUtils.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/MultiFieldsInnerRetrieverUtils.java index 8aa5dbf366a7a..61f4d13a3f6bd 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/MultiFieldsInnerRetrieverUtils.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/MultiFieldsInnerRetrieverUtils.java @@ -207,7 +207,7 @@ private static List generateInnerRetrieversForIndex( if (nonInferenceFields.isEmpty() == false) { MultiMatchQueryBuilder nonInferenceFieldQueryBuilder = new MultiMatchQueryBuilder(query).type( MultiMatchQueryBuilder.Type.MOST_FIELDS - ).fields(nonInferenceFields); + ).fields(nonInferenceFields).resolveInferenceFieldWildcards(false); innerRetrievers.add(new StandardRetrieverBuilder(nonInferenceFieldQueryBuilder)); } if (inferenceFields.isEmpty() == false) { From b17f8d2bec725cdfbde44046ef6953dc709aef6e Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 13 Aug 2025 12:04:29 -0400 Subject: [PATCH 32/46] refactor extracting resolvewildcard field --- ...anticKnnVectorQueryRewriteInterceptor.java | 5 ++++ .../SemanticMatchQueryRewriteInterceptor.java | 5 ++++ ...nticMultiMatchQueryRewriteInterceptor.java | 7 +++++ .../SemanticQueryRewriteInterceptor.java | 27 ++++++++++--------- ...icSparseVectorQueryRewriteInterceptor.java | 5 ++++ 5 files changed, 37 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index b1f5c240371f8..671e4dfb33d3e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -47,6 +47,11 @@ protected String getQuery(QueryBuilder queryBuilder) { return queryVectorBuilder != null ? queryVectorBuilder.getModelText() : null; } + @Override + protected boolean isResolveInferenceFieldWildcardsRequired(QueryBuilder queryBuilder) { + return false; + } + @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { assert (queryBuilder instanceof KnnVectorQueryBuilder); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index bc977a5ec8e14..30073841ad00e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -34,6 +34,11 @@ protected String getQuery(QueryBuilder queryBuilder) { return (String) matchQueryBuilder.value(); } + @Override + protected boolean isResolveInferenceFieldWildcardsRequired(QueryBuilder queryBuilder) { + return false; + } + @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(getFieldName(queryBuilder), getQuery(queryBuilder), false); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index f3973b8ba4a4c..ca6ee2a564161 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -110,6 +110,13 @@ public String getQueryName() { return MultiMatchQueryBuilder.NAME; } + @Override + public boolean isResolveInferenceFieldWildcardsRequired(QueryBuilder queryBuilder) { + assert (queryBuilder instanceof MultiMatchQueryBuilder); + MultiMatchQueryBuilder multiMatchQuery = (MultiMatchQueryBuilder) queryBuilder; + return multiMatchQuery.resolveInferenceFieldWildcards(); + } + private QueryBuilder buildMultiFieldSemanticQuery( MultiMatchQueryBuilder originalQuery, Set inferenceFields, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 37c55803fdd69..730b4ea275d0f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -48,10 +48,11 @@ public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilde return queryBuilder; } - boolean resolveInferenceFieldWildcards = extractResolveInferenceFieldWildcards(queryBuilder); + boolean resolveInferenceFieldWildcards = + isResolveInferenceFieldWildcardsRequired(queryBuilder); InferenceIndexInformationForField indexInformation = resolveIndicesForFields( - queryBuilder, - resolvedIndices, + queryBuilder, + resolvedIndices, resolveInferenceFieldWildcards ); if (indexInformation.hasInferenceFields() == false) { @@ -91,6 +92,15 @@ protected Map getFieldsWithWeights(QueryBuilder queryBuilder) { */ protected abstract String getQuery(QueryBuilder queryBuilder); + /** + * Determines if inference field wildcards should be resolved. + * This is typically used to expand wildcard queries to all inference fields. + * + * @param queryBuilder {@link QueryBuilder} + * @return true if inference field wildcards should be resolved, false otherwise. + */ + protected abstract boolean isResolveInferenceFieldWildcardsRequired(QueryBuilder queryBuilder); + /** * Builds the inference query * @@ -113,8 +123,8 @@ protected abstract QueryBuilder buildCombinedInferenceAndNonInferenceQuery( ); private InferenceIndexInformationForField resolveIndicesForFields( - QueryBuilder queryBuilder, - ResolvedIndices resolvedIndices, + QueryBuilder queryBuilder, + ResolvedIndices resolvedIndices, boolean resolveInferenceFieldWildcards ) { Map fieldsWithWeights = getFieldsWithWeights(queryBuilder); @@ -179,13 +189,6 @@ private InferenceIndexInformationForField resolveIndicesForFields( return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex, fieldBoosts); } - private boolean extractResolveInferenceFieldWildcards(QueryBuilder queryBuilder) { - if (queryBuilder instanceof org.elasticsearch.index.query.MultiMatchQueryBuilder multiMatchQuery) { - return multiMatchQuery.resolveInferenceFieldWildcards(); - } - return false; - } - protected QueryBuilder createSubQueryForIndices(Collection indices, QueryBuilder queryBuilder) { BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); boolQueryBuilder.must(queryBuilder); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java index c85a21f10301d..9b39b38f977ce 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java @@ -40,6 +40,11 @@ protected String getQuery(QueryBuilder queryBuilder) { return sparseVectorQueryBuilder.getQuery(); } + @Override + protected boolean isResolveInferenceFieldWildcardsRequired(QueryBuilder queryBuilder) { + return false; + } + @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { Map> inferenceIdsIndices = indexInformation.getInferenceIdsIndices(); From 046ff0f4ec3dab6c657c6d702ba16873b75040d2 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 14 Aug 2025 01:29:32 -0400 Subject: [PATCH 33/46] multiple boosting per field --- .../queries/SemanticQueryRewriteInterceptor.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 730b4ea275d0f..b0549b0746082 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -122,6 +122,10 @@ protected abstract QueryBuilder buildCombinedInferenceAndNonInferenceQuery( InferenceIndexInformationForField indexInformation ); + private static void addToFieldBoostsMap(Map fieldBoosts, String field, Float boost) { + fieldBoosts.compute(field, (k, v) -> v == null ? boost : v * boost); + } + private InferenceIndexInformationForField resolveIndicesForFields( QueryBuilder queryBuilder, ResolvedIndices resolvedIndices, @@ -147,7 +151,7 @@ private InferenceIndexInformationForField resolveIndicesForFields( fieldsToProcess = QueryParserHelper.parseFieldsAndWeights(defaultFields); } - // Resolve wildcards for inference fields and store boosts + // Resolve wildcards for inference fields and multiply boosts when field matches multiple patterns for (Map.Entry entry : fieldsToProcess.entrySet()) { String field = entry.getKey(); Float boost = entry.getValue(); @@ -155,16 +159,16 @@ private InferenceIndexInformationForField resolveIndicesForFields( if (resolveInferenceFieldWildcards && Regex.isMatchAllPattern(field)) { indexInferenceMetadata.keySet().forEach(f -> { indexInferenceFields.put(f, indexInferenceMetadata.get(f)); - fieldBoosts.put(f, boost); + addToFieldBoostsMap(fieldBoosts, f, boost); }); } else if (resolveInferenceFieldWildcards && Regex.isSimpleMatchPattern(field)) { indexInferenceMetadata.keySet().stream().filter(f -> Regex.simpleMatch(field, f)).forEach(f -> { indexInferenceFields.put(f, indexInferenceMetadata.get(f)); - fieldBoosts.put(f, boost); + addToFieldBoostsMap(fieldBoosts, f, boost); }); } else if (indexInferenceMetadata.containsKey(field)) { indexInferenceFields.put(field, indexInferenceMetadata.get(field)); - fieldBoosts.put(field, boost); + addToFieldBoostsMap(fieldBoosts, field, boost); } } @@ -174,7 +178,7 @@ private InferenceIndexInformationForField resolveIndicesForFields( // Store boosts for non-inference fields in global fieldBoosts map for (String nonInferenceField : indexNonInferenceFields) { - fieldBoosts.put(nonInferenceField, fieldsToProcess.get(nonInferenceField)); + addToFieldBoostsMap(fieldBoosts, nonInferenceField, fieldsToProcess.get(nonInferenceField)); } if (indexInferenceFields.isEmpty() == false) { From 409e320cd37c3711131174e185b867b261124d1a Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 14 Aug 2025 01:31:07 -0400 Subject: [PATCH 34/46] linting --- .../SemanticMultiMatchQueryRewriteInterceptor.java | 11 ++--------- .../queries/SemanticQueryRewriteInterceptor.java | 7 +++---- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index ca6ee2a564161..d9e55ae1b8593 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -7,15 +7,12 @@ package org.elasticsearch.xpack.inference.queries; -import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.DisMaxQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.inference.MinimalServiceSettings; -import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.inference.registry.ModelRegistry; import java.util.HashMap; @@ -99,9 +96,7 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( return switch (originalQuery.type()) { case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, indexInformation, queryValue); case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, indexInformation, queryValue); - default -> throw new IllegalArgumentException( - "Unsupported query type [" + originalQuery.type() + "] for semantic_text fields" - ); + default -> throw new IllegalArgumentException("Unsupported query type [" + originalQuery.type() + "] for semantic_text fields"); }; } @@ -126,9 +121,7 @@ private QueryBuilder buildMultiFieldSemanticQuery( return switch (originalQuery.type()) { case BEST_FIELDS -> buildBestFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); case MOST_FIELDS -> buildMostFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); - default -> throw new IllegalArgumentException( - "Unsupported query type [" + originalQuery.type() + "] for semantic_text fields" - ); + default -> throw new IllegalArgumentException("Unsupported query type [" + originalQuery.type() + "] for semantic_text fields"); }; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index b0549b0746082..a6ecaa7de3093 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -21,8 +21,6 @@ import org.elasticsearch.index.search.QueryParserHelper; import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor; -import static org.elasticsearch.index.IndexSettings.DEFAULT_FIELD_SETTING; - import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -32,6 +30,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static org.elasticsearch.index.IndexSettings.DEFAULT_FIELD_SETTING; + /** * Intercepts and adapts a query to be rewritten to work seamlessly on a semantic_text field. */ @@ -48,8 +48,7 @@ public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilde return queryBuilder; } - boolean resolveInferenceFieldWildcards = - isResolveInferenceFieldWildcardsRequired(queryBuilder); + boolean resolveInferenceFieldWildcards = isResolveInferenceFieldWildcardsRequired(queryBuilder); InferenceIndexInformationForField indexInformation = resolveIndicesForFields( queryBuilder, resolvedIndices, From e964f2ea67219331fb950c0e606a0234e846e430 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 14 Aug 2025 09:14:32 -0400 Subject: [PATCH 35/46] remove unnecessary headers --- .../test/inference/100_semantic_text_multi_match.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml index 6354b3da480b1..af1ac2f7ce184 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/100_semantic_text_multi_match.yml @@ -124,8 +124,6 @@ setup: refresh: true - do: - headers: - Content-Type: application/json search: index: test-semantic-index body: From 80560be82ed1a5e6172aadca6695abe819d56ae3 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 14 Aug 2025 09:30:23 -0400 Subject: [PATCH 36/46] remove modelResitry --- .../org/elasticsearch/xpack/inference/InferencePlugin.java | 2 +- .../queries/SemanticMultiMatchQueryRewriteInterceptor.java | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index a132cb6d4cbb4..63f061357b944 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -582,7 +582,7 @@ public List getQueryRewriteInterceptors() { return List.of( new SemanticKnnVectorQueryRewriteInterceptor(), new SemanticMatchQueryRewriteInterceptor(), - new SemanticMultiMatchQueryRewriteInterceptor(getModelRegistry()), + new SemanticMultiMatchQueryRewriteInterceptor(), new SemanticSparseVectorQueryRewriteInterceptor() ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index d9e55ae1b8593..5595ed2bdc7ef 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -28,12 +28,7 @@ public class SemanticMultiMatchQueryRewriteInterceptor extends SemanticQueryRewr "search.semantic_multi_match_query_rewrite_interception_supported" ); - private final Supplier modelRegistrySupplier; - - public SemanticMultiMatchQueryRewriteInterceptor(Supplier modelRegistrySupplier) { - super(); - this.modelRegistrySupplier = modelRegistrySupplier; - } + public SemanticMultiMatchQueryRewriteInterceptor() {} @Override protected String getFieldName(QueryBuilder queryBuilder) { From 98ec933b550773db64ca033d1ae3a5fc60bb5286 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 14 Aug 2025 10:20:25 -0400 Subject: [PATCH 37/46] clean up --- .../queries/SemanticMultiMatchQueryRewriteInterceptor.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 5595ed2bdc7ef..9809a87986108 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -13,14 +13,12 @@ import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.xpack.inference.registry.ModelRegistry; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.function.Supplier; public class SemanticMultiMatchQueryRewriteInterceptor extends SemanticQueryRewriteInterceptor { From 2595efbe642ae380a976094263ea8b84ec659a1a Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 14 Aug 2025 11:50:18 -0400 Subject: [PATCH 38/46] fix yaml test failure --- .../index/query/MultiMatchQueryBuilder.java | 2 +- .../test/inference/100_semantic_text_multi_match.yml | 12 +----------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index 3ab2b9727c407..f7c721af82887 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -52,7 +52,7 @@ public final class MultiMatchQueryBuilder extends AbstractQueryBuilder Date: Thu, 14 Aug 2025 13:31:18 -0400 Subject: [PATCH 39/46] fix retrievers unit tests --- .../xpack/rank/linear/LinearRetrieverBuilderTests.java | 1 + .../elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java | 1 + 2 files changed, 2 insertions(+) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java index c211440d10bae..8c2a18812e001 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -242,6 +242,7 @@ private static void assertMultiFieldsParamsRewrite( new StandardRetrieverBuilder( new MultiMatchQueryBuilder(expectedQuery).type(MultiMatchQueryBuilder.Type.MOST_FIELDS) .fields(expectedNonInferenceFields) + .resolveInferenceFieldWildcards(false) ), 1.0f, expectedNormalizer diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 7885ac9df2aa8..8e8a152ea8f3c 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -322,6 +322,7 @@ private static void assertMultiFieldsParamsRewrite( new StandardRetrieverBuilder( new MultiMatchQueryBuilder(expectedQuery).type(MultiMatchQueryBuilder.Type.MOST_FIELDS) .fields(expectedNonInferenceFields) + .resolveInferenceFieldWildcards(false) ) ), Set.of(expectedInferenceFields.entrySet().stream().map(e -> { From 7c7fbf831a9f44908bc82a5be0857d0faa9cf286 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 14 Aug 2025 18:37:40 -0400 Subject: [PATCH 40/46] rename resolve function --- .../queries/SemanticKnnVectorQueryRewriteInterceptor.java | 2 +- .../queries/SemanticMatchQueryRewriteInterceptor.java | 2 +- .../queries/SemanticMultiMatchQueryRewriteInterceptor.java | 2 +- .../inference/queries/SemanticQueryRewriteInterceptor.java | 4 ++-- .../queries/SemanticSparseVectorQueryRewriteInterceptor.java | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index 671e4dfb33d3e..12f1d9fcc1a7e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -48,7 +48,7 @@ protected String getQuery(QueryBuilder queryBuilder) { } @Override - protected boolean isResolveInferenceFieldWildcardsRequired(QueryBuilder queryBuilder) { + protected boolean shouldResolveInferenceFieldWildcards(QueryBuilder queryBuilder) { return false; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index 30073841ad00e..0534da8f98eff 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -35,7 +35,7 @@ protected String getQuery(QueryBuilder queryBuilder) { } @Override - protected boolean isResolveInferenceFieldWildcardsRequired(QueryBuilder queryBuilder) { + protected boolean shouldResolveInferenceFieldWildcards(QueryBuilder queryBuilder) { return false; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 9809a87986108..af2109a095492 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -99,7 +99,7 @@ public String getQueryName() { } @Override - public boolean isResolveInferenceFieldWildcardsRequired(QueryBuilder queryBuilder) { + public boolean shouldResolveInferenceFieldWildcards(QueryBuilder queryBuilder) { assert (queryBuilder instanceof MultiMatchQueryBuilder); MultiMatchQueryBuilder multiMatchQuery = (MultiMatchQueryBuilder) queryBuilder; return multiMatchQuery.resolveInferenceFieldWildcards(); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index a6ecaa7de3093..b4bfb7e8cdc3a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -48,7 +48,7 @@ public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilde return queryBuilder; } - boolean resolveInferenceFieldWildcards = isResolveInferenceFieldWildcardsRequired(queryBuilder); + boolean resolveInferenceFieldWildcards = shouldResolveInferenceFieldWildcards(queryBuilder); InferenceIndexInformationForField indexInformation = resolveIndicesForFields( queryBuilder, resolvedIndices, @@ -98,7 +98,7 @@ protected Map getFieldsWithWeights(QueryBuilder queryBuilder) { * @param queryBuilder {@link QueryBuilder} * @return true if inference field wildcards should be resolved, false otherwise. */ - protected abstract boolean isResolveInferenceFieldWildcardsRequired(QueryBuilder queryBuilder); + protected abstract boolean shouldResolveInferenceFieldWildcards(QueryBuilder queryBuilder); /** * Builds the inference query diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java index 9b39b38f977ce..e03d5590fdd40 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java @@ -41,7 +41,7 @@ protected String getQuery(QueryBuilder queryBuilder) { } @Override - protected boolean isResolveInferenceFieldWildcardsRequired(QueryBuilder queryBuilder) { + protected boolean shouldResolveInferenceFieldWildcards(QueryBuilder queryBuilder) { return false; } From dfd27d2956a239e6b8d701e82d912e1c67d79c0b Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 15 Aug 2025 09:29:07 -0400 Subject: [PATCH 41/46] combine logic to simplify best fields and most fields --- ...nticMultiMatchQueryRewriteInterceptor.java | 132 ++++++++---------- 1 file changed, 55 insertions(+), 77 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index af2109a095492..19ca799ffabc2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -87,8 +87,7 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( validateQueryTypeSupported(originalQuery.type()); return switch (originalQuery.type()) { - case BEST_FIELDS -> buildBestFieldsCombinedQuery(originalQuery, indexInformation, queryValue); - case MOST_FIELDS -> buildMostFieldsCombinedQuery(originalQuery, indexInformation, queryValue); + case BEST_FIELDS, MOST_FIELDS -> buildCombinedQuery(originalQuery, indexInformation, queryValue); default -> throw new IllegalArgumentException("Unsupported query type [" + originalQuery.type() + "] for semantic_text fields"); }; } @@ -112,8 +111,7 @@ private QueryBuilder buildMultiFieldSemanticQuery( InferenceIndexInformationForField indexInformation ) { return switch (originalQuery.type()) { - case BEST_FIELDS -> buildBestFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); - case MOST_FIELDS -> buildMostFieldsSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); + case BEST_FIELDS, MOST_FIELDS -> buildSemanticQuery(originalQuery, indexInformation, inferenceFields, queryValue); default -> throw new IllegalArgumentException("Unsupported query type [" + originalQuery.type() + "] for semantic_text fields"); }; } @@ -154,7 +152,10 @@ private SemanticQueryBuilder createSemanticQuery(String fieldName, String queryV return semanticQuery; } - private QueryBuilder buildBestFieldsSemanticQuery( + /** + * Builds a semantic query for multiple fields using DisMax. + */ + private QueryBuilder buildSemanticQuery( MultiMatchQueryBuilder originalQuery, InferenceIndexInformationForField indexInformation, Set inferenceFields, @@ -173,91 +174,74 @@ private QueryBuilder buildBestFieldsSemanticQuery( } /** - * Builds a most_fields query for pure semantic fields using BoolQueryBuilder. + * Builds a combined query for both inference and non-inference fields. + * Uses DisMax for BEST_FIELDS and Bool for MOST_FIELDS to properly handle minimum_should_match. */ - private QueryBuilder buildMostFieldsSemanticQuery( - MultiMatchQueryBuilder originalQuery, - InferenceIndexInformationForField indexInformation, - Set inferenceFields, - String queryValue - ) { - BoolQueryBuilder boolQuery = new BoolQueryBuilder(); - for (String fieldName : inferenceFields) { - boolQuery.should(createSemanticQuery(fieldName, queryValue, indexInformation)); - } - String minimumShouldMatch = originalQuery.minimumShouldMatch(); - boolQuery.minimumShouldMatch(minimumShouldMatch != null ? minimumShouldMatch : "1"); - boolQuery.boost(originalQuery.boost()); - boolQuery.queryName(originalQuery.queryName()); - return boolQuery; - } - - private QueryBuilder buildBestFieldsCombinedQuery( + private QueryBuilder buildCombinedQuery( MultiMatchQueryBuilder originalQuery, InferenceIndexInformationForField inferenceInfo, String queryValue ) { - DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); - - // Add semantic queries for each inference field, but only in indices where that field is semantic - for (String fieldName : inferenceInfo.getAllInferenceFields()) { - Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); - if (semanticIndices.isEmpty() == false) { - disMaxQuery.add( - createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName)) - ); + boolean isMostFields = originalQuery.type() == MultiMatchQueryBuilder.Type.MOST_FIELDS; + + if (isMostFields) { + // Use BoolQuery for MOST_FIELDS to properly handle minimum_should_match + BoolQueryBuilder boolQuery = new BoolQueryBuilder(); + + // Add semantic queries + for (String fieldName : inferenceInfo.getAllInferenceFields()) { + Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); + if (!semanticIndices.isEmpty()) { + boolQuery.should( + createSemanticSubQuery(semanticIndices, fieldName, queryValue) + .boost(inferenceInfo.getFieldBoost(fieldName)) + ); + } } - } - // Add one multi_match query per index containing all non-inference fields in that index - for (Map.Entry> entry : inferenceInfo.nonInferenceFieldsPerIndex().entrySet()) { - String indexName = entry.getKey(); - Set indexFieldNames = entry.getValue(); + // Add non-inference queries + addNonInferenceQueries(boolQuery::should, originalQuery, inferenceInfo); - // Build field map with boosts from fieldBoosts Map - Map indexFields = new HashMap<>(); - for (String fieldName : indexFieldNames) { - indexFields.put(fieldName, inferenceInfo.getFieldBoost(fieldName)); + String minimumShouldMatch = originalQuery.minimumShouldMatch(); + boolQuery.minimumShouldMatch(minimumShouldMatch != null ? minimumShouldMatch : "1"); + boolQuery.boost(originalQuery.boost()); + boolQuery.queryName(originalQuery.queryName()); + return boolQuery; + } else { + // Use DisMaxQuery for BEST_FIELDS + DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); + + // Add semantic queries + for (String fieldName : inferenceInfo.getAllInferenceFields()) { + Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); + if (!semanticIndices.isEmpty()) { + disMaxQuery.add( + createSemanticSubQuery(semanticIndices, fieldName, queryValue) + .boost(inferenceInfo.getFieldBoost(fieldName)) + ); + } } - MultiMatchQueryBuilder indexQuery = new MultiMatchQueryBuilder(originalQuery.value()); - indexQuery.fields(indexFields); - copyQueryProperties(originalQuery, indexQuery); + // Add non-inference queries + addNonInferenceQueries(disMaxQuery::add, originalQuery, inferenceInfo); - disMaxQuery.add(createSubQueryForIndices(List.of(indexName), indexQuery)); + Float tieBreaker = originalQuery.tieBreaker(); + disMaxQuery.tieBreaker(Objects.requireNonNullElseGet(tieBreaker, () -> originalQuery.type().tieBreaker())); + disMaxQuery.boost(originalQuery.boost()); + disMaxQuery.queryName(originalQuery.queryName()); + return disMaxQuery; } - - // Apply tie_breaker - use explicit value or fall back to type's default - Float tieBreaker = originalQuery.tieBreaker(); - disMaxQuery.tieBreaker(Objects.requireNonNullElseGet(tieBreaker, () -> originalQuery.type().tieBreaker())); - disMaxQuery.boost(originalQuery.boost()); - disMaxQuery.queryName(originalQuery.queryName()); - return disMaxQuery; } - private QueryBuilder buildMostFieldsCombinedQuery( + private void addNonInferenceQueries( + java.util.function.Consumer addQuery, MultiMatchQueryBuilder originalQuery, - InferenceIndexInformationForField inferenceInfo, - String queryValue + InferenceIndexInformationForField inferenceInfo ) { - BoolQueryBuilder boolQuery = new BoolQueryBuilder(); - - // Add semantic queries for each inference field, but only in indices where that field is semantic - for (String fieldName : inferenceInfo.getAllInferenceFields()) { - Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); - if (semanticIndices.isEmpty() == false) { - boolQuery.should( - createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName)) - ); - } - } - - // Add one multi_match query per index containing all non-inference fields in that index for (Map.Entry> entry : inferenceInfo.nonInferenceFieldsPerIndex().entrySet()) { String indexName = entry.getKey(); Set indexFieldNames = entry.getValue(); - // Build field map with boosts from global fieldBoosts Map indexFields = new HashMap<>(); for (String fieldName : indexFieldNames) { indexFields.put(fieldName, inferenceInfo.getFieldBoost(fieldName)); @@ -267,14 +251,8 @@ private QueryBuilder buildMostFieldsCombinedQuery( indexQuery.fields(indexFields); copyQueryProperties(originalQuery, indexQuery); - boolQuery.should(createSubQueryForIndices(List.of(indexName), indexQuery)); + addQuery.accept(createSubQueryForIndices(List.of(indexName), indexQuery)); } - - String minimumShouldMatch = originalQuery.minimumShouldMatch(); - boolQuery.minimumShouldMatch(minimumShouldMatch != null ? minimumShouldMatch : "1"); - boolQuery.boost(originalQuery.boost()); - boolQuery.queryName(originalQuery.queryName()); - return boolQuery; } /** From b42bc6b3a1a7998a9d90c875253ab5c10489ac77 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 15 Aug 2025 09:48:18 -0400 Subject: [PATCH 42/46] update combine logic to handle user defined tiebreaker and minimum_should_match --- ...nticMultiMatchQueryRewriteInterceptor.java | 60 ++++++++++++------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 19ca799ffabc2..32ea9681411b2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -153,7 +153,8 @@ private SemanticQueryBuilder createSemanticQuery(String fieldName, String queryV } /** - * Builds a semantic query for multiple fields using DisMax. + * Builds a semantic query for multiple fields. + * Uses BoolQuery when minimum_should_match is specified, otherwise uses DisMax with appropriate tie_breaker. */ private QueryBuilder buildSemanticQuery( MultiMatchQueryBuilder originalQuery, @@ -161,40 +162,56 @@ private QueryBuilder buildSemanticQuery( Set inferenceFields, String queryValue ) { - DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); - for (String fieldName : inferenceFields) { - disMaxQuery.add(createSemanticQuery(fieldName, queryValue, indexInformation)); + String minimumShouldMatch = originalQuery.minimumShouldMatch(); + + if (minimumShouldMatch != null) { + // Use BoolQuery when minimum_should_match is specified + BoolQueryBuilder boolQuery = new BoolQueryBuilder(); + for (String fieldName : inferenceFields) { + boolQuery.should(createSemanticQuery(fieldName, queryValue, indexInformation)); + } + boolQuery.minimumShouldMatch(minimumShouldMatch); + boolQuery.boost(originalQuery.boost()); + boolQuery.queryName(originalQuery.queryName()); + return boolQuery; + } else { + // Use DisMax for default behavior with tie_breaker + DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); + for (String fieldName : inferenceFields) { + disMaxQuery.add(createSemanticQuery(fieldName, queryValue, indexInformation)); + } + // Apply tie_breaker - use explicit value or fall back to type's default + Float tieBreaker = originalQuery.tieBreaker(); + disMaxQuery.tieBreaker(Objects.requireNonNullElseGet(tieBreaker, () -> originalQuery.type().tieBreaker())); + disMaxQuery.boost(originalQuery.boost()); + disMaxQuery.queryName(originalQuery.queryName()); + return disMaxQuery; } - // Apply tie_breaker - use explicit value or fall back to type's default - Float tieBreaker = originalQuery.tieBreaker(); - disMaxQuery.tieBreaker(Objects.requireNonNullElseGet(tieBreaker, () -> originalQuery.type().tieBreaker())); - disMaxQuery.boost(originalQuery.boost()); - disMaxQuery.queryName(originalQuery.queryName()); - return disMaxQuery; } /** * Builds a combined query for both inference and non-inference fields. - * Uses DisMax for BEST_FIELDS and Bool for MOST_FIELDS to properly handle minimum_should_match. + * Uses BoolQuery when minimum_should_match is specified, otherwise uses DisMax for BEST_FIELDS + * and BoolQuery for MOST_FIELDS to match core multi_match behavior. */ private QueryBuilder buildCombinedQuery( MultiMatchQueryBuilder originalQuery, InferenceIndexInformationForField inferenceInfo, String queryValue ) { + String minimumShouldMatch = originalQuery.minimumShouldMatch(); boolean isMostFields = originalQuery.type() == MultiMatchQueryBuilder.Type.MOST_FIELDS; - if (isMostFields) { - // Use BoolQuery for MOST_FIELDS to properly handle minimum_should_match + if (minimumShouldMatch != null || isMostFields) { + // Use BoolQuery when minimum_should_match is specified or for MOST_FIELDS BoolQueryBuilder boolQuery = new BoolQueryBuilder(); // Add semantic queries for (String fieldName : inferenceInfo.getAllInferenceFields()) { Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); - if (!semanticIndices.isEmpty()) { + if (semanticIndices.isEmpty() == false) { boolQuery.should( - createSemanticSubQuery(semanticIndices, fieldName, queryValue) - .boost(inferenceInfo.getFieldBoost(fieldName)) + createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName)) ); } } @@ -202,22 +219,21 @@ private QueryBuilder buildCombinedQuery( // Add non-inference queries addNonInferenceQueries(boolQuery::should, originalQuery, inferenceInfo); - String minimumShouldMatch = originalQuery.minimumShouldMatch(); - boolQuery.minimumShouldMatch(minimumShouldMatch != null ? minimumShouldMatch : "1"); + // Set minimum_should_match - default to "1" for MOST_FIELDS when not specified + boolQuery.minimumShouldMatch(Objects.requireNonNullElse(minimumShouldMatch, "1")); boolQuery.boost(originalQuery.boost()); boolQuery.queryName(originalQuery.queryName()); return boolQuery; } else { - // Use DisMaxQuery for BEST_FIELDS + // Use DisMaxQuery for BEST_FIELDS without minimum_should_match DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); // Add semantic queries for (String fieldName : inferenceInfo.getAllInferenceFields()) { Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); - if (!semanticIndices.isEmpty()) { + if (semanticIndices.isEmpty() == false) { disMaxQuery.add( - createSemanticSubQuery(semanticIndices, fieldName, queryValue) - .boost(inferenceInfo.getFieldBoost(fieldName)) + createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName)) ); } } From 13cd7c073292fa26c9e6e161f2fb0b682e1eee8d Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 20 Aug 2025 14:29:56 -0400 Subject: [PATCH 43/46] remove duplicate boosting by creating a global boostig fields --- .../SemanticQueryRewriteInterceptor.java | 78 +++++++++++++------ 1 file changed, 53 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index b4bfb7e8cdc3a..294ae3ecbe019 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -133,16 +133,40 @@ private InferenceIndexInformationForField resolveIndicesForFields( Map fieldsWithWeights = getFieldsWithWeights(queryBuilder); Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); + // STEP 1: Global wildcard resolution for inference fields + Map globalInferenceFieldBoosts = new HashMap<>(); + if (resolveInferenceFieldWildcards) { + // Get all unique inference fields across all indices + Set allInferenceFields = indexMetadataCollection.stream() + .flatMap(idx -> idx.getInferenceFields().keySet().stream()) + .collect(Collectors.toSet()); + + // Calculate boost for each inference field based on matching patterns + for (String inferenceField : allInferenceFields) { + for (Map.Entry entry : fieldsWithWeights.entrySet()) { + String pattern = entry.getKey(); + Float boost = entry.getValue(); + + if (Regex.isMatchAllPattern(pattern) + || (Regex.isSimpleMatchPattern(pattern) && Regex.simpleMatch(pattern, inferenceField)) + || pattern.equals(inferenceField)) { + addToFieldBoostsMap(globalInferenceFieldBoosts, inferenceField, boost); + } + } + } + } + + // STEP 2: Per-index processing using pre-calculated global boosts Map> inferenceFieldsPerIndex = new HashMap<>(); Map> nonInferenceFieldsPerIndex = new HashMap<>(); - Map fieldBoosts = new HashMap<>(); + Map allFieldBoosts = new HashMap<>(globalInferenceFieldBoosts); for (IndexMetadata indexMetadata : indexMetadataCollection) { String indexName = indexMetadata.getIndex().getName(); Map indexInferenceFields = new HashMap<>(); Map indexInferenceMetadata = indexMetadata.getInferenceFields(); - // Handle default fields per index when no fields are specified - following RRF pattern + // Handle default fields per index when no fields are specified Map fieldsToProcess = fieldsWithWeights; if (fieldsToProcess.isEmpty()) { Settings settings = indexMetadata.getSettings(); @@ -150,34 +174,38 @@ private InferenceIndexInformationForField resolveIndicesForFields( fieldsToProcess = QueryParserHelper.parseFieldsAndWeights(defaultFields); } - // Resolve wildcards for inference fields and multiply boosts when field matches multiple patterns - for (Map.Entry entry : fieldsToProcess.entrySet()) { - String field = entry.getKey(); - Float boost = entry.getValue(); - - if (resolveInferenceFieldWildcards && Regex.isMatchAllPattern(field)) { - indexInferenceMetadata.keySet().forEach(f -> { - indexInferenceFields.put(f, indexInferenceMetadata.get(f)); - addToFieldBoostsMap(fieldBoosts, f, boost); - }); - } else if (resolveInferenceFieldWildcards && Regex.isSimpleMatchPattern(field)) { - indexInferenceMetadata.keySet().stream().filter(f -> Regex.simpleMatch(field, f)).forEach(f -> { - indexInferenceFields.put(f, indexInferenceMetadata.get(f)); - addToFieldBoostsMap(fieldBoosts, f, boost); - }); - } else if (indexInferenceMetadata.containsKey(field)) { - indexInferenceFields.put(field, indexInferenceMetadata.get(field)); - addToFieldBoostsMap(fieldBoosts, field, boost); + // Collect resolved inference fields for this index + Set resolvedInferenceFields = new HashSet<>(); + + if (resolveInferenceFieldWildcards) { + // Add inference fields that exist in this index (using pre-calculated boosts) + for (String inferenceField : globalInferenceFieldBoosts.keySet()) { + if (indexInferenceMetadata.containsKey(inferenceField)) { + indexInferenceFields.put(inferenceField, indexInferenceMetadata.get(inferenceField)); + resolvedInferenceFields.add(inferenceField); + } + } + } else { + // Handle explicit inference fields (non-wildcard) + for (Map.Entry entry : fieldsToProcess.entrySet()) { + String field = entry.getKey(); + Float boost = entry.getValue(); + + if (indexInferenceMetadata.containsKey(field)) { + indexInferenceFields.put(field, indexInferenceMetadata.get(field)); + resolvedInferenceFields.add(field); + addToFieldBoostsMap(allFieldBoosts, field, boost); + } } } - // Non-inference fields: original fields minus resolved inference fields + // Non-inference fields: all patterns minus resolved inference fields (simple approach like MultiFieldsInnerRetrieverUtils) Set indexNonInferenceFields = new HashSet<>(fieldsToProcess.keySet()); - indexNonInferenceFields.removeAll(indexInferenceFields.keySet()); + indexNonInferenceFields.removeAll(resolvedInferenceFields); - // Store boosts for non-inference fields in global fieldBoosts map + // Store boosts for non-inference field patterns for (String nonInferenceField : indexNonInferenceFields) { - addToFieldBoostsMap(fieldBoosts, nonInferenceField, fieldsToProcess.get(nonInferenceField)); + addToFieldBoostsMap(allFieldBoosts, nonInferenceField, fieldsToProcess.get(nonInferenceField)); } if (indexInferenceFields.isEmpty() == false) { @@ -189,7 +217,7 @@ private InferenceIndexInformationForField resolveIndicesForFields( } } - return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex, fieldBoosts); + return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex, allFieldBoosts); } protected QueryBuilder createSubQueryForIndices(Collection indices, QueryBuilder queryBuilder) { From 0accbe2f2718044c6e24a60979e7a5a1e0b8801d Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 21 Aug 2025 01:57:26 -0400 Subject: [PATCH 44/46] combine logic to simplify best fields and most fields --- ...nticMultiMatchQueryRewriteInterceptor.java | 103 +++++------------- 1 file changed, 29 insertions(+), 74 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 32ea9681411b2..414bf07af4ad5 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -8,7 +8,6 @@ package org.elasticsearch.xpack.inference.queries; import org.elasticsearch.features.NodeFeature; -import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.DisMaxQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; @@ -153,8 +152,7 @@ private SemanticQueryBuilder createSemanticQuery(String fieldName, String queryV } /** - * Builds a semantic query for multiple fields. - * Uses BoolQuery when minimum_should_match is specified, otherwise uses DisMax with appropriate tie_breaker. + * Builds a semantic query for multiple fields using Dismax. */ private QueryBuilder buildSemanticQuery( MultiMatchQueryBuilder originalQuery, @@ -162,91 +160,48 @@ private QueryBuilder buildSemanticQuery( Set inferenceFields, String queryValue ) { - String minimumShouldMatch = originalQuery.minimumShouldMatch(); - - if (minimumShouldMatch != null) { - // Use BoolQuery when minimum_should_match is specified - BoolQueryBuilder boolQuery = new BoolQueryBuilder(); - for (String fieldName : inferenceFields) { - boolQuery.should(createSemanticQuery(fieldName, queryValue, indexInformation)); - } - boolQuery.minimumShouldMatch(minimumShouldMatch); - boolQuery.boost(originalQuery.boost()); - boolQuery.queryName(originalQuery.queryName()); - return boolQuery; - } else { - // Use DisMax for default behavior with tie_breaker - DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); - for (String fieldName : inferenceFields) { - disMaxQuery.add(createSemanticQuery(fieldName, queryValue, indexInformation)); - } - // Apply tie_breaker - use explicit value or fall back to type's default - Float tieBreaker = originalQuery.tieBreaker(); - disMaxQuery.tieBreaker(Objects.requireNonNullElseGet(tieBreaker, () -> originalQuery.type().tieBreaker())); - disMaxQuery.boost(originalQuery.boost()); - disMaxQuery.queryName(originalQuery.queryName()); - return disMaxQuery; + DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); + for (String fieldName : inferenceFields) { + disMaxQuery.add(createSemanticQuery(fieldName, queryValue, indexInformation)); } + + // Apply tie_breaker - use explicit value or fall back to type's default + Float tieBreaker = originalQuery.tieBreaker(); + disMaxQuery.tieBreaker(Objects.requireNonNullElseGet(tieBreaker, () -> originalQuery.type().tieBreaker())); + disMaxQuery.boost(originalQuery.boost()); + disMaxQuery.queryName(originalQuery.queryName()); + return disMaxQuery; } /** * Builds a combined query for both inference and non-inference fields. - * Uses BoolQuery when minimum_should_match is specified, otherwise uses DisMax for BEST_FIELDS - * and BoolQuery for MOST_FIELDS to match core multi_match behavior. */ private QueryBuilder buildCombinedQuery( MultiMatchQueryBuilder originalQuery, InferenceIndexInformationForField inferenceInfo, String queryValue ) { - String minimumShouldMatch = originalQuery.minimumShouldMatch(); - boolean isMostFields = originalQuery.type() == MultiMatchQueryBuilder.Type.MOST_FIELDS; - - if (minimumShouldMatch != null || isMostFields) { - // Use BoolQuery when minimum_should_match is specified or for MOST_FIELDS - BoolQueryBuilder boolQuery = new BoolQueryBuilder(); - - // Add semantic queries - for (String fieldName : inferenceInfo.getAllInferenceFields()) { - Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); - if (semanticIndices.isEmpty() == false) { - boolQuery.should( - createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName)) - ); - } - } - - // Add non-inference queries - addNonInferenceQueries(boolQuery::should, originalQuery, inferenceInfo); - - // Set minimum_should_match - default to "1" for MOST_FIELDS when not specified - boolQuery.minimumShouldMatch(Objects.requireNonNullElse(minimumShouldMatch, "1")); - boolQuery.boost(originalQuery.boost()); - boolQuery.queryName(originalQuery.queryName()); - return boolQuery; - } else { - // Use DisMaxQuery for BEST_FIELDS without minimum_should_match - DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); - - // Add semantic queries - for (String fieldName : inferenceInfo.getAllInferenceFields()) { - Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); - if (semanticIndices.isEmpty() == false) { - disMaxQuery.add( - createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName)) - ); - } + DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery(); + + // Add semantic queries + for (String fieldName : inferenceInfo.getAllInferenceFields()) { + Set semanticIndices = inferenceInfo.getInferenceIndicesForField(fieldName); + if (semanticIndices.isEmpty() == false) { + disMaxQuery.add( + createSemanticSubQuery(semanticIndices, fieldName, queryValue).boost(inferenceInfo.getFieldBoost(fieldName)) + ); } + } - // Add non-inference queries - addNonInferenceQueries(disMaxQuery::add, originalQuery, inferenceInfo); + // Add non-inference queries + addNonInferenceQueries(disMaxQuery::add, originalQuery, inferenceInfo); - Float tieBreaker = originalQuery.tieBreaker(); - disMaxQuery.tieBreaker(Objects.requireNonNullElseGet(tieBreaker, () -> originalQuery.type().tieBreaker())); - disMaxQuery.boost(originalQuery.boost()); - disMaxQuery.queryName(originalQuery.queryName()); - return disMaxQuery; - } + // Apply tie_breaker - use explicit value or fall back to type's default + Float tieBreaker = originalQuery.tieBreaker(); + disMaxQuery.tieBreaker(Objects.requireNonNullElseGet(tieBreaker, () -> originalQuery.type().tieBreaker())); + disMaxQuery.boost(originalQuery.boost()); + disMaxQuery.queryName(originalQuery.queryName()); + return disMaxQuery; } private void addNonInferenceQueries( From 8b45313ba4c7ff24f02ff0d79cd5703c4600bbcd Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 21 Aug 2025 02:25:51 -0400 Subject: [PATCH 45/46] make decisions to resolve wildcard any fields for only multi_match --- ...SemanticMultiMatchQueryRewriteInterceptor.java | 5 +++++ .../queries/SemanticQueryRewriteInterceptor.java | 15 +++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 414bf07af4ad5..1c57c56fc57b3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -103,6 +103,11 @@ public boolean shouldResolveInferenceFieldWildcards(QueryBuilder queryBuilder) { return multiMatchQuery.resolveInferenceFieldWildcards(); } + @Override + protected boolean shouldUseDefaultFields() { + return true; + } + private QueryBuilder buildMultiFieldSemanticQuery( MultiMatchQueryBuilder originalQuery, Set inferenceFields, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 294ae3ecbe019..347546d1f5732 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -100,6 +100,17 @@ protected Map getFieldsWithWeights(QueryBuilder queryBuilder) { */ protected abstract boolean shouldResolveInferenceFieldWildcards(QueryBuilder queryBuilder); + /** + * Determines if this query type should use default fields when no fields are specified. + * This is typically only needed for multi_match queries. + * Default implementation returns false for most query types. + * + * @return true if default fields should be used when no fields are specified, false otherwise. + */ + protected boolean shouldUseDefaultFields() { + return false; + } + /** * Builds the inference query * @@ -166,9 +177,9 @@ private InferenceIndexInformationForField resolveIndicesForFields( Map indexInferenceFields = new HashMap<>(); Map indexInferenceMetadata = indexMetadata.getInferenceFields(); - // Handle default fields per index when no fields are specified + // Handle default fields per index when no fields are specified (only for multi_match queries) Map fieldsToProcess = fieldsWithWeights; - if (fieldsToProcess.isEmpty()) { + if (fieldsToProcess.isEmpty() && shouldUseDefaultFields()) { Settings settings = indexMetadata.getSettings(); List defaultFields = settings.getAsList(DEFAULT_FIELD_SETTING.getKey(), DEFAULT_FIELD_SETTING.getDefault(settings)); fieldsToProcess = QueryParserHelper.parseFieldsAndWeights(defaultFields); From 658f171baa8f146d7b26e26ab3d9789c27193df7 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 21 Aug 2025 14:38:26 -0400 Subject: [PATCH 46/46] refactor to push wildcard related work into multi_match --- ...nticMultiMatchQueryRewriteInterceptor.java | 123 ++++++++++++++++++ .../SemanticQueryRewriteInterceptor.java | 60 ++------- 2 files changed, 136 insertions(+), 47 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java index 1c57c56fc57b3..6e06f9af82ae3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMultiMatchQueryRewriteInterceptor.java @@ -7,17 +7,28 @@ package org.elasticsearch.xpack.inference.queries; +import org.elasticsearch.action.ResolvedIndices; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; +import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.query.DisMaxQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.search.QueryParserHelper; +import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; + +import static org.elasticsearch.index.IndexSettings.DEFAULT_FIELD_SETTING; public class SemanticMultiMatchQueryRewriteInterceptor extends SemanticQueryRewriteInterceptor { @@ -108,6 +119,118 @@ protected boolean shouldUseDefaultFields() { return true; } + @Override + protected InferenceIndexInformationForField resolveIndicesForFields( + QueryBuilder queryBuilder, + ResolvedIndices resolvedIndices, + boolean resolveInferenceFieldWildcards + ) { + assert (queryBuilder instanceof MultiMatchQueryBuilder); + MultiMatchQueryBuilder multiMatchQuery = (MultiMatchQueryBuilder) queryBuilder; + + // If wildcard resolution is disabled, use the simple parent class implementation + if (resolveInferenceFieldWildcards == false || multiMatchQuery.resolveInferenceFieldWildcards() == false) { + return super.resolveIndicesForFields(queryBuilder, resolvedIndices, resolveInferenceFieldWildcards); + } + + return resolveIndicesForFieldsWithWildcards(queryBuilder, resolvedIndices); + } + + private InferenceIndexInformationForField resolveIndicesForFieldsWithWildcards( + QueryBuilder queryBuilder, + ResolvedIndices resolvedIndices + ) { + Map fieldsWithWeights = getFieldsWithWeights(queryBuilder); + Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); + + // Global wildcard resolution for inference fields + Map globalInferenceFieldBoosts = new HashMap<>(); + + // Get all unique inference fields across all indices + Set allInferenceFields = indexMetadataCollection.stream() + .flatMap(idx -> idx.getInferenceFields().keySet().stream()) + .collect(Collectors.toSet()); + + // Calculate boost for each inference field based on matching patterns + for (String inferenceField : allInferenceFields) { + for (Map.Entry entry : fieldsWithWeights.entrySet()) { + String pattern = entry.getKey(); + Float boost = entry.getValue(); + + if (Regex.isMatchAllPattern(pattern) + || (Regex.isSimpleMatchPattern(pattern) && Regex.simpleMatch(pattern, inferenceField)) + || pattern.equals(inferenceField)) { + addToFieldBoostsMap(globalInferenceFieldBoosts, inferenceField, boost); + } + } + } + + // Per-index processing using pre-calculated global boosts + Map> inferenceFieldsPerIndex = new HashMap<>(); + Map> nonInferenceFieldsPerIndex = new HashMap<>(); + Map allFieldBoosts = new HashMap<>(globalInferenceFieldBoosts); + + for (IndexMetadata indexMetadata : indexMetadataCollection) { + String indexName = indexMetadata.getIndex().getName(); + Map indexInferenceFields = new HashMap<>(); + Map indexInferenceMetadata = indexMetadata.getInferenceFields(); + + // Handle default fields per index when no fields are specified + Map fieldsToProcess = fieldsWithWeights; + if (fieldsToProcess.isEmpty() && shouldUseDefaultFields()) { + Settings settings = indexMetadata.getSettings(); + List defaultFields = settings.getAsList(DEFAULT_FIELD_SETTING.getKey(), DEFAULT_FIELD_SETTING.getDefault(settings)); + fieldsToProcess = QueryParserHelper.parseFieldsAndWeights(defaultFields); + } + + // Collect resolved inference fields for this index + Set resolvedInferenceFields = new HashSet<>(); + + // Add wildcard-resolved inference fields that exist in this index + for (String inferenceField : globalInferenceFieldBoosts.keySet()) { + if (indexInferenceMetadata.containsKey(inferenceField)) { + indexInferenceFields.put(inferenceField, indexInferenceMetadata.get(inferenceField)); + resolvedInferenceFields.add(inferenceField); + } + } + + // Always handle explicit inference fields (both wildcard and non-wildcard cases) + for (Map.Entry entry : fieldsToProcess.entrySet()) { + String field = entry.getKey(); + Float boost = entry.getValue(); + + if (indexInferenceMetadata.containsKey(field)) { + indexInferenceFields.put(field, indexInferenceMetadata.get(field)); + resolvedInferenceFields.add(field); + addToFieldBoostsMap(allFieldBoosts, field, boost); + } + } + + // Non-inference fields: all patterns minus resolved inference fields + Set indexNonInferenceFields = new HashSet<>(fieldsToProcess.keySet()); + indexNonInferenceFields.removeAll(resolvedInferenceFields); + + // Store boosts for non-inference field patterns + for (String nonInferenceField : indexNonInferenceFields) { + addToFieldBoostsMap(allFieldBoosts, nonInferenceField, fieldsToProcess.get(nonInferenceField)); + } + + if (indexInferenceFields.isEmpty() == false) { + inferenceFieldsPerIndex.put(indexName, indexInferenceFields); + } + + if (indexNonInferenceFields.isEmpty() == false) { + nonInferenceFieldsPerIndex.put(indexName, indexNonInferenceFields); + } + } + + return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex, allFieldBoosts); + } + + private static void addToFieldBoostsMap(Map fieldBoosts, String field, Float boost) { + fieldBoosts.compute(field, (k, v) -> v == null ? boost : v * boost); + } + private QueryBuilder buildMultiFieldSemanticQuery( MultiMatchQueryBuilder originalQuery, Set inferenceFields, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 347546d1f5732..b335f85e9897d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -10,7 +10,6 @@ import org.elasticsearch.action.ResolvedIndices; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; -import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.mapper.IndexFieldMapper; import org.elasticsearch.index.query.AbstractQueryBuilder; @@ -136,7 +135,7 @@ private static void addToFieldBoostsMap(Map fieldBoosts, String f fieldBoosts.compute(field, (k, v) -> v == null ? boost : v * boost); } - private InferenceIndexInformationForField resolveIndicesForFields( + protected InferenceIndexInformationForField resolveIndicesForFields( QueryBuilder queryBuilder, ResolvedIndices resolvedIndices, boolean resolveInferenceFieldWildcards @@ -144,33 +143,10 @@ private InferenceIndexInformationForField resolveIndicesForFields( Map fieldsWithWeights = getFieldsWithWeights(queryBuilder); Collection indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values(); - // STEP 1: Global wildcard resolution for inference fields - Map globalInferenceFieldBoosts = new HashMap<>(); - if (resolveInferenceFieldWildcards) { - // Get all unique inference fields across all indices - Set allInferenceFields = indexMetadataCollection.stream() - .flatMap(idx -> idx.getInferenceFields().keySet().stream()) - .collect(Collectors.toSet()); - - // Calculate boost for each inference field based on matching patterns - for (String inferenceField : allInferenceFields) { - for (Map.Entry entry : fieldsWithWeights.entrySet()) { - String pattern = entry.getKey(); - Float boost = entry.getValue(); - - if (Regex.isMatchAllPattern(pattern) - || (Regex.isSimpleMatchPattern(pattern) && Regex.simpleMatch(pattern, inferenceField)) - || pattern.equals(inferenceField)) { - addToFieldBoostsMap(globalInferenceFieldBoosts, inferenceField, boost); - } - } - } - } - - // STEP 2: Per-index processing using pre-calculated global boosts + // Simple implementation: only handle explicit inference fields (no wildcards) Map> inferenceFieldsPerIndex = new HashMap<>(); Map> nonInferenceFieldsPerIndex = new HashMap<>(); - Map allFieldBoosts = new HashMap<>(globalInferenceFieldBoosts); + Map allFieldBoosts = new HashMap<>(); for (IndexMetadata indexMetadata : indexMetadataCollection) { String indexName = indexMetadata.getIndex().getName(); @@ -188,29 +164,19 @@ private InferenceIndexInformationForField resolveIndicesForFields( // Collect resolved inference fields for this index Set resolvedInferenceFields = new HashSet<>(); - if (resolveInferenceFieldWildcards) { - // Add inference fields that exist in this index (using pre-calculated boosts) - for (String inferenceField : globalInferenceFieldBoosts.keySet()) { - if (indexInferenceMetadata.containsKey(inferenceField)) { - indexInferenceFields.put(inferenceField, indexInferenceMetadata.get(inferenceField)); - resolvedInferenceFields.add(inferenceField); - } - } - } else { - // Handle explicit inference fields (non-wildcard) - for (Map.Entry entry : fieldsToProcess.entrySet()) { - String field = entry.getKey(); - Float boost = entry.getValue(); - - if (indexInferenceMetadata.containsKey(field)) { - indexInferenceFields.put(field, indexInferenceMetadata.get(field)); - resolvedInferenceFields.add(field); - addToFieldBoostsMap(allFieldBoosts, field, boost); - } + // Handle explicit inference fields only + for (Map.Entry entry : fieldsToProcess.entrySet()) { + String field = entry.getKey(); + Float boost = entry.getValue(); + + if (indexInferenceMetadata.containsKey(field)) { + indexInferenceFields.put(field, indexInferenceMetadata.get(field)); + resolvedInferenceFields.add(field); + addToFieldBoostsMap(allFieldBoosts, field, boost); } } - // Non-inference fields: all patterns minus resolved inference fields (simple approach like MultiFieldsInnerRetrieverUtils) + // Non-inference fields Set indexNonInferenceFields = new HashSet<>(fieldsToProcess.keySet()); indexNonInferenceFields.removeAll(resolvedInferenceFields);