Skip to content

Commit 13cd7c0

Browse files
remove duplicate boosting by creating a global boostig fields
1 parent c1f284a commit 13cd7c0

File tree

1 file changed

+53
-25
lines changed

1 file changed

+53
-25
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -133,51 +133,79 @@ private InferenceIndexInformationForField resolveIndicesForFields(
133133
Map<String, Float> fieldsWithWeights = getFieldsWithWeights(queryBuilder);
134134
Collection<IndexMetadata> indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values();
135135

136+
// STEP 1: Global wildcard resolution for inference fields
137+
Map<String, Float> globalInferenceFieldBoosts = new HashMap<>();
138+
if (resolveInferenceFieldWildcards) {
139+
// Get all unique inference fields across all indices
140+
Set<String> allInferenceFields = indexMetadataCollection.stream()
141+
.flatMap(idx -> idx.getInferenceFields().keySet().stream())
142+
.collect(Collectors.toSet());
143+
144+
// Calculate boost for each inference field based on matching patterns
145+
for (String inferenceField : allInferenceFields) {
146+
for (Map.Entry<String, Float> entry : fieldsWithWeights.entrySet()) {
147+
String pattern = entry.getKey();
148+
Float boost = entry.getValue();
149+
150+
if (Regex.isMatchAllPattern(pattern)
151+
|| (Regex.isSimpleMatchPattern(pattern) && Regex.simpleMatch(pattern, inferenceField))
152+
|| pattern.equals(inferenceField)) {
153+
addToFieldBoostsMap(globalInferenceFieldBoosts, inferenceField, boost);
154+
}
155+
}
156+
}
157+
}
158+
159+
// STEP 2: Per-index processing using pre-calculated global boosts
136160
Map<String, Map<String, InferenceFieldMetadata>> inferenceFieldsPerIndex = new HashMap<>();
137161
Map<String, Set<String>> nonInferenceFieldsPerIndex = new HashMap<>();
138-
Map<String, Float> fieldBoosts = new HashMap<>();
162+
Map<String, Float> allFieldBoosts = new HashMap<>(globalInferenceFieldBoosts);
139163

140164
for (IndexMetadata indexMetadata : indexMetadataCollection) {
141165
String indexName = indexMetadata.getIndex().getName();
142166
Map<String, InferenceFieldMetadata> indexInferenceFields = new HashMap<>();
143167
Map<String, InferenceFieldMetadata> indexInferenceMetadata = indexMetadata.getInferenceFields();
144168

145-
// Handle default fields per index when no fields are specified - following RRF pattern
169+
// Handle default fields per index when no fields are specified
146170
Map<String, Float> fieldsToProcess = fieldsWithWeights;
147171
if (fieldsToProcess.isEmpty()) {
148172
Settings settings = indexMetadata.getSettings();
149173
List<String> defaultFields = settings.getAsList(DEFAULT_FIELD_SETTING.getKey(), DEFAULT_FIELD_SETTING.getDefault(settings));
150174
fieldsToProcess = QueryParserHelper.parseFieldsAndWeights(defaultFields);
151175
}
152176

153-
// Resolve wildcards for inference fields and multiply boosts when field matches multiple patterns
154-
for (Map.Entry<String, Float> entry : fieldsToProcess.entrySet()) {
155-
String field = entry.getKey();
156-
Float boost = entry.getValue();
157-
158-
if (resolveInferenceFieldWildcards && Regex.isMatchAllPattern(field)) {
159-
indexInferenceMetadata.keySet().forEach(f -> {
160-
indexInferenceFields.put(f, indexInferenceMetadata.get(f));
161-
addToFieldBoostsMap(fieldBoosts, f, boost);
162-
});
163-
} else if (resolveInferenceFieldWildcards && Regex.isSimpleMatchPattern(field)) {
164-
indexInferenceMetadata.keySet().stream().filter(f -> Regex.simpleMatch(field, f)).forEach(f -> {
165-
indexInferenceFields.put(f, indexInferenceMetadata.get(f));
166-
addToFieldBoostsMap(fieldBoosts, f, boost);
167-
});
168-
} else if (indexInferenceMetadata.containsKey(field)) {
169-
indexInferenceFields.put(field, indexInferenceMetadata.get(field));
170-
addToFieldBoostsMap(fieldBoosts, field, boost);
177+
// Collect resolved inference fields for this index
178+
Set<String> resolvedInferenceFields = new HashSet<>();
179+
180+
if (resolveInferenceFieldWildcards) {
181+
// Add inference fields that exist in this index (using pre-calculated boosts)
182+
for (String inferenceField : globalInferenceFieldBoosts.keySet()) {
183+
if (indexInferenceMetadata.containsKey(inferenceField)) {
184+
indexInferenceFields.put(inferenceField, indexInferenceMetadata.get(inferenceField));
185+
resolvedInferenceFields.add(inferenceField);
186+
}
187+
}
188+
} else {
189+
// Handle explicit inference fields (non-wildcard)
190+
for (Map.Entry<String, Float> entry : fieldsToProcess.entrySet()) {
191+
String field = entry.getKey();
192+
Float boost = entry.getValue();
193+
194+
if (indexInferenceMetadata.containsKey(field)) {
195+
indexInferenceFields.put(field, indexInferenceMetadata.get(field));
196+
resolvedInferenceFields.add(field);
197+
addToFieldBoostsMap(allFieldBoosts, field, boost);
198+
}
171199
}
172200
}
173201

174-
// Non-inference fields: original fields minus resolved inference fields
202+
// Non-inference fields: all patterns minus resolved inference fields (simple approach like MultiFieldsInnerRetrieverUtils)
175203
Set<String> indexNonInferenceFields = new HashSet<>(fieldsToProcess.keySet());
176-
indexNonInferenceFields.removeAll(indexInferenceFields.keySet());
204+
indexNonInferenceFields.removeAll(resolvedInferenceFields);
177205

178-
// Store boosts for non-inference fields in global fieldBoosts map
206+
// Store boosts for non-inference field patterns
179207
for (String nonInferenceField : indexNonInferenceFields) {
180-
addToFieldBoostsMap(fieldBoosts, nonInferenceField, fieldsToProcess.get(nonInferenceField));
208+
addToFieldBoostsMap(allFieldBoosts, nonInferenceField, fieldsToProcess.get(nonInferenceField));
181209
}
182210

183211
if (indexInferenceFields.isEmpty() == false) {
@@ -189,7 +217,7 @@ private InferenceIndexInformationForField resolveIndicesForFields(
189217
}
190218
}
191219

192-
return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex, fieldBoosts);
220+
return new InferenceIndexInformationForField(inferenceFieldsPerIndex, nonInferenceFieldsPerIndex, allFieldBoosts);
193221
}
194222

195223
protected QueryBuilder createSubQueryForIndices(Collection<String> indices, QueryBuilder queryBuilder) {

0 commit comments

Comments
 (0)