From f4242d3c68d647e10fe07787afcdb0cef3a2f96e Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 28 Aug 2025 01:33:45 +0300 Subject: [PATCH] ESQL: Fix wrong marking of a field as unmapped when indices shared the same mapping (#133298) A follow up fixing https://github.com/elastic/elasticsearch/pull/119886/files/e32a4463e8a4015c7be17cb984a80e79ed1e45c5#r2287545802. The original issue, as mentioned in the comment, was triggered by multiple indices sharing the same mapping. This PR fixes it by reading the FieldCapabilitiesResponse directly instead. --- .../xpack/esql/session/IndexResolver.java | 47 ++++++++++++++----- .../xpack/esql/analysis/AnalyzerTests.java | 29 +++++++++++- 2 files changed, 64 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java index 16401574b0f58..945fe4a8dc4b3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java @@ -92,13 +92,15 @@ public void resolveAsMergedMapping( // public for testing only public static IndexResolution mergedMappings(String indexPattern, FieldCapabilitiesResponse fieldCapsResponse) { - var numberOfIndices = fieldCapsResponse.getIndexResponses().size(); assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.SEARCH_COORDINATION); // too expensive to run this on a transport worker + var numberOfIndices = fieldCapsResponse.getIndexResponses().size(); if (fieldCapsResponse.getIndexResponses().isEmpty()) { return IndexResolution.notFound(indexPattern); } - Map> fieldsCaps = collectFieldCaps(fieldCapsResponse); + var collectedFieldCaps = collectFieldCaps(fieldCapsResponse); + Map fieldsCaps = collectedFieldCaps.fieldsCaps; + Map indexMappingHashDuplicates = collectedFieldCaps.indexMappingHashDuplicates; // Build hierarchical fields - it's easier to do it in sorted order so the object fields come first. // TODO flattened is simpler - could we get away with that? @@ -130,7 +132,8 @@ public static IndexResolution mergedMappings(String indexPattern, FieldCapabilit } // TODO we're careful to make isAlias match IndexResolver - but do we use it? - List fcs = fieldsCaps.get(fullName); + var fieldCap = fieldsCaps.get(fullName); + List fcs = fieldCap.fieldCapabilities; EsField field = firstUnsupportedParent == null ? createField(fieldCapsResponse, name, fullName, fcs, isAlias) : new UnsupportedEsField( @@ -140,8 +143,7 @@ public static IndexResolution mergedMappings(String indexPattern, FieldCapabilit new HashMap<>() ); fields.put(name, field); - - var isPartiallyUnmapped = fcs.size() < numberOfIndices; + var isPartiallyUnmapped = fcs.size() + indexMappingHashDuplicates.getOrDefault(fieldCap.indexMappingHash, 0) < numberOfIndices; if (isPartiallyUnmapped) { partiallyUnmappedFields.add(fullName); } @@ -162,11 +164,20 @@ public static IndexResolution mergedMappings(String indexPattern, FieldCapabilit return IndexResolution.valid(index, concreteIndices.keySet(), failures); } - private static Map> collectFieldCaps(FieldCapabilitiesResponse fieldCapsResponse) { - Set seenHashes = new HashSet<>(); - Map> fieldsCaps = new HashMap<>(); + private record IndexFieldCapabilitiesWithSourceHash(List fieldCapabilities, String indexMappingHash) {} + + private record CollectedFieldCaps( + Map fieldsCaps, + // The map won't contain entries without duplicates, i.e., it's number of occurrences - 1. + Map indexMappingHashDuplicates + ) {} + + private static CollectedFieldCaps collectFieldCaps(FieldCapabilitiesResponse fieldCapsResponse) { + Map indexMappingHashToDuplicateCount = new HashMap<>(); + Map fieldsCaps = new HashMap<>(); + for (FieldCapabilitiesIndexResponse response : fieldCapsResponse.getIndexResponses()) { - if (seenHashes.add(response.getIndexMappingHash()) == false) { + if (indexMappingHashToDuplicateCount.compute(response.getIndexMappingHash(), (k, v) -> v == null ? 1 : v + 1) > 1) { continue; } for (IndexFieldCapabilities fc : response.get().values()) { @@ -174,11 +185,25 @@ private static Map> collectFieldCaps(FieldC // ESQL builds the metadata fields if they are asked for without using the resolution. continue; } - List all = fieldsCaps.computeIfAbsent(fc.name(), (_key) -> new ArrayList<>()); + List all = fieldsCaps.computeIfAbsent( + fc.name(), + (_key) -> new IndexFieldCapabilitiesWithSourceHash(new ArrayList<>(), response.getIndexMappingHash()) + ).fieldCapabilities; all.add(fc); } } - return fieldsCaps; + + var iterator = indexMappingHashToDuplicateCount.entrySet().iterator(); + while (iterator.hasNext()) { + var next = iterator.next(); + if (next.getValue() <= 1) { + iterator.remove(); + } else { + next.setValue(next.getValue() - 1); + } + } + + return new CollectedFieldCaps(fieldsCaps, indexMappingHashToDuplicateCount); } private static EsField createField( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index bf44c47ea9c4d..4ef6f36522b5d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; import org.elasticsearch.action.fieldcaps.IndexFieldCapabilities; import org.elasticsearch.action.fieldcaps.IndexFieldCapabilitiesBuilder; +import org.elasticsearch.common.hash.MessageDigests; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexMode; @@ -89,6 +90,7 @@ import org.elasticsearch.xpack.esql.session.IndexResolver; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.time.Period; import java.util.ArrayList; import java.util.List; @@ -3017,6 +3019,27 @@ public void testResolveInsist_multiIndexFieldPartiallyExistsWithMultiTypesNoKeyw assertThat(attr.unresolvedMessage(), is(expected)); } + public void testResolveInsist_multiIndexSameMapping_fieldIsMapped() { + assumeTrue("Requires UNMAPPED FIELDS", EsqlCapabilities.Cap.UNMAPPED_FIELDS.isEnabled()); + + IndexResolution resolution = IndexResolver.mergedMappings( + "foo, bar", + new FieldCapabilitiesResponse( + List.of( + fieldCapabilitiesIndexResponse("foo", messageResponseMap("long")), + fieldCapabilitiesIndexResponse("bar", messageResponseMap("long")) + ), + List.of() + ) + ); + var plan = analyze("FROM foo, bar | INSIST_🐔 message", analyzer(resolution, TEST_VERIFIER)); + var limit = as(plan, Limit.class); + var insist = as(limit.child(), Insist.class); + var attribute = (FieldAttribute) EsqlTestUtils.singleValue(insist.output()); + assertThat(attribute.name(), is("message")); + assertThat(attribute.dataType(), is(DataType.LONG)); + } + public void testResolveInsist_multiIndexFieldPartiallyExistsWithMultiTypesWithKeyword_createsAnInvalidMappedField() { assumeTrue("Requires UNMAPPED FIELDS", EsqlCapabilities.Cap.UNMAPPED_FIELDS.isEnabled()); @@ -3472,7 +3495,11 @@ private static FieldCapabilitiesIndexResponse fieldCapabilitiesIndexResponse( String indexName, Map fields ) { - return new FieldCapabilitiesIndexResponse(indexName, indexName, fields, false, IndexMode.STANDARD); + String indexMappingHash = new String( + MessageDigests.sha256().digest(fields.toString().getBytes(StandardCharsets.UTF_8)), + StandardCharsets.UTF_8 + ); + return new FieldCapabilitiesIndexResponse(indexName, indexMappingHash, fields, false, IndexMode.STANDARD); } private static Map messageResponseMap(String date) {