Skip to content

Commit 4b4ee57

Browse files
authored
Move plan attribute resolution to its own component (elastic#131830)
1 parent 8bd0bef commit 4b4ee57

File tree

3 files changed

+349
-366
lines changed

3 files changed

+349
-366
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

Lines changed: 4 additions & 281 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import org.elasticsearch.action.support.SubscribableListener;
1717
import org.elasticsearch.common.collect.Iterators;
1818
import org.elasticsearch.common.lucene.BytesRefs;
19-
import org.elasticsearch.common.regex.Regex;
2019
import org.elasticsearch.compute.data.Block;
2120
import org.elasticsearch.compute.data.BlockUtils;
2221
import org.elasticsearch.compute.data.Page;
@@ -44,23 +43,12 @@
4443
import org.elasticsearch.xpack.esql.analysis.EnrichResolution;
4544
import org.elasticsearch.xpack.esql.analysis.PreAnalyzer;
4645
import org.elasticsearch.xpack.esql.analysis.Verifier;
47-
import org.elasticsearch.xpack.esql.core.expression.Alias;
4846
import org.elasticsearch.xpack.esql.core.expression.Attribute;
49-
import org.elasticsearch.xpack.esql.core.expression.AttributeSet;
50-
import org.elasticsearch.xpack.esql.core.expression.EmptyAttribute;
51-
import org.elasticsearch.xpack.esql.core.expression.Expressions;
5247
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
53-
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
54-
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
5548
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
56-
import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute;
57-
import org.elasticsearch.xpack.esql.core.expression.UnresolvedStar;
5849
import org.elasticsearch.xpack.esql.core.tree.Source;
5950
import org.elasticsearch.xpack.esql.core.type.DataType;
60-
import org.elasticsearch.xpack.esql.core.util.Holder;
6151
import org.elasticsearch.xpack.esql.enrich.EnrichPolicyResolver;
62-
import org.elasticsearch.xpack.esql.enrich.ResolvedEnrichPolicy;
63-
import org.elasticsearch.xpack.esql.expression.UnresolvedNamePattern;
6452
import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
6553
import org.elasticsearch.xpack.esql.index.EsIndex;
6654
import org.elasticsearch.xpack.esql.index.IndexResolution;
@@ -74,30 +62,10 @@
7462
import org.elasticsearch.xpack.esql.parser.EsqlParser;
7563
import org.elasticsearch.xpack.esql.parser.QueryParams;
7664
import org.elasticsearch.xpack.esql.plan.IndexPattern;
77-
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
78-
import org.elasticsearch.xpack.esql.plan.logical.Drop;
79-
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
80-
import org.elasticsearch.xpack.esql.plan.logical.Eval;
8165
import org.elasticsearch.xpack.esql.plan.logical.Explain;
82-
import org.elasticsearch.xpack.esql.plan.logical.Filter;
83-
import org.elasticsearch.xpack.esql.plan.logical.Fork;
84-
import org.elasticsearch.xpack.esql.plan.logical.InlineStats;
85-
import org.elasticsearch.xpack.esql.plan.logical.Insist;
86-
import org.elasticsearch.xpack.esql.plan.logical.Keep;
87-
import org.elasticsearch.xpack.esql.plan.logical.Limit;
8866
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
89-
import org.elasticsearch.xpack.esql.plan.logical.MvExpand;
90-
import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
91-
import org.elasticsearch.xpack.esql.plan.logical.Project;
92-
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
93-
import org.elasticsearch.xpack.esql.plan.logical.Rename;
94-
import org.elasticsearch.xpack.esql.plan.logical.TopN;
95-
import org.elasticsearch.xpack.esql.plan.logical.UnresolvedRelation;
96-
import org.elasticsearch.xpack.esql.plan.logical.inference.Completion;
9767
import org.elasticsearch.xpack.esql.plan.logical.inference.InferencePlan;
9868
import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
99-
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes;
100-
import org.elasticsearch.xpack.esql.plan.logical.join.LookupJoin;
10169
import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;
10270
import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier;
10371
import org.elasticsearch.xpack.esql.plan.physical.EstimatesRowSize;
@@ -113,7 +81,6 @@
11381
import java.util.ArrayList;
11482
import java.util.Collection;
11583
import java.util.HashMap;
116-
import java.util.HashSet;
11784
import java.util.List;
11885
import java.util.Map;
11986
import java.util.Set;
@@ -122,7 +89,6 @@
12289

12390
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
12491
import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;
125-
import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD;
12692
import static org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin.firstSubPlan;
12793

12894
public class EsqlSession {
@@ -777,17 +743,7 @@ private static void analyzeAndMaybeRetry(
777743
}
778744

779745
private static void resolveFieldNames(LogicalPlan parsed, EnrichResolution enrichResolution, ActionListener<PreAnalysisResult> l) {
780-
try {
781-
// we need the match_fields names from enrich policies and THEN, with an updated list of fields, we call field_caps API
782-
var enrichMatchFields = enrichResolution.resolvedEnrichPolicies()
783-
.stream()
784-
.map(ResolvedEnrichPolicy::matchField)
785-
.collect(Collectors.toSet());
786-
// get the field names from the parsed plan combined with the ENRICH match fields from the ENRICH policy
787-
l.onResponse(fieldNames(parsed, enrichMatchFields, new PreAnalysisResult(enrichResolution)));
788-
} catch (Exception ex) {
789-
l.onFailure(ex);
790-
}
746+
ActionListener.completeWith(l, () -> FieldNameUtils.resolveFieldNames(parsed, enrichResolution));
791747
}
792748

793749
private void resolveInferences(
@@ -798,207 +754,6 @@ private void resolveInferences(
798754
inferenceRunner.resolveInferenceIds(inferencePlans, l.map(preAnalysisResult::withInferenceResolution));
799755
}
800756

801-
static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicyMatchFields, PreAnalysisResult result) {
802-
List<LogicalPlan> inlinestats = parsed.collect(InlineStats.class::isInstance);
803-
Set<Aggregate> inlinestatsAggs = new HashSet<>();
804-
for (var i : inlinestats) {
805-
inlinestatsAggs.add(((InlineStats) i).aggregate());
806-
}
807-
808-
if (false == parsed.anyMatch(p -> shouldCollectReferencedFields(p, inlinestatsAggs))) {
809-
// no explicit columns selection, for example "from employees"
810-
// also, inlinestats only adds columns to the existent output, its Aggregate shouldn't interfere with potentially using "*"
811-
return result.withFieldNames(IndexResolver.ALL_FIELDS);
812-
}
813-
814-
// TODO: Improve field resolution for FORK - right now we request all fields
815-
if (parsed.anyMatch(p -> p instanceof Fork)) {
816-
return result.withFieldNames(IndexResolver.ALL_FIELDS);
817-
}
818-
819-
Holder<Boolean> projectAll = new Holder<>(false);
820-
parsed.forEachExpressionDown(UnresolvedStar.class, us -> {// explicit "*" fields selection
821-
if (projectAll.get()) {
822-
return;
823-
}
824-
projectAll.set(true);
825-
});
826-
827-
if (projectAll.get()) {
828-
return result.withFieldNames(IndexResolver.ALL_FIELDS);
829-
}
830-
831-
var referencesBuilder = AttributeSet.builder();
832-
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some
833-
// attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the
834-
// wildcards.
835-
//
836-
// E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
837-
// "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for
838-
//
839-
// NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder
840-
// Except in KEEP and DROP.
841-
var keepRefs = AttributeSet.builder();
842-
var dropWildcardRefs = AttributeSet.builder();
843-
// fields required to request for lookup joins to work
844-
var joinRefs = AttributeSet.builder();
845-
// lookup indices where we request "*" because we may require all their fields
846-
Set<String> wildcardJoinIndices = new java.util.HashSet<>();
847-
848-
boolean[] canRemoveAliases = new boolean[] { true };
849-
850-
parsed.forEachDown(p -> {// go over each plan top-down
851-
if (p instanceof RegexExtract re) { // for Grok and Dissect
852-
// keep the inputs needed by Grok/Dissect
853-
referencesBuilder.addAll(re.input().references());
854-
} else if (p instanceof Enrich enrich) {
855-
AttributeSet enrichFieldRefs = Expressions.references(enrich.enrichFields());
856-
AttributeSet.Builder enrichRefs = enrichFieldRefs.combine(enrich.matchField().references()).asBuilder();
857-
// Enrich adds an EmptyAttribute if no match field is specified
858-
// The exact name of the field will be added later as part of enrichPolicyMatchFields Set
859-
enrichRefs.removeIf(attr -> attr instanceof EmptyAttribute);
860-
referencesBuilder.addAll(enrichRefs);
861-
} else if (p instanceof LookupJoin join) {
862-
if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) {
863-
joinRefs.addAll(usingJoinType.columns());
864-
}
865-
if (keepRefs.isEmpty()) {
866-
// No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution
867-
wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern());
868-
} else {
869-
// Keep commands can reference the join columns with names that shadow aliases, so we block their removal
870-
joinRefs.addAll(keepRefs);
871-
}
872-
} else {
873-
referencesBuilder.addAll(p.references());
874-
if (p instanceof UnresolvedRelation ur && ur.indexMode() == IndexMode.TIME_SERIES) {
875-
// METRICS aggs generally rely on @timestamp without the user having to mention it.
876-
referencesBuilder.add(new UnresolvedAttribute(ur.source(), MetadataAttribute.TIMESTAMP_FIELD));
877-
}
878-
// special handling for UnresolvedPattern (which is not an UnresolvedAttribute)
879-
p.forEachExpression(UnresolvedNamePattern.class, up -> {
880-
var ua = new UnresolvedAttribute(up.source(), up.name());
881-
referencesBuilder.add(ua);
882-
if (p instanceof Keep) {
883-
keepRefs.add(ua);
884-
} else if (p instanceof Drop) {
885-
dropWildcardRefs.add(ua);
886-
} else {
887-
throw new IllegalStateException("Only KEEP and DROP should allow wildcards");
888-
}
889-
});
890-
if (p instanceof Keep) {
891-
keepRefs.addAll(p.references());
892-
}
893-
}
894-
895-
// If the current node in the tree is of type JOIN (lookup join, inlinestats) or ENRICH or other type of
896-
// command that we may add in the future which can override already defined Aliases with EVAL
897-
// (for example
898-
//
899-
// from test
900-
// | eval ip = 123
901-
// | enrich ips_policy ON hostname
902-
// | rename ip AS my_ip
903-
//
904-
// and ips_policy enriches the results with the same name ip field),
905-
// these aliases should be kept in the list of fields.
906-
if (canRemoveAliases[0] && p.anyMatch(EsqlSession::couldOverrideAliases)) {
907-
canRemoveAliases[0] = false;
908-
}
909-
if (canRemoveAliases[0]) {
910-
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
911-
// for example "from test | eval x = salary | stats max = max(x) by gender"
912-
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
913-
// also remove other down-the-tree references to the extracted fields from "grok" and "dissect"
914-
AttributeSet planRefs = p.references();
915-
Set<String> fieldNames = planRefs.names();
916-
p.forEachExpressionDown(NamedExpression.class, ne -> {
917-
if ((ne instanceof Alias || ne instanceof ReferenceAttribute) == false) {
918-
return;
919-
}
920-
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
921-
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
922-
if (fieldNames.contains(ne.name())) {
923-
return;
924-
}
925-
referencesBuilder.removeIf(
926-
attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))
927-
);
928-
});
929-
}
930-
});
931-
932-
// Add JOIN ON column references afterward to avoid Alias removal
933-
referencesBuilder.addAll(joinRefs);
934-
// If any JOIN commands need wildcard field-caps calls, persist the index names
935-
if (wildcardJoinIndices.isEmpty() == false) {
936-
result = result.withWildcardJoinIndices(wildcardJoinIndices);
937-
}
938-
939-
// remove valid metadata attributes because they will be filtered out by the IndexResolver anyway
940-
// otherwise, in some edge cases, we will fail to ask for "*" (all fields) instead
941-
referencesBuilder.removeIf(a -> a instanceof MetadataAttribute || MetadataAttribute.isSupported(a.name()));
942-
Set<String> fieldNames = referencesBuilder.build().names();
943-
944-
if (fieldNames.isEmpty() && enrichPolicyMatchFields.isEmpty()) {
945-
// there cannot be an empty list of fields, we'll ask the simplest and lightest one instead: _index
946-
return result.withFieldNames(IndexResolver.INDEX_METADATA_FIELD);
947-
} else {
948-
fieldNames.addAll(subfields(fieldNames));
949-
fieldNames.addAll(enrichPolicyMatchFields);
950-
fieldNames.addAll(subfields(enrichPolicyMatchFields));
951-
return result.withFieldNames(fieldNames);
952-
}
953-
}
954-
955-
/**
956-
* Indicates whether the given plan gives an exact list of fields that we need to collect from field_caps.
957-
*/
958-
private static boolean shouldCollectReferencedFields(LogicalPlan plan, Set<Aggregate> inlinestatsAggs) {
959-
return plan instanceof Project || (plan instanceof Aggregate agg && inlinestatsAggs.contains(agg) == false);
960-
}
961-
962-
/**
963-
* Could a plan "accidentally" override aliases?
964-
* Examples are JOIN and ENRICH, that _could_ produce fields with the same
965-
* name of an existing alias, based on their index mapping.
966-
* Here we just have to consider commands where this information is not available before index resolution,
967-
* eg. EVAL, GROK, DISSECT can override an alias, but we know it in advance, ie. we don't need to resolve indices to know.
968-
*/
969-
private static boolean couldOverrideAliases(LogicalPlan p) {
970-
return (p instanceof Aggregate
971-
|| p instanceof Completion
972-
|| p instanceof Drop
973-
|| p instanceof Eval
974-
|| p instanceof Filter
975-
|| p instanceof Fork
976-
|| p instanceof InlineStats
977-
|| p instanceof Insist
978-
|| p instanceof Keep
979-
|| p instanceof Limit
980-
|| p instanceof MvExpand
981-
|| p instanceof OrderBy
982-
|| p instanceof Project
983-
|| p instanceof RegexExtract
984-
|| p instanceof Rename
985-
|| p instanceof TopN
986-
|| p instanceof UnresolvedRelation) == false;
987-
}
988-
989-
private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {
990-
boolean isPattern = Regex.isSimpleMatchPattern(attr.name());
991-
if (skipIfPattern && isPattern) {
992-
return false;
993-
}
994-
var name = attr.name();
995-
return isPattern ? Regex.simpleMatch(name, other) : name.equals(other);
996-
}
997-
998-
private static Set<String> subfields(Set<String> names) {
999-
return names.stream().filter(name -> name.endsWith(WILDCARD) == false).map(name -> name + ".*").collect(Collectors.toSet());
1000-
}
1001-
1002757
private PhysicalPlan logicalPlanToPhysicalPlan(LogicalPlan optimizedPlan, EsqlQueryRequest request) {
1003758
PhysicalPlan physicalPlan = optimizedPhysicalPlan(optimizedPlan);
1004759
physicalPlan = physicalPlan.transformUp(FragmentExec.class, f -> {
@@ -1046,27 +801,17 @@ public PhysicalPlan optimizedPhysicalPlan(LogicalPlan optimizedPlan) {
1046801
return plan;
1047802
}
1048803

1049-
record PreAnalysisResult(
804+
public record PreAnalysisResult(
1050805
IndexResolution indices,
1051806
Map<String, IndexResolution> lookupIndices,
1052807
EnrichResolution enrichResolution,
1053808
Set<String> fieldNames,
1054809
Set<String> wildcardJoinIndices,
1055810
InferenceResolution inferenceResolution
1056811
) {
1057-
PreAnalysisResult(EnrichResolution newEnrichResolution) {
1058-
this(null, new HashMap<>(), newEnrichResolution, Set.of(), Set.of(), InferenceResolution.EMPTY);
1059-
}
1060812

1061-
PreAnalysisResult withEnrichResolution(EnrichResolution newEnrichResolution) {
1062-
return new PreAnalysisResult(
1063-
indices(),
1064-
lookupIndices(),
1065-
newEnrichResolution,
1066-
fieldNames(),
1067-
wildcardJoinIndices(),
1068-
inferenceResolution()
1069-
);
813+
public PreAnalysisResult(EnrichResolution enrichResolution, Set<String> fieldNames, Set<String> wildcardJoinIndices) {
814+
this(null, new HashMap<>(), enrichResolution, fieldNames, wildcardJoinIndices, InferenceResolution.EMPTY);
1070815
}
1071816

1072817
PreAnalysisResult withInferenceResolution(InferenceResolution newInferenceResolution) {
@@ -1095,27 +840,5 @@ PreAnalysisResult addLookupIndexResolution(String index, IndexResolution newInde
1095840
lookupIndices.put(index, newIndexResolution);
1096841
return this;
1097842
}
1098-
1099-
PreAnalysisResult withFieldNames(Set<String> newFields) {
1100-
return new PreAnalysisResult(
1101-
indices(),
1102-
lookupIndices(),
1103-
enrichResolution(),
1104-
newFields,
1105-
wildcardJoinIndices(),
1106-
inferenceResolution()
1107-
);
1108-
}
1109-
1110-
public PreAnalysisResult withWildcardJoinIndices(Set<String> wildcardJoinIndices) {
1111-
return new PreAnalysisResult(
1112-
indices(),
1113-
lookupIndices(),
1114-
enrichResolution(),
1115-
fieldNames(),
1116-
wildcardJoinIndices,
1117-
inferenceResolution()
1118-
);
1119-
}
1120843
}
1121844
}

0 commit comments

Comments
 (0)