1010import org .elasticsearch .common .util .Maps ;
1111import org .elasticsearch .index .IndexMode ;
1212import org .elasticsearch .xpack .esql .core .expression .Alias ;
13+ import org .elasticsearch .xpack .esql .core .expression .Attribute ;
1314import org .elasticsearch .xpack .esql .core .expression .AttributeSet ;
1415import org .elasticsearch .xpack .esql .core .expression .FieldAttribute ;
1516import org .elasticsearch .xpack .esql .core .expression .Literal ;
1617import org .elasticsearch .xpack .esql .core .expression .NamedExpression ;
1718import org .elasticsearch .xpack .esql .core .type .DataType ;
1819import org .elasticsearch .xpack .esql .core .type .PotentiallyUnmappedKeywordEsField ;
1920import org .elasticsearch .xpack .esql .optimizer .LocalLogicalOptimizerContext ;
20- import org .elasticsearch .xpack .esql .plan .logical .Aggregate ;
2121import org .elasticsearch .xpack .esql .plan .logical .EsRelation ;
2222import org .elasticsearch .xpack .esql .plan .logical .Eval ;
2323import org .elasticsearch .xpack .esql .plan .logical .Filter ;
2626import org .elasticsearch .xpack .esql .plan .logical .Project ;
2727import org .elasticsearch .xpack .esql .plan .logical .RegexExtract ;
2828import org .elasticsearch .xpack .esql .plan .logical .TopN ;
29- import org .elasticsearch .xpack .esql .plan .logical .join .Join ;
30- import org .elasticsearch .xpack .esql .plan .logical .local .LocalRelation ;
3129import org .elasticsearch .xpack .esql .rule .ParameterizedRule ;
32- import org .elasticsearch .xpack .esql .stats .SearchStats ;
3330
3431import java .util .ArrayList ;
3532import java .util .List ;
3633import java .util .Map ;
34+ import java .util .function .Predicate ;
3735
3836/**
3937 * Look for any fields used in the plan that are missing locally and replace them with null.
@@ -45,82 +43,83 @@ public class ReplaceMissingFieldWithNull extends ParameterizedRule<LogicalPlan,
4543 public LogicalPlan apply (LogicalPlan plan , LocalLogicalOptimizerContext localLogicalOptimizerContext ) {
4644 var lookupFieldsBuilder = AttributeSet .builder ();
4745 plan .forEachUp (EsRelation .class , esRelation -> {
46+ // Looking only for indices in LOOKUP mode is correct: during parsing, we assign the expected mode and even if a lookup index
47+ // is used in the FROM command, it will not be marked with LOOKUP mode there - but STANDARD.
48+ // It seems like we could instead just look for JOINs and walk down their right hand side to find lookup fields - but this does
49+ // not work as this rule also gets called just on the right hand side of a JOIN, which means that we don't always know that
50+ // we're inside the right (or left) branch of a JOIN node. (See PlannerUtils.localPlan - this looks for FragmentExecs and
51+ // performs local logical optimization of the fragments; the right hand side of a LookupJoinExec can be a FragmentExec.)
4852 if (esRelation .indexMode () == IndexMode .LOOKUP ) {
4953 lookupFieldsBuilder .addAll (esRelation .output ());
5054 }
5155 });
56+ AttributeSet lookupFields = lookupFieldsBuilder .build ();
5257
53- return plan .transformUp (p -> missingToNull (p , localLogicalOptimizerContext .searchStats (), lookupFieldsBuilder .build ()));
54- }
55-
56- private LogicalPlan missingToNull (LogicalPlan plan , SearchStats stats , AttributeSet lookupFields ) {
57- if (plan instanceof EsRelation || plan instanceof LocalRelation ) {
58- return plan ;
59- }
58+ // Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead.
59+ // Also retain fields from lookup indices because we do not have stats for these.
60+ Predicate <FieldAttribute > shouldBeRetained = f -> f .field () instanceof PotentiallyUnmappedKeywordEsField
61+ || (localLogicalOptimizerContext .searchStats ().exists (f .fieldName ()) || lookupFields .contains (f ));
6062
61- if (plan instanceof Aggregate a ) {
62- // don't do anything (for now)
63- return a ;
64- }
65- // keep the aliased name
66- else if (plan instanceof Project project ) {
67- var projections = project .projections ();
68- List <NamedExpression > newProjections = new ArrayList <>(projections .size ());
69- Map <DataType , Alias > nullLiteral = Maps .newLinkedHashMapWithExpectedSize (DataType .types ().size ());
70- AttributeSet joinAttributes = joinAttributes (project );
63+ return plan .transformUp (p -> missingToNull (p , shouldBeRetained ));
64+ }
7165
72- for (NamedExpression projection : projections ) {
73- // Do not use the attribute name, this can deviate from the field name for union types.
74- if (projection instanceof FieldAttribute f
75- && stats .exists (f .fieldName ()) == false
76- && joinAttributes .contains (f ) == false
77- && f .field () instanceof PotentiallyUnmappedKeywordEsField == false ) {
78- // TODO: Should do a searchStats lookup for join attributes instead of just ignoring them here
79- // See TransportSearchShardsAction
66+ private LogicalPlan missingToNull (LogicalPlan plan , Predicate <FieldAttribute > shouldBeRetained ) {
67+ if (plan instanceof EsRelation relation ) {
68+ // Remove missing fields from the EsRelation because this is not where we will obtain them from; replace them by an Eval right
69+ // after, instead. This allows us to safely re-use the attribute ids of the corresponding FieldAttributes.
70+ // This means that an EsRelation[field1, field2, field3] where field1 and field 3 are missing will be replaced by
71+ // Project[field1, field2, field3] <- keeps the ordering intact
72+ // \_Eval[field1 = null, field3 = null]
73+ // \_EsRelation[field2]
74+ List <Attribute > relationOutput = relation .output ();
75+ Map <DataType , Alias > nullLiterals = Maps .newLinkedHashMapWithExpectedSize (DataType .types ().size ());
76+ List <NamedExpression > newProjections = new ArrayList <>(relationOutput .size ());
77+ for (int i = 0 , size = relationOutput .size (); i < size ; i ++) {
78+ Attribute attr = relationOutput .get (i );
79+ NamedExpression projection ;
80+ if (attr instanceof FieldAttribute f && (shouldBeRetained .test (f ) == false )) {
8081 DataType dt = f .dataType ();
81- Alias nullAlias = nullLiteral .get (f . dataType () );
82+ Alias nullAlias = nullLiterals .get (dt );
8283 // save the first field as null (per datatype)
8384 if (nullAlias == null ) {
85+ // Keep the same id so downstream query plans don't need updating
86+ // NOTE: THIS IS BRITTLE AND CAN LEAD TO BUGS.
87+ // In case some optimizer rule or so inserts a plan node that requires the field BEFORE the Eval that we're adding
88+ // on top of the EsRelation, this can trigger a field extraction in the physical optimizer phase, causing wrong
89+ // layouts due to a duplicate name id.
90+ // If someone reaches here AGAIN when debugging e.g. ClassCastExceptions NPEs from wrong layouts, we should probably
91+ // give up on this approach and instead insert EvalExecs in InsertFieldExtraction.
8492 Alias alias = new Alias (f .source (), f .name (), Literal .of (f , null ), f .id ());
85- nullLiteral .put (dt , alias );
93+ nullLiterals .put (dt , alias );
8694 projection = alias .toAttribute ();
8795 }
88- // otherwise point to it
96+ // otherwise point to it since this avoids creating field copies
8997 else {
90- // since avoids creating field copies
9198 projection = new Alias (f .source (), f .name (), nullAlias .toAttribute (), f .id ());
9299 }
100+ } else {
101+ projection = attr ;
93102 }
94-
95103 newProjections .add (projection );
96104 }
97- // add the first found field as null
98- if (nullLiteral .size () > 0 ) {
99- plan = new Eval (project .source (), project .child (), new ArrayList <>(nullLiteral .values ()));
100- plan = new Project (project .source (), plan , newProjections );
105+
106+ if (nullLiterals .size () == 0 ) {
107+ return plan ;
101108 }
102- } else if (plan instanceof Eval
109+
110+ Eval eval = new Eval (plan .source (), relation , new ArrayList <>(nullLiterals .values ()));
111+ // This projection is redundant if there's another projection downstream (and no commands depend on the order until we hit it).
112+ return new Project (plan .source (), eval , newProjections );
113+ }
114+
115+ if (plan instanceof Eval
103116 || plan instanceof Filter
104117 || plan instanceof OrderBy
105118 || plan instanceof RegexExtract
106119 || plan instanceof TopN ) {
107- plan = plan .transformExpressionsOnlyUp (
108- FieldAttribute .class ,
109- // Do not use the attribute name, this can deviate from the field name for union types.
110- // Also skip fields from lookup indices because we do not have stats for these.
111- // TODO: We do have stats for lookup indices in case they are being used in the FROM clause; this can be refined.
112- f -> f .field () instanceof PotentiallyUnmappedKeywordEsField || (stats .exists (f .fieldName ()) || lookupFields .contains (f ))
113- ? f
114- : Literal .of (f , null )
115- );
116- }
120+ return plan .transformExpressionsOnlyUp (FieldAttribute .class , f -> shouldBeRetained .test (f ) ? f : Literal .of (f , null ));
121+ }
117122
118123 return plan ;
119124 }
120-
121- private static AttributeSet joinAttributes (Project project ) {
122- var attributesBuilder = AttributeSet .builder ();
123- project .forEachDown (Join .class , j -> j .right ().forEachDown (EsRelation .class , p -> attributesBuilder .addAll (p .output ())));
124- return attributesBuilder .build ();
125- }
126125}
0 commit comments