@@ -42,10 +42,14 @@ public class ReplaceMissingFieldWithNull extends ParameterizedRule<LogicalPlan,
4242 @ Override
4343 public LogicalPlan apply (LogicalPlan plan , LocalLogicalOptimizerContext localLogicalOptimizerContext ) {
4444 // Fields from lookup indices don't need to be present on the node, and our search stats don't include them, anyway. Ignore them.
45- // TODO: this prevents this rule from working when the main index is a lookup index. Unfortunately, this rule can get applied
46- // without knowing that we're in a join's right hand side in PlannerUtils.localPlan, which applies the optimizer to fragments.
4745 AttributeSet lookupFields = new AttributeSet ();
4846 plan .forEachUp (EsRelation .class , esRelation -> {
47+ // Looking only for indices in LOOKUP mode is correct: during parsing, we assign the expected mode and even if a lookup index
48+ // is used in the FROM command, it will not be marked with LOOKUP mode there - but STANDARD.
49+ // It seems like we could instead just look for JOINs and walk down their right hand side to find lookup fields - but this does
50+ // not work as this rule also gets called just on the right hand side of a JOIN, which means that we don't always know that
51+ // we're inside the right (or left) branch of a JOIN node. (See PlannerUtils.localPlan - this looks for FragmentExecs and
52+ // performs local logical optimization of the fragments; the right hand side of a LookupJoinExec can be a FragmentExec.)
4953 if (esRelation .indexMode () == IndexMode .LOOKUP ) {
5054 lookupFields .addAll (esRelation .output ());
5155 }
@@ -79,6 +83,12 @@ private LogicalPlan missingToNull(LogicalPlan plan, Predicate<FieldAttribute> sh
7983 // save the first field as null (per datatype)
8084 if (nullAlias == null ) {
8185 // Keep the same id so downstream query plans don't need updating
86+ // NOTE: THIS IS BRITTLE AND CAN LEAD TO BUGS.
87+ // In case some optimizer rule or so inserts a plan node that requires the field BEFORE the Eval that we're adding
88+ // on top of the EsRelation, this can trigger a field extraction in the physical optimizer phase, causing wrong
89+ // layouts due to a duplicate name id.
90+ // If someone reaches here AGAIN when debugging e.g. ClassCastExceptions NPEs from wrong layouts, we should probably
91+ // give up on this approach and instead insert EvalExecs in InsertFieldExtraction.
8292 Alias alias = new Alias (f .source (), f .name (), Literal .of (f , null ), f .id ());
8393 nullLiterals .put (dt , alias );
8494 projection = alias .toAttribute ();
0 commit comments