77
88import java .util .ArrayList ;
99import java .util .Collection ;
10- import java .util .LinkedHashMap ;
10+ import java .util .HashSet ;
1111import java .util .List ;
12- import java .util .Map ;
12+ import java .util .Set ;
1313import java .util .function .Function ;
1414import lombok .RequiredArgsConstructor ;
1515import org .apache .calcite .rex .RexNode ;
1616import org .apache .calcite .sql .SqlKind ;
1717import org .apache .commons .lang3 .tuple .Pair ;
18+ import org .apache .logging .log4j .LogManager ;
19+ import org .apache .logging .log4j .Logger ;
1820import org .opensearch .sql .ast .expression .Field ;
1921import org .opensearch .sql .ast .tree .AddTotals ;
2022import org .opensearch .sql .ast .tree .FillNull ;
2527import org .opensearch .sql .ast .tree .UnresolvedPlan ;
2628
2729/**
28- * Pre-materializes MAP dotted paths for symbol-based commands (Category A) before the command's own
29- * visitor logic runs. When a command like {@code rename} or {@code fillnull} references a dotted
30- * path such as {@code doc.user.name}, this class resolves it to an {@code ITEM()} expression and
31- * projects it as a flat named column so the command's string-matching logic can find it.
32- *
33- * <p>Called from {@link CalciteRelNodeVisitor#visitChildren} after children are visited. The
34- * pipeline is: {@code extractFieldOperands → resolveMapPaths → projectMapPaths}.
30+ * Resolves MAP dotted paths (e.g. {@code doc.user.name}) referenced by a PPL command and projects
31+ * them as flat named columns. Each dotted path that resolves to an {@code ITEM()} expression is
32+ * added to the current row type so downstream command logic can reference it by name.
3533 */
3634@ RequiredArgsConstructor
3735public class MapPathPreMaterializer {
3836
37+ private static final Logger log = LogManager .getLogger (MapPathPreMaterializer .class );
38+
3939 /** Visitor used to resolve field expressions to Calcite {@link RexNode}. */
4040 private final CalciteRexNodeVisitor rexVisitor ;
4141
4242 /**
43- * Materializes MAP dotted paths referenced by the given command. For each field operand that
44- * resolves to an {@code ITEM()} access on a MAP column, projects it as a flat named column via
45- * {@code relBuilder.projectPlus()}.
43+ * Resolves and projects MAP dotted paths referenced by the given command as flat named columns.
4644 *
4745 * @param plan the AST command being visited
4846 * @param context the current plan context with relBuilder state
@@ -54,7 +52,7 @@ public void materializePaths(UnresolvedPlan plan, CalcitePlanContext context) {
5452
5553 List <Field > fields = extractFieldOperands (plan );
5654 if (!fields .isEmpty ()) {
57- projectMapPaths ( resolveMapPaths ( fields , context ) , context );
55+ doMaterializeMapPaths ( fields , context );
5856 }
5957 }
6058
@@ -63,41 +61,30 @@ private List<Field> extractFieldOperands(UnresolvedPlan node) {
6361 case Rename rename -> toFields (rename .getRenameList (), m -> m .getOrigin ());
6462 case FillNull fillNull -> toFields (fillNull .getReplacementPairs (), Pair ::getLeft );
6563 case Replace replace -> toFields (replace .getFieldList ());
64+ // Only fields-exclusion needs pre-materialization; inclusion resolves paths via visitProject
6665 case Project project -> project .isExcluded () ? toFields (project .getProjectList ()) : List .of ();
6766 case AddTotals addTotals -> toFields (addTotals .getFieldList ());
6867 case MvCombine mvCombine -> List .of (mvCombine .getField ());
6968 default -> List .of ();
7069 };
7170 }
7271
73- private Map <String , RexNode > resolveMapPaths (List <Field > fields , CalcitePlanContext context ) {
74- Map <String , RexNode > paths = new LinkedHashMap <>();
72+ private void doMaterializeMapPaths (List <Field > fields , CalcitePlanContext context ) {
73+ Set <String > existingFields =
74+ new HashSet <>(context .relBuilder .peek ().getRowType ().getFieldNames ());
75+ List <RexNode > newColumns = new ArrayList <>();
7576 for (Field f : fields ) {
7677 try {
7778 RexNode resolved = rexVisitor .analyze (f , context );
78- if (resolved .getKind () == SqlKind .ITEM ) {
79- paths .put (f .getField ().toString (), resolved );
79+ String name = f .getField ().toString ();
80+ if (resolved .getKind () == SqlKind .ITEM && !existingFields .contains (name )) {
81+ newColumns .add (context .relBuilder .alias (resolved , name ));
8082 }
8183 } catch (RuntimeException e ) {
82- // Field cannot be resolved (e.g., not in schema) — skip silently
84+ log . debug ( "Skipping field resolution for '{}': {}" , f . getField (), e . getMessage (), e );
8385 }
8486 }
85- return paths ;
86- }
8787
88- private void projectMapPaths (Map <String , RexNode > mapPaths , CalcitePlanContext context ) {
89- if (mapPaths .isEmpty ()) {
90- return ;
91- }
92-
93- List <String > existingFields = context .relBuilder .peek ().getRowType ().getFieldNames ();
94- List <RexNode > newColumns = new ArrayList <>();
95- mapPaths .forEach (
96- (name , itemAccess ) -> {
97- if (!existingFields .contains (name )) {
98- newColumns .add (context .relBuilder .alias (itemAccess , name ));
99- }
100- });
10188 if (!newColumns .isEmpty ()) {
10289 context .relBuilder .projectPlus (newColumns );
10390 }
0 commit comments