From 445799f285eaa5605f595a381acb6a143fb329f6 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Fri, 10 Oct 2025 16:13:38 -0500 Subject: [PATCH 01/24] Initial checkpoint - following calcite way and commented legacy way Signed-off-by: Srikanth Padakanti --- .../org/opensearch/sql/analysis/Analyzer.java | 6 + .../sql/ast/AbstractNodeVisitor.java | 5 + .../org/opensearch/sql/ast/dsl/AstDSL.java | 5 + .../org/opensearch/sql/ast/tree/MvExpand.java | 55 +++++++ .../sql/calcite/CalciteRelNodeVisitor.java | 19 +++ .../CollectionUDF/MVExpandFunctionImpl.java | 94 ++++++++++++ .../sql/planner/logical/LogicalMvExpand.java | 46 ++++++ .../logical/LogicalPlanNodeVisitor.java | 4 + .../planner/physical/MvExpandOperator.java | 140 ++++++++++++++++++ .../physical/PhysicalPlanNodeVisitor.java | 4 + ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 5 + .../opensearch/sql/ppl/parser/AstBuilder.java | 9 ++ .../ppl/calcite/CalcitePPLAbstractTest.java | 16 ++ .../ppl/calcite/CalcitePPLMvExpandTest.java | 59 ++++++++ 15 files changed, 468 insertions(+) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java create mode 100644 core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVExpandFunctionImpl.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/logical/LogicalMvExpand.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/physical/MvExpandOperator.java create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 9f78b245942..88f1d58f1bf 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -76,6 +76,7 @@ import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -700,6 +701,11 @@ public LogicalPlan visitExpand(Expand expand, AnalysisContext context) { throw getOnlyForCalciteException("Expand"); } + @Override + public LogicalPlan visitMvExpand(MvExpand node, AnalysisContext context) { + throw getOnlyForCalciteException("MvExpand"); + } + /** Build {@link LogicalTrendline} for Trendline command. */ @Override public LogicalPlan visitTrendline(Trendline node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index f5d2a1623b3..a287b5e268f 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -64,6 +64,7 @@ import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -441,4 +442,8 @@ public T visitAppend(Append node, C context) { public T visitMultisearch(Multisearch node, C context) { return visitChildren(node, context); } + + public T visitMvExpand(MvExpand node, C context) { + return visitChildren(node, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 86b2343ace1..af4b06c8613 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -61,6 +61,7 @@ import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.MinSpanBin; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -135,6 +136,10 @@ public Expand expand(UnresolvedPlan input, Field field, String alias) { return new Expand(field, alias).attach(input); } + public static UnresolvedPlan mvexpand(UnresolvedPlan input, Field field, Integer limit) { + return new MvExpand(field, limit); + } + public static UnresolvedPlan projectWithArg( UnresolvedPlan input, List argList, UnresolvedExpression... projectList) { return new Project(Arrays.asList(projectList), argList).attach(input); diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java b/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java new file mode 100644 index 00000000000..4ac64253ebb --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import javax.annotation.Nullable; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Field; + +/** AST node representing an {@code mvexpand [limit N]} operation. */ +@ToString +@EqualsAndHashCode(callSuper = false) +public class MvExpand extends UnresolvedPlan { + + private UnresolvedPlan child; + @Getter private final Field field; + @Getter @Nullable private final Integer limit; + + public MvExpand(Field field, @Nullable Integer limit) { + this.field = field; + this.limit = limit; + } + + @Override + public MvExpand attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + public Field getField() { + return field; + } + + @Nullable + public Integer getLimit() { + return limit; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitMvExpand(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 7cec960b82a..4337a9dfaba 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -113,6 +113,7 @@ import org.opensearch.sql.ast.tree.Lookup.OutputStrategy; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -1553,6 +1554,24 @@ private static void buildDedupNotNull( context.relBuilder.projectExcept(_row_number_dedup_); } + @Override + public RelNode visitMvExpand(MvExpand node, CalcitePlanContext context) { + visitChildren(node, context); + + Field arrayField = node.getField(); + RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); + + // Use the same strategy as visitExpand: unnest the array field using uncollect + buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), null, context); + + // If there's a limit, add a limit clause after the uncollect/join: + if (node.getLimit() != null) { + context.relBuilder.limit(0, node.getLimit()); + } + + return context.relBuilder.peek(); + } + @Override public RelNode visitWindow(Window node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVExpandFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVExpandFunctionImpl.java new file mode 100644 index 00000000000..b63a9a2d3dd --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVExpandFunctionImpl.java @@ -0,0 +1,94 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.CollectionUDF; +// +// import java.util.ArrayList; +// import java.util.Collections; +// import java.util.List; +// import org.apache.calcite.adapter.enumerable.NotNullImplementor; +// import org.apache.calcite.adapter.enumerable.NullPolicy; +// import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +// import org.apache.calcite.linq4j.tree.Expression; +// import org.apache.calcite.linq4j.tree.Expressions; +// import org.apache.calcite.linq4j.tree.Types; +// import org.apache.calcite.rel.type.RelDataType; +// import org.apache.calcite.rel.type.RelDataTypeFactory; +// import org.apache.calcite.rex.RexCall; +// import org.apache.calcite.sql.SqlOperatorBinding; +// import org.apache.calcite.sql.type.SqlReturnTypeInference; +// import org.apache.calcite.sql.type.SqlTypeName; +// import org.opensearch.sql.expression.function.ImplementorUDF; +// import org.opensearch.sql.expression.function.UDFOperandMetadata; +// +/// ** +// * MVExpand function that expands multivalue (array) fields into multiple rows. +// */ +// public class MVExpandFunctionImpl extends ImplementorUDF { +// +// public MVExpandFunctionImpl() { +// super(new MVExpandImplementor(), NullPolicy.ALL); +// } +// +// @Override +// public SqlReturnTypeInference getReturnTypeInference() { +// // For mvexpand, the output type should be the type of the array element (or ANY) +// return sqlOperatorBinding -> { +// RelDataTypeFactory typeFactory = sqlOperatorBinding.getTypeFactory(); +// +// if (sqlOperatorBinding.getOperandCount() == 0) { +// return typeFactory.createSqlType(SqlTypeName.NULL); +// } +// +// // Assume single argument: the array to expand +// RelDataType operandType = sqlOperatorBinding.getOperandType(0); +// RelDataType elementType = +// operandType.getComponentType() != null +// ? operandType.getComponentType() +// : typeFactory.createSqlType(SqlTypeName.ANY); +// +// // Output is a scalar (not array) +// return typeFactory.createTypeWithNullability(elementType, true); +// }; +// } +// +// @Override +// public UDFOperandMetadata getOperandMetadata() { +// return null; +// } +// +// public static class MVExpandImplementor implements NotNullImplementor { +// @Override +// public Expression implement( +// RexToLixTranslator translator, RexCall call, List translatedOperands) +// { +// // Delegate to static Java method for value expansion +// return Expressions.call( +// Types.lookupMethod(MVExpandFunctionImpl.class, "mvexpand", Object.class), +// translatedOperands.get(0)); +// } +// } +// +// /** +// * Implementation for mvexpand. +// * If the argument is a List, return its elements as a List (to be mapped to separate rows). +// * If the argument is null or not a List, return a singleton list with the original value. +// */ +// public static List mvexpand(Object arg) { +// if (arg == null) { +// return Collections.singletonList(null); +// } +// if (arg instanceof List) { +// List arr = (List) arg; +// if (arr.isEmpty()) { +// return Collections.singletonList(null); +// } +// return new ArrayList<>(arr); +// } else { +// // Non-array value: return as single-element list +// return Collections.singletonList(arg); +// } +// } +// } diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalMvExpand.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalMvExpand.java new file mode 100644 index 00000000000..66e4f0eac9c --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalMvExpand.java @@ -0,0 +1,46 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.logical; +// +// import java.util.Collections; +// import java.util.List; +// import java.util.Optional; +// import lombok.EqualsAndHashCode; +// import org.opensearch.sql.ast.expression.Field; +// +// @EqualsAndHashCode(callSuper = true) +// public class LogicalMvExpand extends LogicalPlan { +// private final Field field; +// private final Optional limit; +// +// public LogicalMvExpand(LogicalPlan input, Field field, Optional limit) { +// super(Collections.singletonList(input)); +// this.field = field; +// this.limit = limit != null ? limit : Optional.empty(); +// } +// +// public LogicalPlan getInput() { +// return getChild().get(0); +// } +// +// public Field getField() { +// return field; +// } +// +// public Optional getLimit() { +// return limit; +// } +// +// @Override +// public R accept(LogicalPlanNodeVisitor visitor, C context) { +// return visitor.visitLogicalMvExpand(this, context); +// } +// +// @Override +// public String toString() { +// return String.format("LogicalMvExpand(field=%s, limit=%s)", field, limit); +// } +// } diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java index c9eedd8efc8..fa9cf5ccaa0 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java @@ -119,4 +119,8 @@ public R visitFetchCursor(LogicalFetchCursor plan, C context) { public R visitCloseCursor(LogicalCloseCursor plan, C context) { return visitNode(plan, context); } + + // public R visitLogicalMvExpand(LogicalMvExpand plan, C context) { + // return visitNode(plan, context); + // } } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/MvExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/MvExpandOperator.java new file mode 100644 index 00000000000..598922f4356 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/physical/MvExpandOperator.java @@ -0,0 +1,140 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; +// +// import java.util.Collections; +// import java.util.Iterator; +// import java.util.List; +// import java.util.Map; +// import java.util.NoSuchElementException; +// import java.util.Optional; +// import java.util.ArrayList; +// import java.util.LinkedHashMap; +// import lombok.EqualsAndHashCode; +// import lombok.Getter; +// import lombok.ToString; +// import org.opensearch.sql.data.model.ExprTupleValue; +// import org.opensearch.sql.data.model.ExprValue; +// import org.opensearch.sql.data.model.ExprValueUtils; +// +// @EqualsAndHashCode(callSuper = false) +// @ToString +// public class MvExpandOperator extends PhysicalPlan { +// @Getter private final PhysicalPlan input; +// @Getter private final String fieldName; +// @Getter private final Optional limit; +// @ToString.Exclude private Iterator expandedValuesIterator = +// Collections.emptyIterator(); +// @ToString.Exclude private ExprValue next = null; +// @ToString.Exclude private boolean nextPrepared = false; +// +// public MvExpandOperator(PhysicalPlan input, String fieldName, Optional limit) { +// this.input = input; +// this.fieldName = fieldName; +// this.limit = limit; +// } +// +// @Override +// public R accept(PhysicalPlanNodeVisitor visitor, C context) { +// return visitor.visitMvExpandOperator(this, context); +// } +// +// @Override +// public List getChild() { +// return Collections.singletonList(input); +// } +// +// @Override +// public void open() { +// input.open(); +// expandedValuesIterator = Collections.emptyIterator(); +// next = null; +// nextPrepared = false; +// } +// +// @Override +// public void close() { +// input.close(); +// } +// +// @Override +// public boolean hasNext() { +// if (!nextPrepared) { +// prepareNext(); +// } +// return next != null; +// } +// +// @Override +// public ExprValue next() { +// if (!nextPrepared) { +// prepareNext(); +// } +// if (next == null) { +// throw new NoSuchElementException("No more values in MvExpandOperator"); +// } +// ExprValue result = next; +// next = null; +// nextPrepared = false; +// return result; +// } +// +// private void prepareNext() { +// while (true) { +// if (expandedValuesIterator != null && expandedValuesIterator.hasNext()) { +// next = expandedValuesIterator.next(); +// nextPrepared = true; +// return; +// } +// if (!input.hasNext()) { +// next = null; +// nextPrepared = true; +// return; +// } +// ExprValue value = input.next(); +// expandedValuesIterator = expandRow(value); +// } +// } +// +// private Iterator expandRow(ExprValue value) { +// if (value == null || value.isMissing()) { +// return Collections.emptyIterator(); +// } +// Map tuple = value.tupleValue(); +// +// if (fieldName.startsWith("_")) { +// return Collections.singletonList(value).iterator(); +// } +// +// ExprValue fieldVal = tuple.get(fieldName); +// if (fieldVal == null || fieldVal.isMissing() || fieldVal.isNull()) { +// return Collections.emptyIterator(); +// } +// +// // If not a collection, just return the row as is +// if (!(fieldVal instanceof org.opensearch.sql.data.model.ExprCollectionValue)) { +// return Collections.singletonList(value).iterator(); +// } +// +// // Get the list of ExprValue from the collection +// List values = fieldVal.collectionValue(); +// if (values.isEmpty()) { +// return Collections.emptyIterator(); +// } +// +// int max = limit.orElse(values.size()); +// List expandedRows = new ArrayList<>(); +// int count = 0; +// for (ExprValue v : values) { +// if (max > 0 && count >= max) break; +// count++; +// LinkedHashMap newTuple = new LinkedHashMap<>(tuple); +// newTuple.put(fieldName, v); +// expandedRows.add(new ExprTupleValue(newTuple)); +// } +// return expandedRows.iterator(); +// } +// } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java index 66c7219e39c..804bcf10574 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java @@ -103,4 +103,8 @@ public R visitTrendline(TrendlineOperator node, C context) { public R visitCursorClose(CursorCloseOperator node, C context) { return visitNode(node, context); } + + // public R visitMvExpandOperator(MvExpandOperator plan, C context) { + // return visitNode(plan, context); + // } } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index ba1e4960bb2..c6c0a07fcf2 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -48,6 +48,7 @@ TRENDLINE: 'TRENDLINE'; TIMECHART: 'TIMECHART'; APPENDCOL: 'APPENDCOL'; EXPAND: 'EXPAND'; +MVEXPAND: 'MVEXPAND'; SIMPLE_PATTERN: 'SIMPLE_PATTERN'; BRAIN: 'BRAIN'; VARIABLE_COUNT_THRESHOLD: 'VARIABLE_COUNT_THRESHOLD'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index cb7e005e140..abd396058e2 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -74,6 +74,7 @@ commands | appendcolCommand | appendCommand | expandCommand + | mvexpandCommand | flattenCommand | reverseCommand | regexCommand @@ -458,6 +459,10 @@ expandCommand : EXPAND fieldExpression (AS alias = qualifiedName)? ; +mvexpandCommand + : MVEXPAND fieldExpression (LIMIT INTEGER_LITERAL)? + ; + flattenCommand : FLATTEN fieldExpression (AS aliases = identifierSeq)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index f6ce4b10933..d70851d38f6 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -67,6 +67,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.Multisearch; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -727,6 +728,14 @@ public UnresolvedPlan visitExpandCommand(OpenSearchPPLParser.ExpandCommandContex return new Expand(fieldExpression, alias); } + @Override + public UnresolvedPlan visitMvexpandCommand(OpenSearchPPLParser.MvexpandCommandContext ctx) { + Field field = (Field) expressionBuilder.visit(ctx.fieldExpression()); + Integer limit = + ctx.INTEGER_LITERAL() != null ? Integer.parseInt(ctx.INTEGER_LITERAL().getText()) : null; + return new MvExpand(field, limit); + } + @Override public UnresolvedPlan visitGrokCommand(OpenSearchPPLParser.GrokCommandContext ctx) { UnresolvedExpression sourceField = internalVisitExpression(ctx.source_field); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java index 9dd01b30df5..8da3a43c318 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java @@ -25,6 +25,7 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.rel2sql.RelToSqlConverter; import org.apache.calcite.rel.rel2sql.SqlImplementor; +import org.apache.calcite.schema.Schema; import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.parser.SqlParser; @@ -64,6 +65,21 @@ public CalcitePPLAbstractTest(CalciteAssert.SchemaSpec... schemaSpecs) { this.settings = mock(Settings.class); } + public CalcitePPLAbstractTest(Schema customSchema) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + rootSchema.add("CUSTOM", customSchema); + this.config = + Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(rootSchema.getSubSchema("CUSTOM")) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + this.dataSourceService = mock(DataSourceService.class); + this.planTransformer = new CalciteRelNodeVisitor(dataSourceService); + this.converter = new RelToSqlConverter(OpenSearchSparkSqlDialect.DEFAULT); + this.settings = mock(Settings.class); + } + @Before public void init() { doReturn(true).when(settings).getSettingValue(Settings.Key.CALCITE_ENGINE_ENABLED); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java new file mode 100644 index 00000000000..137813bde45 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java @@ -0,0 +1,59 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import java.util.*; +import org.apache.calcite.adapter.java.ReflectiveSchema; +import org.apache.calcite.rel.RelNode; +import org.junit.Test; + +public class CalcitePPLMvExpandTest extends CalcitePPLAbstractTest { + public CalcitePPLMvExpandTest() { + super(new ReflectiveSchema(new MvExpandSchema())); + System.out.println("CalcitePPLMvExpandTest constructed!"); + } + + public static class MvExpandSchema { + public List USERS; + + public MvExpandSchema() { + System.out.println("MvExpandSchema constructor called!"); + USERS = + Arrays.asList( + new User("happy", Arrays.asList(new Skill("python", null), new Skill("java", null))), + new User("single", Arrays.asList(new Skill("go", null)))); + } + } + + public static class User { + public String username; + public List skills; + + public User(String username, List skills) { + System.out.println("User created: " + username + ", skills: " + skills); + this.username = username; + this.skills = skills; + } + } + + public static class Skill { + public String name; + public String level; + + public Skill(String name, String level) { + System.out.println("Skill created: " + name + ", " + level); + this.name = name; + this.level = level; + } + } + + @Test + public void testMvExpand_HappyPath() { + String ppl = "source=USERS | mvexpand skills"; + RelNode root = getRelNode(ppl); + System.out.println("testMvExpand_HappyPath ran!"); + } +} From 427af0ffac3ea80f6cee2f484794f25f067f83f1 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Wed, 22 Oct 2025 13:25:40 -0500 Subject: [PATCH 02/24] Removed the build.gradle dependency opensearch-common Signed-off-by: Srikanth Padakanti --- .../sql/calcite/CalciteRelNodeVisitor.java | 78 +++- .../remote/CalciteMvExpandCommandIT.java | 180 ++++++++ .../ppl/calcite/CalcitePPLMvExpandTest.java | 430 ++++++++++++++++-- 3 files changed, 653 insertions(+), 35 deletions(-) create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 4337a9dfaba..a2dd15c618e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1554,21 +1554,89 @@ private static void buildDedupNotNull( context.relBuilder.projectExcept(_row_number_dedup_); } + private void buildMvExpandRelNode( + RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) { + + // 1. Capture left node and its schema BEFORE calling build() + RelNode leftNode = context.relBuilder.peek(); + RelDataType leftSchema = leftNode.getRowType(); + + // 2. Create correlation variable + Holder correlVariable = Holder.empty(); + context.relBuilder.variable(correlVariable::set); + + // 3. Find the array field index in the left schema by name (robust) + RelDataTypeField leftField = leftSchema.getField(arrayFieldName, false, false); + int arrayFieldIndexInLeft; + if (leftField != null) { + arrayFieldIndexInLeft = leftField.getIndex(); + } else { + // fallback (best effort) + arrayFieldIndexInLeft = arrayFieldRex.getIndex(); + } + + // 4. Build correlated field access for the right-side projection + RexNode correlArrayFieldAccess = + context.relBuilder.field( + context.rexBuilder.makeCorrel(leftSchema, correlVariable.get().id), + arrayFieldIndexInLeft); + + // 5. Build left and right nodes (leftBuilt is the original left, rightNode uncollects the + // array) + RelNode leftBuilt = context.relBuilder.build(); + RelNode rightNode = + context + .relBuilder + .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) + .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) + .uncollect(List.of(), false) + .build(); + + // 6. Compute a proper RexInputRef that refers to leftBuilt's rowType at the + // arrayFieldIndexInLeft. + // Use rexBuilder.makeInputRef with leftBuilt.getRowType() + RexNode requiredColumnRef = + context.rexBuilder.makeInputRef(leftBuilt.getRowType(), arrayFieldIndexInLeft); + + // 7. Correlate left and right using the proper required column ref + context + .relBuilder + .push(leftBuilt) + .push(rightNode) + .correlate(JoinRelType.INNER, correlVariable.get().id, List.of(requiredColumnRef)); + + // 8. Remove the original array field from the output by name using the builder's field() + // (this ensures we remove the correct column instance) + RexNode toRemove; + try { + toRemove = context.relBuilder.field(arrayFieldName); + } catch (Exception e) { + // Fallback in case name lookup fails + toRemove = arrayFieldRex; + } + context.relBuilder.projectExcept(toRemove); + + // 9. Optional rename into alias (same as your prior logic) + if (alias != null) { + tryToRemoveNestedFields(context); + RexInputRef expandedField = context.relBuilder.field(arrayFieldName); + List names = new ArrayList<>(context.relBuilder.peek().getRowType().getFieldNames()); + names.set(expandedField.getIndex(), alias); + context.relBuilder.rename(names); + } + } + @Override public RelNode visitMvExpand(MvExpand node, CalcitePlanContext context) { visitChildren(node, context); - Field arrayField = node.getField(); RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); - // Use the same strategy as visitExpand: unnest the array field using uncollect - buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), null, context); + buildMvExpandRelNode(arrayFieldRex, arrayField.getField().toString(), null, context); - // If there's a limit, add a limit clause after the uncollect/join: if (node.getLimit() != null) { context.relBuilder.limit(0, node.getLimit()); } - return context.relBuilder.peek(); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java new file mode 100644 index 00000000000..0422602a46d --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java @@ -0,0 +1,180 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteMvExpandCommandIT extends PPLIntegTestCase { + + private static final String INDEX = "mvexpand_edge_cases"; + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + deleteIndexIfExists(INDEX); + createIndex( + INDEX, + "{ \"mappings\": { \"properties\": { " + + "\"username\": { \"type\": \"keyword\" }," + + "\"skills\": { \"type\": \"nested\" }" + + "} } }"); + bulkInsert( + INDEX, + "1|{\"username\":\"happy\"," + + " \"skills\":[{\"name\":\"python\"},{\"name\":\"java\"},{\"name\":\"sql\"}]}", + "2|{\"username\":\"single\", \"skills\":[{\"name\":\"go\"}]}", + "3|{\"username\":\"empty\", \"skills\":[]}", + "4|{\"username\":\"nullskills\", \"skills\":null}", + "5|{\"username\":\"noskills\"}", + "6|{\"username\":\"missingattr\", \"skills\":[{\"name\":\"c\"},{\"level\":\"advanced\"}]}", + "7|{\"username\":\"complex\"," + + " \"skills\":[{\"name\":\"ml\",\"level\":\"expert\"},{\"name\":\"ai\"},{\"level\":\"novice\"}]}", + "8|{\"username\":\"duplicate\", \"skills\":[{\"name\":\"dup\"},{\"name\":\"dup\"}]}", + "9|{\"username\":\"large\"," + + " \"skills\":[{\"name\":\"s1\"},{\"name\":\"s2\"},{\"name\":\"s3\"},{\"name\":\"s4\"},{\"name\":\"s5\"},{\"name\":\"s6\"},{\"name\":\"s7\"},{\"name\":\"s8\"},{\"name\":\"s9\"},{\"name\":\"s10\"}]}"); + refreshIndex(INDEX); + } + + @Test + public void testMvexpandSingleElement() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='single' | fields username, skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows(result, rows("single", "go")); + } + + @Test + public void testMvexpandEmptyArray() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='empty' | fields username, skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows(result); // Should be empty + } + + @Test + public void testMvexpandNullArray() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='nullskills' | fields username," + + " skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows(result); // Should be empty + } + + @Test + public void testMvexpandNoArrayField() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='noskills' | fields username," + + " skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows(result); // Should be empty + } + + @Test + public void testMvexpandDuplicate() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='duplicate' | fields username," + + " skills.name | sort skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows(result, rows("duplicate", "dup"), rows("duplicate", "dup")); + } + + // Helper methods for index setup/teardown + private static void deleteIndexIfExists(String index) throws IOException { + try { + Request request = new Request("DELETE", "/" + index); + PPLIntegTestCase.adminClient().performRequest(request); + } catch (IOException e) { + // Index does not exist or already deleted + } + } + + private static void createIndex(String index, String mappingJson) throws IOException { + Request request = new Request("PUT", "/" + index); + request.setJsonEntity(mappingJson); + PPLIntegTestCase.adminClient().performRequest(request); + } + + private static void bulkInsert(String index, String... docs) throws IOException { + StringBuilder bulk = new StringBuilder(); + for (String doc : docs) { + String[] parts = doc.split("\\|", 2); + bulk.append("{\"index\":{\"_id\":").append(parts[0]).append("}}\n"); + bulk.append(parts[1]).append("\n"); + } + Request request = new Request("POST", "/" + index + "/_bulk?refresh=true"); + request.setJsonEntity(bulk.toString()); + PPLIntegTestCase.adminClient().performRequest(request); + } + + private static void refreshIndex(String index) throws IOException { + Request request = new Request("POST", "/" + index + "/_refresh"); + PPLIntegTestCase.adminClient().performRequest(request); + } + + // @Test + // public void testMvexpandComplex() throws Exception { + // String query = String.format( + // "source=%s | mvexpand skills | where username='complex' | fields username, + // skills.name, skills.level | sort skills.level", + // INDEX); + // JSONObject result = executeQuery(query); + // verifyDataRows(result, + // rows("complex", "ai", null), + // rows("complex", "ml", "expert"), + // rows("complex", null, "novice") + // ); + // } + // @Test + // public void testMvexpandLargeArray() throws Exception { + // String query = String.format( + // "source=%s | mvexpand skills | where username='large' | fields skills.name | sort + // skills.name", + // INDEX); + // JSONObject result = executeQuery(query); + // verifyDataRows(result, + // rows("s1"), rows("s10"), rows("s2"), rows("s3"), rows("s4"), + // rows("s5"), rows("s6"), rows("s7"), rows("s8"), rows("s9") + // ); + // } + // @Test + // public void testMvexpandMissingAttribute() throws Exception { + // String query = String.format( + // "source=%s | mvexpand skills | where username='missingattr' | fields username, + // skills.name, skills.level | sort skills.level", + // INDEX); + // JSONObject result = executeQuery(query); + // verifyDataRows(result, rows("missingattr", "c", null), rows("missingattr", null, + // "advanced")); + // } + // @Test + // public void testMvexpandNormal() throws Exception { + // String query = String.format( + // "source=%s | mvexpand skills | where username='happy' | fields username, skills.name + // | sort skills.name", + // INDEX); + // JSONObject result = executeQuery(query); + // verifyDataRows(result, rows("happy", "java"), rows("happy", "python"), rows("happy", + // "sql")); + // } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java index 137813bde45..7ee304c70d1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java @@ -5,55 +5,425 @@ package org.opensearch.sql.ppl.calcite; -import java.util.*; -import org.apache.calcite.adapter.java.ReflectiveSchema; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; import org.junit.Test; public class CalcitePPLMvExpandTest extends CalcitePPLAbstractTest { + public CalcitePPLMvExpandTest() { - super(new ReflectiveSchema(new MvExpandSchema())); - System.out.println("CalcitePPLMvExpandTest constructed!"); + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + + ImmutableList users = + ImmutableList.of( + // happy: multiple skills + new Object[] { + "happy", + new Object[] { + new Object[] {"python", null}, + new Object[] {"java", null}, + new Object[] {"sql", null} + } + }, + // single: single skill + new Object[] {"single", new Object[] {new Object[] {"go", null}}}, + // empty: empty skills array + new Object[] {"empty", new Object[] {}}, + // nullskills: null skills array + new Object[] {"nullskills", null}, + // noskills: missing skills (simulate with null) + new Object[] {"noskills", null}, + // missingattr: skills with missing fields + new Object[] { + "missingattr", + new Object[] { + new Object[] {"c", null}, + new Object[] {null, "advanced"} + } + }, + // complex: skills with some missing name/level + new Object[] { + "complex", + new Object[] { + new Object[] {"ml", "expert"}, + new Object[] {"ai", null}, + new Object[] {null, "novice"} + } + }, + // duplicate: skills with duplicate names + new Object[] { + "duplicate", + new Object[] { + new Object[] {"dup", null}, + new Object[] {"dup", null} + } + }, + // large: skills with many elements + new Object[] { + "large", + new Object[] { + new Object[] {"s1", null}, new Object[] {"s2", null}, new Object[] {"s3", null}, + new Object[] {"s4", null}, new Object[] {"s5", null}, new Object[] {"s6", null}, + new Object[] {"s7", null}, new Object[] {"s8", null}, new Object[] {"s9", null}, + new Object[] {"s10", null} + } + }, + // primitive: array of primitives instead of objects + new Object[] {"primitive", new Object[] {"python", "java"}}, + // allnulls: array of nulls + new Object[] {"allnulls", new Object[] {null, null}}, + // emptyobj: array with an empty object + new Object[] {"emptyobj", new Object[] {new Object[] {}}}, + // --- New edge cases below --- + // deeplyNested: array of arrays + new Object[] { + "deeplyNested", + new Object[] { + new Object[] {new Object[] {"python", null}}, + new Object[] {new Object[] {"java", null}} + } + }, + // mixedTypes: array with mixed types + new Object[] { + "mixedTypes", new Object[] {"python", 42, true, null, new Object[] {"java", null}} + }, + // nestedObject: array of objects with objects as attributes + new Object[] { + "nestedObject", + new Object[] { + new Object[] {new Object[] {"meta", new Object[] {"name", "python", "years", 5}}} + } + }, + // allEmptyObjects: array of empty objects + new Object[] {"allEmptyObjects", new Object[] {new Object[] {}, new Object[] {}}}, + // allEmptyArrays: array of empty arrays + new Object[] {"allEmptyArrays", new Object[] {new Object[] {}, new Object[] {}}}, + // arrayOfArraysOfPrimitives + new Object[] { + "arrayOfArraysOfPrimitives", + new Object[] {new Object[] {"python", "java"}, new Object[] {"sql"}} + }, + // specialValues: array with Infinity, NaN, very long string, unicode + new Object[] { + "specialValues", + new Object[] { + Double.POSITIVE_INFINITY, + Double.NaN, + "😀😃😄😁", + new String(new char[10000]).replace('\0', 'x') + } + }); + schema.add("USERS", new UsersTable(users)); + + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + // Option 2: Assert specific logical plan (as per the main edge/typical cases) + @Test + public void testMvExpandBasic() { + String ppl = "source=USERS | mvexpand skills"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(USERNAME=[$0], name=[$2], level=[$3])\n" + + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}])\n" + + " LogicalTableScan(table=[[scott, USERS]])\n" + + " Uncollect\n" + + " LogicalProject(skills=[$cor0.skills])\n" + + " LogicalValues(tuples=[[{ 0 }]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testMvExpandWithLimit() { + String ppl = "source=USERS | mvexpand skills | head 1"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(USERNAME=[$0], name=[$2], level=[$3])\n" + + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}])\n" + + " LogicalTableScan(table=[[scott, USERS]])\n" + + " Uncollect\n" + + " LogicalProject(skills=[$cor0.skills])\n" + + " LogicalValues(tuples=[[{ 0 }]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testMvExpandProjectNested() { + String ppl = "source=USERS | mvexpand skills | fields USERNAME, name, level"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(USERNAME=[$0], name=[$2], level=[$3])\n" + + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}])\n" + + " LogicalTableScan(table=[[scott, USERS]])\n" + + " Uncollect\n" + + " LogicalProject(skills=[$cor0.skills])\n" + + " LogicalValues(tuples=[[{ 0 }]])\n"; + verifyLogical(root, expectedLogical); } - public static class MvExpandSchema { - public List USERS; + // Option 3: Assert that no error/crash occurs for edge cases - public MvExpandSchema() { - System.out.println("MvExpandSchema constructor called!"); - USERS = - Arrays.asList( - new User("happy", Arrays.asList(new Skill("python", null), new Skill("java", null))), - new User("single", Arrays.asList(new Skill("go", null)))); + @Test + public void testMvExpandEmptyOrNullArray() { + String ppl = "source=USERS | where USERNAME in ('empty','nullskills') | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on empty/null array should not throw, but got: " + e.getMessage()); } } - public static class User { - public String username; - public List skills; + @Test + public void testMvExpandNoArrayField() { + String ppl = "source=USERS | where USERNAME = 'noskills' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on missing array field should not throw, but got: " + e.getMessage()); + } + } - public User(String username, List skills) { - System.out.println("User created: " + username + ", skills: " + skills); - this.username = username; - this.skills = skills; + @Test + public void testMvExpandWithDuplicates() { + String ppl = "source=USERS | where USERNAME = 'duplicate' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand with duplicates should not throw, but got: " + e.getMessage()); } } - public static class Skill { - public String name; - public String level; + @Test + public void testMvExpandLargeArray() { + String ppl = "source=USERS | where USERNAME = 'large' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on large array should not throw, but got: " + e.getMessage()); + } + } - public Skill(String name, String level) { - System.out.println("Skill created: " + name + ", " + level); - this.name = name; - this.level = level; + @Test + public void testMvExpandProjectMissingAttribute() { + String ppl = "source=USERS | mvexpand skills | fields USERNAME, level"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand projection of missing attribute should not throw, but got: " + e.getMessage()); } } @Test - public void testMvExpand_HappyPath() { - String ppl = "source=USERS | mvexpand skills"; - RelNode root = getRelNode(ppl); - System.out.println("testMvExpand_HappyPath ran!"); + public void testMvExpandPrimitiveArray() { + String ppl = "source=USERS | where USERNAME = 'primitive' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on array of primitives should not throw, but got: " + e.getMessage()); + } + } + + @Test + public void testMvExpandAllNullsArray() { + String ppl = "source=USERS | where USERNAME = 'allnulls' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on array of all nulls should not throw, but got: " + e.getMessage()); + } + } + + @Test + public void testMvExpandEmptyObjectArray() { + String ppl = "source=USERS | where USERNAME = 'emptyobj' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on array with empty struct should not throw, but got: " + e.getMessage()); + } + } + + // --- Additional uncovered edge case tests --- + + @Test + public void testMvExpandDeeplyNestedArray() { + String ppl = "source=USERS | where USERNAME = 'deeplyNested' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on deeply nested arrays should not throw, but got: " + e.getMessage()); + } + } + + @Test + public void testMvExpandMixedTypesArray() { + String ppl = "source=USERS | where USERNAME = 'mixedTypes' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on array with mixed types should not throw, but got: " + e.getMessage()); + } + } + + @Test + public void testMvExpandNestedObjectArray() { + String ppl = "source=USERS | where USERNAME = 'nestedObject' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on array of nested objects should not throw, but got: " + e.getMessage()); + } + } + + @Test + public void testMvExpandAllEmptyObjectsArray() { + String ppl = "source=USERS | where USERNAME = 'allEmptyObjects' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on array of all empty objects should not throw, but got: " + e.getMessage()); + } + } + + @Test + public void testMvExpandAllEmptyArraysArray() { + String ppl = "source=USERS | where USERNAME = 'allEmptyArrays' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on array of all empty arrays should not throw, but got: " + e.getMessage()); + } + } + + @Test + public void testMvExpandArrayOfArraysOfPrimitives() { + String ppl = "source=USERS | where USERNAME = 'arrayOfArraysOfPrimitives' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail( + "mvexpand on array of arrays of primitives should not throw, but got: " + e.getMessage()); + } + } + + @Test + public void testMvExpandSpecialValuesArray() { + String ppl = "source=USERS | where USERNAME = 'specialValues' | mvexpand skills"; + try { + RelNode root = getRelNode(ppl); + assertNotNull(root); + } catch (Exception e) { + fail("mvexpand on array with special values should not throw, but got: " + e.getMessage()); + } + } + + @RequiredArgsConstructor + static class UsersTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("USERNAME", SqlTypeName.VARCHAR) + .add( + "skills", + factory.createArrayType( + factory + .builder() + .add("name", SqlTypeName.VARCHAR) + .add("level", SqlTypeName.VARCHAR) + .build(), + -1)) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } } } From 83d778617ba59ce349fe056b00fd275fe0bbc356 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Wed, 22 Oct 2025 13:27:42 -0500 Subject: [PATCH 03/24] Ready to submit this PR Signed-off-by: Srikanth Padakanti --- .../org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java index 7ee304c70d1..1114c1f2051 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java @@ -107,7 +107,6 @@ protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpec new Object[] {"allnulls", new Object[] {null, null}}, // emptyobj: array with an empty object new Object[] {"emptyobj", new Object[] {new Object[] {}}}, - // --- New edge cases below --- // deeplyNested: array of arrays new Object[] { "deeplyNested", From 7f6e1276f2f6bea66c36db76dc75e1cc358362ae Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Wed, 22 Oct 2025 13:47:22 -0500 Subject: [PATCH 04/24] Ready to submit this PR Signed-off-by: Srikanth Padakanti --- .../remote/CalciteMvExpandCommandIT.java | 46 ------------------- 1 file changed, 46 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java index 0422602a46d..fe48580ad87 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java @@ -131,50 +131,4 @@ private static void refreshIndex(String index) throws IOException { Request request = new Request("POST", "/" + index + "/_refresh"); PPLIntegTestCase.adminClient().performRequest(request); } - - // @Test - // public void testMvexpandComplex() throws Exception { - // String query = String.format( - // "source=%s | mvexpand skills | where username='complex' | fields username, - // skills.name, skills.level | sort skills.level", - // INDEX); - // JSONObject result = executeQuery(query); - // verifyDataRows(result, - // rows("complex", "ai", null), - // rows("complex", "ml", "expert"), - // rows("complex", null, "novice") - // ); - // } - // @Test - // public void testMvexpandLargeArray() throws Exception { - // String query = String.format( - // "source=%s | mvexpand skills | where username='large' | fields skills.name | sort - // skills.name", - // INDEX); - // JSONObject result = executeQuery(query); - // verifyDataRows(result, - // rows("s1"), rows("s10"), rows("s2"), rows("s3"), rows("s4"), - // rows("s5"), rows("s6"), rows("s7"), rows("s8"), rows("s9") - // ); - // } - // @Test - // public void testMvexpandMissingAttribute() throws Exception { - // String query = String.format( - // "source=%s | mvexpand skills | where username='missingattr' | fields username, - // skills.name, skills.level | sort skills.level", - // INDEX); - // JSONObject result = executeQuery(query); - // verifyDataRows(result, rows("missingattr", "c", null), rows("missingattr", null, - // "advanced")); - // } - // @Test - // public void testMvexpandNormal() throws Exception { - // String query = String.format( - // "source=%s | mvexpand skills | where username='happy' | fields username, skills.name - // | sort skills.name", - // INDEX); - // JSONObject result = executeQuery(query); - // verifyDataRows(result, rows("happy", "java"), rows("happy", "python"), rows("happy", - // "sql")); - // } } From dcbf56b47192752df359b2f7bc85a1ce44323054 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Wed, 22 Oct 2025 13:57:15 -0500 Subject: [PATCH 05/24] Ready to submit this PR Signed-off-by: Srikanth Padakanti --- .../CollectionUDF/MVExpandFunctionImpl.java | 94 ------------ .../sql/planner/logical/LogicalMvExpand.java | 46 ------ .../planner/physical/MvExpandOperator.java | 140 ------------------ .../physical/PhysicalPlanNodeVisitor.java | 4 - 4 files changed, 284 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVExpandFunctionImpl.java delete mode 100644 core/src/main/java/org/opensearch/sql/planner/logical/LogicalMvExpand.java delete mode 100644 core/src/main/java/org/opensearch/sql/planner/physical/MvExpandOperator.java diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVExpandFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVExpandFunctionImpl.java deleted file mode 100644 index b63a9a2d3dd..00000000000 --- a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVExpandFunctionImpl.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.expression.function.CollectionUDF; -// -// import java.util.ArrayList; -// import java.util.Collections; -// import java.util.List; -// import org.apache.calcite.adapter.enumerable.NotNullImplementor; -// import org.apache.calcite.adapter.enumerable.NullPolicy; -// import org.apache.calcite.adapter.enumerable.RexToLixTranslator; -// import org.apache.calcite.linq4j.tree.Expression; -// import org.apache.calcite.linq4j.tree.Expressions; -// import org.apache.calcite.linq4j.tree.Types; -// import org.apache.calcite.rel.type.RelDataType; -// import org.apache.calcite.rel.type.RelDataTypeFactory; -// import org.apache.calcite.rex.RexCall; -// import org.apache.calcite.sql.SqlOperatorBinding; -// import org.apache.calcite.sql.type.SqlReturnTypeInference; -// import org.apache.calcite.sql.type.SqlTypeName; -// import org.opensearch.sql.expression.function.ImplementorUDF; -// import org.opensearch.sql.expression.function.UDFOperandMetadata; -// -/// ** -// * MVExpand function that expands multivalue (array) fields into multiple rows. -// */ -// public class MVExpandFunctionImpl extends ImplementorUDF { -// -// public MVExpandFunctionImpl() { -// super(new MVExpandImplementor(), NullPolicy.ALL); -// } -// -// @Override -// public SqlReturnTypeInference getReturnTypeInference() { -// // For mvexpand, the output type should be the type of the array element (or ANY) -// return sqlOperatorBinding -> { -// RelDataTypeFactory typeFactory = sqlOperatorBinding.getTypeFactory(); -// -// if (sqlOperatorBinding.getOperandCount() == 0) { -// return typeFactory.createSqlType(SqlTypeName.NULL); -// } -// -// // Assume single argument: the array to expand -// RelDataType operandType = sqlOperatorBinding.getOperandType(0); -// RelDataType elementType = -// operandType.getComponentType() != null -// ? operandType.getComponentType() -// : typeFactory.createSqlType(SqlTypeName.ANY); -// -// // Output is a scalar (not array) -// return typeFactory.createTypeWithNullability(elementType, true); -// }; -// } -// -// @Override -// public UDFOperandMetadata getOperandMetadata() { -// return null; -// } -// -// public static class MVExpandImplementor implements NotNullImplementor { -// @Override -// public Expression implement( -// RexToLixTranslator translator, RexCall call, List translatedOperands) -// { -// // Delegate to static Java method for value expansion -// return Expressions.call( -// Types.lookupMethod(MVExpandFunctionImpl.class, "mvexpand", Object.class), -// translatedOperands.get(0)); -// } -// } -// -// /** -// * Implementation for mvexpand. -// * If the argument is a List, return its elements as a List (to be mapped to separate rows). -// * If the argument is null or not a List, return a singleton list with the original value. -// */ -// public static List mvexpand(Object arg) { -// if (arg == null) { -// return Collections.singletonList(null); -// } -// if (arg instanceof List) { -// List arr = (List) arg; -// if (arr.isEmpty()) { -// return Collections.singletonList(null); -// } -// return new ArrayList<>(arr); -// } else { -// // Non-array value: return as single-element list -// return Collections.singletonList(arg); -// } -// } -// } diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalMvExpand.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalMvExpand.java deleted file mode 100644 index 66e4f0eac9c..00000000000 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalMvExpand.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.planner.logical; -// -// import java.util.Collections; -// import java.util.List; -// import java.util.Optional; -// import lombok.EqualsAndHashCode; -// import org.opensearch.sql.ast.expression.Field; -// -// @EqualsAndHashCode(callSuper = true) -// public class LogicalMvExpand extends LogicalPlan { -// private final Field field; -// private final Optional limit; -// -// public LogicalMvExpand(LogicalPlan input, Field field, Optional limit) { -// super(Collections.singletonList(input)); -// this.field = field; -// this.limit = limit != null ? limit : Optional.empty(); -// } -// -// public LogicalPlan getInput() { -// return getChild().get(0); -// } -// -// public Field getField() { -// return field; -// } -// -// public Optional getLimit() { -// return limit; -// } -// -// @Override -// public R accept(LogicalPlanNodeVisitor visitor, C context) { -// return visitor.visitLogicalMvExpand(this, context); -// } -// -// @Override -// public String toString() { -// return String.format("LogicalMvExpand(field=%s, limit=%s)", field, limit); -// } -// } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/MvExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/MvExpandOperator.java deleted file mode 100644 index 598922f4356..00000000000 --- a/core/src/main/java/org/opensearch/sql/planner/physical/MvExpandOperator.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.planner.physical; -// -// import java.util.Collections; -// import java.util.Iterator; -// import java.util.List; -// import java.util.Map; -// import java.util.NoSuchElementException; -// import java.util.Optional; -// import java.util.ArrayList; -// import java.util.LinkedHashMap; -// import lombok.EqualsAndHashCode; -// import lombok.Getter; -// import lombok.ToString; -// import org.opensearch.sql.data.model.ExprTupleValue; -// import org.opensearch.sql.data.model.ExprValue; -// import org.opensearch.sql.data.model.ExprValueUtils; -// -// @EqualsAndHashCode(callSuper = false) -// @ToString -// public class MvExpandOperator extends PhysicalPlan { -// @Getter private final PhysicalPlan input; -// @Getter private final String fieldName; -// @Getter private final Optional limit; -// @ToString.Exclude private Iterator expandedValuesIterator = -// Collections.emptyIterator(); -// @ToString.Exclude private ExprValue next = null; -// @ToString.Exclude private boolean nextPrepared = false; -// -// public MvExpandOperator(PhysicalPlan input, String fieldName, Optional limit) { -// this.input = input; -// this.fieldName = fieldName; -// this.limit = limit; -// } -// -// @Override -// public R accept(PhysicalPlanNodeVisitor visitor, C context) { -// return visitor.visitMvExpandOperator(this, context); -// } -// -// @Override -// public List getChild() { -// return Collections.singletonList(input); -// } -// -// @Override -// public void open() { -// input.open(); -// expandedValuesIterator = Collections.emptyIterator(); -// next = null; -// nextPrepared = false; -// } -// -// @Override -// public void close() { -// input.close(); -// } -// -// @Override -// public boolean hasNext() { -// if (!nextPrepared) { -// prepareNext(); -// } -// return next != null; -// } -// -// @Override -// public ExprValue next() { -// if (!nextPrepared) { -// prepareNext(); -// } -// if (next == null) { -// throw new NoSuchElementException("No more values in MvExpandOperator"); -// } -// ExprValue result = next; -// next = null; -// nextPrepared = false; -// return result; -// } -// -// private void prepareNext() { -// while (true) { -// if (expandedValuesIterator != null && expandedValuesIterator.hasNext()) { -// next = expandedValuesIterator.next(); -// nextPrepared = true; -// return; -// } -// if (!input.hasNext()) { -// next = null; -// nextPrepared = true; -// return; -// } -// ExprValue value = input.next(); -// expandedValuesIterator = expandRow(value); -// } -// } -// -// private Iterator expandRow(ExprValue value) { -// if (value == null || value.isMissing()) { -// return Collections.emptyIterator(); -// } -// Map tuple = value.tupleValue(); -// -// if (fieldName.startsWith("_")) { -// return Collections.singletonList(value).iterator(); -// } -// -// ExprValue fieldVal = tuple.get(fieldName); -// if (fieldVal == null || fieldVal.isMissing() || fieldVal.isNull()) { -// return Collections.emptyIterator(); -// } -// -// // If not a collection, just return the row as is -// if (!(fieldVal instanceof org.opensearch.sql.data.model.ExprCollectionValue)) { -// return Collections.singletonList(value).iterator(); -// } -// -// // Get the list of ExprValue from the collection -// List values = fieldVal.collectionValue(); -// if (values.isEmpty()) { -// return Collections.emptyIterator(); -// } -// -// int max = limit.orElse(values.size()); -// List expandedRows = new ArrayList<>(); -// int count = 0; -// for (ExprValue v : values) { -// if (max > 0 && count >= max) break; -// count++; -// LinkedHashMap newTuple = new LinkedHashMap<>(tuple); -// newTuple.put(fieldName, v); -// expandedRows.add(new ExprTupleValue(newTuple)); -// } -// return expandedRows.iterator(); -// } -// } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java index 804bcf10574..66c7219e39c 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java @@ -103,8 +103,4 @@ public R visitTrendline(TrendlineOperator node, C context) { public R visitCursorClose(CursorCloseOperator node, C context) { return visitNode(node, context); } - - // public R visitMvExpandOperator(MvExpandOperator plan, C context) { - // return visitNode(plan, context); - // } } From a3c138444a769ded886ba75d2b71482844ece11d Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Wed, 22 Oct 2025 14:07:56 -0500 Subject: [PATCH 06/24] Add mvexpand.rst Signed-off-by: Srikanth Padakanti --- docs/user/ppl/cmd/mvexpand.rst | 180 +++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 docs/user/ppl/cmd/mvexpand.rst diff --git a/docs/user/ppl/cmd/mvexpand.rst b/docs/user/ppl/cmd/mvexpand.rst new file mode 100644 index 00000000000..d9256ea12d0 --- /dev/null +++ b/docs/user/ppl/cmd/mvexpand.rst @@ -0,0 +1,180 @@ +============= +mvexpand +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +| The ``mvexpand`` command expands each value in a multivalue (array) field into a separate row, similar to Splunk's `mvexpand` command. +| For each document, every value in the specified field is returned as a new row. This is especially useful for log analytics and data exploration involving array fields. + +| Key features of ``mvexpand``: +- Expands array fields into multiple rows, one per value. +- Supports an optional ``limit`` parameter to restrict the number of expanded values per document. +- Handles empty, null, and non-array fields gracefully. +- Works as a streaming/distributable command for performance and scalability. + +Version +======= +3.3.0 + +Syntax +====== +mvexpand [limit=] + +* **field**: The multivalue (array) field to expand. (Required) +* **limit**: Maximum number of values per document to expand. (Optional) + +Usage +===== +Basic expansion:: + + os> source=logs | mvexpand tags + +Expansion with limit:: + + os> source=docs | mvexpand ids limit=3 + +Limitations +=========== +- Only one field can be expanded per mvexpand command. +- For non-array fields, the value is returned as-is. +- For empty or null arrays, no rows are returned. +- Large arrays may be subject to resource/memory limits; exceeding them results in an error or warning. + +Examples and Edge Cases +======================= + +Example 1: Basic Expansion +-------------------------- +Expand all values from an array field. + +Input document:: + + { "tags": ["error", "warning", "info"] } + +PPL query:: + + os> source=logs | mvexpand tags + fetched rows / total rows = 3/3 + +--------+ + | tags | + +--------+ + | error | + | warning| + | info | + +--------+ + +Example 2: Expansion with Limit +------------------------------- +Limit the number of expanded values per document. + +Input document:: + + { "ids": [1, 2, 3, 4, 5] } + +PPL query:: + + os> source=docs | mvexpand ids limit=3 + fetched rows / total rows = 3/3 + +-----+ + | ids | + +-----+ + | 1 | + | 2 | + | 3 | + +-----+ + +Example 3: Empty or Null Arrays +------------------------------ +Handles documents with empty or null array fields. + +Input document:: + + { "tags": [] } + +PPL query:: + + os> source=logs | mvexpand tags + fetched rows / total rows = 0/0 + +------+ + | tags | + +------+ + +------+ + +Input document:: + + { "tags": null } + +PPL query:: + + os> source=logs | mvexpand tags + fetched rows / total rows = 0/0 + +------+ + | tags | + +------+ + +------+ + +Example 4: Non-array Field +-------------------------- +If the field is a single value (not an array), mvexpand returns the value as-is. + +Input document:: + + { "tags": "error" } + +PPL query:: + + os> source=logs | mvexpand tags + fetched rows / total rows = 1/1 + +-------+ + | tags | + +-------+ + | error | + +-------+ + +Example 5: Large Arrays and Memory Limits +---------------------------------------- +If an array exceeds configured memory/resource limits, mvexpand returns an error. + +Input document:: + + { "ids": [1, 2, ..., 100000] } + +PPL query:: + + os> source=docs | mvexpand ids + Error: Memory/resource limit exceeded while expanding field 'ids'. Please reduce the array size or specify a limit. + +Example 6: Multiple Fields (Limitation) +--------------------------------------- +mvexpand only supports expanding one field per command. To expand multiple fields, use multiple mvexpand commands or document the limitation. + +PPL query:: + + os> source=docs | mvexpand a | mvexpand b + +Example 7: Edge Case - Field Missing +------------------------------------ +If the field does not exist in a document, no row is produced for that document. + +Input document:: + + { "other": [1,2] } + +PPL query:: + + os> source=docs | mvexpand tags + fetched rows / total rows = 0/0 + +------+ + | tags | + +------+ + +------+ + +--- From 148ccc5f2fbe28cca8b2484ce665948d0619ca29 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Wed, 22 Oct 2025 15:50:34 -0500 Subject: [PATCH 07/24] Add Tests Signed-off-by: Srikanth Padakanti --- .../sql/calcite/CalciteNoPushdownIT.java | 1 + .../sql/calcite/remote/CalciteExplainIT.java | 8 ++++++++ .../sql/legacy/SQLIntegTestCase.java | 5 +++++ .../calcite/explain_mvexpand.yaml | 17 +++++++++++++++++ .../calcite_no_pushdown/explain_mvexpand.yaml | 17 +++++++++++++++++ .../test/resources/mvexpand_edge_cases.json | 18 ++++++++++++++++++ .../resources/mvexpand_edge_cases_mapping.json | 8 ++++++++ .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 12 ++++++++++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 12 ++++++++++++ 9 files changed, 98 insertions(+) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml create mode 100644 integ-test/src/test/resources/mvexpand_edge_cases.json create mode 100644 integ-test/src/test/resources/mvexpand_edge_cases_mapping.json diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index 69507c71aa5..f17e064eb3f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -32,6 +32,7 @@ CalciteDedupCommandIT.class, CalciteDescribeCommandIT.class, CalciteExpandCommandIT.class, + CalciteMvExpandCommandIT.class, CalciteFieldsCommandIT.class, CalciteFillNullCommandIT.class, CalciteFlattenCommandIT.class, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index a5dc5b4dc63..6d293877de4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -38,6 +38,7 @@ public void init() throws Exception { loadIndex(Index.LOGS); loadIndex(Index.WORKER); loadIndex(Index.WORK_INFORMATION); + loadIndex(Index.MVEXPAND_EDGE_CASES); } @Override @@ -307,6 +308,13 @@ public void testExplainMultisearchTimestampInterleaving() throws IOException { assertYamlEqualsIgnoreId(expected, result); } + @Test + public void testMvexpandExplain() throws IOException { + String query = "source=mvexpand_edge_cases | mvexpand skills"; + String expected = loadExpectedPlan("explain_mvexpand.yaml"); + assertYamlEqualsJsonIgnoreId(expected, explainQueryToString(query)); + } + // Only for Calcite @Test public void testExplainIsBlank() throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index ee979900c48..6103cbd653d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -686,6 +686,11 @@ public enum Index { "_doc", getNestedSimpleIndexMapping(), "src/test/resources/nested_simple.json"), + MVEXPAND_EDGE_CASES( + "mvexpand_edge_cases", + "mvexpand_edge_cases", + getMappingFile("mvexpand_edge_cases_mapping.json"), + "src/test/resources/mvexpand_edge_cases.json"), DEEP_NESTED( TestsConstants.TEST_INDEX_DEEP_NESTED, "_doc", diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml new file mode 100644 index 00000000000..650fb3508d0 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml @@ -0,0 +1,17 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(skills.level=[$1], skills.name=[$2], username=[$3], KEY=[$10], VALUE=[$11]) + LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}]) + CalciteLogicalIndexScan(table=[[OpenSearch, mvexpand_edge_cases]]) + Uncollect + LogicalProject(skills=[$cor0.skills]) + LogicalValues(tuples=[[{ 0 }]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..11=[{inputs}], skills.level=[$t1], skills.name=[$t2], username=[$t3], KEY=[$t10], VALUE=[$t11]) + EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]]) + EnumerableUncollect + EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.skills], skills=[$t2]) + EnumerableValues(tuples=[[{ 0 }]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml new file mode 100644 index 00000000000..650fb3508d0 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml @@ -0,0 +1,17 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(skills.level=[$1], skills.name=[$2], username=[$3], KEY=[$10], VALUE=[$11]) + LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}]) + CalciteLogicalIndexScan(table=[[OpenSearch, mvexpand_edge_cases]]) + Uncollect + LogicalProject(skills=[$cor0.skills]) + LogicalValues(tuples=[[{ 0 }]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..11=[{inputs}], skills.level=[$t1], skills.name=[$t2], username=[$t3], KEY=[$t10], VALUE=[$t11]) + EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]]) + EnumerableUncollect + EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.skills], skills=[$t2]) + EnumerableValues(tuples=[[{ 0 }]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/mvexpand_edge_cases.json b/integ-test/src/test/resources/mvexpand_edge_cases.json new file mode 100644 index 00000000000..662769d89b8 --- /dev/null +++ b/integ-test/src/test/resources/mvexpand_edge_cases.json @@ -0,0 +1,18 @@ +{"index":{}} +{"username":"happy","skills":[{"name":"python"},{"name":"java"},{"name":"sql"}]} +{"index":{}} +{"username":"single","skills":[{"name":"go"}]} +{"index":{}} +{"username":"empty","skills":[]} +{"index":{}} +{"username":"nullskills","skills":null} +{"index":{}} +{"username":"noskills"} +{"index":{}} +{"username":"missingattr","skills":[{"name":"c"},{"level":"advanced"}]} +{"index":{}} +{"username":"complex","skills":[{"name":"ml","level":"expert"},{"name":"ai"},{"level":"novice"}]} +{"index":{}} +{"username":"duplicate","skills":[{"name":"dup"},{"name":"dup"}]} +{"index":{}} +{"username":"large","skills":[{"name":"s1"},{"name":"s2"},{"name":"s3"},{"name":"s4"},{"name":"s5"},{"name":"s6"},{"name":"s7"},{"name":"s8"},{"name":"s9"},{"name":"s10"}]} diff --git a/integ-test/src/test/resources/mvexpand_edge_cases_mapping.json b/integ-test/src/test/resources/mvexpand_edge_cases_mapping.json new file mode 100644 index 00000000000..164adb77f62 --- /dev/null +++ b/integ-test/src/test/resources/mvexpand_edge_cases_mapping.json @@ -0,0 +1,8 @@ +{ + "mappings": { + "properties": { + "username": { "type": "keyword" }, + "skills": { "type": "nested" } + } + } +} \ No newline at end of file diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index e392a682cef..00679957a2f 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -70,6 +70,7 @@ import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.Multisearch; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -607,6 +608,17 @@ public String visitAppend(Append node, String context) { return StringUtils.format("%s | append [%s ]", child, subsearch); } + @Override + public String visitMvExpand(MvExpand node, String context) { + String child = node.getChild().get(0).accept(this, context); + String field = MASK_COLUMN; // Always anonymize field names + // Optionally handle limit if needed (e.g., | mvexpand identifier limit=***) + if (node.getLimit() != null) { + return StringUtils.format("%s | mvexpand %s limit=%s", child, field, MASK_LITERAL); + } + return StringUtils.format("%s | mvexpand %s", child, field); + } + @Override public String visitMultisearch(Multisearch node, String context) { List anonymizedSubsearches = new ArrayList<>(); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 6de9acacfe1..7c60930fff0 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -738,6 +738,18 @@ public void testMvappend() { anonymize("source=t | eval result=mvappend(a, 'b', 'c') | fields result")); } + @Test + public void testMvexpandCommand() { + assertEquals("source=table | mvexpand identifier", anonymize("source=t | mvexpand skills")); + } + + @Test + public void testMvexpandCommandWithLimit() { + assertEquals( + "source=table | mvexpand identifier limit=***", + anonymize("source=t | mvexpand skills limit 5")); + } + @Test public void testRexWithOffsetField() { when(settings.getSettingValue(Key.PPL_REX_MAX_MATCH_LIMIT)).thenReturn(10); From f5a9e823089a69950b6cb552020a7960818bf3a1 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 23 Oct 2025 10:56:50 -0500 Subject: [PATCH 08/24] Add the mvexpand.rst to the index.rst Signed-off-by: Srikanth Padakanti --- docs/user/ppl/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 17b4797df39..080b0a54765 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -68,6 +68,8 @@ The query start with search command and then flowing a set of command delimited - `expand command `_ + - `mvexpand command `_ + - `explain command `_ - `fields command `_ From 18cbba416f3ad468c765bd82ceae24bba334915d Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Mon, 27 Oct 2025 14:52:12 -0500 Subject: [PATCH 09/24] Remove the unwanted code Signed-off-by: Srikanth Padakanti --- .../sql/planner/logical/LogicalPlanNodeVisitor.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java index fa9cf5ccaa0..c9eedd8efc8 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java @@ -119,8 +119,4 @@ public R visitFetchCursor(LogicalFetchCursor plan, C context) { public R visitCloseCursor(LogicalCloseCursor plan, C context) { return visitNode(plan, context); } - - // public R visitLogicalMvExpand(LogicalMvExpand plan, C context) { - // return visitNode(plan, context); - // } } From 60fa2adf6442f948ca974238ff320d3ff8b8171f Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Mon, 27 Oct 2025 15:19:43 -0500 Subject: [PATCH 10/24] Fix the failing test Signed-off-by: Srikanth Padakanti --- .../org/opensearch/sql/calcite/remote/CalciteExplainIT.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 855bf0a9cff..71fe09c7763 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -308,11 +308,13 @@ public void testExplainMultisearchTimestampInterleaving() throws IOException { assertYamlEqualsIgnoreId(expected, result); } + // Only for Calcite @Test public void testMvexpandExplain() throws IOException { - String query = "source=mvexpand_edge_cases | mvexpand skills"; + // script pushdown String expected = loadExpectedPlan("explain_mvexpand.yaml"); - assertYamlEqualsJsonIgnoreId(expected, explainQueryToString(query)); + assertYamlEqualsIgnoreId( + expected, explainQueryYaml("source=mvexpand_edge_cases | mvexpand skills")); } // Only for Calcite From a28894aefd5a81773172bf2c087bd30490416911 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 30 Oct 2025 15:31:04 -0500 Subject: [PATCH 11/24] Address the PR comments and fix the tests accordingly Signed-off-by: Srikanth Padakanti --- .../org/opensearch/sql/ast/dsl/AstDSL.java | 3 +- .../org/opensearch/sql/ast/tree/MvExpand.java | 4 - .../sql/calcite/CalciteRelNodeVisitor.java | 323 ++++++++++++------ docs/category.json | 4 +- .../remote/CalciteMvExpandCommandIT.java | 46 ++- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 +- .../ppl/calcite/CalcitePPLMvExpandTest.java | 219 +++++++----- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 2 +- 8 files changed, 389 insertions(+), 214 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 4039f5d9b97..70f544c84a0 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -137,7 +137,8 @@ public Expand expand(UnresolvedPlan input, Field field, String alias) { } public static UnresolvedPlan mvexpand(UnresolvedPlan input, Field field, Integer limit) { - return new MvExpand(field, limit); + // attach the incoming child plan so the AST contains the pipeline link + return new MvExpand(field, limit).attach(input); } public static UnresolvedPlan projectWithArg( diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java b/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java index 4ac64253ebb..127f0332d0d 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java @@ -34,10 +34,6 @@ public MvExpand attach(UnresolvedPlan child) { return this; } - public Field getField() { - return field; - } - @Nullable public Integer getLimit() { return limit; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index ecdaffeb4ec..b2fb9cb8b0a 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1542,78 +1542,6 @@ private static void buildDedupNotNull( context.relBuilder.projectExcept(_row_number_dedup_); } - private void buildMvExpandRelNode( - RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) { - - // 1. Capture left node and its schema BEFORE calling build() - RelNode leftNode = context.relBuilder.peek(); - RelDataType leftSchema = leftNode.getRowType(); - - // 2. Create correlation variable - Holder correlVariable = Holder.empty(); - context.relBuilder.variable(correlVariable::set); - - // 3. Find the array field index in the left schema by name (robust) - RelDataTypeField leftField = leftSchema.getField(arrayFieldName, false, false); - int arrayFieldIndexInLeft; - if (leftField != null) { - arrayFieldIndexInLeft = leftField.getIndex(); - } else { - // fallback (best effort) - arrayFieldIndexInLeft = arrayFieldRex.getIndex(); - } - - // 4. Build correlated field access for the right-side projection - RexNode correlArrayFieldAccess = - context.relBuilder.field( - context.rexBuilder.makeCorrel(leftSchema, correlVariable.get().id), - arrayFieldIndexInLeft); - - // 5. Build left and right nodes (leftBuilt is the original left, rightNode uncollects the - // array) - RelNode leftBuilt = context.relBuilder.build(); - RelNode rightNode = - context - .relBuilder - .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) - .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) - .uncollect(List.of(), false) - .build(); - - // 6. Compute a proper RexInputRef that refers to leftBuilt's rowType at the - // arrayFieldIndexInLeft. - // Use rexBuilder.makeInputRef with leftBuilt.getRowType() - RexNode requiredColumnRef = - context.rexBuilder.makeInputRef(leftBuilt.getRowType(), arrayFieldIndexInLeft); - - // 7. Correlate left and right using the proper required column ref - context - .relBuilder - .push(leftBuilt) - .push(rightNode) - .correlate(JoinRelType.INNER, correlVariable.get().id, List.of(requiredColumnRef)); - - // 8. Remove the original array field from the output by name using the builder's field() - // (this ensures we remove the correct column instance) - RexNode toRemove; - try { - toRemove = context.relBuilder.field(arrayFieldName); - } catch (Exception e) { - // Fallback in case name lookup fails - toRemove = arrayFieldRex; - } - context.relBuilder.projectExcept(toRemove); - - // 9. Optional rename into alias (same as your prior logic) - if (alias != null) { - tryToRemoveNestedFields(context); - RexInputRef expandedField = context.relBuilder.field(arrayFieldName); - List names = new ArrayList<>(context.relBuilder.peek().getRowType().getFieldNames()); - names.set(expandedField.getIndex(), alias); - context.relBuilder.rename(names); - } - } - @Override public RelNode visitMvExpand(MvExpand node, CalcitePlanContext context) { visitChildren(node, context); @@ -2767,46 +2695,175 @@ private void flattenParsedPattern( projectPlusOverriding(fattenedNodes, projectNames, context); } - private void buildExpandRelNode( - RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) { - // 3. Capture the outer row in a CorrelationId - Holder correlVariable = Holder.empty(); - context.relBuilder.variable(correlVariable::set); - - // 4. Create RexFieldAccess to access left node's array field with correlationId and build join - // left node - RexNode correlArrayFieldAccess = - context.relBuilder.field( - context.rexBuilder.makeCorrel( - context.relBuilder.peek().getRowType(), correlVariable.get().id), - arrayFieldRex.getIndex()); - RelNode leftNode = context.relBuilder.build(); + // private void buildExpandRelNode( + // RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext + // context) { + // // 3. Capture the outer row in a CorrelationId + // Holder correlVariable = Holder.empty(); + // context.relBuilder.variable(correlVariable::set); + // + // // 4. Create RexFieldAccess to access left node's array field with correlationId and build + // join + // // left node + // RexNode correlArrayFieldAccess = + // context.relBuilder.field( + // context.rexBuilder.makeCorrel( + // context.relBuilder.peek().getRowType(), correlVariable.get().id), + // arrayFieldRex.getIndex()); + // RelNode leftNode = context.relBuilder.build(); + // + // // 5. Build join right node and expand the array field using uncollect + // RelNode rightNode = + // context + // .relBuilder + // // fake input, see convertUnnest and convertExpression in Calcite SqlToRelConverter + // .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) + // .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) + // .uncollect(List.of(), false) + // .build(); + // + // // 6. Perform a nested-loop join (correlate) between the original table and the expanded + // // array field. + // // The last parameter has to refer to the array to be expanded on the left side. It will + // // be used by the right side to correlate with the left side. + // context + // .relBuilder + // .push(leftNode) + // .push(rightNode) + // .correlate(JoinRelType.INNER, correlVariable.get().id, List.of(arrayFieldRex)) + // // 7. Remove the original array field from the output. + // // TODO: RFC: should we keep the original array field when alias is present? + // .projectExcept(arrayFieldRex); + // + // if (alias != null) { + // // Sub-nested fields cannot be removed after renaming the nested field. + // tryToRemoveNestedFields(context); + // RexInputRef expandedField = context.relBuilder.field(arrayFieldName); + // List names = new + // ArrayList<>(context.relBuilder.peek().getRowType().getFieldNames()); + // names.set(expandedField.getIndex(), alias); + // context.relBuilder.rename(names); + // } + // } + // + // private void buildMvExpandRelNode( + // RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext + // context) { + // + // // 1. Capture left node and its schema BEFORE calling build() + // RelNode leftNode = context.relBuilder.peek(); + // RelDataType leftSchema = leftNode.getRowType(); + // + // // 2. Create correlation variable + // Holder correlVariable = Holder.empty(); + // context.relBuilder.variable(correlVariable::set); + // + // // 3. Find the array field index in the left schema by name (robust) + // RelDataTypeField leftField = leftSchema.getField(arrayFieldName, false, false); + // int arrayFieldIndexInLeft; + // if (leftField != null) { + // arrayFieldIndexInLeft = leftField.getIndex(); + // } else { + // // fallback (best effort) + // arrayFieldIndexInLeft = arrayFieldRex.getIndex(); + // } + // + // // 4. Build correlated field access for the right-side projection + // RexNode correlArrayFieldAccess = + // context.relBuilder.field( + // context.rexBuilder.makeCorrel(leftSchema, correlVariable.get().id), + // arrayFieldIndexInLeft); + // + // // 5. Build left and right nodes (leftBuilt is the original left, rightNode uncollects the + // // array) + // RelNode leftBuilt = context.relBuilder.build(); + // RelNode rightNode = + // context + // .relBuilder + // .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) + // .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) + // .uncollect(List.of(), false) + // .build(); + // + // // 6. Compute a proper RexInputRef that refers to leftBuilt's rowType at the + // // arrayFieldIndexInLeft. + // // Use rexBuilder.makeInputRef with leftBuilt.getRowType() + // RexNode requiredColumnRef = + // context.rexBuilder.makeInputRef(leftBuilt.getRowType(), arrayFieldIndexInLeft); + // + // // 7. Correlate left and right using the proper required column ref + // context + // .relBuilder + // .push(leftBuilt) + // .push(rightNode) + // .correlate(JoinRelType.INNER, correlVariable.get().id, List.of(requiredColumnRef)); + // + // // 8. Remove the original array field from the output by name using the builder's field() + // // (this ensures we remove the correct column instance) + // RexNode toRemove; + // try { + // toRemove = context.relBuilder.field(arrayFieldName); + // } catch (Exception e) { + // // Fallback in case name lookup fails + // toRemove = arrayFieldRex; + // } + // context.relBuilder.projectExcept(toRemove); + // + // // 9. Optional rename into alias (same as your prior logic) + // if (alias != null) { + // tryToRemoveNestedFields(context); + // RexInputRef expandedField = context.relBuilder.field(arrayFieldName); + // List names = new + // ArrayList<>(context.relBuilder.peek().getRowType().getFieldNames()); + // names.set(expandedField.getIndex(), alias); + // context.relBuilder.rename(names); + // } + // } + + // Replace the existing three methods with these implementations. + + private void buildUnnestForLeft( + RelNode leftBuilt, + RelDataType leftRowType, + int arrayFieldIndex, + String arrayFieldName, + String alias, + Holder correlVariable, + RexNode correlArrayFieldAccess, + CalcitePlanContext context) { - // 5. Build join right node and expand the array field using uncollect + // Build right node: one-row -> project(correlated access) -> uncollect RelNode rightNode = context .relBuilder - // fake input, see convertUnnest and convertExpression in Calcite SqlToRelConverter .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) .uncollect(List.of(), false) .build(); - // 6. Perform a nested-loop join (correlate) between the original table and the expanded - // array field. - // The last parameter has to refer to the array to be expanded on the left side. It will - // be used by the right side to correlate with the left side. + // Compute required column ref against leftBuilt's row type (robust) + RexNode requiredColumnRef = + context.rexBuilder.makeInputRef(leftBuilt.getRowType(), arrayFieldIndex); + + // Correlate leftBuilt and rightNode using the proper required column ref context .relBuilder - .push(leftNode) + .push(leftBuilt) .push(rightNode) - .correlate(JoinRelType.INNER, correlVariable.get().id, List.of(arrayFieldRex)) - // 7. Remove the original array field from the output. - // TODO: RFC: should we keep the original array field when alias is present? - .projectExcept(arrayFieldRex); + .correlate(JoinRelType.INNER, correlVariable.get().id, List.of(requiredColumnRef)); + + // Remove the original array field from the output by name if possible + RexNode toRemove; + try { + toRemove = context.relBuilder.field(arrayFieldName); + } catch (Exception e) { + // Fallback in case name lookup fails + toRemove = requiredColumnRef; + } + context.relBuilder.projectExcept(toRemove); + // Optional rename into alias (preserve the original logic) if (alias != null) { - // Sub-nested fields cannot be removed after renaming the nested field. tryToRemoveNestedFields(context); RexInputRef expandedField = context.relBuilder.field(arrayFieldName); List names = new ArrayList<>(context.relBuilder.peek().getRowType().getFieldNames()); @@ -2815,6 +2872,76 @@ private void buildExpandRelNode( } } + private void buildExpandRelNode( + RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) { + + // Capture left node and its schema BEFORE calling build() + RelNode leftNode = context.relBuilder.peek(); + RelDataType leftRowType = leftNode.getRowType(); + + // Create correlation variable while left is still on the builder stack + Holder correlVariable = Holder.empty(); + context.relBuilder.variable(correlVariable::set); + + // Create correlated field access while left is still on the builder stack + // (preserve original expand semantics: use the input RexInputRef index) + RexNode correlArrayFieldAccess = + context.relBuilder.field( + context.rexBuilder.makeCorrel(leftRowType, correlVariable.get().id), + arrayFieldRex.getIndex()); + + // Materialize leftBuilt (this pops the left from the relBuilder stack) + RelNode leftBuilt = context.relBuilder.build(); + + // Use unified helper to build right/uncollect + correlate + cleanup + buildUnnestForLeft( + leftBuilt, + leftRowType, + arrayFieldRex.getIndex(), + arrayFieldName, + alias, + correlVariable, + correlArrayFieldAccess, + context); + } + + private void buildMvExpandRelNode( + RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) { + + // Capture left node and its schema BEFORE calling build() + RelNode leftNode = context.relBuilder.peek(); + RelDataType leftRowType = leftNode.getRowType(); + + // Resolve the array field index in left schema by name (robust); fallback to original index + RelDataTypeField leftField = leftRowType.getField(arrayFieldName, false, false); + int arrayFieldIndexInLeft = + (leftField != null) ? leftField.getIndex() : arrayFieldRex.getIndex(); + + // Create correlation variable while left is still on the builder stack + Holder correlVariable = Holder.empty(); + context.relBuilder.variable(correlVariable::set); + + // Create correlated field access while left is still on the builder stack + RexNode correlArrayFieldAccess = + context.relBuilder.field( + context.rexBuilder.makeCorrel(leftRowType, correlVariable.get().id), + arrayFieldIndexInLeft); + + // Materialize leftBuilt + RelNode leftBuilt = context.relBuilder.build(); + + // Use unified helper to build right/uncollect + correlate + cleanup + buildUnnestForLeft( + leftBuilt, + leftRowType, + arrayFieldIndexInLeft, + arrayFieldName, + alias, + correlVariable, + correlArrayFieldAccess, + context); + } + /** Creates an optimized sed call using native Calcite functions */ private RexNode createOptimizedSedCall( RexNode fieldRex, String sedExpression, CalcitePlanContext context) { diff --git a/docs/category.json b/docs/category.json index d9605598800..1c5294b5e8a 100644 --- a/docs/category.json +++ b/docs/category.json @@ -50,7 +50,6 @@ "user/ppl/cmd/subquery.rst", "user/ppl/cmd/syntax.rst", "user/ppl/cmd/timechart.rst", - "user/ppl/cmd/search.rst", "user/ppl/functions/statistical.rst", "user/ppl/cmd/top.rst", "user/ppl/cmd/trendline.rst", @@ -66,7 +65,8 @@ "user/ppl/functions/string.rst", "user/ppl/functions/conversion.rst", "user/ppl/general/datatypes.rst", - "user/ppl/general/identifiers.rst" + "user/ppl/general/identifiers.rst", + "user/ppl/cmd/mvexpand.rst" ], "bash_settings": [ "user/ppl/admin/settings.rst" diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java index fe48580ad87..8d42544be6a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java @@ -12,11 +12,12 @@ import org.json.JSONObject; import org.junit.jupiter.api.Test; import org.opensearch.client.Request; +import org.opensearch.sql.legacy.SQLIntegTestCase.Index; import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalciteMvExpandCommandIT extends PPLIntegTestCase { - private static final String INDEX = "mvexpand_edge_cases"; + private static final String INDEX = Index.MVEXPAND_EDGE_CASES.getName(); @Override public void init() throws Exception { @@ -29,20 +30,19 @@ public void init() throws Exception { + "\"username\": { \"type\": \"keyword\" }," + "\"skills\": { \"type\": \"nested\" }" + "} } }"); + + // Pass plain JSON documents; bulkInsert will auto-assign incremental ids. bulkInsert( INDEX, - "1|{\"username\":\"happy\"," - + " \"skills\":[{\"name\":\"python\"},{\"name\":\"java\"},{\"name\":\"sql\"}]}", - "2|{\"username\":\"single\", \"skills\":[{\"name\":\"go\"}]}", - "3|{\"username\":\"empty\", \"skills\":[]}", - "4|{\"username\":\"nullskills\", \"skills\":null}", - "5|{\"username\":\"noskills\"}", - "6|{\"username\":\"missingattr\", \"skills\":[{\"name\":\"c\"},{\"level\":\"advanced\"}]}", - "7|{\"username\":\"complex\"," - + " \"skills\":[{\"name\":\"ml\",\"level\":\"expert\"},{\"name\":\"ai\"},{\"level\":\"novice\"}]}", - "8|{\"username\":\"duplicate\", \"skills\":[{\"name\":\"dup\"},{\"name\":\"dup\"}]}", - "9|{\"username\":\"large\"," - + " \"skills\":[{\"name\":\"s1\"},{\"name\":\"s2\"},{\"name\":\"s3\"},{\"name\":\"s4\"},{\"name\":\"s5\"},{\"name\":\"s6\"},{\"name\":\"s7\"},{\"name\":\"s8\"},{\"name\":\"s9\"},{\"name\":\"s10\"}]}"); + "{\"username\":\"happy\",\"skills\":[{\"name\":\"python\"},{\"name\":\"java\"},{\"name\":\"sql\"}]}", + "{\"username\":\"single\",\"skills\":[{\"name\":\"go\"}]}", + "{\"username\":\"empty\",\"skills\":[]}", + "{\"username\":\"nullskills\",\"skills\":null}", + "{\"username\":\"noskills\"}", + "{\"username\":\"missingattr\",\"skills\":[{\"name\":\"c\"},{\"level\":\"advanced\"}]}", + "{\"username\":\"complex\",\"skills\":[{\"name\":\"ml\",\"level\":\"expert\"},{\"name\":\"ai\"},{\"level\":\"novice\"}]}", + "{\"username\":\"duplicate\",\"skills\":[{\"name\":\"dup\"},{\"name\":\"dup\"}]}", + "{\"username\":\"large\",\"skills\":[{\"name\":\"s1\"},{\"name\":\"s2\"},{\"name\":\"s3\"},{\"name\":\"s4\"},{\"name\":\"s5\"},{\"name\":\"s6\"},{\"name\":\"s7\"},{\"name\":\"s8\"},{\"name\":\"s9\"},{\"name\":\"s10\"}]}"); refreshIndex(INDEX); } @@ -115,12 +115,26 @@ private static void createIndex(String index, String mappingJson) throws IOExcep PPLIntegTestCase.adminClient().performRequest(request); } + /** + * Bulk insert helper: - Accepts plain JSON strings (no id): assigns incremental numeric ids + * starting at 1. - Also accepts legacy "id|" strings if a test prefers explicit ids. + */ private static void bulkInsert(String index, String... docs) throws IOException { StringBuilder bulk = new StringBuilder(); + int nextAutoId = 1; for (String doc : docs) { - String[] parts = doc.split("\\|", 2); - bulk.append("{\"index\":{\"_id\":").append(parts[0]).append("}}\n"); - bulk.append(parts[1]).append("\n"); + String id; + String json; + if (doc.contains("|")) { + String[] parts = doc.split("\\|", 2); + id = parts[0]; + json = parts[1]; + } else { + id = String.valueOf(nextAutoId++); + json = doc; + } + bulk.append("{\"index\":{\"_id\":").append(id).append("}}\n"); + bulk.append(json).append("\n"); } Request request = new Request("POST", "/" + index + "/_bulk?refresh=true"); request.setJsonEntity(bulk.toString()); diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 99ec2e23cac..3ce375feaf0 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -461,7 +461,7 @@ expandCommand ; mvexpandCommand - : MVEXPAND fieldExpression (LIMIT INTEGER_LITERAL)? + : MVEXPAND fieldExpression (LIMIT '=' INTEGER_LITERAL)? ; flattenCommand diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java index 1114c1f2051..6cc2c094323 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java @@ -9,6 +9,8 @@ import static org.junit.Assert.fail; import com.google.common.collect.ImmutableList; +import java.lang.reflect.Field; +import java.lang.reflect.Method; import java.util.List; import lombok.RequiredArgsConstructor; import org.apache.calcite.DataContext; @@ -18,6 +20,8 @@ import org.apache.calcite.plan.RelTraitDef; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.rel2sql.RelToSqlConverter; +import org.apache.calcite.rel.rel2sql.SqlImplementor; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelProtoDataType; @@ -36,6 +40,17 @@ import org.checkerframework.checker.nullness.qual.Nullable; import org.junit.Test; +/** + * Calcite tests for the mvexpand command. + * + *

This file contains a set of planner tests for mvexpand and a small helper to verify RelNode -> + * SQL conversion (best-effort via reflection) so tests can run in various IDE/classpath setups. + * + *

Notes: - The scan() return type uses Enumerable (no type-use @Nullable) to avoid + * "annotation not applicable to this kind of reference" in some environments. - + * verifyPPLToSparkSQL(RelNode) uses reflection/fallback to exercise Rel->SQL conversion without + * adding a compile-time dependency on OpenSearchSparkSqlDialect. + */ public class CalcitePPLMvExpandTest extends CalcitePPLAbstractTest { public CalcitePPLMvExpandTest() { @@ -47,104 +62,21 @@ protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpec final SchemaPlus rootSchema = Frameworks.createRootSchema(true); final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + // Keep dataset minimal — planner tests here only need a representative schema and a couple of + // rows. ImmutableList users = ImmutableList.of( - // happy: multiple skills + // representative row with multiple skills new Object[] { "happy", new Object[] { new Object[] {"python", null}, - new Object[] {"java", null}, - new Object[] {"sql", null} - } - }, - // single: single skill - new Object[] {"single", new Object[] {new Object[] {"go", null}}}, - // empty: empty skills array - new Object[] {"empty", new Object[] {}}, - // nullskills: null skills array - new Object[] {"nullskills", null}, - // noskills: missing skills (simulate with null) - new Object[] {"noskills", null}, - // missingattr: skills with missing fields - new Object[] { - "missingattr", - new Object[] { - new Object[] {"c", null}, - new Object[] {null, "advanced"} - } - }, - // complex: skills with some missing name/level - new Object[] { - "complex", - new Object[] { - new Object[] {"ml", "expert"}, - new Object[] {"ai", null}, - new Object[] {null, "novice"} - } - }, - // duplicate: skills with duplicate names - new Object[] { - "duplicate", - new Object[] { - new Object[] {"dup", null}, - new Object[] {"dup", null} - } - }, - // large: skills with many elements - new Object[] { - "large", - new Object[] { - new Object[] {"s1", null}, new Object[] {"s2", null}, new Object[] {"s3", null}, - new Object[] {"s4", null}, new Object[] {"s5", null}, new Object[] {"s6", null}, - new Object[] {"s7", null}, new Object[] {"s8", null}, new Object[] {"s9", null}, - new Object[] {"s10", null} + new Object[] {"java", null} } }, - // primitive: array of primitives instead of objects - new Object[] {"primitive", new Object[] {"python", "java"}}, - // allnulls: array of nulls - new Object[] {"allnulls", new Object[] {null, null}}, - // emptyobj: array with an empty object - new Object[] {"emptyobj", new Object[] {new Object[] {}}}, - // deeplyNested: array of arrays - new Object[] { - "deeplyNested", - new Object[] { - new Object[] {new Object[] {"python", null}}, - new Object[] {new Object[] {"java", null}} - } - }, - // mixedTypes: array with mixed types - new Object[] { - "mixedTypes", new Object[] {"python", 42, true, null, new Object[] {"java", null}} - }, - // nestedObject: array of objects with objects as attributes - new Object[] { - "nestedObject", - new Object[] { - new Object[] {new Object[] {"meta", new Object[] {"name", "python", "years", 5}}} - } - }, - // allEmptyObjects: array of empty objects - new Object[] {"allEmptyObjects", new Object[] {new Object[] {}, new Object[] {}}}, - // allEmptyArrays: array of empty arrays - new Object[] {"allEmptyArrays", new Object[] {new Object[] {}, new Object[] {}}}, - // arrayOfArraysOfPrimitives - new Object[] { - "arrayOfArraysOfPrimitives", - new Object[] {new Object[] {"python", "java"}, new Object[] {"sql"}} - }, - // specialValues: array with Infinity, NaN, very long string, unicode - new Object[] { - "specialValues", - new Object[] { - Double.POSITIVE_INFINITY, - Double.NaN, - "😀😃😄😁", - new String(new char[10000]).replace('\0', 'x') - } - }); + // representative row with single skill + new Object[] {"single", new Object[] {new Object[] {"go", null}}}); + schema.add("USERS", new UsersTable(users)); return Frameworks.newConfigBuilder() @@ -160,6 +92,9 @@ public void testMvExpandBasic() { String ppl = "source=USERS | mvexpand skills"; RelNode root = getRelNode(ppl); + // verify PPL -> SparkSQL conversion + verifyPPLToSparkSQL(root); + String expectedLogical = "LogicalProject(USERNAME=[$0], name=[$2], level=[$3])\n" + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}])\n" @@ -175,6 +110,10 @@ public void testMvExpandWithLimit() { String ppl = "source=USERS | mvexpand skills | head 1"; RelNode root = getRelNode(ppl); + // verify PPL -> SparkSQL conversion + verifyPPLToSparkSQL(root); + + // Updated expected plan to match the actual planner output (single LogicalSort above Project) String expectedLogical = "LogicalSort(fetch=[1])\n" + " LogicalProject(USERNAME=[$0], name=[$2], level=[$3])\n" @@ -191,6 +130,9 @@ public void testMvExpandProjectNested() { String ppl = "source=USERS | mvexpand skills | fields USERNAME, name, level"; RelNode root = getRelNode(ppl); + // verify PPL -> SparkSQL conversion + verifyPPLToSparkSQL(root); + String expectedLogical = "LogicalProject(USERNAME=[$0], name=[$2], level=[$3])\n" + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}])\n" @@ -208,6 +150,8 @@ public void testMvExpandEmptyOrNullArray() { String ppl = "source=USERS | where USERNAME in ('empty','nullskills') | mvexpand skills"; try { RelNode root = getRelNode(ppl); + // also sanity-check conversion + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on empty/null array should not throw, but got: " + e.getMessage()); @@ -219,6 +163,7 @@ public void testMvExpandNoArrayField() { String ppl = "source=USERS | where USERNAME = 'noskills' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on missing array field should not throw, but got: " + e.getMessage()); @@ -230,6 +175,7 @@ public void testMvExpandWithDuplicates() { String ppl = "source=USERS | where USERNAME = 'duplicate' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand with duplicates should not throw, but got: " + e.getMessage()); @@ -241,6 +187,7 @@ public void testMvExpandLargeArray() { String ppl = "source=USERS | where USERNAME = 'large' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on large array should not throw, but got: " + e.getMessage()); @@ -252,6 +199,7 @@ public void testMvExpandProjectMissingAttribute() { String ppl = "source=USERS | mvexpand skills | fields USERNAME, level"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand projection of missing attribute should not throw, but got: " + e.getMessage()); @@ -263,6 +211,7 @@ public void testMvExpandPrimitiveArray() { String ppl = "source=USERS | where USERNAME = 'primitive' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array of primitives should not throw, but got: " + e.getMessage()); @@ -274,6 +223,7 @@ public void testMvExpandAllNullsArray() { String ppl = "source=USERS | where USERNAME = 'allnulls' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array of all nulls should not throw, but got: " + e.getMessage()); @@ -285,6 +235,7 @@ public void testMvExpandEmptyObjectArray() { String ppl = "source=USERS | where USERNAME = 'emptyobj' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array with empty struct should not throw, but got: " + e.getMessage()); @@ -298,6 +249,7 @@ public void testMvExpandDeeplyNestedArray() { String ppl = "source=USERS | where USERNAME = 'deeplyNested' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on deeply nested arrays should not throw, but got: " + e.getMessage()); @@ -309,6 +261,7 @@ public void testMvExpandMixedTypesArray() { String ppl = "source=USERS | where USERNAME = 'mixedTypes' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array with mixed types should not throw, but got: " + e.getMessage()); @@ -320,6 +273,7 @@ public void testMvExpandNestedObjectArray() { String ppl = "source=USERS | where USERNAME = 'nestedObject' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array of nested objects should not throw, but got: " + e.getMessage()); @@ -331,6 +285,7 @@ public void testMvExpandAllEmptyObjectsArray() { String ppl = "source=USERS | where USERNAME = 'allEmptyObjects' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array of all empty objects should not throw, but got: " + e.getMessage()); @@ -342,6 +297,7 @@ public void testMvExpandAllEmptyArraysArray() { String ppl = "source=USERS | where USERNAME = 'allEmptyArrays' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array of all empty arrays should not throw, but got: " + e.getMessage()); @@ -353,6 +309,7 @@ public void testMvExpandArrayOfArraysOfPrimitives() { String ppl = "source=USERS | where USERNAME = 'arrayOfArraysOfPrimitives' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail( @@ -365,12 +322,92 @@ public void testMvExpandSpecialValuesArray() { String ppl = "source=USERS | where USERNAME = 'specialValues' | mvexpand skills"; try { RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array with special values should not throw, but got: " + e.getMessage()); } } + // --------------------------------------------------------------------------- + // Local helper: verify PPL -> SparkSQL conversion without adding compile-time + // dependency on OpenSearchSparkSqlDialect (uses reflection/fallback). + // --------------------------------------------------------------------------- + + /** + * Verify PPL -> SparkSQL conversion for an already-built RelNode. + * + *

Strategy: 1) Try to instantiate org.opensearch.sql.calcite.OpenSearchSparkSqlDialect.DEFAULT + * via reflection and run a typed RelToSqlConverter with it (best effort - exercises same path + * used in other tests). 2) Fallback: use the private 'converter' instance from + * CalcitePPLAbstractTest via reflection and call its visitRoot(...) method; assert it produced a + * non-null statement object. + */ + private void verifyPPLToSparkSQL(RelNode root) { + try { + // Preferred: try to instantiate dialect class and produce SQL string (if available). + try { + Class dialectClass = + Class.forName("org.opensearch.sql.calcite.OpenSearchSparkSqlDialect"); + Field defaultField = dialectClass.getField("DEFAULT"); + Object dialectDefault = defaultField.get(null); // static field + RelToSqlConverter localConv = + new RelToSqlConverter((org.apache.calcite.sql.SqlDialect) dialectDefault); + SqlImplementor.Result result = localConv.visitRoot(root); + if (result == null || result.asStatement() == null) { + fail("PPL -> SparkSQL conversion returned no statement"); + } + // Convert to SQL string using the dialect instance (typed call) and assert non-null. + final SqlNode sqlNode = result.asStatement(); + final String sql = + sqlNode.toSqlString((org.apache.calcite.sql.SqlDialect) dialectDefault).getSql(); + assertNotNull(sql); + return; // success + } catch (ClassNotFoundException cnfe) { + // Dialect class not present in this classloader/IDE environment — fall back. + } + + // Fallback: call upstream private converter via reflection and assert result/asStatement() + // non-null. + try { + Field convField = CalcitePPLAbstractTest.class.getDeclaredField("converter"); + convField.setAccessible(true); + Object convObj = convField.get(this); // should be RelToSqlConverter + if (convObj == null) { + fail("Upstream converter is not initialized; cannot verify PPL->SparkSQL"); + } + Method visitRoot = + convObj.getClass().getMethod("visitRoot", org.apache.calcite.rel.RelNode.class); + Object resultObj = visitRoot.invoke(convObj, root); + if (resultObj == null) { + fail("PPL -> SparkSQL conversion (via upstream converter) returned null"); + } + Method asStatement = resultObj.getClass().getMethod("asStatement"); + Object stmtObj = asStatement.invoke(resultObj); + if (stmtObj == null) { + fail("PPL -> SparkSQL conversion returned no statement object"); + } + // success: conversion produced a statement object + return; + } catch (NoSuchFieldException nsf) { + fail( + "Reflection fallback failed: converter field not found in CalcitePPLAbstractTest: " + + nsf.getMessage()); + } catch (ReflectiveOperationException reflEx) { + fail("Reflection fallback to upstream converter failed: " + reflEx.getMessage()); + } + } catch (Exception ex) { + fail("PPL -> SparkSQL conversion failed: " + ex.getMessage()); + } + } + + /** Convenience wrapper when only a PPL string is available. */ + @SuppressWarnings("unused") + private void verifyPPLToSparkSQL(String ppl) { + RelNode root = getRelNode(ppl); + verifyPPLToSparkSQL(root); + } + @RequiredArgsConstructor static class UsersTable implements ScannableTable { private final ImmutableList rows; @@ -392,7 +429,7 @@ static class UsersTable implements ScannableTable { .build(); @Override - public Enumerable<@Nullable Object[]> scan(DataContext root) { + public Enumerable scan(DataContext root) { return Linq4j.asEnumerable(rows); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 2c3fbb863a2..719b51d165e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -749,7 +749,7 @@ public void testMvexpandCommand() { public void testMvexpandCommandWithLimit() { assertEquals( "source=table | mvexpand identifier limit=***", - anonymize("source=t | mvexpand skills limit 5")); + anonymize("source=t | mvexpand skills limit=5")); } @Test From 825c52e34608f55c2db79f9b0bdb1af889ec09b9 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 30 Oct 2025 15:46:23 -0500 Subject: [PATCH 12/24] Address the PR comments and fix the tests accordingly Signed-off-by: Srikanth Padakanti --- .../ppl/calcite/CalcitePPLMvExpandTest.java | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java index 6cc2c094323..e21ba6d319e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java @@ -43,8 +43,8 @@ /** * Calcite tests for the mvexpand command. * - *

This file contains a set of planner tests for mvexpand and a small helper to verify RelNode -> - * SQL conversion (best-effort via reflection) so tests can run in various IDE/classpath setups. + *

This file contains planner tests for mvexpand and a small helper to verify RelNode -> SQL + * conversion (best-effort via reflection) so tests can run in various IDE/classpath setups. * *

Notes: - The scan() return type uses Enumerable (no type-use @Nullable) to avoid * "annotation not applicable to this kind of reference" in some environments. - @@ -62,20 +62,10 @@ protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpec final SchemaPlus rootSchema = Frameworks.createRootSchema(true); final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); - // Keep dataset minimal — planner tests here only need a representative schema and a couple of - // rows. - ImmutableList users = - ImmutableList.of( - // representative row with multiple skills - new Object[] { - "happy", - new Object[] { - new Object[] {"python", null}, - new Object[] {"java", null} - } - }, - // representative row with single skill - new Object[] {"single", new Object[] {new Object[] {"go", null}}}); + // Intentionally keep dataset empty: these tests only need the schema/type information, + // not actual row values. This addresses the reviewer comment about overly-complicated test + // data. + ImmutableList users = ImmutableList.of(); schema.add("USERS", new UsersTable(users)); From dc76a551bc8ddb65545827720af4491e7931af5a Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 30 Oct 2025 15:48:03 -0500 Subject: [PATCH 13/24] Address the PR comments and fix the tests accordingly Signed-off-by: Srikanth Padakanti --- .../sql/calcite/CalciteRelNodeVisitor.java | 127 ------------------ 1 file changed, 127 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index b2fb9cb8b0a..47c15593b6b 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2695,133 +2695,6 @@ private void flattenParsedPattern( projectPlusOverriding(fattenedNodes, projectNames, context); } - // private void buildExpandRelNode( - // RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext - // context) { - // // 3. Capture the outer row in a CorrelationId - // Holder correlVariable = Holder.empty(); - // context.relBuilder.variable(correlVariable::set); - // - // // 4. Create RexFieldAccess to access left node's array field with correlationId and build - // join - // // left node - // RexNode correlArrayFieldAccess = - // context.relBuilder.field( - // context.rexBuilder.makeCorrel( - // context.relBuilder.peek().getRowType(), correlVariable.get().id), - // arrayFieldRex.getIndex()); - // RelNode leftNode = context.relBuilder.build(); - // - // // 5. Build join right node and expand the array field using uncollect - // RelNode rightNode = - // context - // .relBuilder - // // fake input, see convertUnnest and convertExpression in Calcite SqlToRelConverter - // .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) - // .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) - // .uncollect(List.of(), false) - // .build(); - // - // // 6. Perform a nested-loop join (correlate) between the original table and the expanded - // // array field. - // // The last parameter has to refer to the array to be expanded on the left side. It will - // // be used by the right side to correlate with the left side. - // context - // .relBuilder - // .push(leftNode) - // .push(rightNode) - // .correlate(JoinRelType.INNER, correlVariable.get().id, List.of(arrayFieldRex)) - // // 7. Remove the original array field from the output. - // // TODO: RFC: should we keep the original array field when alias is present? - // .projectExcept(arrayFieldRex); - // - // if (alias != null) { - // // Sub-nested fields cannot be removed after renaming the nested field. - // tryToRemoveNestedFields(context); - // RexInputRef expandedField = context.relBuilder.field(arrayFieldName); - // List names = new - // ArrayList<>(context.relBuilder.peek().getRowType().getFieldNames()); - // names.set(expandedField.getIndex(), alias); - // context.relBuilder.rename(names); - // } - // } - // - // private void buildMvExpandRelNode( - // RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext - // context) { - // - // // 1. Capture left node and its schema BEFORE calling build() - // RelNode leftNode = context.relBuilder.peek(); - // RelDataType leftSchema = leftNode.getRowType(); - // - // // 2. Create correlation variable - // Holder correlVariable = Holder.empty(); - // context.relBuilder.variable(correlVariable::set); - // - // // 3. Find the array field index in the left schema by name (robust) - // RelDataTypeField leftField = leftSchema.getField(arrayFieldName, false, false); - // int arrayFieldIndexInLeft; - // if (leftField != null) { - // arrayFieldIndexInLeft = leftField.getIndex(); - // } else { - // // fallback (best effort) - // arrayFieldIndexInLeft = arrayFieldRex.getIndex(); - // } - // - // // 4. Build correlated field access for the right-side projection - // RexNode correlArrayFieldAccess = - // context.relBuilder.field( - // context.rexBuilder.makeCorrel(leftSchema, correlVariable.get().id), - // arrayFieldIndexInLeft); - // - // // 5. Build left and right nodes (leftBuilt is the original left, rightNode uncollects the - // // array) - // RelNode leftBuilt = context.relBuilder.build(); - // RelNode rightNode = - // context - // .relBuilder - // .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) - // .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) - // .uncollect(List.of(), false) - // .build(); - // - // // 6. Compute a proper RexInputRef that refers to leftBuilt's rowType at the - // // arrayFieldIndexInLeft. - // // Use rexBuilder.makeInputRef with leftBuilt.getRowType() - // RexNode requiredColumnRef = - // context.rexBuilder.makeInputRef(leftBuilt.getRowType(), arrayFieldIndexInLeft); - // - // // 7. Correlate left and right using the proper required column ref - // context - // .relBuilder - // .push(leftBuilt) - // .push(rightNode) - // .correlate(JoinRelType.INNER, correlVariable.get().id, List.of(requiredColumnRef)); - // - // // 8. Remove the original array field from the output by name using the builder's field() - // // (this ensures we remove the correct column instance) - // RexNode toRemove; - // try { - // toRemove = context.relBuilder.field(arrayFieldName); - // } catch (Exception e) { - // // Fallback in case name lookup fails - // toRemove = arrayFieldRex; - // } - // context.relBuilder.projectExcept(toRemove); - // - // // 9. Optional rename into alias (same as your prior logic) - // if (alias != null) { - // tryToRemoveNestedFields(context); - // RexInputRef expandedField = context.relBuilder.field(arrayFieldName); - // List names = new - // ArrayList<>(context.relBuilder.peek().getRowType().getFieldNames()); - // names.set(expandedField.getIndex(), alias); - // context.relBuilder.rename(names); - // } - // } - - // Replace the existing three methods with these implementations. - private void buildUnnestForLeft( RelNode leftBuilt, RelDataType leftRowType, From 831958346888c168985e70b34cb2aaa565cfc693 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 30 Oct 2025 15:53:14 -0500 Subject: [PATCH 14/24] Add comment lines for buildUnnestForLeft Signed-off-by: Srikanth Padakanti --- .../java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 47c15593b6b..ab1054ade5f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2695,6 +2695,8 @@ private void flattenParsedPattern( projectPlusOverriding(fattenedNodes, projectNames, context); } + // New generic helper: builds Uncollect + Correlate using a provided left node (so caller + // can ensure left rowType is fixed). private void buildUnnestForLeft( RelNode leftBuilt, RelDataType leftRowType, From 6d871337ab47b9959fa5996851ebf91dd99a21c0 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 30 Oct 2025 23:29:20 -0500 Subject: [PATCH 15/24] Fix the mvexpand.rst Signed-off-by: Srikanth Padakanti --- docs/user/ppl/cmd/mvexpand.rst | 43 +++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/docs/user/ppl/cmd/mvexpand.rst b/docs/user/ppl/cmd/mvexpand.rst index d9256ea12d0..c3ebf91b650 100644 --- a/docs/user/ppl/cmd/mvexpand.rst +++ b/docs/user/ppl/cmd/mvexpand.rst @@ -35,11 +35,11 @@ Usage ===== Basic expansion:: - os> source=logs | mvexpand tags + source=logs | mvexpand tags Expansion with limit:: - os> source=docs | mvexpand ids limit=3 + source=docs | mvexpand ids limit=3 Limitations =========== @@ -61,7 +61,10 @@ Input document:: PPL query:: - os> source=logs | mvexpand tags + source=logs | mvexpand tags + +Output (example):: + fetched rows / total rows = 3/3 +--------+ | tags | @@ -81,7 +84,10 @@ Input document:: PPL query:: - os> source=docs | mvexpand ids limit=3 + source=docs | mvexpand ids limit=3 + +Output (example):: + fetched rows / total rows = 3/3 +-----+ | ids | @@ -101,7 +107,10 @@ Input document:: PPL query:: - os> source=logs | mvexpand tags + source=logs | mvexpand tags + +Output (example):: + fetched rows / total rows = 0/0 +------+ | tags | @@ -114,7 +123,10 @@ Input document:: PPL query:: - os> source=logs | mvexpand tags + source=logs | mvexpand tags + +Output (example):: + fetched rows / total rows = 0/0 +------+ | tags | @@ -131,7 +143,10 @@ Input document:: PPL query:: - os> source=logs | mvexpand tags + source=logs | mvexpand tags + +Output (example):: + fetched rows / total rows = 1/1 +-------+ | tags | @@ -149,7 +164,10 @@ Input document:: PPL query:: - os> source=docs | mvexpand ids + source=docs | mvexpand ids + +Output (example):: + Error: Memory/resource limit exceeded while expanding field 'ids'. Please reduce the array size or specify a limit. Example 6: Multiple Fields (Limitation) @@ -158,7 +176,7 @@ mvexpand only supports expanding one field per command. To expand multiple field PPL query:: - os> source=docs | mvexpand a | mvexpand b + source=docs | mvexpand a | mvexpand b Example 7: Edge Case - Field Missing ------------------------------------ @@ -170,11 +188,14 @@ Input document:: PPL query:: - os> source=docs | mvexpand tags + source=docs | mvexpand tags + +Output (example):: + fetched rows / total rows = 0/0 +------+ | tags | +------+ +------+ ---- +--- \ No newline at end of file From c84703d9f8b499ab830bd56813cd20230c78ea3f Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Mon, 3 Nov 2025 12:15:49 -0600 Subject: [PATCH 16/24] Fix the failing test Signed-off-by: Srikanth Padakanti --- .../resources/expectedOutput/calcite/explain_mvexpand.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml index 650fb3508d0..3823139767e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml @@ -9,9 +9,9 @@ calcite: LogicalValues(tuples=[[{ 0 }]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..11=[{inputs}], skills.level=[$t1], skills.name=[$t2], username=[$t3], KEY=[$t10], VALUE=[$t11]) + EnumerableCalc(expr#0..5=[{inputs}], skills.level=[$t1], skills.name=[$t2], username=[$t3], KEY=[$t4], VALUE=[$t5]) EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]]) + CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]], PushDownContext=[[PROJECT->[skills, skills.level, skills.name, username]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["skills","skills.level","skills.name","username"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableUncollect EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.skills], skills=[$t2]) EnumerableValues(tuples=[[{ 0 }]]) \ No newline at end of file From de82b65e98c25b38df81cdb4680dbbb7c69d5749 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Mon, 3 Nov 2025 12:55:35 -0600 Subject: [PATCH 17/24] Fix the failing test Signed-off-by: Srikanth Padakanti --- .../resources/expectedOutput/calcite/explain_mvexpand.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml index 3823139767e..a18824ee8e6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml @@ -11,7 +11,8 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..5=[{inputs}], skills.level=[$t1], skills.name=[$t2], username=[$t3], KEY=[$t4], VALUE=[$t5]) EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]], PushDownContext=[[PROJECT->[skills, skills.level, skills.name, username]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["skills","skills.level","skills.name","username"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..9=[{inputs}], proj#0..3=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]], PushDownContext=[[PROJECT->[skills, skills.level, skills.name, username]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["skills","skills.level","skills.name","username"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableUncollect EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.skills], skills=[$t2]) EnumerableValues(tuples=[[{ 0 }]]) \ No newline at end of file From 6c6e0ec8f53f95857646a904d3fc79c42ba927d2 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Mon, 3 Nov 2025 13:00:26 -0600 Subject: [PATCH 18/24] Fix the failing test Signed-off-by: Srikanth Padakanti --- .../expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml index 650fb3508d0..3da8d77fb1b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml @@ -9,9 +9,10 @@ calcite: LogicalValues(tuples=[[{ 0 }]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..11=[{inputs}], skills.level=[$t1], skills.name=[$t2], username=[$t3], KEY=[$t10], VALUE=[$t11]) + EnumerableCalc(expr#0..5=[{inputs}], skills.level=[$t1], skills.name=[$t2], username=[$t3], KEY=[$t4], VALUE=[$t5]) EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]]) + EnumerableCalc(expr#0..9=[{inputs}], proj#0..3=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]]) EnumerableUncollect EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.skills], skills=[$t2]) EnumerableValues(tuples=[[{ 0 }]]) \ No newline at end of file From 069d52e7b6294c4073947c53941a0f54f50c68ae Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Mon, 3 Nov 2025 13:17:38 -0600 Subject: [PATCH 19/24] Fix the failing test Signed-off-by: Srikanth Padakanti --- .../resources/expectedOutput/calcite/explain_mvexpand.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml index a18824ee8e6..3823139767e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml @@ -11,8 +11,7 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..5=[{inputs}], skills.level=[$t1], skills.name=[$t2], username=[$t3], KEY=[$t4], VALUE=[$t5]) EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}]) - EnumerableCalc(expr#0..9=[{inputs}], proj#0..3=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]], PushDownContext=[[PROJECT->[skills, skills.level, skills.name, username]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["skills","skills.level","skills.name","username"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]], PushDownContext=[[PROJECT->[skills, skills.level, skills.name, username]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["skills","skills.level","skills.name","username"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableUncollect EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.skills], skills=[$t2]) EnumerableValues(tuples=[[{ 0 }]]) \ No newline at end of file From a41b081afc81dbb34d83b3621bad683ed595e2b1 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 6 Nov 2025 12:57:53 -0600 Subject: [PATCH 20/24] Address the PR comments Signed-off-by: Srikanth Padakanti --- .../sql/calcite/CalciteRelNodeVisitor.java | 38 ++++-- docs/user/ppl/cmd/mvexpand.rst | 18 +-- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 +- .../ppl/calcite/CalcitePPLMvExpandTest.java | 128 +----------------- 4 files changed, 39 insertions(+), 147 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 7b498628b2b..56179290135 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1556,11 +1556,16 @@ public RelNode visitMvExpand(MvExpand node, CalcitePlanContext context) { Field arrayField = node.getField(); RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); - buildMvExpandRelNode(arrayFieldRex, arrayField.getField().toString(), null, context); + // buildMvExpandRelNode(arrayFieldRex, arrayField.getField().toString(), null, context); - if (node.getLimit() != null) { - context.relBuilder.limit(0, node.getLimit()); - } + // pass the per-document limit into the builder so it can be applied inside the UNNEST inner + // query + buildMvExpandRelNode( + arrayFieldRex, arrayField.getField().toString(), null, node.getLimit(), context); + + // if (node.getLimit() != null) { + // context.relBuilder.limit(0, node.getLimit()); + // } return context.relBuilder.peek(); } @@ -3083,16 +3088,17 @@ private void buildUnnestForLeft( String alias, Holder correlVariable, RexNode correlArrayFieldAccess, + Integer mvExpandLimit, CalcitePlanContext context) { - // Build right node: one-row -> project(correlated access) -> uncollect - RelNode rightNode = - context - .relBuilder - .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) - .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) - .uncollect(List.of(), false) - .build(); + RelBuilder rb = context.relBuilder; + rb.push(LogicalValues.createOneRow(rb.getCluster())) + .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)); + // apply per-document limit into the inner SELECT if provided + if (mvExpandLimit != null && mvExpandLimit > 0) { + rb.limit(0, mvExpandLimit); + } + RelNode rightNode = rb.uncollect(List.of(), false).build(); // Compute required column ref against leftBuilt's row type (robust) RexNode requiredColumnRef = @@ -3155,11 +3161,16 @@ private void buildExpandRelNode( alias, correlVariable, correlArrayFieldAccess, + null, context); } private void buildMvExpandRelNode( - RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) { + RexInputRef arrayFieldRex, + String arrayFieldName, + String alias, + Integer mvExpandLimit, + CalcitePlanContext context) { // Capture left node and its schema BEFORE calling build() RelNode leftNode = context.relBuilder.peek(); @@ -3192,6 +3203,7 @@ private void buildMvExpandRelNode( alias, correlVariable, correlArrayFieldAccess, + mvExpandLimit, context); } diff --git a/docs/user/ppl/cmd/mvexpand.rst b/docs/user/ppl/cmd/mvexpand.rst index c3ebf91b650..9088ff50b8c 100644 --- a/docs/user/ppl/cmd/mvexpand.rst +++ b/docs/user/ppl/cmd/mvexpand.rst @@ -20,10 +20,6 @@ Description - Handles empty, null, and non-array fields gracefully. - Works as a streaming/distributable command for performance and scalability. -Version -======= -3.3.0 - Syntax ====== mvexpand [limit=] @@ -48,6 +44,10 @@ Limitations - For empty or null arrays, no rows are returned. - Large arrays may be subject to resource/memory limits; exceeding them results in an error or warning. +Output ordering and default limit +-------------------------------- +If no `limit` is specified, mvexpand expands all elements in the array (there is no implicit per-document cap). Elements are emitted in the same order they appear in the array (array iteration order). If the underlying field does not provide a defined order, the output order is undefined. Use `limit` to bound the number of expanded rows per document and to avoid resource issues on very large arrays. + Examples and Edge Cases ======================= @@ -156,18 +156,18 @@ Output (example):: Example 5: Large Arrays and Memory Limits ---------------------------------------- -If an array exceeds configured memory/resource limits, mvexpand returns an error. +If an array is very large it can trigger engine/cluster resource limits (memory, circuit-breakers, or query execution limits). Note: this behavior is enforced by the underlying engine and cluster settings, not by a mvexpand-specific configuration. -Input document:: - - { "ids": [1, 2, ..., 100000] } +To avoid failures when expanding large arrays: +- Use the `limit` parameter to restrict the number of expanded values per document (for example: `mvexpand field limit=1000`). +- Filter or narrow the input before expanding (use `where` and `fields` to reduce rows and columns). +- Tune cluster and SQL/PPL execution settings (circuit breakers, query size/timeouts, memory limits) appropriate for your deployment. PPL query:: source=docs | mvexpand ids Output (example):: - Error: Memory/resource limit exceeded while expanding field 'ids'. Please reduce the array size or specify a limit. Example 6: Multiple Fields (Limitation) diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 83bfae68dcd..5516a48d87b 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -491,7 +491,7 @@ expandCommand ; mvexpandCommand - : MVEXPAND fieldExpression (LIMIT '=' INTEGER_LITERAL)? + : MVEXPAND fieldExpression (LIMIT EQUAL INTEGER_LITERAL)? ; flattenCommand diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java index e21ba6d319e..16594fe0ae9 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java @@ -9,8 +9,6 @@ import static org.junit.Assert.fail; import com.google.common.collect.ImmutableList; -import java.lang.reflect.Field; -import java.lang.reflect.Method; import java.util.List; import lombok.RequiredArgsConstructor; import org.apache.calcite.DataContext; @@ -20,8 +18,6 @@ import org.apache.calcite.plan.RelTraitDef; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.rel2sql.RelToSqlConverter; -import org.apache.calcite.rel.rel2sql.SqlImplementor; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelProtoDataType; @@ -43,13 +39,7 @@ /** * Calcite tests for the mvexpand command. * - *

This file contains planner tests for mvexpand and a small helper to verify RelNode -> SQL - * conversion (best-effort via reflection) so tests can run in various IDE/classpath setups. - * - *

Notes: - The scan() return type uses Enumerable (no type-use @Nullable) to avoid - * "annotation not applicable to this kind of reference" in some environments. - - * verifyPPLToSparkSQL(RelNode) uses reflection/fallback to exercise Rel->SQL conversion without - * adding a compile-time dependency on OpenSearchSparkSqlDialect. + *

Planner tests for mvexpand; kept minimal and consistent with other Calcite planner tests. */ public class CalcitePPLMvExpandTest extends CalcitePPLAbstractTest { @@ -62,9 +52,7 @@ protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpec final SchemaPlus rootSchema = Frameworks.createRootSchema(true); final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); - // Intentionally keep dataset empty: these tests only need the schema/type information, - // not actual row values. This addresses the reviewer comment about overly-complicated test - // data. + // Keep dataset empty: tests only need schema/type information. ImmutableList users = ImmutableList.of(); schema.add("USERS", new UsersTable(users)); @@ -76,15 +64,11 @@ protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpec .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); } - // Option 2: Assert specific logical plan (as per the main edge/typical cases) @Test public void testMvExpandBasic() { String ppl = "source=USERS | mvexpand skills"; RelNode root = getRelNode(ppl); - // verify PPL -> SparkSQL conversion - verifyPPLToSparkSQL(root); - String expectedLogical = "LogicalProject(USERNAME=[$0], name=[$2], level=[$3])\n" + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}])\n" @@ -100,10 +84,6 @@ public void testMvExpandWithLimit() { String ppl = "source=USERS | mvexpand skills | head 1"; RelNode root = getRelNode(ppl); - // verify PPL -> SparkSQL conversion - verifyPPLToSparkSQL(root); - - // Updated expected plan to match the actual planner output (single LogicalSort above Project) String expectedLogical = "LogicalSort(fetch=[1])\n" + " LogicalProject(USERNAME=[$0], name=[$2], level=[$3])\n" @@ -120,9 +100,6 @@ public void testMvExpandProjectNested() { String ppl = "source=USERS | mvexpand skills | fields USERNAME, name, level"; RelNode root = getRelNode(ppl); - // verify PPL -> SparkSQL conversion - verifyPPLToSparkSQL(root); - String expectedLogical = "LogicalProject(USERNAME=[$0], name=[$2], level=[$3])\n" + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}])\n" @@ -133,16 +110,14 @@ public void testMvExpandProjectNested() { verifyLogical(root, expectedLogical); } - // Option 3: Assert that no error/crash occurs for edge cases - @Test public void testMvExpandEmptyOrNullArray() { String ppl = "source=USERS | where USERNAME in ('empty','nullskills') | mvexpand skills"; try { RelNode root = getRelNode(ppl); - // also sanity-check conversion - verifyPPLToSparkSQL(root); + System.out.println("line 118" + root); assertNotNull(root); + System.out.println("line 120" + root); } catch (Exception e) { fail("mvexpand on empty/null array should not throw, but got: " + e.getMessage()); } @@ -153,7 +128,6 @@ public void testMvExpandNoArrayField() { String ppl = "source=USERS | where USERNAME = 'noskills' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on missing array field should not throw, but got: " + e.getMessage()); @@ -165,7 +139,6 @@ public void testMvExpandWithDuplicates() { String ppl = "source=USERS | where USERNAME = 'duplicate' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand with duplicates should not throw, but got: " + e.getMessage()); @@ -177,7 +150,6 @@ public void testMvExpandLargeArray() { String ppl = "source=USERS | where USERNAME = 'large' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on large array should not throw, but got: " + e.getMessage()); @@ -189,7 +161,6 @@ public void testMvExpandProjectMissingAttribute() { String ppl = "source=USERS | mvexpand skills | fields USERNAME, level"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand projection of missing attribute should not throw, but got: " + e.getMessage()); @@ -201,7 +172,6 @@ public void testMvExpandPrimitiveArray() { String ppl = "source=USERS | where USERNAME = 'primitive' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array of primitives should not throw, but got: " + e.getMessage()); @@ -213,7 +183,6 @@ public void testMvExpandAllNullsArray() { String ppl = "source=USERS | where USERNAME = 'allnulls' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array of all nulls should not throw, but got: " + e.getMessage()); @@ -225,21 +194,17 @@ public void testMvExpandEmptyObjectArray() { String ppl = "source=USERS | where USERNAME = 'emptyobj' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array with empty struct should not throw, but got: " + e.getMessage()); } } - // --- Additional uncovered edge case tests --- - @Test public void testMvExpandDeeplyNestedArray() { String ppl = "source=USERS | where USERNAME = 'deeplyNested' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on deeply nested arrays should not throw, but got: " + e.getMessage()); @@ -251,7 +216,6 @@ public void testMvExpandMixedTypesArray() { String ppl = "source=USERS | where USERNAME = 'mixedTypes' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array with mixed types should not throw, but got: " + e.getMessage()); @@ -263,7 +227,6 @@ public void testMvExpandNestedObjectArray() { String ppl = "source=USERS | where USERNAME = 'nestedObject' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array of nested objects should not throw, but got: " + e.getMessage()); @@ -275,7 +238,6 @@ public void testMvExpandAllEmptyObjectsArray() { String ppl = "source=USERS | where USERNAME = 'allEmptyObjects' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array of all empty objects should not throw, but got: " + e.getMessage()); @@ -287,7 +249,6 @@ public void testMvExpandAllEmptyArraysArray() { String ppl = "source=USERS | where USERNAME = 'allEmptyArrays' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array of all empty arrays should not throw, but got: " + e.getMessage()); @@ -299,7 +260,6 @@ public void testMvExpandArrayOfArraysOfPrimitives() { String ppl = "source=USERS | where USERNAME = 'arrayOfArraysOfPrimitives' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail( @@ -312,92 +272,12 @@ public void testMvExpandSpecialValuesArray() { String ppl = "source=USERS | where USERNAME = 'specialValues' | mvexpand skills"; try { RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); assertNotNull(root); } catch (Exception e) { fail("mvexpand on array with special values should not throw, but got: " + e.getMessage()); } } - // --------------------------------------------------------------------------- - // Local helper: verify PPL -> SparkSQL conversion without adding compile-time - // dependency on OpenSearchSparkSqlDialect (uses reflection/fallback). - // --------------------------------------------------------------------------- - - /** - * Verify PPL -> SparkSQL conversion for an already-built RelNode. - * - *

Strategy: 1) Try to instantiate org.opensearch.sql.calcite.OpenSearchSparkSqlDialect.DEFAULT - * via reflection and run a typed RelToSqlConverter with it (best effort - exercises same path - * used in other tests). 2) Fallback: use the private 'converter' instance from - * CalcitePPLAbstractTest via reflection and call its visitRoot(...) method; assert it produced a - * non-null statement object. - */ - private void verifyPPLToSparkSQL(RelNode root) { - try { - // Preferred: try to instantiate dialect class and produce SQL string (if available). - try { - Class dialectClass = - Class.forName("org.opensearch.sql.calcite.OpenSearchSparkSqlDialect"); - Field defaultField = dialectClass.getField("DEFAULT"); - Object dialectDefault = defaultField.get(null); // static field - RelToSqlConverter localConv = - new RelToSqlConverter((org.apache.calcite.sql.SqlDialect) dialectDefault); - SqlImplementor.Result result = localConv.visitRoot(root); - if (result == null || result.asStatement() == null) { - fail("PPL -> SparkSQL conversion returned no statement"); - } - // Convert to SQL string using the dialect instance (typed call) and assert non-null. - final SqlNode sqlNode = result.asStatement(); - final String sql = - sqlNode.toSqlString((org.apache.calcite.sql.SqlDialect) dialectDefault).getSql(); - assertNotNull(sql); - return; // success - } catch (ClassNotFoundException cnfe) { - // Dialect class not present in this classloader/IDE environment — fall back. - } - - // Fallback: call upstream private converter via reflection and assert result/asStatement() - // non-null. - try { - Field convField = CalcitePPLAbstractTest.class.getDeclaredField("converter"); - convField.setAccessible(true); - Object convObj = convField.get(this); // should be RelToSqlConverter - if (convObj == null) { - fail("Upstream converter is not initialized; cannot verify PPL->SparkSQL"); - } - Method visitRoot = - convObj.getClass().getMethod("visitRoot", org.apache.calcite.rel.RelNode.class); - Object resultObj = visitRoot.invoke(convObj, root); - if (resultObj == null) { - fail("PPL -> SparkSQL conversion (via upstream converter) returned null"); - } - Method asStatement = resultObj.getClass().getMethod("asStatement"); - Object stmtObj = asStatement.invoke(resultObj); - if (stmtObj == null) { - fail("PPL -> SparkSQL conversion returned no statement object"); - } - // success: conversion produced a statement object - return; - } catch (NoSuchFieldException nsf) { - fail( - "Reflection fallback failed: converter field not found in CalcitePPLAbstractTest: " - + nsf.getMessage()); - } catch (ReflectiveOperationException reflEx) { - fail("Reflection fallback to upstream converter failed: " + reflEx.getMessage()); - } - } catch (Exception ex) { - fail("PPL -> SparkSQL conversion failed: " + ex.getMessage()); - } - } - - /** Convenience wrapper when only a PPL string is available. */ - @SuppressWarnings("unused") - private void verifyPPLToSparkSQL(String ppl) { - RelNode root = getRelNode(ppl); - verifyPPLToSparkSQL(root); - } - @RequiredArgsConstructor static class UsersTable implements ScannableTable { private final ImmutableList rows; From bf018b7d90318494824aebbc587759f2867f7e13 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 6 Nov 2025 21:34:01 -0600 Subject: [PATCH 21/24] Address the PR comments Signed-off-by: Srikanth Padakanti --- .../org/opensearch/sql/ast/tree/MvExpand.java | 5 -- .../sql/calcite/CalciteRelNodeVisitor.java | 76 +++++++++---------- docs/user/ppl/cmd/mvexpand.rst | 13 +++- 3 files changed, 43 insertions(+), 51 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java b/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java index 127f0332d0d..540e53fd6e6 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java @@ -34,11 +34,6 @@ public MvExpand attach(UnresolvedPlan child) { return this; } - @Nullable - public Integer getLimit() { - return limit; - } - @Override public List getChild() { return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 56179290135..2fff51225e2 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -813,7 +813,12 @@ public RelNode visitPatterns(Patterns node, CalcitePlanContext context) { .toList(); context.relBuilder.aggregate(context.relBuilder.groupKey(groupByList), aggCall); buildExpandRelNode( - context.relBuilder.field(node.getAlias()), node.getAlias(), node.getAlias(), context); + context.relBuilder.field(node.getAlias()), + node.getAlias(), + node.getAlias(), + null, + context); + flattenParsedPattern( node.getAlias(), context.relBuilder.field(node.getAlias()), @@ -1556,16 +1561,11 @@ public RelNode visitMvExpand(MvExpand node, CalcitePlanContext context) { Field arrayField = node.getField(); RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); - // buildMvExpandRelNode(arrayFieldRex, arrayField.getField().toString(), null, context); - // pass the per-document limit into the builder so it can be applied inside the UNNEST inner // query buildMvExpandRelNode( arrayFieldRex, arrayField.getField().toString(), null, node.getLimit(), context); - // if (node.getLimit() != null) { - // context.relBuilder.limit(0, node.getLimit()); - // } return context.relBuilder.peek(); } @@ -2830,7 +2830,7 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); String alias = expand.getAlias(); - buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), alias, context); + buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), alias, null, context); return context.relBuilder.peek(); } @@ -3132,46 +3132,27 @@ private void buildUnnestForLeft( } private void buildExpandRelNode( - RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) { - - // Capture left node and its schema BEFORE calling build() - RelNode leftNode = context.relBuilder.peek(); - RelDataType leftRowType = leftNode.getRowType(); - - // Create correlation variable while left is still on the builder stack - Holder correlVariable = Holder.empty(); - context.relBuilder.variable(correlVariable::set); - - // Create correlated field access while left is still on the builder stack - // (preserve original expand semantics: use the input RexInputRef index) - RexNode correlArrayFieldAccess = - context.relBuilder.field( - context.rexBuilder.makeCorrel(leftRowType, correlVariable.get().id), - arrayFieldRex.getIndex()); - - // Materialize leftBuilt (this pops the left from the relBuilder stack) - RelNode leftBuilt = context.relBuilder.build(); - - // Use unified helper to build right/uncollect + correlate + cleanup - buildUnnestForLeft( - leftBuilt, - leftRowType, - arrayFieldRex.getIndex(), - arrayFieldName, - alias, - correlVariable, - correlArrayFieldAccess, - null, - context); - } - - private void buildMvExpandRelNode( - RexInputRef arrayFieldRex, + RexNode arrayFieldRexNode, String arrayFieldName, String alias, Integer mvExpandLimit, CalcitePlanContext context) { + // Convert incoming RexNode to RexInputRef when possible; otherwise resolve by field name. + RexInputRef arrayFieldRex; + if (arrayFieldRexNode instanceof RexInputRef) { + arrayFieldRex = (RexInputRef) arrayFieldRexNode; + } else { + RelDataType currentRowType = context.relBuilder.peek().getRowType(); + RelDataTypeField fld = currentRowType.getField(arrayFieldName, false, false); + if (fld != null) { + arrayFieldRex = context.rexBuilder.makeInputRef(currentRowType, fld.getIndex()); + } else { + throw new IllegalArgumentException( + "buildExpandRelNode: expected RexInputRef or resolvable field name: " + arrayFieldName); + } + } + // Capture left node and its schema BEFORE calling build() RelNode leftNode = context.relBuilder.peek(); RelDataType leftRowType = leftNode.getRowType(); @@ -3207,6 +3188,17 @@ private void buildMvExpandRelNode( context); } + private void buildMvExpandRelNode( + RexInputRef arrayFieldRex, + String arrayFieldName, + String alias, + Integer mvExpandLimit, + CalcitePlanContext context) { + + // Delegate to the canonical expand implementation (pass the per-document limit through). + buildExpandRelNode(arrayFieldRex, arrayFieldName, alias, mvExpandLimit, context); + } + /** Creates an optimized sed call using native Calcite functions */ private RexNode createOptimizedSedCall( RexNode fieldRex, String sedExpression, CalcitePlanContext context) { diff --git a/docs/user/ppl/cmd/mvexpand.rst b/docs/user/ppl/cmd/mvexpand.rst index 9088ff50b8c..e56a14539fb 100644 --- a/docs/user/ppl/cmd/mvexpand.rst +++ b/docs/user/ppl/cmd/mvexpand.rst @@ -154,14 +154,19 @@ Output (example):: | error | +-------+ -Example 5: Large Arrays and Memory Limits ----------------------------------------- -If an array is very large it can trigger engine/cluster resource limits (memory, circuit-breakers, or query execution limits). Note: this behavior is enforced by the underlying engine and cluster settings, not by a mvexpand-specific configuration. +Example 5: Large Arrays and Memory / resource limits +---------------------------------------------------- +If an array is very large it can trigger engine or cluster resource limits and the query can fail with an error. There is no mvexpand-specific configuration flag that controls resource usage; instead, limits are enforced by the engine and the cluster: + +- OpenSearch node-level protections (circuit breakers and JVM/heap safeguards) and request-size protections. +- SQL/PPL execution limits (for example, query timeouts, request-size limits, and engine memory budgets) that apply to the query execution layer. + +Behavior of circuit breakers and which operators they protect can vary by release and configuration (some breakers primarily protect memory-heavy operations such as fielddata, aggregations, and certain scan implementations). Because of these distinctions, mvexpand should not be relied on to bypass cluster-level protections — use the command-level ``limit`` to bound per-document expansion and avoid hitting cluster limits. To avoid failures when expanding large arrays: - Use the `limit` parameter to restrict the number of expanded values per document (for example: `mvexpand field limit=1000`). - Filter or narrow the input before expanding (use `where` and `fields` to reduce rows and columns). -- Tune cluster and SQL/PPL execution settings (circuit breakers, query size/timeouts, memory limits) appropriate for your deployment. +- Tune cluster and SQL/PPL execution settings (circuit breakers, request/response size, timeouts, memory limits) appropriate for your deployment. If desired, we can add links to the exact OpenSearch circuit-breaker and SQL/PPL configuration docs for the targeted release. PPL query:: From 7743ad0743de8efb9e3de3e450fe41311bf02825 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 13 Nov 2025 17:39:57 -0600 Subject: [PATCH 22/24] Address the PR comments Signed-off-by: Srikanth Padakanti --- .../sql/calcite/CalciteRelNodeVisitor.java | 195 +++++++++++------- .../CalciteRelNodeVisitorExpandTest.java | 160 ++++++++++++++ docs/user/ppl/cmd/mvexpand.rst | 23 +-- .../remote/CalciteMvExpandCommandIT.java | 2 - 4 files changed, 284 insertions(+), 96 deletions(-) create mode 100644 core/src/test/java/org/opensearch/sql/calcite/CalciteRelNodeVisitorExpandTest.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index cb26fad8e3e..185cacf9d08 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1617,20 +1617,6 @@ private static void buildDedupNotNull( context.relBuilder.projectExcept(_row_number_dedup_); } - @Override - public RelNode visitMvExpand(MvExpand node, CalcitePlanContext context) { - visitChildren(node, context); - Field arrayField = node.getField(); - RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); - - // pass the per-document limit into the builder so it can be applied inside the UNNEST inner - // query - buildMvExpandRelNode( - arrayFieldRex, arrayField.getField().toString(), null, node.getLimit(), context); - - return context.relBuilder.peek(); - } - @Override public RelNode visitWindow(Window node, CalcitePlanContext context) { visitChildren(node, context); @@ -2832,6 +2818,20 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { return context.relBuilder.peek(); } + @Override + public RelNode visitMvExpand(MvExpand node, CalcitePlanContext context) { + visitChildren(node, context); + Field arrayField = node.getField(); + RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); + + // pass the per-document limit into the builder so it can be applied inside the UNNEST inner + // query + buildMvExpandRelNode( + arrayFieldRex, arrayField.getField().toString(), null, node.getLimit(), context); + + return context.relBuilder.peek(); + } + @Override public RelNode visitValues(Values values, CalcitePlanContext context) { if (values.getValues() == null || values.getValues().isEmpty()) { @@ -3075,31 +3075,95 @@ private void flattenParsedPattern( projectPlusOverriding(fattenedNodes, projectNames, context); } - // New generic helper: builds Uncollect + Correlate using a provided left node (so caller - // can ensure left rowType is fixed). - private void buildUnnestForLeft( - RelNode leftBuilt, - RelDataType leftRowType, - int arrayFieldIndex, + private void buildExpandRelNode( + RexNode arrayFieldRexNode, String arrayFieldName, String alias, - Holder correlVariable, - RexNode correlArrayFieldAccess, Integer mvExpandLimit, CalcitePlanContext context) { - RelBuilder rb = context.relBuilder; - rb.push(LogicalValues.createOneRow(rb.getCluster())) - .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)); - // apply per-document limit into the inner SELECT if provided - if (mvExpandLimit != null && mvExpandLimit > 0) { - rb.limit(0, mvExpandLimit); + // Convert incoming RexNode to RexInputRef when possible; otherwise resolve by field name. + RexInputRef arrayFieldRex; + if (arrayFieldRexNode instanceof RexInputRef) { + arrayFieldRex = (RexInputRef) arrayFieldRexNode; + + // If caller gave an input ref, try to sanity-check that the referenced field in the + // current row type is actually an array. If not, surface a clear semantic error. + RelDataType currentRowTypeCheck = context.relBuilder.peek().getRowType(); + int idx = arrayFieldRex.getIndex(); + RelDataTypeField checkField = null; + if (idx >= 0 && idx < currentRowTypeCheck.getFieldList().size()) { + checkField = currentRowTypeCheck.getFieldList().get(idx); + } + // Allow both ArraySqlType and MapSqlType here to avoid failing + // early when mappings are represented as MAP at runtime. + if (checkField != null + && !(checkField.getType() instanceof ArraySqlType) + && !(checkField.getType() instanceof MapSqlType)) { + throw new SemanticCheckException( + String.format( + "Cannot expand field '%s': expected ARRAY type but found %s", + checkField.getName(), checkField.getType().getSqlTypeName())); + } + } else { + // Try resolve by name and provide user-friendly errors when resolution fails or the type + // is not an array (user-visible message). + RelDataType currentRowType = context.relBuilder.peek().getRowType(); + RelDataTypeField fld = currentRowType.getField(arrayFieldName, false, false); + if (fld == null) { + throw new SemanticCheckException( + String.format("Cannot expand field '%s': field not found in input", arrayFieldName)); + } + // Accept ArraySqlType or MapSqlType here + if (!(fld.getType() instanceof ArraySqlType) && !(fld.getType() instanceof MapSqlType)) { + throw new SemanticCheckException( + String.format( + "Cannot expand field '%s': expected ARRAY type but found %s", + arrayFieldName, fld.getType().getSqlTypeName())); + } + arrayFieldRex = context.rexBuilder.makeInputRef(currentRowType, fld.getIndex()); } - RelNode rightNode = rb.uncollect(List.of(), false).build(); + + // Capture left node and its schema BEFORE calling build() + RelNode leftNode = context.relBuilder.peek(); + RelDataType leftRowType = leftNode.getRowType(); + + // Resolve the array field index in left schema by name (robust); fallback to original index + RelDataTypeField leftField = leftRowType.getField(arrayFieldName, false, false); + int arrayFieldIndexInLeft = + (leftField != null) ? leftField.getIndex() : arrayFieldRex.getIndex(); + + // If left schema has the field but it's not an array, produce a helpful message. + // Accept MapSqlType as well to avoid premature failure when mapping is object-like. + if (leftField != null + && !(leftField.getType() instanceof ArraySqlType) + && !(leftField.getType() instanceof MapSqlType)) { + throw new SemanticCheckException( + String.format( + "Cannot expand field '%s': expected ARRAY type in input but found %s", + arrayFieldName, leftField.getType().getSqlTypeName())); + } + + // Create correlation variable while left is still on the builder stack + Holder correlVariable = Holder.empty(); + context.relBuilder.variable(correlVariable::set); + + // Create correlated field access while left is still on the builder stack + RexNode correlArrayFieldAccess = + context.relBuilder.field( + context.rexBuilder.makeCorrel(leftRowType, correlVariable.get().id), + arrayFieldIndexInLeft); + + // Materialize leftBuilt + RelNode leftBuilt = context.relBuilder.build(); + + // Build the right (uncollect) using the small helper + RelNode rightNode = + buildRightUncollect(correlArrayFieldAccess, arrayFieldName, mvExpandLimit, context); // Compute required column ref against leftBuilt's row type (robust) RexNode requiredColumnRef = - context.rexBuilder.makeInputRef(leftBuilt.getRowType(), arrayFieldIndex); + context.rexBuilder.makeInputRef(leftBuilt.getRowType(), arrayFieldIndexInLeft); // Correlate leftBuilt and rightNode using the proper required column ref context @@ -3128,61 +3192,32 @@ private void buildUnnestForLeft( } } - private void buildExpandRelNode( - RexNode arrayFieldRexNode, + /** + * Build the inner uncollect (UNNEST) right node given a correlated field access. + * + *

This helper intentionally keeps a very small surface: it accepts the correlated access + * (which must be created while the left is still on the builder stack) and the other local + * options, constructs the "one-row -> project(correlatedField) -> (limit?) -> uncollect" sequence + * and returns the built right RelNode. + * + *

Keeping the correlate + projectExcept logic in buildExpandRelNode simplifies reasoning about + * the required correlate-variable lifecycle (it must be created while left is on the builder + * stack). + */ + private RelNode buildRightUncollect( + RexNode correlArrayFieldAccess, String arrayFieldName, - String alias, Integer mvExpandLimit, CalcitePlanContext context) { - // Convert incoming RexNode to RexInputRef when possible; otherwise resolve by field name. - RexInputRef arrayFieldRex; - if (arrayFieldRexNode instanceof RexInputRef) { - arrayFieldRex = (RexInputRef) arrayFieldRexNode; - } else { - RelDataType currentRowType = context.relBuilder.peek().getRowType(); - RelDataTypeField fld = currentRowType.getField(arrayFieldName, false, false); - if (fld != null) { - arrayFieldRex = context.rexBuilder.makeInputRef(currentRowType, fld.getIndex()); - } else { - throw new IllegalArgumentException( - "buildExpandRelNode: expected RexInputRef or resolvable field name: " + arrayFieldName); - } + RelBuilder rb = context.relBuilder; + rb.push(LogicalValues.createOneRow(rb.getCluster())) + .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)); + // apply per-document limit into the inner SELECT if provided + if (mvExpandLimit != null && mvExpandLimit > 0) { + rb.limit(0, mvExpandLimit); } - - // Capture left node and its schema BEFORE calling build() - RelNode leftNode = context.relBuilder.peek(); - RelDataType leftRowType = leftNode.getRowType(); - - // Resolve the array field index in left schema by name (robust); fallback to original index - RelDataTypeField leftField = leftRowType.getField(arrayFieldName, false, false); - int arrayFieldIndexInLeft = - (leftField != null) ? leftField.getIndex() : arrayFieldRex.getIndex(); - - // Create correlation variable while left is still on the builder stack - Holder correlVariable = Holder.empty(); - context.relBuilder.variable(correlVariable::set); - - // Create correlated field access while left is still on the builder stack - RexNode correlArrayFieldAccess = - context.relBuilder.field( - context.rexBuilder.makeCorrel(leftRowType, correlVariable.get().id), - arrayFieldIndexInLeft); - - // Materialize leftBuilt - RelNode leftBuilt = context.relBuilder.build(); - - // Use unified helper to build right/uncollect + correlate + cleanup - buildUnnestForLeft( - leftBuilt, - leftRowType, - arrayFieldIndexInLeft, - arrayFieldName, - alias, - correlVariable, - correlArrayFieldAccess, - mvExpandLimit, - context); + return rb.uncollect(List.of(), false).build(); } private void buildMvExpandRelNode( diff --git a/core/src/test/java/org/opensearch/sql/calcite/CalciteRelNodeVisitorExpandTest.java b/core/src/test/java/org/opensearch/sql/calcite/CalciteRelNodeVisitorExpandTest.java new file mode 100644 index 00000000000..c0f1162bbf2 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/CalciteRelNodeVisitorExpandTest.java @@ -0,0 +1,160 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.mock; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.type.ArraySqlType; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.tools.FrameworkConfig; +import org.apache.calcite.tools.RelBuilder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.calcite.utils.CalciteToolsHelper; +import org.opensearch.sql.datasource.DataSourceService; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.executor.QueryType; + +/** Negative tests for expand branch validations. */ +@ExtendWith(MockitoExtension.class) +public class CalciteRelNodeVisitorExpandTest { + + private MockedStatic mockedCalciteToolsHelper; + + @SuppressWarnings("unused") + private FrameworkConfig frameworkConfig = mock(FrameworkConfig.class); + + private final RelBuilder relBuilder = mock(RelBuilder.class); + private final RelNode leftRelNode = mock(RelNode.class); + private final RelDataType leftRowType = mock(RelDataType.class); + private final RelDataTypeField arrayField = mock(RelDataTypeField.class); + private final RelDataTypeField nonArrayField = mock(RelDataTypeField.class); + private final ArraySqlType arraySqlType = mock(ArraySqlType.class); + private final RelDataType nonArrayType = mock(RelDataType.class); + private final DataSourceService dataSourceService = mock(DataSourceService.class); + private final ExtendedRexBuilder rexBuilder = mock(ExtendedRexBuilder.class); + + private CalciteRelNodeVisitor visitor; + private CalcitePlanContext context; + + @BeforeEach + public void setUp() { + // Intercept CalciteToolsHelper.create(...) so CalcitePlanContext.create(...) ends up using our + // relBuilder. + mockedCalciteToolsHelper = Mockito.mockStatic(CalciteToolsHelper.class); + mockedCalciteToolsHelper + .when(() -> CalciteToolsHelper.create(any(), any(), any())) + .thenReturn(relBuilder); + + // Minimal relBuilder / row-type wiring used by the validation branches. + lenient().when(relBuilder.peek()).thenReturn(leftRelNode); + lenient().when(leftRelNode.getRowType()).thenReturn(leftRowType); + + // Some versions of Calcite require relBuilder.getRexBuilder()/getTypeFactory during context + // creation. + lenient().when(relBuilder.getRexBuilder()).thenReturn(rexBuilder); + lenient().when(rexBuilder.getTypeFactory()).thenReturn(TYPE_FACTORY); + + // Create the plan context. Pass null for SysLimit (tests do not depend on it). + context = CalcitePlanContext.create(frameworkConfig, null, QueryType.PPL); + + visitor = new CalciteRelNodeVisitor(dataSourceService); + } + + @AfterEach + public void tearDown() { + mockedCalciteToolsHelper.close(); + } + + /** + * Negative: requested field does not exist in current row type -> SemanticCheckException + * + *

This exercises the resolve-by-name branch early validation that throws when the named field + * is not found in the current row type. + */ + @Test + public void expand_on_nonexistent_field_should_throw_user_friendly_error() throws Exception { + // leftRowType.getField("missing_field", false, false) -> null (not found) + lenient().when(leftRowType.getField("missing_field", false, false)).thenReturn(null); + + // Use a non-RexInputRef to hit resolve-by-name branch + RexNode nonInputRexNode = mock(RexNode.class); + + Method m = + CalciteRelNodeVisitor.class.getDeclaredMethod( + "buildExpandRelNode", + RexNode.class, + String.class, + String.class, + Integer.class, + CalcitePlanContext.class); + m.setAccessible(true); + + try { + m.invoke(visitor, nonInputRexNode, "missing_field", null, null, context); + fail("Expected SemanticCheckException"); + } catch (InvocationTargetException ite) { + Throwable cause = ite.getCause(); + assertTrue(cause instanceof SemanticCheckException); + assertEquals( + "Cannot expand field 'missing_field': field not found in input", cause.getMessage()); + } + } + + /** + * Negative: requested field exists but is not an ARRAY -> SemanticCheckException + * + *

This exercises the resolve-by-name branch early validation that throws when the named field + * exists but its type is not ArraySqlType. + */ + @Test + public void expand_on_non_array_field_should_throw_expected_array_message() throws Exception { + // leftRowType.getField("not_array", false, false) -> nonArrayField and its type is non-array + lenient().when(leftRowType.getField("not_array", false, false)).thenReturn(nonArrayField); + lenient().when(nonArrayField.getType()).thenReturn(nonArrayType); + lenient().when(nonArrayType.getSqlTypeName()).thenReturn(SqlTypeName.VARCHAR); + + RexNode nonInputRexNode = mock(RexNode.class); + + Method m = + CalciteRelNodeVisitor.class.getDeclaredMethod( + "buildExpandRelNode", + RexNode.class, + String.class, + String.class, + Integer.class, + CalcitePlanContext.class); + m.setAccessible(true); + + try { + m.invoke(visitor, nonInputRexNode, "not_array", null, null, context); + fail("Expected SemanticCheckException"); + } catch (InvocationTargetException ite) { + Throwable cause = ite.getCause(); + assertTrue(cause instanceof SemanticCheckException); + assertEquals( + "Cannot expand field 'not_array': expected ARRAY type but found VARCHAR", + cause.getMessage()); + } + } +} diff --git a/docs/user/ppl/cmd/mvexpand.rst b/docs/user/ppl/cmd/mvexpand.rst index e56a14539fb..1dbeaaf22cd 100644 --- a/docs/user/ppl/cmd/mvexpand.rst +++ b/docs/user/ppl/cmd/mvexpand.rst @@ -156,24 +156,19 @@ Output (example):: Example 5: Large Arrays and Memory / resource limits ---------------------------------------------------- -If an array is very large it can trigger engine or cluster resource limits and the query can fail with an error. There is no mvexpand-specific configuration flag that controls resource usage; instead, limits are enforced by the engine and the cluster: +If an array is very large it can trigger engine or cluster resource limits and the query can fail with an error. There is no mvexpand-specific configuration. Instead, limits that can cause a query to be terminated are enforced at the node / engine level and by SQL/PPL query controls. -- OpenSearch node-level protections (circuit breakers and JVM/heap safeguards) and request-size protections. -- SQL/PPL execution limits (for example, query timeouts, request-size limits, and engine memory budgets) that apply to the query execution layer. - -Behavior of circuit breakers and which operators they protect can vary by release and configuration (some breakers primarily protect memory-heavy operations such as fielddata, aggregations, and certain scan implementations). Because of these distinctions, mvexpand should not be relied on to bypass cluster-level protections — use the command-level ``limit`` to bound per-document expansion and avoid hitting cluster limits. +- OpenSearch node protections (for example, heap / query memory limits such as plugins.query.memory_limit) can terminate queries that exceed configured memory budgets. +- SQL/PPL execution limits (timeouts, request/response size limits, and engine memory budgets) also apply to queries that use mvexpand. +- Note: in the current Calcite-based engine, circuit-breaking protections are applied primarily to the index scan operator; protections for other operators (including some operators used internally to implement mvexpand) are under research. Do not assume operator-level circuit breaking will fully protect mvexpand in all cases. To avoid failures when expanding large arrays: -- Use the `limit` parameter to restrict the number of expanded values per document (for example: `mvexpand field limit=1000`). -- Filter or narrow the input before expanding (use `where` and `fields` to reduce rows and columns). -- Tune cluster and SQL/PPL execution settings (circuit breakers, request/response size, timeouts, memory limits) appropriate for your deployment. If desired, we can add links to the exact OpenSearch circuit-breaker and SQL/PPL configuration docs for the targeted release. - -PPL query:: +- Use mvexpand's limit parameter to bound the number of expanded values per document (for example: mvexpand field limit=1000). +- Reduce the input size before expanding (filter with where, project only needed fields). +- Tune cluster and SQL/PPL execution settings (circuit breakers, request/response size, timeouts, memory limits) appropriate for your deployment. - source=docs | mvexpand ids - -Output (example):: - Error: Memory/resource limit exceeded while expanding field 'ids'. Please reduce the array size or specify a limit. +For node and SQL/PPL settings see: +https://docs.opensearch.org/1.0/search-plugins/ppl/settings/ Example 6: Multiple Fields (Limitation) --------------------------------------- diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java index 8d42544be6a..268f2b0f847 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java @@ -39,8 +39,6 @@ public void init() throws Exception { "{\"username\":\"empty\",\"skills\":[]}", "{\"username\":\"nullskills\",\"skills\":null}", "{\"username\":\"noskills\"}", - "{\"username\":\"missingattr\",\"skills\":[{\"name\":\"c\"},{\"level\":\"advanced\"}]}", - "{\"username\":\"complex\",\"skills\":[{\"name\":\"ml\",\"level\":\"expert\"},{\"name\":\"ai\"},{\"level\":\"novice\"}]}", "{\"username\":\"duplicate\",\"skills\":[{\"name\":\"dup\"},{\"name\":\"dup\"}]}", "{\"username\":\"large\",\"skills\":[{\"name\":\"s1\"},{\"name\":\"s2\"},{\"name\":\"s3\"},{\"name\":\"s4\"},{\"name\":\"s5\"},{\"name\":\"s6\"},{\"name\":\"s7\"},{\"name\":\"s8\"},{\"name\":\"s9\"},{\"name\":\"s10\"}]}"); refreshIndex(INDEX); From 1e0313165f170bd400f0d6d4c9d0496f60aad8e3 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 13 Nov 2025 17:48:36 -0600 Subject: [PATCH 23/24] Address the PR comments Signed-off-by: Srikanth Padakanti --- .../CalciteRelNodeVisitorExpandTest.java | 43 ++++++++----------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/core/src/test/java/org/opensearch/sql/calcite/CalciteRelNodeVisitorExpandTest.java b/core/src/test/java/org/opensearch/sql/calcite/CalciteRelNodeVisitorExpandTest.java index c0f1162bbf2..87a9f131183 100644 --- a/core/src/test/java/org/opensearch/sql/calcite/CalciteRelNodeVisitorExpandTest.java +++ b/core/src/test/java/org/opensearch/sql/calcite/CalciteRelNodeVisitorExpandTest.java @@ -5,9 +5,7 @@ package org.opensearch.sql.calcite; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.*; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.lenient; import static org.mockito.Mockito.mock; @@ -94,10 +92,7 @@ public void tearDown() { */ @Test public void expand_on_nonexistent_field_should_throw_user_friendly_error() throws Exception { - // leftRowType.getField("missing_field", false, false) -> null (not found) lenient().when(leftRowType.getField("missing_field", false, false)).thenReturn(null); - - // Use a non-RexInputRef to hit resolve-by-name branch RexNode nonInputRexNode = mock(RexNode.class); Method m = @@ -110,15 +105,14 @@ public void expand_on_nonexistent_field_should_throw_user_friendly_error() throw CalcitePlanContext.class); m.setAccessible(true); - try { - m.invoke(visitor, nonInputRexNode, "missing_field", null, null, context); - fail("Expected SemanticCheckException"); - } catch (InvocationTargetException ite) { - Throwable cause = ite.getCause(); - assertTrue(cause instanceof SemanticCheckException); - assertEquals( - "Cannot expand field 'missing_field': field not found in input", cause.getMessage()); - } + InvocationTargetException ite = + assertThrows( + InvocationTargetException.class, + () -> m.invoke(visitor, nonInputRexNode, "missing_field", null, null, context)); + Throwable cause = ite.getCause(); + assertTrue(cause instanceof SemanticCheckException); + assertEquals( + "Cannot expand field 'missing_field': field not found in input", cause.getMessage()); } /** @@ -146,15 +140,14 @@ public void expand_on_non_array_field_should_throw_expected_array_message() thro CalcitePlanContext.class); m.setAccessible(true); - try { - m.invoke(visitor, nonInputRexNode, "not_array", null, null, context); - fail("Expected SemanticCheckException"); - } catch (InvocationTargetException ite) { - Throwable cause = ite.getCause(); - assertTrue(cause instanceof SemanticCheckException); - assertEquals( - "Cannot expand field 'not_array': expected ARRAY type but found VARCHAR", - cause.getMessage()); - } + InvocationTargetException ite = + assertThrows( + InvocationTargetException.class, + () -> m.invoke(visitor, nonInputRexNode, "not_array", null, null, context)); + Throwable cause = ite.getCause(); + assertTrue(cause instanceof SemanticCheckException); + assertEquals( + "Cannot expand field 'not_array': expected ARRAY type but found VARCHAR", + cause.getMessage()); } } From dbea3a2b469dab07e11f17ba641244dc3705cbce Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 13 Nov 2025 23:04:01 -0600 Subject: [PATCH 24/24] Address the PR comments Signed-off-by: Srikanth Padakanti --- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + 1 file changed, 1 insertion(+) diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index ffe11630e19..30c43888480 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -117,6 +117,7 @@ commandName | ML | FILLNULL | EXPAND + | MVEXPAND | FLATTEN | TRENDLINE | TIMECHART