Skip to content

Commit dd52196

Browse files
authored
Support mvindex eval function (#4794)
1 parent a3c90a8 commit dd52196

File tree

9 files changed

+434
-0
lines changed

9 files changed

+434
-0
lines changed

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,13 @@ public enum BuiltinFunctionName {
6868
/** Collection functions */
6969
ARRAY(FunctionName.of("array")),
7070
ARRAY_LENGTH(FunctionName.of("array_length")),
71+
ARRAY_SLICE(FunctionName.of("array_slice"), true),
7172
MAP_APPEND(FunctionName.of("map_append"), true),
7273
MAP_CONCAT(FunctionName.of("map_concat"), true),
7374
MAP_REMOVE(FunctionName.of("map_remove"), true),
7475
MVAPPEND(FunctionName.of("mvappend")),
7576
MVJOIN(FunctionName.of("mvjoin")),
77+
MVINDEX(FunctionName.of("mvindex")),
7678
FORALL(FunctionName.of("forall")),
7779
EXISTS(FunctionName.of("exists")),
7880
FILTER(FunctionName.of("filter")),
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.expression.function.CollectionUDF;
7+
8+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADDFUNCTION;
9+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_LENGTH;
10+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_SLICE;
11+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.IF;
12+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_ITEM;
13+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LESS;
14+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUBTRACT;
15+
16+
import java.math.BigDecimal;
17+
import org.apache.calcite.rex.RexBuilder;
18+
import org.apache.calcite.rex.RexNode;
19+
import org.opensearch.sql.expression.function.PPLFuncImpTable;
20+
21+
/**
22+
* MVINDEX function implementation that returns a subset of a multivalue array.
23+
*
24+
* <p>Usage:
25+
*
26+
* <ul>
27+
* <li>mvindex(array, start) - returns single element at index (0-based)
28+
* <li>mvindex(array, start, end) - returns array slice from start to end (inclusive, 0-based)
29+
* </ul>
30+
*
31+
* <p>Supports negative indexing where -1 refers to the last element.
32+
*
33+
* <p>Implementation notes:
34+
*
35+
* <ul>
36+
* <li>Single element access uses Calcite's ITEM operator (1-based indexing)
37+
* <li>Range access uses Calcite's ARRAY_SLICE operator (0-based indexing with length parameter)
38+
* <li>Index conversion handles the difference between PPL's 0-based indexing and Calcite's
39+
* conventions
40+
* </ul>
41+
*/
42+
public class MVIndexFunctionImp implements PPLFuncImpTable.FunctionImp {
43+
44+
@Override
45+
public RexNode resolve(RexBuilder builder, RexNode... args) {
46+
RexNode array = args[0];
47+
RexNode startIdx = args[1];
48+
49+
// Use resolve to get array length instead of direct makeCall
50+
RexNode arrayLen = PPLFuncImpTable.INSTANCE.resolve(builder, ARRAY_LENGTH, array);
51+
52+
if (args.length == 2) {
53+
// Single element access using ITEM (1-based indexing)
54+
return resolveSingleElement(builder, array, startIdx, arrayLen);
55+
} else {
56+
// Range access using ARRAY_SLICE (0-based indexing)
57+
RexNode endIdx = args[2];
58+
return resolveRange(builder, array, startIdx, endIdx, arrayLen);
59+
}
60+
}
61+
62+
/**
63+
* Resolves single element access: mvindex(array, index)
64+
*
65+
* <p>Uses Calcite's ITEM operator which uses 1-based indexing. Converts PPL's 0-based index to
66+
* 1-based by adding 1.
67+
*/
68+
private RexNode resolveSingleElement(
69+
RexBuilder builder, RexNode array, RexNode startIdx, RexNode arrayLen) {
70+
// Convert 0-based PPL index to 1-based Calcite ITEM index
71+
RexNode zero = builder.makeExactLiteral(BigDecimal.ZERO);
72+
RexNode one = builder.makeExactLiteral(BigDecimal.ONE);
73+
74+
RexNode isNegative = PPLFuncImpTable.INSTANCE.resolve(builder, LESS, startIdx, zero);
75+
RexNode sumArrayLenStart =
76+
PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, arrayLen, startIdx);
77+
RexNode negativeCase =
78+
PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, sumArrayLenStart, one);
79+
RexNode positiveCase = PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, startIdx, one);
80+
81+
RexNode normalizedStart =
82+
PPLFuncImpTable.INSTANCE.resolve(builder, IF, isNegative, negativeCase, positiveCase);
83+
84+
return PPLFuncImpTable.INSTANCE.resolve(builder, INTERNAL_ITEM, array, normalizedStart);
85+
}
86+
87+
/**
88+
* Resolves range access: mvindex(array, start, end)
89+
*
90+
* <p>Uses Calcite's ARRAY_SLICE operator which uses 0-based indexing and a length parameter.
91+
* PPL's end index is inclusive, so length = (end - start) + 1.
92+
*/
93+
private RexNode resolveRange(
94+
RexBuilder builder, RexNode array, RexNode startIdx, RexNode endIdx, RexNode arrayLen) {
95+
// Normalize negative indices for ARRAY_SLICE (0-based)
96+
RexNode zero = builder.makeExactLiteral(BigDecimal.ZERO);
97+
RexNode one = builder.makeExactLiteral(BigDecimal.ONE);
98+
99+
RexNode isStartNegative = PPLFuncImpTable.INSTANCE.resolve(builder, LESS, startIdx, zero);
100+
RexNode startNegativeCase =
101+
PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, arrayLen, startIdx);
102+
RexNode normalizedStart =
103+
PPLFuncImpTable.INSTANCE.resolve(builder, IF, isStartNegative, startNegativeCase, startIdx);
104+
105+
RexNode isEndNegative = PPLFuncImpTable.INSTANCE.resolve(builder, LESS, endIdx, zero);
106+
RexNode endNegativeCase =
107+
PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, arrayLen, endIdx);
108+
RexNode normalizedEnd =
109+
PPLFuncImpTable.INSTANCE.resolve(builder, IF, isEndNegative, endNegativeCase, endIdx);
110+
111+
// Calculate length: (normalizedEnd - normalizedStart) + 1
112+
RexNode diff =
113+
PPLFuncImpTable.INSTANCE.resolve(builder, SUBTRACT, normalizedEnd, normalizedStart);
114+
RexNode length = PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, diff, one);
115+
116+
// Call ARRAY_SLICE(array, normalizedStart, length)
117+
return PPLFuncImpTable.INSTANCE.resolve(builder, ARRAY_SLICE, array, normalizedStart, length);
118+
}
119+
}

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import static org.opensearch.sql.expression.function.BuiltinFunctionName.AND;
1818
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY;
1919
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_LENGTH;
20+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_SLICE;
2021
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ASCII;
2122
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ASIN;
2223
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ATAN;
@@ -149,6 +150,7 @@
149150
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLYFUNCTION;
150151
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH;
151152
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVAPPEND;
153+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVINDEX;
152154
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVJOIN;
153155
import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT;
154156
import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOTEQUAL;
@@ -282,6 +284,7 @@
282284
import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils;
283285
import org.opensearch.sql.exception.ExpressionEvaluationException;
284286
import org.opensearch.sql.executor.QueryType;
287+
import org.opensearch.sql.expression.function.CollectionUDF.MVIndexFunctionImp;
285288

286289
public class PPLFuncImpTable {
287290
private static final Logger logger = LogManager.getLogger(PPLFuncImpTable.class);
@@ -972,12 +975,25 @@ void populate() {
972975
builder.makeCall(SqlLibraryOperators.ARRAY_JOIN, array, delimiter),
973976
PPLTypeChecker.family(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER));
974977

978+
// Register MVINDEX to use Calcite's ITEM/ARRAY_SLICE with index normalization
979+
register(
980+
MVINDEX,
981+
new MVIndexFunctionImp(),
982+
PPLTypeChecker.wrapComposite(
983+
(CompositeOperandTypeChecker)
984+
OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER)
985+
.or(
986+
OperandTypes.family(
987+
SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER)),
988+
false));
989+
975990
registerOperator(ARRAY, PPLBuiltinOperators.ARRAY);
976991
registerOperator(MVAPPEND, PPLBuiltinOperators.MVAPPEND);
977992
registerOperator(MAP_APPEND, PPLBuiltinOperators.MAP_APPEND);
978993
registerOperator(MAP_CONCAT, SqlLibraryOperators.MAP_CONCAT);
979994
registerOperator(MAP_REMOVE, PPLBuiltinOperators.MAP_REMOVE);
980995
registerOperator(ARRAY_LENGTH, SqlLibraryOperators.ARRAY_LENGTH);
996+
registerOperator(ARRAY_SLICE, SqlLibraryOperators.ARRAY_SLICE);
981997
registerOperator(FORALL, PPLBuiltinOperators.FORALL);
982998
registerOperator(EXISTS, PPLBuiltinOperators.EXISTS);
983999
registerOperator(FILTER, PPLBuiltinOperators.FILTER);

docs/user/ppl/functions/collection.rst

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,3 +301,58 @@ Example::
301301
|--------------|
302302
| [1,text,2.5] |
303303
+--------------+
304+
305+
MVINDEX
306+
-------
307+
308+
Description
309+
>>>>>>>>>>>
310+
311+
Usage: mvindex(array, start, [end]) returns a subset of the multivalue array using the start and optional end index values. Indexes are 0-based (first element is at index 0). Supports negative indexing where -1 refers to the last element. When only start is provided, returns a single element. When both start and end are provided, returns an array of elements from start to end (inclusive).
312+
313+
Argument type: array: ARRAY, start: INTEGER, end: INTEGER (optional)
314+
315+
Return type: ANY (single element) or ARRAY (range)
316+
317+
Example::
318+
319+
os> source=people | eval array = array('a', 'b', 'c', 'd', 'e'), result = mvindex(array, 1) | fields result | head 1
320+
fetched rows / total rows = 1/1
321+
+--------+
322+
| result |
323+
|--------|
324+
| b |
325+
+--------+
326+
327+
os> source=people | eval array = array('a', 'b', 'c', 'd', 'e'), result = mvindex(array, -1) | fields result | head 1
328+
fetched rows / total rows = 1/1
329+
+--------+
330+
| result |
331+
|--------|
332+
| e |
333+
+--------+
334+
335+
os> source=people | eval array = array(1, 2, 3, 4, 5), result = mvindex(array, 1, 3) | fields result | head 1
336+
fetched rows / total rows = 1/1
337+
+---------+
338+
| result |
339+
|---------|
340+
| [2,3,4] |
341+
+---------+
342+
343+
os> source=people | eval array = array(1, 2, 3, 4, 5), result = mvindex(array, -3, -1) | fields result | head 1
344+
fetched rows / total rows = 1/1
345+
+---------+
346+
| result |
347+
|---------|
348+
| [3,4,5] |
349+
+---------+
350+
351+
os> source=people | eval array = array('alex', 'celestino', 'claudia', 'david'), result = mvindex(array, 0, 2) | fields result | head 1
352+
fetched rows / total rows = 1/1
353+
+--------------------------+
354+
| result |
355+
|--------------------------|
356+
| [alex,celestino,claudia] |
357+
+--------------------------+
358+

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,4 +370,123 @@ public void testMvjoinWithMultipleRealFields() throws IOException {
370370
firstRow.getString(0) + " | " + firstRow.getString(1) + " | " + firstRow.getString(2),
371371
firstRow.getString(3));
372372
}
373+
374+
@Test
375+
public void testMvindexSingleElementPositive() throws IOException {
376+
JSONObject actual =
377+
executeQuery(
378+
String.format(
379+
"source=%s | eval arr = array('a', 'b', 'c', 'd', 'e'), result = mvindex(arr, 1)"
380+
+ " | head 1 | fields result",
381+
TEST_INDEX_BANK));
382+
383+
verifySchema(actual, schema("result", "string"));
384+
verifyDataRows(actual, rows("b"));
385+
}
386+
387+
@Test
388+
public void testMvindexSingleElementNegative() throws IOException {
389+
JSONObject actual =
390+
executeQuery(
391+
String.format(
392+
"source=%s | eval arr = array('a', 'b', 'c', 'd', 'e'), result = mvindex(arr, -1)"
393+
+ " | head 1 | fields result",
394+
TEST_INDEX_BANK));
395+
396+
verifySchema(actual, schema("result", "string"));
397+
verifyDataRows(actual, rows("e"));
398+
}
399+
400+
@Test
401+
public void testMvindexSingleElementNegativeMiddle() throws IOException {
402+
JSONObject actual =
403+
executeQuery(
404+
String.format(
405+
"source=%s | eval arr = array('a', 'b', 'c', 'd', 'e'), result = mvindex(arr, -3)"
406+
+ " | head 1 | fields result",
407+
TEST_INDEX_BANK));
408+
409+
verifySchema(actual, schema("result", "string"));
410+
verifyDataRows(actual, rows("c"));
411+
}
412+
413+
@Test
414+
public void testMvindexRangePositive() throws IOException {
415+
JSONObject actual =
416+
executeQuery(
417+
String.format(
418+
"source=%s | eval arr = array(1, 2, 3, 4, 5), result = mvindex(arr, 1, 3) | head"
419+
+ " 1 | fields result",
420+
TEST_INDEX_BANK));
421+
422+
verifySchema(actual, schema("result", "array"));
423+
verifyDataRows(actual, rows(List.of(2, 3, 4)));
424+
}
425+
426+
@Test
427+
public void testMvindexRangeNegative() throws IOException {
428+
JSONObject actual =
429+
executeQuery(
430+
String.format(
431+
"source=%s | eval arr = array(1, 2, 3, 4, 5), result = mvindex(arr, -3, -1) |"
432+
+ " head 1 | fields result",
433+
TEST_INDEX_BANK));
434+
435+
verifySchema(actual, schema("result", "array"));
436+
verifyDataRows(actual, rows(List.of(3, 4, 5)));
437+
}
438+
439+
@Test
440+
public void testMvindexRangeMixed() throws IOException {
441+
JSONObject actual =
442+
executeQuery(
443+
String.format(
444+
"source=%s | eval arr = array(1, 2, 3, 4, 5), result = mvindex(arr, -4, 2) | head"
445+
+ " 1 | fields result",
446+
TEST_INDEX_BANK));
447+
448+
verifySchema(actual, schema("result", "array"));
449+
verifyDataRows(actual, rows(List.of(2, 3)));
450+
}
451+
452+
@Test
453+
public void testMvindexRangeFirstThree() throws IOException {
454+
JSONObject actual =
455+
executeQuery(
456+
String.format(
457+
"source=%s | eval arr = array('alex', 'celestino', 'claudia', 'david'), result ="
458+
+ " mvindex(arr, 0, 2) | head 1 | fields result",
459+
TEST_INDEX_BANK));
460+
461+
verifySchema(actual, schema("result", "array"));
462+
verifyDataRows(actual, rows(List.of("alex", "celestino", "claudia")));
463+
}
464+
465+
@Test
466+
public void testMvindexRangeLastThree() throws IOException {
467+
JSONObject actual =
468+
executeQuery(
469+
String.format(
470+
"source=%s | eval arr = array('buttercup', 'dash', 'flutter', 'honey', 'ivory',"
471+
+ " 'minty', 'pinky', 'rarity'), result = mvindex(arr, -3, -1) | head 1 |"
472+
+ " fields result",
473+
TEST_INDEX_BANK));
474+
475+
verifySchema(actual, schema("result", "array"));
476+
verifyDataRows(actual, rows(List.of("minty", "pinky", "rarity")));
477+
}
478+
479+
@Test
480+
public void testMvindexRangeSingleElement() throws IOException {
481+
// When start == end, should return single element in array
482+
JSONObject actual =
483+
executeQuery(
484+
String.format(
485+
"source=%s | eval arr = array(1, 2, 3, 4, 5), result = mvindex(arr, 2, 2) | head"
486+
+ " 1 | fields result",
487+
TEST_INDEX_BANK));
488+
489+
verifySchema(actual, schema("result", "array"));
490+
verifyDataRows(actual, rows(List.of(3)));
491+
}
373492
}

ppl/src/main/antlr/OpenSearchPPLLexer.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ ARRAY: 'ARRAY';
441441
ARRAY_LENGTH: 'ARRAY_LENGTH';
442442
MVAPPEND: 'MVAPPEND';
443443
MVJOIN: 'MVJOIN';
444+
MVINDEX: 'MVINDEX';
444445
FORALL: 'FORALL';
445446
FILTER: 'FILTER';
446447
TRANSFORM: 'TRANSFORM';

ppl/src/main/antlr/OpenSearchPPLParser.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,7 @@ collectionFunctionName
10941094
| ARRAY_LENGTH
10951095
| MVAPPEND
10961096
| MVJOIN
1097+
| MVINDEX
10971098
| FORALL
10981099
| EXISTS
10991100
| FILTER

0 commit comments

Comments
 (0)