Skip to content

Commit 7088e08

Browse files
authored
Add non-numeric field support for max/min functions (#4281)
* add non-numeric support for max/min Signed-off-by: Ritvi Bhatt <[email protected]> * fix mixed field behavior Signed-off-by: Ritvi Bhatt <[email protected]> * update doc Signed-off-by: Ritvi Bhatt <[email protected]> * update doc Signed-off-by: Ritvi Bhatt <[email protected]> * update formatting Signed-off-by: Ritvi Bhatt <[email protected]> * add tests Signed-off-by: Ritvi Bhatt <[email protected]> * fix formatting Signed-off-by: Ritvi Bhatt <[email protected]> * empty Signed-off-by: Ritvi Bhatt <[email protected]> * fix formatting Signed-off-by: Ritvi Bhatt <[email protected]> * fix Signed-off-by: Ritvi Bhatt <[email protected]> * support ip type max/min Signed-off-by: Ritvi Bhatt <[email protected]> * fix formatting Signed-off-by: Ritvi Bhatt <[email protected]> * use tophitsparser Signed-off-by: Ritvi Bhatt <[email protected]> * remove v2 explain Signed-off-by: Ritvi Bhatt <[email protected]> * check for numeric fields for native max/min Signed-off-by: Ritvi Bhatt <[email protected]> * change names Signed-off-by: Ritvi Bhatt <[email protected]> * fix type checking Signed-off-by: Ritvi Bhatt <[email protected]> --------- Signed-off-by: Ritvi Bhatt <[email protected]> Signed-off-by: ritvibhatt <[email protected]>
1 parent 0af7429 commit 7088e08

File tree

9 files changed

+210
-6
lines changed

9 files changed

+210
-6
lines changed

docs/user/ppl/cmd/stats.rst

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,10 @@ Description
182182

183183
Usage: MAX(expr). Returns the maximum value of expr.
184184

185+
For non-numeric fields, values are sorted lexicographically.
186+
187+
Note: Non-numeric field support requires Calcite to be enabled (see `Configuration`_ section above). Available since version 3.3.0.
188+
185189
Example::
186190

187191
os> source=accounts | stats max(age);
@@ -192,6 +196,16 @@ Example::
192196
| 36 |
193197
+----------+
194198

199+
Example with text field::
200+
201+
os> source=accounts | stats max(firstname);
202+
fetched rows / total rows = 1/1
203+
+----------------+
204+
| max(firstname) |
205+
|----------------|
206+
| Nanette |
207+
+----------------+
208+
195209
MIN
196210
---
197211

@@ -200,6 +214,10 @@ Description
200214

201215
Usage: MIN(expr). Returns the minimum value of expr.
202216

217+
For non-numeric fields, values are sorted lexicographically.
218+
219+
Note: Non-numeric field support requires Calcite to be enabled (see `Configuration`_ section above). Available since version 3.3.0.
220+
203221
Example::
204222

205223
os> source=accounts | stats min(age);
@@ -210,6 +228,16 @@ Example::
210228
| 28 |
211229
+----------+
212230

231+
Example with text field::
232+
233+
os> source=accounts | stats min(firstname);
234+
fetched rows / total rows = 1/1
235+
+----------------+
236+
| min(firstname) |
237+
|----------------|
238+
| Amber |
239+
+----------------+
240+
213241
VAR_SAMP
214242
--------
215243

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,22 @@ public void testPushdownLimitIntoAggregation() throws IOException {
621621
+ " head 100 | head 10 from 10 "));
622622
}
623623

624+
@Test
625+
public void testExplainMaxOnStringField() throws IOException {
626+
String expected = loadExpectedPlan("explain_max_string_field.json");
627+
assertJsonEqualsIgnoreId(
628+
expected,
629+
explainQueryToString("source=opensearch-sql_test_index_account | stats max(firstname)"));
630+
}
631+
632+
@Test
633+
public void testExplainMinOnStringField() throws IOException {
634+
String expected = loadExpectedPlan("explain_min_string_field.json");
635+
assertJsonEqualsIgnoreId(
636+
expected,
637+
explainQueryToString("source=opensearch-sql_test_index_account | stats min(firstname)"));
638+
}
639+
624640
@Test
625641
public void testExplainSortOnMetricsNoBucketNullable() throws IOException {
626642
// TODO enhancement later: https://github.com/opensearch-project/sql/issues/4282

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,4 +1176,20 @@ public void testMedian() throws IOException {
11761176
verifySchema(actual, schema("median(balance)", "bigint"));
11771177
verifyDataRows(actual, rows(32838));
11781178
}
1179+
1180+
@Test
1181+
public void testStatsMaxOnStringField() throws IOException {
1182+
JSONObject actual =
1183+
executeQuery(String.format("source=%s | stats max(firstname)", TEST_INDEX_BANK));
1184+
verifySchema(actual, schema("max(firstname)", "string"));
1185+
verifyDataRows(actual, rows("Virginia"));
1186+
}
1187+
1188+
@Test
1189+
public void testStatsMinOnStringField() throws IOException {
1190+
JSONObject actual =
1191+
executeQuery(String.format("source=%s | stats min(firstname)", TEST_INDEX_BANK));
1192+
verifySchema(actual, schema("min(firstname)", "string"));
1193+
verifyDataRows(actual, rows("Amber JOHnny"));
1194+
}
11791195
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], max(firstname)=[MAX($0)])\n LogicalProject(firstname=[$1])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},max(firstname)=MAX($0))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"max(firstname)\":{\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false,\"_source\":{\"includes\":[\"firstname\"],\"excludes\":[]},\"sort\":[{\"firstname.keyword\":{\"order\":\"desc\"}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], min(firstname)=[MIN($0)])\n LogicalProject(firstname=[$1])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},min(firstname)=MIN($0))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"min(firstname)\":{\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false,\"_source\":{\"includes\":[\"firstname\"],\"excludes\":[]},\"sort\":[{\"firstname.keyword\":{\"order\":\"asc\"}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], max(firstname)=[MAX($0)])\n LogicalProject(firstname=[$1])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableLimit(fetch=[10000])\n EnumerableAggregate(group=[{}], max(firstname)=[MAX($1)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], min(firstname)=[MIN($0)])\n LogicalProject(firstname=[$1])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableLimit(fetch=[10000])\n EnumerableAggregate(group=[{}], min(firstname)=[MIN($1)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n"
5+
}
6+
}

opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@
6969
import org.opensearch.sql.ast.expression.Argument;
7070
import org.opensearch.sql.ast.expression.SpanUnit;
7171
import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory;
72+
import org.opensearch.sql.data.type.ExprCoreType;
7273
import org.opensearch.sql.data.type.ExprType;
7374
import org.opensearch.sql.expression.function.BuiltinFunctionName;
75+
import org.opensearch.sql.opensearch.data.type.OpenSearchDataType;
7476
import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression;
7577
import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser;
7678
import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser;
@@ -298,12 +300,46 @@ private static Pair<AggregationBuilder, MetricParser> createRegularAggregation(
298300
helper.build(
299301
!args.isEmpty() ? args.getFirst() : null, AggregationBuilders.count(aggFieldName)),
300302
new SingleValueParser(aggFieldName));
301-
case MIN -> Pair.of(
302-
helper.build(args.getFirst(), AggregationBuilders.min(aggFieldName)),
303-
new SingleValueParser(aggFieldName));
304-
case MAX -> Pair.of(
305-
helper.build(args.getFirst(), AggregationBuilders.max(aggFieldName)),
306-
new SingleValueParser(aggFieldName));
303+
case MIN -> {
304+
String fieldName = helper.inferNamedField(args.getFirst()).getRootName();
305+
ExprType fieldType = helper.fieldTypes.get(fieldName);
306+
307+
if (supportsMaxMinAggregation(fieldType)) {
308+
yield Pair.of(
309+
helper.build(args.getFirst(), AggregationBuilders.min(aggFieldName)),
310+
new SingleValueParser(aggFieldName));
311+
} else {
312+
yield Pair.of(
313+
AggregationBuilders.topHits(aggFieldName)
314+
.fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null)
315+
.size(1)
316+
.from(0)
317+
.sort(
318+
helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(),
319+
SortOrder.ASC),
320+
new TopHitsParser(aggFieldName, true));
321+
}
322+
}
323+
case MAX -> {
324+
String fieldName = helper.inferNamedField(args.getFirst()).getRootName();
325+
ExprType fieldType = helper.fieldTypes.get(fieldName);
326+
327+
if (supportsMaxMinAggregation(fieldType)) {
328+
yield Pair.of(
329+
helper.build(args.getFirst(), AggregationBuilders.max(aggFieldName)),
330+
new SingleValueParser(aggFieldName));
331+
} else {
332+
yield Pair.of(
333+
AggregationBuilders.topHits(aggFieldName)
334+
.fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null)
335+
.size(1)
336+
.from(0)
337+
.sort(
338+
helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(),
339+
SortOrder.DESC),
340+
new TopHitsParser(aggFieldName, true));
341+
}
342+
}
307343
case VAR_SAMP -> Pair.of(
308344
helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)),
309345
new StatsParser(ExtendedStats::getVarianceSampling, aggFieldName));
@@ -383,6 +419,18 @@ yield switch (functionName) {
383419
};
384420
}
385421

422+
private static boolean supportsMaxMinAggregation(ExprType fieldType) {
423+
ExprType coreType =
424+
(fieldType instanceof OpenSearchDataType)
425+
? ((OpenSearchDataType) fieldType).getExprType()
426+
: fieldType;
427+
428+
return ExprCoreType.numberTypes().contains(coreType)
429+
|| coreType == ExprCoreType.DATE
430+
|| coreType == ExprCoreType.TIME
431+
|| coreType == ExprCoreType.TIMESTAMP;
432+
}
433+
386434
private static ValuesSourceAggregationBuilder<?> createBucketAggregation(
387435
Integer group, Project project, AggregateAnalyzer.AggregateBuilderHelper helper) {
388436
return createBucket(group, project, helper);

ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,4 +756,76 @@ public void testMedian() {
756756
"SELECT `percentile_approx`(`SAL`, 50.0, DECIMAL) `median(SAL)`\n" + "FROM `scott`.`EMP`";
757757
verifyPPLToSparkSQL(root, expectedSparkSql);
758758
}
759+
760+
@Test
761+
public void testMaxOnStringField() {
762+
String ppl = "source=EMP | stats max(ENAME) as max_name";
763+
RelNode root = getRelNode(ppl);
764+
765+
String expectedLogical =
766+
"LogicalAggregate(group=[{}], max_name=[MAX($0)])\n"
767+
+ " LogicalProject(ENAME=[$1])\n"
768+
+ " LogicalTableScan(table=[[scott, EMP]])\n";
769+
verifyLogical(root, expectedLogical);
770+
771+
String expectedResult = "max_name=WARD\n";
772+
verifyResult(root, expectedResult);
773+
774+
String expectedSparkSql = "SELECT MAX(`ENAME`) `max_name`\nFROM `scott`.`EMP`";
775+
verifyPPLToSparkSQL(root, expectedSparkSql);
776+
}
777+
778+
@Test
779+
public void testMinOnStringField() {
780+
String ppl = "source=EMP | stats min(ENAME) as min_name";
781+
RelNode root = getRelNode(ppl);
782+
783+
String expectedLogical =
784+
"LogicalAggregate(group=[{}], min_name=[MIN($0)])\n"
785+
+ " LogicalProject(ENAME=[$1])\n"
786+
+ " LogicalTableScan(table=[[scott, EMP]])\n";
787+
verifyLogical(root, expectedLogical);
788+
789+
String expectedResult = "min_name=ADAMS\n";
790+
verifyResult(root, expectedResult);
791+
792+
String expectedSparkSql = "SELECT MIN(`ENAME`) `min_name`\nFROM `scott`.`EMP`";
793+
verifyPPLToSparkSQL(root, expectedSparkSql);
794+
}
795+
796+
@Test
797+
public void testMaxOnTimeField() {
798+
String ppl = "source=EMP | stats max(HIREDATE) as max_hire_date";
799+
RelNode root = getRelNode(ppl);
800+
801+
String expectedLogical =
802+
"LogicalAggregate(group=[{}], max_hire_date=[MAX($0)])\n"
803+
+ " LogicalProject(HIREDATE=[$4])\n"
804+
+ " LogicalTableScan(table=[[scott, EMP]])\n";
805+
verifyLogical(root, expectedLogical);
806+
807+
String expectedResult = "max_hire_date=1987-05-23\n";
808+
verifyResult(root, expectedResult);
809+
810+
String expectedSparkSql = "SELECT MAX(`HIREDATE`) `max_hire_date`\nFROM `scott`.`EMP`";
811+
verifyPPLToSparkSQL(root, expectedSparkSql);
812+
}
813+
814+
@Test
815+
public void testMinOnTimeField() {
816+
String ppl = "source=EMP | stats min(HIREDATE) as min_hire_date";
817+
RelNode root = getRelNode(ppl);
818+
819+
String expectedLogical =
820+
"LogicalAggregate(group=[{}], min_hire_date=[MIN($0)])\n"
821+
+ " LogicalProject(HIREDATE=[$4])\n"
822+
+ " LogicalTableScan(table=[[scott, EMP]])\n";
823+
verifyLogical(root, expectedLogical);
824+
825+
String expectedResult = "min_hire_date=1980-12-17\n";
826+
verifyResult(root, expectedResult);
827+
828+
String expectedSparkSql = "SELECT MIN(`HIREDATE`) `min_hire_date`\nFROM `scott`.`EMP`";
829+
verifyPPLToSparkSQL(root, expectedSparkSql);
830+
}
759831
}

0 commit comments

Comments
 (0)