Skip to content

Commit 20f2234

Browse files
authored
Fix binning udf resolution / Add type coercion support for binning UDFs (#4742)
* Fix binning udf resolution Signed-off-by: Kai Huang <[email protected]> * spotless fix Signed-off-by: Kai Huang <[email protected]> * update tes Signed-off-by: Kai Huang <[email protected]> * add comments Signed-off-by: Kai Huang <[email protected]> * removal Signed-off-by: Kai Huang <[email protected]> * update yaml test Signed-off-by: Kai Huang <[email protected]> * rerun Signed-off-by: Kai Huang <[email protected]> * update support for type coercion Signed-off-by: Kai Huang <[email protected]> * update doc Signed-off-by: Kai Huang <[email protected]> * enhance error handling and doc Signed-off-by: Kai Huang <[email protected]> * fixes Signed-off-by: Kai Huang <[email protected]> * fixes Signed-off-by: Kai Huang <[email protected]> * fixes Signed-off-by: Kai Huang <[email protected]> * Fix tests Signed-off-by: Kai Huang <[email protected]> --------- Signed-off-by: Kai Huang <[email protected]>
1 parent 2057dfc commit 20f2234

File tree

22 files changed

+500
-288
lines changed

22 files changed

+500
-288
lines changed

core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -338,12 +338,12 @@ public static boolean isUserDefinedType(RelDataType type) {
338338
}
339339

340340
/**
341-
* Checks if the RelDataType represents a numeric field. Supports both standard SQL numeric types
342-
* (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, DOUBLE, DECIMAL, REAL) and OpenSearch UDT numeric
343-
* types.
341+
* Checks if the RelDataType represents a numeric type. Supports standard SQL numeric types
342+
* (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, DOUBLE, DECIMAL, REAL), OpenSearch UDT numeric
343+
* types, and string types (VARCHAR, CHAR).
344344
*
345345
* @param fieldType the RelDataType to check
346-
* @return true if the type is numeric, false otherwise
346+
* @return true if the type is numeric or string, false otherwise
347347
*/
348348
public static boolean isNumericType(RelDataType fieldType) {
349349
// Check standard SQL numeric types
@@ -359,6 +359,11 @@ public static boolean isNumericType(RelDataType fieldType) {
359359
return true;
360360
}
361361

362+
// Check string types (VARCHAR, CHAR)
363+
if (sqlType == SqlTypeName.VARCHAR || sqlType == SqlTypeName.CHAR) {
364+
return true;
365+
}
366+
362367
// Check for OpenSearch UDT numeric types
363368
if (isUserDefinedType(fieldType)) {
364369
AbstractExprRelDataType<?> exprType = (AbstractExprRelDataType<?>) fieldType;

core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,65 @@ private PPLOperandTypes() {}
134134
SqlTypeFamily.NUMERIC,
135135
SqlTypeFamily.NUMERIC,
136136
SqlTypeFamily.NUMERIC));
137+
138+
public static final UDFOperandMetadata WIDTH_BUCKET_OPERAND =
139+
UDFOperandMetadata.wrap(
140+
(CompositeOperandTypeChecker)
141+
// 1. Numeric fields: bin age span=10
142+
OperandTypes.family(
143+
SqlTypeFamily.NUMERIC,
144+
SqlTypeFamily.INTEGER,
145+
SqlTypeFamily.NUMERIC,
146+
SqlTypeFamily.NUMERIC)
147+
// 2. Timestamp fields with OpenSearch type system
148+
// Used in: Production + Integration tests (CalciteBinCommandIT)
149+
.or(
150+
OperandTypes.family(
151+
SqlTypeFamily.TIMESTAMP,
152+
SqlTypeFamily.INTEGER,
153+
SqlTypeFamily.CHARACTER, // TIMESTAMP - TIMESTAMP = INTERVAL (as STRING)
154+
SqlTypeFamily.TIMESTAMP))
155+
// 3. Timestamp fields with Calcite SCOTT schema
156+
// Used in: Unit tests (CalcitePPLBinTest)
157+
.or(
158+
OperandTypes.family(
159+
SqlTypeFamily.TIMESTAMP,
160+
SqlTypeFamily.INTEGER,
161+
SqlTypeFamily.TIMESTAMP, // TIMESTAMP - TIMESTAMP = TIMESTAMP
162+
SqlTypeFamily.TIMESTAMP))
163+
// DATE field with OpenSearch type system
164+
// Used in: Production + Integration tests (CalciteBinCommandIT)
165+
.or(
166+
OperandTypes.family(
167+
SqlTypeFamily.DATE,
168+
SqlTypeFamily.INTEGER,
169+
SqlTypeFamily.CHARACTER, // DATE - DATE = INTERVAL (as STRING)
170+
SqlTypeFamily.DATE))
171+
// DATE field with Calcite SCOTT schema
172+
// Used in: Unit tests (CalcitePPLBinTest)
173+
.or(
174+
OperandTypes.family(
175+
SqlTypeFamily.DATE,
176+
SqlTypeFamily.INTEGER,
177+
SqlTypeFamily.DATE, // DATE - DATE = DATE
178+
SqlTypeFamily.DATE))
179+
// TIME field with OpenSearch type system
180+
// Used in: Production + Integration tests (CalciteBinCommandIT)
181+
.or(
182+
OperandTypes.family(
183+
SqlTypeFamily.TIME,
184+
SqlTypeFamily.INTEGER,
185+
SqlTypeFamily.CHARACTER, // TIME - TIME = INTERVAL (as STRING)
186+
SqlTypeFamily.TIME))
187+
// TIME field with Calcite SCOTT schema
188+
// Used in: Unit tests (CalcitePPLBinTest)
189+
.or(
190+
OperandTypes.family(
191+
SqlTypeFamily.TIME,
192+
SqlTypeFamily.INTEGER,
193+
SqlTypeFamily.TIME, // TIME - TIME = TIME
194+
SqlTypeFamily.TIME)));
195+
137196
public static final UDFOperandMetadata NUMERIC_NUMERIC_NUMERIC_NUMERIC_NUMERIC =
138197
UDFOperandMetadata.wrap(
139198
OperandTypes.family(

core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,7 @@
1111
import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory;
1212
import org.opensearch.sql.exception.SemanticCheckException;
1313

14-
/**
15-
* Represents a validated field that supports binning operations. The existence of this class
16-
* guarantees that validation has been run - the field is either numeric or time-based.
17-
*
18-
* <p>This design encodes validation in the type system, preventing downstream code from forgetting
19-
* to validate or running validation multiple times.
20-
*/
14+
/** Represents a field that supports binning operations. */
2115
@Getter
2216
public class BinnableField {
2317
private final RexNode fieldExpr;
@@ -27,13 +21,12 @@ public class BinnableField {
2721
private final boolean isNumeric;
2822

2923
/**
30-
* Creates a validated BinnableField. Throws SemanticCheckException if the field is neither
31-
* numeric nor time-based.
24+
* Creates a BinnableField. Validates that the field type is compatible with binning operations.
3225
*
3326
* @param fieldExpr The Rex expression for the field
3427
* @param fieldType The relational data type of the field
3528
* @param fieldName The name of the field (for error messages)
36-
* @throws SemanticCheckException if the field is neither numeric nor time-based
29+
* @throws SemanticCheckException if the field type is not supported for binning
3730
*/
3831
public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName) {
3932
this.fieldExpr = fieldExpr;
@@ -43,13 +36,10 @@ public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName)
4336
this.isTimeBased = OpenSearchTypeFactory.isTimeBasedType(fieldType);
4437
this.isNumeric = OpenSearchTypeFactory.isNumericType(fieldType);
4538

46-
// Validation: field must be either numeric or time-based
39+
// Reject truly unsupported types (e.g., BOOLEAN, ARRAY, MAP)
4740
if (!isNumeric && !isTimeBased) {
4841
throw new SemanticCheckException(
49-
String.format(
50-
"Cannot apply binning: field '%s' is non-numeric and not time-related, expected"
51-
+ " numeric or time-related type",
52-
fieldName));
42+
String.format("Cannot apply binning to field '%s': unsupported type", fieldName));
5343
}
5444
}
5545

core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
package org.opensearch.sql.calcite.utils.binning.handlers;
77

88
import org.apache.calcite.rex.RexNode;
9-
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
109
import org.opensearch.sql.ast.expression.Literal;
1110
import org.opensearch.sql.ast.tree.Bin;
1211
import org.opensearch.sql.ast.tree.CountBin;
@@ -16,7 +15,8 @@
1615
import org.opensearch.sql.calcite.utils.binning.BinFieldValidator;
1716
import org.opensearch.sql.calcite.utils.binning.BinHandler;
1817
import org.opensearch.sql.calcite.utils.binning.BinnableField;
19-
import org.opensearch.sql.expression.function.PPLBuiltinOperators;
18+
import org.opensearch.sql.expression.function.BuiltinFunctionName;
19+
import org.opensearch.sql.expression.function.PPLFuncImpTable;
2020

2121
/** Handler for bins-based (count) binning operations. */
2222
public class CountBinHandler implements BinHandler {
@@ -40,7 +40,9 @@ public RexNode createExpression(
4040
// Calculate data range using window functions
4141
RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex();
4242
RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex();
43-
RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue);
43+
RexNode dataRange =
44+
PPLFuncImpTable.INSTANCE.resolve(
45+
context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue);
4446

4547
// Convert start/end parameters
4648
RexNode startValue = convertParameter(countBin.getStart(), context);
@@ -49,8 +51,13 @@ public RexNode createExpression(
4951
// WIDTH_BUCKET(field_value, num_bins, data_range, max_value)
5052
RexNode numBins = context.relBuilder.literal(requestedBins);
5153

52-
return context.rexBuilder.makeCall(
53-
PPLBuiltinOperators.WIDTH_BUCKET, fieldExpr, numBins, dataRange, maxValue);
54+
return PPLFuncImpTable.INSTANCE.resolve(
55+
context.rexBuilder,
56+
BuiltinFunctionName.WIDTH_BUCKET,
57+
fieldExpr,
58+
numBins,
59+
dataRange,
60+
maxValue);
5461
}
5562

5663
private RexNode convertParameter(

core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
import org.opensearch.sql.calcite.utils.binning.BinHandler;
1717
import org.opensearch.sql.calcite.utils.binning.BinnableField;
1818
import org.opensearch.sql.calcite.utils.binning.RangeFormatter;
19+
import org.opensearch.sql.expression.function.BuiltinFunctionName;
20+
import org.opensearch.sql.expression.function.PPLFuncImpTable;
1921

2022
/** Handler for default binning when no parameters are specified. */
2123
public class DefaultBinHandler implements BinHandler {
@@ -45,7 +47,9 @@ private RexNode createNumericDefaultBinning(RexNode fieldExpr, CalcitePlanContex
4547
// Calculate data range
4648
RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex();
4749
RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex();
48-
RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue);
50+
RexNode dataRange =
51+
PPLFuncImpTable.INSTANCE.resolve(
52+
context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue);
4953

5054
// Calculate magnitude-based width
5155
RexNode log10Range = context.relBuilder.call(SqlStdOperatorTable.LOG10, dataRange);
@@ -60,17 +64,21 @@ private RexNode createNumericDefaultBinning(RexNode fieldExpr, CalcitePlanContex
6064
// Calculate bin value
6165
RexNode binStartValue = calculateBinValue(fieldExpr, widthInt, context);
6266
RexNode binEndValue =
63-
context.relBuilder.call(SqlStdOperatorTable.PLUS, binStartValue, widthInt);
67+
PPLFuncImpTable.INSTANCE.resolve(
68+
context.rexBuilder, BuiltinFunctionName.ADD, binStartValue, widthInt);
6469

6570
return RangeFormatter.createRangeString(binStartValue, binEndValue, context);
6671
}
6772

6873
private RexNode calculateBinValue(RexNode fieldExpr, RexNode width, CalcitePlanContext context) {
6974

70-
RexNode divided = context.relBuilder.call(SqlStdOperatorTable.DIVIDE, fieldExpr, width);
75+
RexNode divided =
76+
PPLFuncImpTable.INSTANCE.resolve(
77+
context.rexBuilder, BuiltinFunctionName.DIVIDE, fieldExpr, width);
7178

7279
RexNode floored = context.relBuilder.call(SqlStdOperatorTable.FLOOR, divided);
7380

74-
return context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, floored, width);
81+
return PPLFuncImpTable.INSTANCE.resolve(
82+
context.rexBuilder, BuiltinFunctionName.MULTIPLY, floored, width);
7583
}
7684
}

core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import org.opensearch.sql.calcite.utils.binning.BinConstants;
1212
import org.opensearch.sql.calcite.utils.binning.RangeFormatter;
1313
import org.opensearch.sql.calcite.utils.binning.SpanInfo;
14+
import org.opensearch.sql.expression.function.BuiltinFunctionName;
15+
import org.opensearch.sql.expression.function.PPLFuncImpTable;
1416

1517
/** Helper for creating logarithmic span expressions. */
1618
public class LogSpanHelper {
@@ -31,14 +33,19 @@ public RexNode createLogSpanExpression(
3133
RexNode adjustedField = fieldExpr;
3234
if (coefficient != 1.0) {
3335
adjustedField =
34-
context.relBuilder.call(
35-
SqlStdOperatorTable.DIVIDE, fieldExpr, context.relBuilder.literal(coefficient));
36+
PPLFuncImpTable.INSTANCE.resolve(
37+
context.rexBuilder,
38+
BuiltinFunctionName.DIVIDE,
39+
fieldExpr,
40+
context.relBuilder.literal(coefficient));
3641
}
3742

3843
// Calculate log_base(adjusted_field)
3944
RexNode lnField = context.relBuilder.call(SqlStdOperatorTable.LN, adjustedField);
4045
RexNode lnBase = context.relBuilder.literal(Math.log(base));
41-
RexNode logValue = context.relBuilder.call(SqlStdOperatorTable.DIVIDE, lnField, lnBase);
46+
RexNode logValue =
47+
PPLFuncImpTable.INSTANCE.resolve(
48+
context.rexBuilder, BuiltinFunctionName.DIVIDE, lnField, lnBase);
4249

4350
// Get bin number
4451
RexNode binNumber = context.relBuilder.call(SqlStdOperatorTable.FLOOR, logValue);
@@ -49,15 +56,20 @@ public RexNode createLogSpanExpression(
4956

5057
RexNode basePowerBin = context.relBuilder.call(SqlStdOperatorTable.POWER, baseNode, binNumber);
5158
RexNode lowerBound =
52-
context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, coefficientNode, basePowerBin);
59+
PPLFuncImpTable.INSTANCE.resolve(
60+
context.rexBuilder, BuiltinFunctionName.MULTIPLY, coefficientNode, basePowerBin);
5361

5462
RexNode binPlusOne =
55-
context.relBuilder.call(
56-
SqlStdOperatorTable.PLUS, binNumber, context.relBuilder.literal(1.0));
63+
PPLFuncImpTable.INSTANCE.resolve(
64+
context.rexBuilder,
65+
BuiltinFunctionName.ADD,
66+
binNumber,
67+
context.relBuilder.literal(1.0));
5768
RexNode basePowerBinPlusOne =
5869
context.relBuilder.call(SqlStdOperatorTable.POWER, baseNode, binPlusOne);
5970
RexNode upperBound =
60-
context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, coefficientNode, basePowerBinPlusOne);
71+
PPLFuncImpTable.INSTANCE.resolve(
72+
context.rexBuilder, BuiltinFunctionName.MULTIPLY, coefficientNode, basePowerBinPlusOne);
6173

6274
// Create range string
6375
RexNode rangeStr = RangeFormatter.createRangeString(lowerBound, upperBound, context);

core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
import org.apache.calcite.rex.RexLiteral;
1111
import org.apache.calcite.rex.RexNode;
12-
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
1312
import org.opensearch.sql.ast.expression.Literal;
1413
import org.opensearch.sql.ast.tree.Bin;
1514
import org.opensearch.sql.ast.tree.MinSpanBin;
@@ -18,7 +17,8 @@
1817
import org.opensearch.sql.calcite.utils.binning.BinFieldValidator;
1918
import org.opensearch.sql.calcite.utils.binning.BinHandler;
2019
import org.opensearch.sql.calcite.utils.binning.BinnableField;
21-
import org.opensearch.sql.expression.function.PPLBuiltinOperators;
20+
import org.opensearch.sql.expression.function.BuiltinFunctionName;
21+
import org.opensearch.sql.expression.function.PPLFuncImpTable;
2222

2323
/** Handler for minspan-based binning operations. */
2424
public class MinSpanBinHandler implements BinHandler {
@@ -51,7 +51,9 @@ public RexNode createExpression(
5151
// Calculate data range using window functions
5252
RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex();
5353
RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex();
54-
RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue);
54+
RexNode dataRange =
55+
PPLFuncImpTable.INSTANCE.resolve(
56+
context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue);
5557

5658
// Convert start/end parameters
5759
RexNode startValue = convertParameter(minSpanBin.getStart(), context);
@@ -60,8 +62,13 @@ public RexNode createExpression(
6062
// MINSPAN_BUCKET(field_value, min_span, data_range, max_value)
6163
RexNode minSpanParam = context.relBuilder.literal(minspan);
6264

63-
return context.rexBuilder.makeCall(
64-
PPLBuiltinOperators.MINSPAN_BUCKET, fieldExpr, minSpanParam, dataRange, maxValue);
65+
return PPLFuncImpTable.INSTANCE.resolve(
66+
context.rexBuilder,
67+
BuiltinFunctionName.MINSPAN_BUCKET,
68+
fieldExpr,
69+
minSpanParam,
70+
dataRange,
71+
maxValue);
6572
}
6673

6774
private RexNode convertParameter(

core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
import org.apache.calcite.rex.RexNode;
99
import org.opensearch.sql.calcite.CalcitePlanContext;
10-
import org.opensearch.sql.expression.function.PPLBuiltinOperators;
10+
import org.opensearch.sql.expression.function.BuiltinFunctionName;
11+
import org.opensearch.sql.expression.function.PPLFuncImpTable;
1112

1213
/** Helper for creating numeric span expressions. */
1314
public class NumericSpanHelper {
@@ -32,6 +33,7 @@ private RexNode createExpression(
3233
RexNode fieldExpr, RexNode spanValue, CalcitePlanContext context) {
3334

3435
// SPAN_BUCKET(field_value, span_value)
35-
return context.rexBuilder.makeCall(PPLBuiltinOperators.SPAN_BUCKET, fieldExpr, spanValue);
36+
return PPLFuncImpTable.INSTANCE.resolve(
37+
context.rexBuilder, BuiltinFunctionName.SPAN_BUCKET, fieldExpr, spanValue);
3638
}
3739
}

core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
import org.opensearch.sql.calcite.utils.binning.BinFieldValidator;
1414
import org.opensearch.sql.calcite.utils.binning.BinHandler;
1515
import org.opensearch.sql.calcite.utils.binning.BinnableField;
16-
import org.opensearch.sql.expression.function.PPLBuiltinOperators;
16+
import org.opensearch.sql.expression.function.BuiltinFunctionName;
17+
import org.opensearch.sql.expression.function.PPLFuncImpTable;
1718

1819
/** Handler for range-based binning (start/end parameters only). */
1920
public class RangeBinHandler implements BinHandler {
@@ -43,8 +44,14 @@ public RexNode createExpression(
4344
RexNode endParam = convertParameter(rangeBin.getEnd(), context, visitor);
4445

4546
// Use RANGE_BUCKET with data bounds and user parameters
46-
return context.rexBuilder.makeCall(
47-
PPLBuiltinOperators.RANGE_BUCKET, fieldExpr, dataMin, dataMax, startParam, endParam);
47+
return PPLFuncImpTable.INSTANCE.resolve(
48+
context.rexBuilder,
49+
BuiltinFunctionName.RANGE_BUCKET,
50+
fieldExpr,
51+
dataMin,
52+
dataMax,
53+
startParam,
54+
endParam);
4855
}
4956

5057
private RexNode convertParameter(

0 commit comments

Comments
 (0)