Skip to content

Commit 5792e98

Browse files
committed
fix docs and null handling
Signed-off-by: Ritvi Bhatt <ribhatt@amazon.com>
1 parent df7873a commit 5792e98

File tree

5 files changed

+392
-156
lines changed

5 files changed

+392
-156
lines changed

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2483,6 +2483,11 @@ public RelNode visitCluster(
24832483
org.opensearch.sql.ast.tree.Cluster node, CalcitePlanContext context) {
24842484
visitChildren(node, context);
24852485

2486+
// Filter out rows where the source field is null before clustering.
2487+
RexNode sourceFieldRex = rexVisitor.analyze(node.getSourceField(), context);
2488+
context.relBuilder.filter(
2489+
context.rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, sourceFieldRex));
2490+
24862491
// Resolve clustering as a window function over all rows (unbounded frame).
24872492
// The window function buffers all rows, runs the greedy clustering algorithm,
24882493
// and returns an array of cluster labels (one per input row, in order).

core/src/main/java/org/opensearch/sql/calcite/udf/udaf/ClusterLabelAggFunction.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import java.util.ArrayList;
99
import java.util.List;
10-
import java.util.Objects;
1110
import org.opensearch.sql.calcite.udf.UserDefinedAggFunction;
1211
import org.opensearch.sql.common.antlr.SyntaxCheckException;
1312
import org.opensearch.sql.common.cluster.TextSimilarityClustering;
@@ -41,6 +40,12 @@ public Object result(Acc acc) {
4140

4241
@Override
4342
public Acc add(Acc acc, Object... values) {
43+
// Handle case where Calcite calls generic method with null field value
44+
if (values.length == 1) {
45+
String field = (values[0] != null) ? values[0].toString() : null;
46+
return add(acc, field);
47+
}
48+
4449
throw new SyntaxCheckException(
4550
"Unsupported function signature for cluster aggregate. Valid parameters include (field:"
4651
+ " required string), (t: optional double threshold 0.0-1.0, default 0.8), (match:"
@@ -58,17 +63,17 @@ public Acc add(
5863
String delims,
5964
int bufferLimit,
6065
int maxClusters) {
61-
if (Objects.isNull(field)) {
62-
return acc;
63-
}
66+
// Process all rows, even when field is null - convert null to empty string
67+
// This ensures the result array matches input row count
68+
String processedField = (field != null) ? field : "";
6469

6570
this.threshold = threshold;
6671
this.matchMode = matchMode;
6772
this.delims = delims;
6873
this.bufferLimit = bufferLimit;
6974
this.maxClusters = maxClusters;
7075

71-
acc.evaluate(field);
76+
acc.evaluate(processedField);
7277

7378
if (bufferLimit > 0 && acc.bufferSize() == bufferLimit) {
7479
acc.partialMerge(threshold, matchMode, delims, maxClusters);

0 commit comments

Comments
 (0)