Skip to content

Commit ada2e34

Browse files
authored
[BugFix]Fix multisearch UDT type loss through UNION (#5145, #5146, #5147) (#5154)
* multisearch fix Signed-off-by: Kai Huang <ahkcs@amazon.com> * fix IT Signed-off-by: Kai Huang <ahkcs@amazon.com> * add UT Signed-off-by: Kai Huang <ahkcs@amazon.com> * Address PR review: simplify leastRestrictive and shorten test assertions Refactor leastRestrictive to use a single for-loop instead of two stream passes. Simplify multisearch integration tests by using 1-day intervals and count() aggregation to reduce verbose 36-row assertions to 5 rows. Signed-off-by: Kai Huang <ahkcs@amazon.com> * Address review: use unconditional assertions in leastRestrictive tests Signed-off-by: Kai Huang <ahkcs@amazon.com> --------- Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent fe95703 commit ada2e34

File tree

4 files changed

+259
-3
lines changed

4 files changed

+259
-3
lines changed

core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import org.apache.calcite.sql.SqlCollation;
4646
import org.apache.calcite.sql.type.SqlTypeName;
4747
import org.apache.calcite.sql.type.SqlTypeUtil;
48+
import org.checkerframework.checker.nullness.qual.Nullable;
4849
import org.opensearch.sql.calcite.type.AbstractExprRelDataType;
4950
import org.opensearch.sql.calcite.type.ExprBinaryType;
5051
import org.opensearch.sql.calcite.type.ExprDateType;
@@ -377,6 +378,43 @@ public static boolean isNumericType(RelDataType fieldType) {
377378
return false;
378379
}
379380

381+
/**
382+
* Preserves OpenSearch UDT types through set operations (UNION, INTERSECT, EXCEPT). When all
383+
* input types share the same {@link AbstractExprRelDataType} with the same {@link
384+
* AbstractExprRelDataType#getUdt() UDT}, the result retains the UDT wrapper instead of being
385+
* downgraded to the underlying SQL type (e.g., VARCHAR). This is critical for operations like
386+
* multisearch that use UNION ALL, where downstream operators (bin, span) rely on the UDT type to
387+
* determine how to process the field. When inputs include non-UDT types or different UDTs, this
388+
* method falls back to {@link super#leastRestrictive}.
389+
*
390+
* @param types the list of input {@link RelDataType} instances to find the least restrictive
391+
* common type for
392+
* @return the least restrictive {@link RelDataType} preserving the UDT wrapper when all inputs
393+
* share the same UDT, or {@code null} if no common type exists (as determined by {@link
394+
* super#leastRestrictive})
395+
*/
396+
@Override
397+
public @Nullable RelDataType leastRestrictive(List<RelDataType> types) {
398+
if (types.size() > 1) {
399+
RelDataType first = types.get(0);
400+
if (first instanceof AbstractExprRelDataType<?> firstUdt) {
401+
boolean anyNullable = false;
402+
for (RelDataType t : types) {
403+
if (t instanceof AbstractExprRelDataType<?> udt && udt.getUdt() == firstUdt.getUdt()) {
404+
anyNullable |= t.isNullable();
405+
} else {
406+
return super.leastRestrictive(types);
407+
}
408+
}
409+
if (anyNullable && !first.isNullable()) {
410+
return firstUdt.createWithNullability(this, true);
411+
}
412+
return first;
413+
}
414+
}
415+
return super.leastRestrictive(types);
416+
}
417+
380418
/**
381419
* Checks if the RelDataType represents a time-based field (timestamp, date, or time). Supports
382420
* both standard SQL time types (including TIMESTAMP, TIMESTAMP_WITH_LOCAL_TIME_ZONE, DATE, TIME,
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.calcite.utils;
7+
8+
import static org.junit.jupiter.api.Assertions.assertEquals;
9+
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
10+
import static org.junit.jupiter.api.Assertions.assertNotNull;
11+
import static org.junit.jupiter.api.Assertions.assertTrue;
12+
import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY;
13+
14+
import java.util.List;
15+
import org.apache.calcite.rel.type.RelDataType;
16+
import org.apache.calcite.sql.type.SqlTypeName;
17+
import org.junit.jupiter.api.Test;
18+
import org.opensearch.sql.calcite.type.AbstractExprRelDataType;
19+
import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT;
20+
21+
public class OpenSearchTypeFactoryTest {
22+
23+
@Test
24+
public void testLeastRestrictivePreservesUdtWhenAllInputsSameUdt() {
25+
RelDataType ts1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP);
26+
RelDataType ts2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP);
27+
28+
RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(ts1, ts2));
29+
30+
assertNotNull(result);
31+
assertInstanceOf(AbstractExprRelDataType.class, result);
32+
assertEquals(ExprUDT.EXPR_TIMESTAMP, ((AbstractExprRelDataType<?>) result).getUdt());
33+
}
34+
35+
@Test
36+
public void testLeastRestrictivePreservesUdtForDateType() {
37+
RelDataType d1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE);
38+
RelDataType d2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE);
39+
40+
RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(d1, d2));
41+
42+
assertNotNull(result);
43+
assertInstanceOf(AbstractExprRelDataType.class, result);
44+
assertEquals(ExprUDT.EXPR_DATE, ((AbstractExprRelDataType<?>) result).getUdt());
45+
}
46+
47+
@Test
48+
public void testLeastRestrictivePreservesUdtForThreeInputs() {
49+
RelDataType ts1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP);
50+
RelDataType ts2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP);
51+
RelDataType ts3 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP);
52+
53+
RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(ts1, ts2, ts3));
54+
55+
assertNotNull(result);
56+
assertInstanceOf(AbstractExprRelDataType.class, result);
57+
assertEquals(ExprUDT.EXPR_TIMESTAMP, ((AbstractExprRelDataType<?>) result).getUdt());
58+
}
59+
60+
@Test
61+
public void testLeastRestrictiveReturnsNullableWhenAnyInputIsNullable() {
62+
RelDataType nonNullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, false);
63+
RelDataType nullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, true);
64+
65+
RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(nonNullable, nullable));
66+
67+
assertNotNull(result);
68+
assertInstanceOf(AbstractExprRelDataType.class, result);
69+
assertEquals(ExprUDT.EXPR_TIMESTAMP, ((AbstractExprRelDataType<?>) result).getUdt());
70+
assertTrue(result.isNullable());
71+
}
72+
73+
@Test
74+
public void testLeastRestrictiveReturnsNullableWhenFirstNullableSecondNot() {
75+
RelDataType nullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, true);
76+
RelDataType nonNullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, false);
77+
78+
RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(nullable, nonNullable));
79+
80+
assertNotNull(result);
81+
assertInstanceOf(AbstractExprRelDataType.class, result);
82+
assertTrue(result.isNullable());
83+
}
84+
85+
@Test
86+
public void testLeastRestrictiveFallsBackForMixedUdtAndNonUdt() {
87+
RelDataType udt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP);
88+
RelDataType plain = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR);
89+
90+
RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(udt, plain));
91+
92+
// Falls back to super.leastRestrictive — both backed by VARCHAR, so result is non-null
93+
assertNotNull(result);
94+
assertEquals(SqlTypeName.VARCHAR, result.getSqlTypeName());
95+
}
96+
97+
@Test
98+
public void testLeastRestrictiveFallsBackForDifferentUdts() {
99+
RelDataType timestamp = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP);
100+
RelDataType date = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE);
101+
102+
RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(timestamp, date));
103+
104+
// Different UDTs — falls back to super.leastRestrictive, both backed by VARCHAR
105+
assertNotNull(result);
106+
assertEquals(SqlTypeName.VARCHAR, result.getSqlTypeName());
107+
}
108+
109+
@Test
110+
public void testLeastRestrictiveDelegatesToSuperForSingleType() {
111+
RelDataType single = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER);
112+
113+
RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(single));
114+
115+
assertNotNull(result);
116+
assertEquals(SqlTypeName.INTEGER, result.getSqlTypeName());
117+
}
118+
119+
@Test
120+
public void testLeastRestrictiveDelegatesToSuperForPlainTypes() {
121+
RelDataType int1 = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER);
122+
RelDataType int2 = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER);
123+
124+
RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(int1, int2));
125+
126+
assertNotNull(result);
127+
assertEquals(SqlTypeName.INTEGER, result.getSqlTypeName());
128+
}
129+
}

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,10 @@ public void testMultisearchWithTimestampInterleaving() throws IOException {
152152

153153
verifySchema(
154154
result,
155-
schema("@timestamp", null, "string"),
155+
schema("@timestamp", null, "timestamp"),
156156
schema("category", null, "string"),
157157
schema("value", null, "int"),
158-
schema("timestamp", null, "string"));
158+
schema("timestamp", null, "timestamp"));
159159

160160
verifyDataRows(
161161
result,
@@ -344,6 +344,95 @@ public void testMultisearchCrossIndexFieldSelection() throws IOException {
344344
rows(null, null, "Times Square", 1002));
345345
}
346346

347+
// ========================================================================
348+
// Reproduction tests for GitHub issues #5145, #5146, #5147
349+
// ========================================================================
350+
351+
/** Reproduce #5145: multisearch without further processing should return all rows. */
352+
@Test
353+
public void testMultisearchWithoutFurtherProcessing() throws IOException {
354+
JSONObject result =
355+
executeQuery(
356+
"| multisearch [search source=opensearch-sql_test_index_time_data | where category ="
357+
+ " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data | where category"
358+
+ " = \\\"B\\\"]");
359+
360+
verifySchema(
361+
result,
362+
schema("@timestamp", null, "timestamp"),
363+
schema("category", null, "string"),
364+
schema("value", null, "int"),
365+
schema("timestamp", null, "timestamp"));
366+
367+
// category A has 26 rows, category B has 25 rows = 51 total
368+
assertEquals(51, result.getInt("total"));
369+
}
370+
371+
/** Reproduce #5146: span expression used after multisearch should work. */
372+
@Test
373+
public void testMultisearchWithSpanExpression() throws IOException {
374+
JSONObject result =
375+
executeQuery(
376+
"| multisearch [search source=opensearch-sql_test_index_time_data | where category ="
377+
+ " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data2 | where category"
378+
+ " = \\\"E\\\"] | stats count() by span(@timestamp, 1d)");
379+
380+
verifySchema(
381+
result,
382+
schema("count()", null, "bigint"),
383+
schema("span(@timestamp,1d)", null, "timestamp"));
384+
385+
// Category A: 26 rows spanning Jul 28 – Aug 1; Category E: 10 rows spanning Jul 30 – Aug 1
386+
verifyDataRows(
387+
result,
388+
rows(7L, "2025-07-28 00:00:00"),
389+
rows(6L, "2025-07-29 00:00:00"),
390+
rows(8L, "2025-07-30 00:00:00"),
391+
rows(12L, "2025-07-31 00:00:00"),
392+
rows(3L, "2025-08-01 00:00:00"));
393+
}
394+
395+
/** Reproduce #5147: bin command after multisearch should produce non-null @timestamp. */
396+
@Test
397+
public void testMultisearchBinTimestamp() throws IOException {
398+
JSONObject result =
399+
executeQuery(
400+
"| multisearch [search source=opensearch-sql_test_index_time_data | where category ="
401+
+ " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data2 | where category"
402+
+ " = \\\"E\\\"] | fields @timestamp, category, value | bin @timestamp span=1d");
403+
404+
verifySchema(
405+
result,
406+
schema("category", null, "string"),
407+
schema("value", null, "int"),
408+
schema("@timestamp", null, "timestamp"));
409+
410+
// bin floors @timestamp to 1-day boundaries; 26 A-rows + 10 E-rows = 36 total
411+
assertEquals(36, result.getInt("total"));
412+
}
413+
414+
/** Reproduce #5147 full pattern: bin + stats after multisearch. */
415+
@Test
416+
public void testMultisearchBinAndStats() throws IOException {
417+
JSONObject result =
418+
executeQuery(
419+
"| multisearch [search source=opensearch-sql_test_index_time_data | where category ="
420+
+ " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data2 | where category"
421+
+ " = \\\"E\\\"] | bin @timestamp span=1d | stats count() by @timestamp");
422+
423+
verifySchema(
424+
result, schema("count()", null, "bigint"), schema("@timestamp", null, "timestamp"));
425+
426+
// Category A: 26 rows spanning Jul 28 – Aug 1; Category E: 10 rows spanning Jul 30 – Aug 1
427+
verifyDataRows(
428+
result,
429+
rows(7L, "2025-07-28 00:00:00"),
430+
rows(6L, "2025-07-29 00:00:00"),
431+
rows(8L, "2025-07-30 00:00:00"),
432+
rows(12L, "2025-07-31 00:00:00"),
433+
rows(3L, "2025-08-01 00:00:00"));
434+
}
435+
347436
@Test
348437
public void testMultisearchTypeConflictWithStats() {
349438
Exception exception =

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ public void testAppendSchemaMergeWithTimestampUDT() throws IOException {
253253
schema("account_number", "bigint"),
254254
schema("firstname", "string"),
255255
schema("age", "int"),
256-
schema("birthdate", "string"));
256+
schema("birthdate", "timestamp"));
257257
verifyDataRows(actual, rows(32, null, 34, "2018-08-11 00:00:00"));
258258
}
259259

0 commit comments

Comments
 (0)