-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[CALCITE-4813] ANY_VALUE assumes that arguments should be comparable #4699
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -154,4 +154,81 @@ select * from table1 where j not in (select i from table2) or j = 3; | |
|
|
||
| !ok | ||
|
|
||
| # [CALCITE-4813] ANY_VALUE assumes that arguments should be comparable | ||
| select any_value(r) over(), s from(select array[f, s] r, s from (select 1 as f, 2 as s) t) t; | ||
| +--------+---+ | ||
| | EXPR$0 | S | | ||
| +--------+---+ | ||
| | [1, 2] | 2 | | ||
| +--------+---+ | ||
| (1 row) | ||
|
|
||
| !ok | ||
|
|
||
| select any_value(r) over(), s from(select map[f, s] r, s from (select 1 as f, 2 as s) t) t; | ||
| +--------+---+ | ||
| | EXPR$0 | S | | ||
| +--------+---+ | ||
| | {1=2} | 2 | | ||
| +--------+---+ | ||
| (1 row) | ||
|
|
||
| !ok | ||
|
|
||
| select any_value(r) over(), s from(select row(f, s) r, s from (select 1 as f, 2 as s) t) t; | ||
| +--------+---+ | ||
| | EXPR$0 | S | | ||
| +--------+---+ | ||
| | {1, 2} | 2 | | ||
| +--------+---+ | ||
| (1 row) | ||
|
|
||
| !ok | ||
|
|
||
|
|
||
| CREATE TABLE complex_t ( | ||
| a INTEGER ARRAY, | ||
| m MAP<VARCHAR, DOUBLE>, | ||
| r ROW(r1 VARCHAR, r2 INTEGER, r3 VARCHAR) | ||
| ); | ||
| (0 rows modified) | ||
|
|
||
| !update | ||
|
|
||
| INSERT INTO complex_t VALUES ( | ||
| ARRAY[1, 2, 3, 4, 5], | ||
| MAP['math', 95.5, 'science', 88.0, 'english', 92.3], | ||
| ROW('Alice Johnson', 30, 'a') | ||
| ), | ||
| ( | ||
| ARRAY[10, 20, 30, 40, 50, 60], | ||
| MAP['physics', 96.2, 'chemistry', 91.8, 'biology', 89.5, 'computer_science', 98.7], | ||
| ROW('Bob Smith', 25, 'b') | ||
| ), | ||
| ( | ||
| ARRAY[100, 200, 300], | ||
| MAP['leadership', 88.9, 'teamwork', 94.2, 'communication', 91.5, 'problem_solving', 97.8], | ||
| ROW('Charlie Chen', 35, 'c') | ||
| ); | ||
| (3 rows modified) | ||
|
|
||
| !update | ||
|
|
||
| select | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also has verified in duckdb and works well. |
||
| max(a) as max_a, | ||
| max(m) as max_m, | ||
| max(r) as max_r, | ||
| min(a) as min_a, | ||
| min(m) as min_m, | ||
| min(r) as min_r | ||
| from complex_t; | ||
| +-----------------+----------------------------------------------------------------------------------------------+-----------------------+-----------------+------------------------------------------------------------------------------------------+------------------------+ | ||
| | MAX_A | MAX_M | MAX_R | MIN_A | MIN_M | MIN_R | | ||
| +-----------------+----------------------------------------------------------------------------------------------+-----------------------+-----------------+------------------------------------------------------------------------------------------+------------------------+ | ||
| | [100, 200, 300] | {physics =96.2, chemistry =91.8, biology =89.5, computer_science=98.7} | {Charlie Chen, 35, c} | [1, 2, 3, 4, 5] | {leadership =88.9, teamwork =94.2, communication =91.5, problem_solving=97.8} | {Alice Johnson, 30, a} | | ||
| +-----------------+----------------------------------------------------------------------------------------------+-----------------------+-----------------+------------------------------------------------------------------------------------------+------------------------+ | ||
| (1 row) | ||
|
|
||
| !ok | ||
|
|
||
| # End blank.iq | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,8 +16,6 @@ | |
| */ | ||
| package org.apache.calcite.linq4j.function; | ||
|
|
||
| import com.google.common.collect.Lists; | ||
|
|
||
| import org.checkerframework.checker.nullness.qual.Nullable; | ||
| import org.checkerframework.framework.qual.DefaultQualifier; | ||
| import org.checkerframework.framework.qual.TypeUseLocation; | ||
|
|
@@ -31,6 +29,7 @@ | |
| import java.util.Collections; | ||
| import java.util.Comparator; | ||
| import java.util.HashMap; | ||
| import java.util.Iterator; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Objects; | ||
|
|
@@ -573,9 +572,7 @@ private static class NullsLastComparator | |
| } else if (o1 instanceof List && o2 instanceof List) { | ||
| return compareLists((List<?>) o1, (List<?>) o2); | ||
| } else if (o1 instanceof Object[] && o2 instanceof Object[]) { | ||
| final List<Object> list1 = Lists.newArrayList((Object[]) o1); | ||
| final List<Object> list2 = Lists.newArrayList((Object[]) o2); | ||
| return compareLists(list1, list2); | ||
| return compareObjectArrays((Object[]) o1, (Object[]) o2); | ||
| } else { | ||
| throw new IllegalArgumentException(); | ||
| } | ||
|
|
@@ -601,16 +598,17 @@ private static class NullsFirstReverseComparator | |
| } else if (o1 instanceof List && o2 instanceof List) { | ||
| return -compareLists((List<?>) o1, (List<?>) o2); | ||
| } else if (o1 instanceof Object[] && o2 instanceof Object[]) { | ||
| final List<Object> list1 = Lists.newArrayList((Object[]) o1); | ||
| final List<Object> list2 = Lists.newArrayList((Object[]) o2); | ||
| return -compareLists(list1, list2); | ||
| return -compareObjectArrays((Object[]) o1, (Object[]) o2); | ||
| } else { | ||
| throw new IllegalArgumentException(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public static int compareLists(List<?> b0, List<?> b1) { | ||
| if (b0 == b1) { | ||
| return 0; | ||
| } | ||
| if (b0.isEmpty() && b1.isEmpty()) { | ||
| return 0; | ||
| } | ||
|
|
@@ -623,10 +621,46 @@ public static int compareLists(List<?> b0, List<?> b1) { | |
| return Integer.compare(b0.size(), b1.size()); | ||
| } | ||
|
|
||
| /** | ||
| * Compares two maps. | ||
| * | ||
| * <p>Since maps in Calcite are implemented using {@link java.util.LinkedHashMap}, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not crazy about this. Is the order of insertion of elements in a map deterministic? Probably not in general, e.g., if you use an aggregate to build the map. In our implementation the maps are actually sorted by key. What do other system say about this?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My understanding is that a LinkedHashMap can maintain the insertion order, allowing for comparisons based on that sequence. Initially, I sorted the data before comparing, but I found that this approach was inconsistent with DuckDB's behavior. The main issue is that I couldn’t find another database to compare results with. Honestly, I don’t have a strong preference for whether sorting should be done before comparison.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, linkedhashmap retains insertion order, but that order is not defined in general for SQL, e.g., You can leave it as it is, but the results are non-deterministic as defined.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Semantically, SQL is indeed nondeterministic, but in the current implementation of Calcite, it is deterministic. So I understand that executing (P.S.: There is currently an issue, though unrelated to our discussion, that I will also need to fix later.)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there are two separate issues: the order of insertion in a MAP is not defined (depending on how the map is created), and even if the order of insertion is defined, the order of elements in a MAP is not defined for comparisons. For the tested implementation both may be deterministic, but we have to be careful to explain that these orders may not be the same as in other implementations. Maybe that's all that's required: add a comment that for linkedhashmap the order is deterministic.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have already fixed this method, but I haven’t sorted it, and I have also improved the comments. What do you think? |
||
| * which guarantees insertion order, this method follows DuckDB's behavior by | ||
| * comparing entries in order. For each entry, it first compares the key and | ||
| * then the value. | ||
| */ | ||
| public static int compareMaps(Map<?, ?> b0, Map<?, ?> b1) { | ||
| if (b0 == b1) { | ||
| return 0; | ||
| } | ||
| final Iterator<? extends Map.Entry<?, ?>> i0 = b0.entrySet().iterator(); | ||
| final Iterator<? extends Map.Entry<?, ?>> i1 = b1.entrySet().iterator(); | ||
| while (i0.hasNext() && i1.hasNext()) { | ||
| Map.Entry<?, ?> e0 = i0.next(); | ||
| Map.Entry<?, ?> e1 = i1.next(); | ||
| int c = compareListItems(e0.getKey(), e1.getKey()); | ||
| if (c != 0) { | ||
| return c; | ||
| } | ||
| c = compareListItems(e0.getValue(), e1.getValue()); | ||
| if (c != 0) { | ||
| return c; | ||
| } | ||
| } | ||
| if (i0.hasNext()) { | ||
| return 1; | ||
| } | ||
| if (i1.hasNext()) { | ||
| return -1; | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| private static int compareListItems(@Nullable Object item0, @Nullable Object item1) { | ||
| if (item0 == null && item1 == null) { | ||
| if (item0 == item1) { | ||
| return 0; | ||
| } else if (item0 == null) { | ||
| } | ||
| if (item0 == null) { | ||
| return 1; | ||
| } else if (item1 == null) { | ||
| return -1; | ||
|
|
@@ -635,21 +669,32 @@ private static int compareListItems(@Nullable Object item0, @Nullable Object ite | |
| final List<?> b0ItemList = (List<?>) item0; | ||
| final List<?> b1ItemList = (List<?>) item1; | ||
| return compareLists(b0ItemList, b1ItemList); | ||
| } else if (item0 instanceof Map && item1 instanceof Map) { | ||
| return compareMaps((Map) item0, (Map) item1); | ||
| } else if (item0 instanceof Object[] && item1 instanceof Object[]) { | ||
| return compareObjectArrays((Object[]) item0, (Object[]) item1); | ||
| } else if (item0.getClass().equals(item1.getClass()) && item0 instanceof Comparable<?>) { | ||
| final Comparable b0Comparable = (Comparable) item0; | ||
| final Comparable b1Comparable = (Comparable) item1; | ||
| return b0Comparable.compareTo(b1Comparable); | ||
| } else { | ||
| throw new IllegalArgumentException("Item types do not match"); | ||
| throw new IllegalArgumentException("Item types do not match: " | ||
| + item0.getClass() + " vs " + item1.getClass()); | ||
| } | ||
| } | ||
|
|
||
| public static int compareObjectArrays(Object[] b0, Object[] b1) { | ||
| final List<Object> b0List = Lists.newArrayList(b0); | ||
| final List<Object> b1List = Lists.newArrayList(b1); | ||
| return compareLists(b0List, b1List); | ||
| public static int compareObjectArrays(@Nullable Object @Nullable [] b0, | ||
| @Nullable Object @Nullable [] b1) { | ||
| if (b0 == b1) { | ||
| return 0; | ||
| } | ||
| if (b0 == null) { | ||
| return 1; | ||
| } | ||
| if (b1 == null) { | ||
| return -1; | ||
| } | ||
| return compareLists(Arrays.asList(b0), Arrays.asList(b1)); | ||
| } | ||
|
|
||
| /** Nulls last reverse comparator. */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because of the limitations of pgsql, it cannot support
any_value, so it was verified in duckdb and works well.