|
| 1 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +# or more contributor license agreements. See the NOTICE file |
| 3 | +# distributed with this work for additional information |
| 4 | +# regarding copyright ownership. The ASF licenses this file |
| 5 | +# to you under the Apache License, Version 2.0 (the |
| 6 | +# "License"); you may not use this file except in compliance |
| 7 | +# with the License. You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, |
| 12 | +# software distributed under the License is distributed on an |
| 13 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +# KIND, either express or implied. See the License for the |
| 15 | +# specific language governing permissions and limitations |
| 16 | +# under the License. |
| 17 | + |
| 18 | +# Test timestamp predicate pushdown behavior |
| 19 | +# |
| 20 | +# When you CREATE TABLE with TIMESTAMP in DataFusion, it creates a TimestampNs column |
| 21 | +# (nanosecond precision) in Iceberg, since DataFusion's default TIMESTAMP type is nanoseconds. |
| 22 | +# |
| 23 | +# We use Datum::timestamp_nanos() for nanosecond timestamp predicates to preserve |
| 24 | +# full precision. This allows predicates to be correctly pushed down to Iceberg. |
| 25 | + |
| 26 | +# Create test table with timestamp column |
| 27 | +statement ok |
| 28 | +CREATE TABLE default.default.test_timestamp_table (id INT NOT NULL, ts TIMESTAMP) |
| 29 | + |
| 30 | +# Insert test data with timestamps |
| 31 | +# We use CAST to convert string timestamps to proper timestamp values |
| 32 | +query I |
| 33 | +INSERT INTO default.default.test_timestamp_table |
| 34 | +VALUES |
| 35 | + (1, CAST('2023-01-01 00:00:00' AS TIMESTAMP)), |
| 36 | + (2, CAST('2023-01-05 12:30:00' AS TIMESTAMP)), |
| 37 | + (3, CAST('2023-01-10 15:45:30' AS TIMESTAMP)), |
| 38 | + (4, CAST('2023-01-15 09:00:00' AS TIMESTAMP)), |
| 39 | + (5, CAST('2023-01-20 18:20:10' AS TIMESTAMP)) |
| 40 | +---- |
| 41 | +5 |
| 42 | + |
| 43 | +# Verify timestamp equality predicate IS pushed down |
| 44 | +query TT |
| 45 | +EXPLAIN SELECT * FROM default.default.test_timestamp_table WHERE ts = CAST('2023-01-05 12:30:00' AS TIMESTAMP) |
| 46 | +---- |
| 47 | +logical_plan |
| 48 | +01)Filter: default.default.test_timestamp_table.ts = TimestampNanosecond(1672921800000000000, None) |
| 49 | +02)--TableScan: default.default.test_timestamp_table projection=[id, ts], partial_filters=[default.default.test_timestamp_table.ts = TimestampNanosecond(1672921800000000000, None)] |
| 50 | +physical_plan |
| 51 | +01)CoalesceBatchesExec: target_batch_size=8192 |
| 52 | +02)--FilterExec: ts@1 = 1672921800000000000 |
| 53 | +03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
| 54 | +04)------CooperativeExec |
| 55 | +05)--------IcebergTableScan projection:[id,ts] predicate:[ts = 2023-01-05 12:30:00] |
| 56 | + |
| 57 | +# Verify timestamp equality filtering works |
| 58 | +query I? |
| 59 | +SELECT * FROM default.default.test_timestamp_table WHERE ts = CAST('2023-01-05 12:30:00' AS TIMESTAMP) |
| 60 | +---- |
| 61 | +2 2023-01-05T12:30:00 |
| 62 | + |
| 63 | +# Verify timestamp greater than predicate IS pushed down |
| 64 | +query TT |
| 65 | +EXPLAIN SELECT * FROM default.default.test_timestamp_table WHERE ts > CAST('2023-01-10 00:00:00' AS TIMESTAMP) |
| 66 | +---- |
| 67 | +logical_plan |
| 68 | +01)Filter: default.default.test_timestamp_table.ts > TimestampNanosecond(1673308800000000000, None) |
| 69 | +02)--TableScan: default.default.test_timestamp_table projection=[id, ts], partial_filters=[default.default.test_timestamp_table.ts > TimestampNanosecond(1673308800000000000, None)] |
| 70 | +physical_plan |
| 71 | +01)CoalesceBatchesExec: target_batch_size=8192 |
| 72 | +02)--FilterExec: ts@1 > 1673308800000000000 |
| 73 | +03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
| 74 | +04)------CooperativeExec |
| 75 | +05)--------IcebergTableScan projection:[id,ts] predicate:[ts > 2023-01-10 00:00:00] |
| 76 | + |
| 77 | +# Verify timestamp greater than filtering |
| 78 | +query I? rowsort |
| 79 | +SELECT * FROM default.default.test_timestamp_table WHERE ts > CAST('2023-01-10 00:00:00' AS TIMESTAMP) |
| 80 | +---- |
| 81 | +3 2023-01-10T15:45:30 |
| 82 | +4 2023-01-15T09:00:00 |
| 83 | +5 2023-01-20T18:20:10 |
| 84 | + |
| 85 | +# Test timestamp less than or equal filtering |
| 86 | +query I? rowsort |
| 87 | +SELECT * FROM default.default.test_timestamp_table WHERE ts <= CAST('2023-01-05 12:30:00' AS TIMESTAMP) |
| 88 | +---- |
| 89 | +1 2023-01-01T00:00:00 |
| 90 | +2 2023-01-05T12:30:00 |
| 91 | + |
| 92 | +# Verify timestamp range predicate (AND of two comparisons) IS pushed down |
| 93 | +query TT |
| 94 | +EXPLAIN SELECT * FROM default.default.test_timestamp_table |
| 95 | +WHERE ts >= CAST('2023-01-05 00:00:00' AS TIMESTAMP) |
| 96 | + AND ts <= CAST('2023-01-15 23:59:59' AS TIMESTAMP) |
| 97 | +---- |
| 98 | +logical_plan |
| 99 | +01)Filter: default.default.test_timestamp_table.ts >= TimestampNanosecond(1672876800000000000, None) AND default.default.test_timestamp_table.ts <= TimestampNanosecond(1673827199000000000, None) |
| 100 | +02)--TableScan: default.default.test_timestamp_table projection=[id, ts], partial_filters=[default.default.test_timestamp_table.ts >= TimestampNanosecond(1672876800000000000, None), default.default.test_timestamp_table.ts <= TimestampNanosecond(1673827199000000000, None)] |
| 101 | +physical_plan |
| 102 | +01)CoalesceBatchesExec: target_batch_size=8192 |
| 103 | +02)--FilterExec: ts@1 >= 1672876800000000000 AND ts@1 <= 1673827199000000000 |
| 104 | +03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
| 105 | +04)------CooperativeExec |
| 106 | +05)--------IcebergTableScan projection:[id,ts] predicate:[(ts >= 2023-01-05 00:00:00) AND (ts <= 2023-01-15 23:59:59)] |
| 107 | + |
| 108 | +# Test timestamp range predicate filtering |
| 109 | +query I? rowsort |
| 110 | +SELECT * FROM default.default.test_timestamp_table |
| 111 | +WHERE ts >= CAST('2023-01-05 00:00:00' AS TIMESTAMP) |
| 112 | + AND ts <= CAST('2023-01-15 23:59:59' AS TIMESTAMP) |
| 113 | +---- |
| 114 | +2 2023-01-05T12:30:00 |
| 115 | +3 2023-01-10T15:45:30 |
| 116 | +4 2023-01-15T09:00:00 |
| 117 | + |
| 118 | +# Test timestamp predicate combined with other predicates |
| 119 | +query I? rowsort |
| 120 | +SELECT * FROM default.default.test_timestamp_table |
| 121 | +WHERE ts >= CAST('2023-01-10 00:00:00' AS TIMESTAMP) AND id < 5 |
| 122 | +---- |
| 123 | +3 2023-01-10T15:45:30 |
| 124 | +4 2023-01-15T09:00:00 |
| 125 | + |
| 126 | +# Test timestamp NOT EQUAL predicate |
| 127 | +query I? rowsort |
| 128 | +SELECT * FROM default.default.test_timestamp_table WHERE ts != CAST('2023-01-05 12:30:00' AS TIMESTAMP) |
| 129 | +---- |
| 130 | +1 2023-01-01T00:00:00 |
| 131 | +3 2023-01-10T15:45:30 |
| 132 | +4 2023-01-15T09:00:00 |
| 133 | +5 2023-01-20T18:20:10 |
| 134 | + |
| 135 | +# Test timestamp less than filtering |
| 136 | +query I? rowsort |
| 137 | +SELECT * FROM default.default.test_timestamp_table WHERE ts < CAST('2023-01-05 00:00:00' AS TIMESTAMP) |
| 138 | +---- |
| 139 | +1 2023-01-01T00:00:00 |
| 140 | + |
| 141 | +# Clean up: Drop the test table |
| 142 | +statement ok |
| 143 | +DROP TABLE default.default.test_timestamp_table |
| 144 | + |
| 145 | +# ============================================================================ |
| 146 | +# Test timestamp predicate pushdown with different precisions |
| 147 | +# ============================================================================ |
| 148 | + |
| 149 | +# Test with TIMESTAMP(6) - microsecond precision |
| 150 | +statement ok |
| 151 | +CREATE TABLE default.default.test_timestamp_micros (id INT NOT NULL, ts TIMESTAMP(6)) |
| 152 | + |
| 153 | +query I |
| 154 | +INSERT INTO default.default.test_timestamp_micros |
| 155 | +VALUES |
| 156 | + (1, CAST('2023-01-01 00:00:00' AS TIMESTAMP)), |
| 157 | + (2, CAST('2023-01-05 12:30:00' AS TIMESTAMP)) |
| 158 | +---- |
| 159 | +2 |
| 160 | + |
| 161 | +# Verify microsecond timestamp predicate is pushed down |
| 162 | +query TT |
| 163 | +EXPLAIN SELECT * FROM default.default.test_timestamp_micros WHERE ts > CAST('2023-01-01 00:00:00' AS TIMESTAMP) |
| 164 | +---- |
| 165 | +logical_plan |
| 166 | +01)Filter: default.default.test_timestamp_micros.ts > TimestampMicrosecond(1672531200000000, None) |
| 167 | +02)--TableScan: default.default.test_timestamp_micros projection=[id, ts], partial_filters=[default.default.test_timestamp_micros.ts > TimestampMicrosecond(1672531200000000, None)] |
| 168 | +physical_plan |
| 169 | +01)CoalesceBatchesExec: target_batch_size=8192 |
| 170 | +02)--FilterExec: ts@1 > 1672531200000000 |
| 171 | +03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
| 172 | +04)------CooperativeExec |
| 173 | +05)--------IcebergTableScan projection:[id,ts] predicate:[ts > 2023-01-01 00:00:00] |
| 174 | + |
| 175 | +query I? |
| 176 | +SELECT * FROM default.default.test_timestamp_micros WHERE ts > CAST('2023-01-01 00:00:00' AS TIMESTAMP) |
| 177 | +---- |
| 178 | +2 2023-01-05T12:30:00 |
| 179 | + |
| 180 | +statement ok |
| 181 | +DROP TABLE default.default.test_timestamp_micros |
| 182 | + |
| 183 | +# Test with TIMESTAMP(3) - millisecond precision |
| 184 | +# This should fail because Iceberg doesn't support millisecond precision |
| 185 | +statement error DataFusion error: External error: DataInvalid => Unsupported Arrow data type: Timestamp\(ms\) |
| 186 | +CREATE TABLE default.default.test_timestamp_millis (id INT NOT NULL, ts TIMESTAMP(3)) |
| 187 | + |
| 188 | +# Test with TIMESTAMP(0) - second precision |
| 189 | +# This should fail because Iceberg doesn't support second precision |
| 190 | +statement error DataFusion error: External error: DataInvalid => Unsupported Arrow data type: Timestamp\(s\) |
| 191 | +CREATE TABLE default.default.test_timestamp_seconds (id INT NOT NULL, ts TIMESTAMP(0)) |
| 192 | + |
0 commit comments