Skip to content

Commit 601811b

Browse files
committed
fix, add tests
1 parent 0449408 commit 601811b

File tree

2 files changed

+144
-2
lines changed

2 files changed

+144
-2
lines changed

datafusion/datasource-parquet/src/metadata.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ impl<'a> DFParquetMetadata<'a> {
218218
/// extracting ordering, as it only fetches the metadata once.
219219
///
220220
/// # Returns
221-
/// A tuple of (Statistics, Option<LexOrdering>) where the ordering is `None` if:
221+
/// A tuple of (Statistics, `Option<LexOrdering>`) where the ordering is `None` if:
222222
/// - No row groups have sorting_columns
223223
/// - Row groups have inconsistent sorting_columns
224224
/// - Sorting columns cannot be mapped to the Arrow schema
@@ -578,7 +578,7 @@ pub(crate) fn sort_expr_to_sorting_column(
578578
})
579579
}
580580

581-
/// Convert a LexOrdering to Vec<SortingColumn> for Parquet.
581+
/// Convert a LexOrdering to `Vec<SortingColumn>` for Parquet.
582582
///
583583
/// Returns `Err` if any expression is not a simple column reference.
584584
pub(crate) fn lex_ordering_to_sorting_columns(

datafusion/sqllogictest/test_files/parquet.slt

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,3 +928,145 @@ DROP TABLE sorted_inferred;
928928

929929
statement ok
930930
DROP TABLE ordering_src;
931+
932+
###################
933+
# Test: DESC ordering inference from Parquet sorting_columns metadata
934+
###################
935+
936+
statement ok
937+
CREATE TABLE ordering_desc_src AS VALUES (5, 50), (4, 40), (3, 30), (2, 20), (1, 10);
938+
939+
query I
940+
COPY (SELECT column1 as a, column2 as b FROM ordering_desc_src ORDER BY a DESC NULLS LAST)
941+
TO 'test_files/scratch/parquet/ordering_desc.parquet'
942+
STORED AS PARQUET;
943+
----
944+
5
945+
946+
statement ok
947+
CREATE EXTERNAL TABLE sorted_desc (a INT, b INT)
948+
STORED AS PARQUET
949+
LOCATION 'test_files/scratch/parquet/ordering_desc.parquet';
950+
951+
# Verify DESC ordering is inferred - no SortExec needed
952+
query TT
953+
EXPLAIN SELECT a, b FROM sorted_desc ORDER BY a DESC NULLS LAST;
954+
----
955+
logical_plan
956+
01)Sort: sorted_desc.a DESC NULLS LAST
957+
02)--TableScan: sorted_desc projection=[a, b]
958+
physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/ordering_desc.parquet]]}, projection=[a, b], output_ordering=[a@0 DESC NULLS LAST], file_type=parquet
959+
960+
statement ok
961+
DROP TABLE sorted_desc;
962+
963+
statement ok
964+
DROP TABLE ordering_desc_src;
965+
966+
###################
967+
# Test: Reverse ordering - file is DESC but query wants ASC
968+
# Ordering is inferred with reverse_order, but SortExec is still needed
969+
###################
970+
971+
statement ok
972+
CREATE TABLE ordering_reverse_src AS VALUES (5, 50), (4, 40), (3, 30), (2, 20), (1, 10);
973+
974+
query I
975+
COPY (SELECT column1 as a, column2 as b FROM ordering_reverse_src ORDER BY a DESC NULLS LAST)
976+
TO 'test_files/scratch/parquet/ordering_reverse.parquet'
977+
STORED AS PARQUET;
978+
----
979+
5
980+
981+
statement ok
982+
CREATE EXTERNAL TABLE sorted_reverse (a INT, b INT)
983+
STORED AS PARQUET
984+
LOCATION 'test_files/scratch/parquet/ordering_reverse.parquet';
985+
986+
# Query wants ASC but file is DESC - ordering is inferred with reverse_order, SortExec preserved
987+
query TT
988+
EXPLAIN SELECT a, b FROM sorted_reverse ORDER BY a ASC NULLS FIRST;
989+
----
990+
logical_plan
991+
01)Sort: sorted_reverse.a ASC NULLS FIRST
992+
02)--TableScan: sorted_reverse projection=[a, b]
993+
physical_plan
994+
01)SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
995+
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/ordering_reverse.parquet]]}, projection=[a, b], file_type=parquet, reverse_row_groups=true
996+
997+
statement ok
998+
DROP TABLE sorted_reverse;
999+
1000+
statement ok
1001+
DROP TABLE ordering_reverse_src;
1002+
1003+
###################
1004+
# Test: Multi-column ordering inference from Parquet sorting_columns metadata
1005+
###################
1006+
1007+
statement ok
1008+
CREATE TABLE ordering_multi_src AS VALUES (1, 10, 100), (1, 20, 200), (2, 10, 300), (2, 20, 400);
1009+
1010+
query I
1011+
COPY (SELECT column1 as a, column2 as b, column3 as c FROM ordering_multi_src ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST)
1012+
TO 'test_files/scratch/parquet/ordering_multi.parquet'
1013+
STORED AS PARQUET;
1014+
----
1015+
4
1016+
1017+
statement ok
1018+
CREATE EXTERNAL TABLE sorted_multi (a INT, b INT, c INT)
1019+
STORED AS PARQUET
1020+
LOCATION 'test_files/scratch/parquet/ordering_multi.parquet';
1021+
1022+
# Verify multi-column ordering is inferred - no SortExec needed
1023+
query TT
1024+
EXPLAIN SELECT a, b, c FROM sorted_multi ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST;
1025+
----
1026+
logical_plan
1027+
01)Sort: sorted_multi.a ASC NULLS FIRST, sorted_multi.b ASC NULLS FIRST
1028+
02)--TableScan: sorted_multi projection=[a, b, c]
1029+
physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/ordering_multi.parquet]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC], file_type=parquet
1030+
1031+
statement ok
1032+
DROP TABLE sorted_multi;
1033+
1034+
statement ok
1035+
DROP TABLE ordering_multi_src;
1036+
1037+
###################
1038+
# Test: Files with no ordering metadata should not infer ordering
1039+
###################
1040+
1041+
statement ok
1042+
CREATE TABLE no_ordering_src AS VALUES (3, 30), (1, 10), (2, 20);
1043+
1044+
# Write Parquet WITHOUT ORDER BY - no sorting_columns metadata
1045+
query I
1046+
COPY (SELECT column1 as a, column2 as b FROM no_ordering_src)
1047+
TO 'test_files/scratch/parquet/no_ordering.parquet'
1048+
STORED AS PARQUET;
1049+
----
1050+
3
1051+
1052+
statement ok
1053+
CREATE EXTERNAL TABLE no_ordering (a INT, b INT)
1054+
STORED AS PARQUET
1055+
LOCATION 'test_files/scratch/parquet/no_ordering.parquet';
1056+
1057+
# Verify NO ordering is inferred - SortExec IS required
1058+
query TT
1059+
EXPLAIN SELECT a, b FROM no_ordering ORDER BY a ASC NULLS FIRST;
1060+
----
1061+
logical_plan
1062+
01)Sort: no_ordering.a ASC NULLS FIRST
1063+
02)--TableScan: no_ordering projection=[a, b]
1064+
physical_plan
1065+
01)SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
1066+
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/no_ordering.parquet]]}, projection=[a, b], file_type=parquet
1067+
1068+
statement ok
1069+
DROP TABLE no_ordering;
1070+
1071+
statement ok
1072+
DROP TABLE no_ordering_src;

0 commit comments

Comments
 (0)