@@ -75,10 +75,6 @@ message LogicalExprNodeCollection {
7575 repeated LogicalExprNode logical_expr_nodes = 1 ;
7676}
7777
78- message SortExprNodeCollection {
79- repeated SortExprNode sort_expr_nodes = 1 ;
80- }
81-
8278message ListingTableScanNode {
8379 reserved 1 ; // was string table_name
8480 TableReference table_name = 14 ;
@@ -94,9 +90,8 @@ message ListingTableScanNode {
9490 datafusion_common.CsvFormat csv = 10 ;
9591 datafusion_common.ParquetFormat parquet = 11 ;
9692 datafusion_common.AvroFormat avro = 12 ;
97- datafusion_common.NdJsonFormat json = 15 ;
9893 }
99- repeated SortExprNodeCollection file_sort_order = 13 ;
94+ repeated LogicalExprNodeCollection file_sort_order = 13 ;
10095}
10196
10297message ViewTableScanNode {
@@ -133,7 +128,7 @@ message SelectionNode {
133128
134129message SortNode {
135130 LogicalPlanNode input = 1 ;
136- repeated SortExprNode expr = 2 ;
131+ repeated LogicalExprNode expr = 2 ;
137132 // Maximum number of highest/lowest rows to fetch; negative means no limit
138133 int64 fetch = 3 ;
139134}
@@ -164,12 +159,12 @@ message CreateExternalTableNode {
164159 repeated string table_partition_cols = 5 ;
165160 bool if_not_exists = 6 ;
166161 string definition = 7 ;
167- repeated SortExprNodeCollection order_exprs = 10 ;
162+ repeated LogicalExprNodeCollection order_exprs = 10 ;
168163 bool unbounded = 11 ;
169164 map <string , string > options = 8 ;
170165 datafusion_common.Constraints constraints = 12 ;
171166 map <string , LogicalExprNode > column_defaults = 13 ;
172- }
167+ }
173168
174169message PrepareNode {
175170 string name = 1 ;
@@ -249,51 +244,35 @@ message DistinctNode {
249244message DistinctOnNode {
250245 repeated LogicalExprNode on_expr = 1 ;
251246 repeated LogicalExprNode select_expr = 2 ;
252- repeated SortExprNode sort_expr = 3 ;
247+ repeated LogicalExprNode sort_expr = 3 ;
253248 LogicalPlanNode input = 4 ;
254249}
255250
256251message CopyToNode {
257- LogicalPlanNode input = 1 ;
258- string output_url = 2 ;
259- bytes file_type = 3 ;
260- repeated string partition_by = 7 ;
252+ LogicalPlanNode input = 1 ;
253+ string output_url = 2 ;
254+ oneof format_options {
255+ datafusion_common.CsvOptions csv = 8 ;
256+ datafusion_common.JsonOptions json = 9 ;
257+ datafusion_common.TableParquetOptions parquet = 10 ;
258+ datafusion_common.AvroOptions avro = 11 ;
259+ datafusion_common.ArrowOptions arrow = 12 ;
260+ }
261+ repeated string partition_by = 7 ;
261262}
262263
263264message UnnestNode {
264- LogicalPlanNode input = 1 ;
265- repeated ColumnUnnestExec exec_columns = 2 ;
266- repeated ColumnUnnestListItem list_type_columns = 3 ;
267- repeated uint64 struct_type_columns = 4 ;
268- repeated uint64 dependency_indices = 5 ;
269- datafusion_common.DfSchema schema = 6 ;
270- UnnestOptions options = 7 ;
271- }
272- message ColumnUnnestListItem {
273- uint32 input_index = 1 ;
274- ColumnUnnestListRecursion recursion = 2 ;
275- }
276-
277- message ColumnUnnestListRecursions {
278- repeated ColumnUnnestListRecursion recursions = 2 ;
279- }
280-
281- message ColumnUnnestListRecursion {
282- datafusion_common.Column output_column = 1 ;
283- uint32 depth = 2 ;
284- }
285-
286- message ColumnUnnestExec {
287- datafusion_common.Column column = 1 ;
288- oneof UnnestType {
289- ColumnUnnestListRecursions list = 2 ;
290- datafusion_common.EmptyMessage struct = 3 ;
291- datafusion_common.EmptyMessage inferred = 4 ;
292- }
265+ LogicalPlanNode input = 1 ;
266+ repeated datafusion_common.Column exec_columns = 2 ;
267+ repeated uint64 list_type_columns = 3 ;
268+ repeated uint64 struct_type_columns = 4 ;
269+ repeated uint64 dependency_indices = 5 ;
270+ datafusion_common.DfSchema schema = 6 ;
271+ UnnestOptions options = 7 ;
293272}
294273
295274message UnnestOptions {
296- bool preserve_nulls = 1 ;
275+ bool preserve_nulls = 1 ;
297276}
298277
299278message UnionNode {
@@ -337,6 +316,8 @@ message LogicalExprNode {
337316 // binary expressions
338317 BinaryExprNode binary_expr = 4 ;
339318
319+ // aggregate expressions
320+ AggregateExprNode aggregate_expr = 5 ;
340321
341322 // null checks
342323 IsNull is_null_expr = 6 ;
@@ -346,6 +327,7 @@ message LogicalExprNode {
346327 BetweenNode between = 9 ;
347328 CaseNode case_ = 10 ;
348329 CastNode cast = 11 ;
330+ SortExprNode sort = 12 ;
349331 NegativeNode negative = 13 ;
350332 InListNode in_list = 14 ;
351333 Wildcard wildcard = 15 ;
@@ -387,7 +369,7 @@ message LogicalExprNode {
387369}
388370
389371message Wildcard {
390- TableReference qualifier = 1 ;
372+ string qualifier = 1 ;
391373}
392374
393375message PlaceholderNode {
@@ -489,14 +471,57 @@ message InListNode {
489471 bool negated = 3 ;
490472}
491473
474+ enum AggregateFunction {
475+ MIN = 0 ;
476+ MAX = 1 ;
477+ SUM = 2 ;
478+ AVG = 3 ;
479+ COUNT = 4 ;
480+ APPROX_DISTINCT = 5 ;
481+ ARRAY_AGG = 6 ;
482+ // VARIANCE = 7;
483+ VARIANCE_POP = 8 ;
484+ // COVARIANCE = 9;
485+ // COVARIANCE_POP = 10;
486+ STDDEV = 11 ;
487+ STDDEV_POP = 12 ;
488+ CORRELATION = 13 ;
489+ APPROX_PERCENTILE_CONT = 14 ;
490+ APPROX_MEDIAN = 15 ;
491+ APPROX_PERCENTILE_CONT_WITH_WEIGHT = 16 ;
492+ GROUPING = 17 ;
493+ // MEDIAN = 18;
494+ BIT_AND = 19 ;
495+ BIT_OR = 20 ;
496+ BIT_XOR = 21 ;
497+ BOOL_AND = 22 ;
498+ BOOL_OR = 23 ;
499+ REGR_SLOPE = 26 ;
500+ REGR_INTERCEPT = 27 ;
501+ REGR_COUNT = 28 ;
502+ REGR_R2 = 29 ;
503+ REGR_AVGX = 30 ;
504+ REGR_AVGY = 31 ;
505+ REGR_SXX = 32 ;
506+ REGR_SYY = 33 ;
507+ REGR_SXY = 34 ;
508+ STRING_AGG = 35 ;
509+ NTH_VALUE_AGG = 36 ;
510+ }
511+
512+ message AggregateExprNode {
513+ AggregateFunction aggr_function = 1 ;
514+ repeated LogicalExprNode expr = 2 ;
515+ bool distinct = 3 ;
516+ LogicalExprNode filter = 4 ;
517+ repeated LogicalExprNode order_by = 5 ;
518+ }
492519
493520message AggregateUDFExprNode {
494521 string fun_name = 1 ;
495522 repeated LogicalExprNode args = 2 ;
496- bool distinct = 5 ;
497523 LogicalExprNode filter = 3 ;
498- repeated SortExprNode order_by = 4 ;
499- optional bytes fun_definition = 6 ;
524+ repeated LogicalExprNode order_by = 4 ;
500525}
501526
502527message ScalarUDFExprNode {
@@ -506,8 +531,7 @@ message ScalarUDFExprNode {
506531}
507532
508533enum BuiltInWindowFunction {
509- UNSPECIFIED = 0 ; // https://protobuf.dev/programming-guides/dos-donts/#unspecified-enum
510- // ROW_NUMBER = 0;
534+ ROW_NUMBER = 0 ;
511535 RANK = 1 ;
512536 DENSE_RANK = 2 ;
513537 PERCENT_RANK = 3 ;
@@ -522,16 +546,16 @@ enum BuiltInWindowFunction {
522546
523547message WindowExprNode {
524548 oneof window_function {
549+ AggregateFunction aggr_function = 1 ;
525550 BuiltInWindowFunction built_in_function = 2 ;
526551 string udaf = 3 ;
527552 string udwf = 9 ;
528553 }
529554 LogicalExprNode expr = 4 ;
530555 repeated LogicalExprNode partition_by = 5 ;
531- repeated SortExprNode order_by = 6 ;
556+ repeated LogicalExprNode order_by = 6 ;
532557 // repeated LogicalExprNode filter = 7;
533558 WindowFrame window_frame = 8 ;
534- optional bytes fun_definition = 10 ;
535559}
536560
537561message BetweenNode {
@@ -650,11 +674,9 @@ message PlanType {
650674 datafusion_common.EmptyMessage FinalLogicalPlan = 3 ;
651675 datafusion_common.EmptyMessage InitialPhysicalPlan = 4 ;
652676 datafusion_common.EmptyMessage InitialPhysicalPlanWithStats = 9 ;
653- datafusion_common.EmptyMessage InitialPhysicalPlanWithSchema = 11 ;
654677 OptimizedPhysicalPlanType OptimizedPhysicalPlan = 5 ;
655678 datafusion_common.EmptyMessage FinalPhysicalPlan = 6 ;
656679 datafusion_common.EmptyMessage FinalPhysicalPlanWithStats = 10 ;
657- datafusion_common.EmptyMessage FinalPhysicalPlanWithSchema = 12 ;
658680 }
659681}
660682
@@ -715,11 +737,10 @@ message PhysicalPlanNode {
715737 AnalyzeExecNode analyze = 23 ;
716738 JsonSinkExecNode json_sink = 24 ;
717739 SymmetricHashJoinExecNode symmetric_hash_join = 25 ;
718- InterleaveExecNode interleave = 26 ;
740+ InterleaveExecNode interleave = 26 ;
719741 PlaceholderRowExecNode placeholder_row = 27 ;
720742 CsvSinkExecNode csv_sink = 28 ;
721743 ParquetSinkExecNode parquet_sink = 29 ;
722- UnnestExecNode unnest = 30 ;
723744 }
724745}
725746
@@ -731,21 +752,13 @@ message PartitionColumn {
731752
732753message FileSinkConfig {
733754 reserved 6 ; // writer_mode
734- reserved 8 ; // was `overwrite` which has been superseded by `insert_op`
735755
736756 string object_store_url = 1 ;
737757 repeated PartitionedFile file_groups = 2 ;
738758 repeated string table_paths = 3 ;
739759 datafusion_common.Schema output_schema = 4 ;
740760 repeated PartitionColumn table_partition_cols = 5 ;
741- bool keep_partition_by_columns = 9 ;
742- InsertOp insert_op = 10 ;
743- }
744-
745- enum InsertOp {
746- Append = 0 ;
747- Overwrite = 1 ;
748- Replace = 2 ;
761+ bool overwrite = 8 ;
749762}
750763
751764message JsonSink {
@@ -784,19 +797,6 @@ message ParquetSinkExecNode {
784797 PhysicalSortExprNodeCollection sort_order = 4 ;
785798}
786799
787- message UnnestExecNode {
788- PhysicalPlanNode input = 1 ;
789- datafusion_common.Schema schema = 2 ;
790- repeated ListUnnest list_type_columns = 3 ;
791- repeated uint64 struct_type_columns = 4 ;
792- UnnestOptions options = 5 ;
793- }
794-
795- message ListUnnest {
796- uint32 index_in_input_schema = 1 ;
797- uint32 depth = 2 ;
798- }
799-
800800message PhysicalExtensionNode {
801801 bytes node = 1 ;
802802 repeated PhysicalPlanNode inputs = 2 ;
@@ -838,8 +838,6 @@ message PhysicalExprNode {
838838 // was PhysicalDateTimeIntervalExprNode date_time_interval_expr = 17;
839839
840840 PhysicalLikeExprNode like_expr = 18 ;
841-
842- PhysicalExtensionExprNode extension = 19 ;
843841 }
844842}
845843
@@ -852,17 +850,17 @@ message PhysicalScalarUdfNode {
852850
853851message PhysicalAggregateExprNode {
854852 oneof AggregateFunction {
853+ AggregateFunction aggr_function = 1 ;
855854 string user_defined_aggr_function = 4 ;
856855 }
857856 repeated PhysicalExprNode expr = 2 ;
858857 repeated PhysicalSortExprNode ordering_req = 5 ;
859858 bool distinct = 3 ;
860- bool ignore_nulls = 6 ;
861- optional bytes fun_definition = 7 ;
862859}
863860
864861message PhysicalWindowExprNode {
865862 oneof window_function {
863+ AggregateFunction aggr_function = 1 ;
866864 BuiltInWindowFunction built_in_function = 2 ;
867865 string user_defined_aggr_function = 3 ;
868866 }
@@ -871,7 +869,6 @@ message PhysicalWindowExprNode {
871869 repeated PhysicalSortExprNode order_by = 6 ;
872870 WindowFrame window_frame = 7 ;
873871 string name = 8 ;
874- optional bytes fun_definition = 9 ;
875872}
876873
877874message PhysicalIsNull {
@@ -947,16 +944,10 @@ message PhysicalNegativeNode {
947944 PhysicalExprNode expr = 1 ;
948945}
949946
950- message PhysicalExtensionExprNode {
951- bytes expr = 1 ;
952- repeated PhysicalExprNode inputs = 2 ;
953- }
954-
955947message FilterExecNode {
956948 PhysicalPlanNode input = 1 ;
957949 PhysicalExprNode expr = 2 ;
958950 uint32 default_filter_selectivity = 3 ;
959- repeated uint32 projection = 9 ;
960951}
961952
962953message FileGroup {
@@ -1003,10 +994,6 @@ message CsvScanExecNode {
1003994 oneof optional_escape {
1004995 string escape = 5 ;
1005996 }
1006- oneof optional_comment {
1007- string comment = 6 ;
1008- }
1009- bool newlines_in_values = 7 ;
1010997}
1011998
1012999message AvroScanExecNode {
@@ -1187,7 +1174,6 @@ message NestedLoopJoinExecNode {
11871174message CoalesceBatchesExecNode {
11881175 PhysicalPlanNode input = 1 ;
11891176 uint32 target_batch_size = 2 ;
1190- optional uint32 fetch = 3 ;
11911177}
11921178
11931179message CoalescePartitionsExecNode {
0 commit comments