Skip to content

Commit 167bbf9

Browse files
authored
Update DataFusion to 43 (#1125)
* Update DataFusions to 43 * Fmt * Debug * Remove comment * Update proto * Update common proto as well
1 parent 80c2c56 commit 167bbf9

File tree

13 files changed

+119
-117
lines changed

13 files changed

+119
-117
lines changed

Cargo.toml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,10 @@ arrow-flight = { version = "53", features = ["flight-sql-experimental"] }
2626
clap = { version = "4.5", features = ["derive", "cargo"] }
2727
configure_me = { version = "0.4.0" }
2828
configure_me_codegen = { version = "0.4.4" }
29-
# bump directly to datafusion v43 to avoid the serde bug on v42 (https://github.com/apache/datafusion/pull/12626)
30-
datafusion = "42.0.0"
31-
datafusion-cli = "42.0.0"
32-
datafusion-proto = "42.0.0"
33-
datafusion-proto-common = "42.0.0"
29+
datafusion = "43.0.0"
30+
datafusion-cli = "43.0.0"
31+
datafusion-proto = "43.0.0"
32+
datafusion-proto-common = "43.0.0"
3433
object_store = "0.11"
3534
prost = "0.13"
3635
prost-types = "0.13"

ballista/client/tests/context_setup.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ mod standalone {
380380
}
381381
}
382382

383-
#[derive(Default)]
383+
#[derive(Debug, Default)]
384384
struct BadPlanner {}
385385

386386
#[async_trait::async_trait]

ballista/core/proto/datafusion.proto

Lines changed: 80 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,6 @@ message LogicalExprNodeCollection {
7575
repeated LogicalExprNode logical_expr_nodes = 1;
7676
}
7777

78-
message SortExprNodeCollection {
79-
repeated SortExprNode sort_expr_nodes = 1;
80-
}
81-
8278
message ListingTableScanNode {
8379
reserved 1; // was string table_name
8480
TableReference table_name = 14;
@@ -94,9 +90,8 @@ message ListingTableScanNode {
9490
datafusion_common.CsvFormat csv = 10;
9591
datafusion_common.ParquetFormat parquet = 11;
9692
datafusion_common.AvroFormat avro = 12;
97-
datafusion_common.NdJsonFormat json = 15;
9893
}
99-
repeated SortExprNodeCollection file_sort_order = 13;
94+
repeated LogicalExprNodeCollection file_sort_order = 13;
10095
}
10196

10297
message ViewTableScanNode {
@@ -133,7 +128,7 @@ message SelectionNode {
133128

134129
message SortNode {
135130
LogicalPlanNode input = 1;
136-
repeated SortExprNode expr = 2;
131+
repeated LogicalExprNode expr = 2;
137132
// Maximum number of highest/lowest rows to fetch; negative means no limit
138133
int64 fetch = 3;
139134
}
@@ -164,12 +159,12 @@ message CreateExternalTableNode {
164159
repeated string table_partition_cols = 5;
165160
bool if_not_exists = 6;
166161
string definition = 7;
167-
repeated SortExprNodeCollection order_exprs = 10;
162+
repeated LogicalExprNodeCollection order_exprs = 10;
168163
bool unbounded = 11;
169164
map<string, string> options = 8;
170165
datafusion_common.Constraints constraints = 12;
171166
map<string, LogicalExprNode> column_defaults = 13;
172-
}
167+
}
173168

174169
message PrepareNode {
175170
string name = 1;
@@ -249,51 +244,35 @@ message DistinctNode {
249244
message DistinctOnNode {
250245
repeated LogicalExprNode on_expr = 1;
251246
repeated LogicalExprNode select_expr = 2;
252-
repeated SortExprNode sort_expr = 3;
247+
repeated LogicalExprNode sort_expr = 3;
253248
LogicalPlanNode input = 4;
254249
}
255250

256251
message CopyToNode {
257-
LogicalPlanNode input = 1;
258-
string output_url = 2;
259-
bytes file_type = 3;
260-
repeated string partition_by = 7;
252+
LogicalPlanNode input = 1;
253+
string output_url = 2;
254+
oneof format_options {
255+
datafusion_common.CsvOptions csv = 8;
256+
datafusion_common.JsonOptions json = 9;
257+
datafusion_common.TableParquetOptions parquet = 10;
258+
datafusion_common.AvroOptions avro = 11;
259+
datafusion_common.ArrowOptions arrow = 12;
260+
}
261+
repeated string partition_by = 7;
261262
}
262263

263264
message UnnestNode {
264-
LogicalPlanNode input = 1;
265-
repeated ColumnUnnestExec exec_columns = 2;
266-
repeated ColumnUnnestListItem list_type_columns = 3;
267-
repeated uint64 struct_type_columns = 4;
268-
repeated uint64 dependency_indices = 5;
269-
datafusion_common.DfSchema schema = 6;
270-
UnnestOptions options = 7;
271-
}
272-
message ColumnUnnestListItem {
273-
uint32 input_index = 1;
274-
ColumnUnnestListRecursion recursion = 2;
275-
}
276-
277-
message ColumnUnnestListRecursions {
278-
repeated ColumnUnnestListRecursion recursions = 2;
279-
}
280-
281-
message ColumnUnnestListRecursion {
282-
datafusion_common.Column output_column = 1;
283-
uint32 depth = 2;
284-
}
285-
286-
message ColumnUnnestExec {
287-
datafusion_common.Column column = 1;
288-
oneof UnnestType {
289-
ColumnUnnestListRecursions list = 2;
290-
datafusion_common.EmptyMessage struct = 3;
291-
datafusion_common.EmptyMessage inferred = 4;
292-
}
265+
LogicalPlanNode input = 1;
266+
repeated datafusion_common.Column exec_columns = 2;
267+
repeated uint64 list_type_columns = 3;
268+
repeated uint64 struct_type_columns = 4;
269+
repeated uint64 dependency_indices = 5;
270+
datafusion_common.DfSchema schema = 6;
271+
UnnestOptions options = 7;
293272
}
294273

295274
message UnnestOptions {
296-
bool preserve_nulls = 1;
275+
bool preserve_nulls = 1;
297276
}
298277

299278
message UnionNode {
@@ -337,6 +316,8 @@ message LogicalExprNode {
337316
// binary expressions
338317
BinaryExprNode binary_expr = 4;
339318

319+
// aggregate expressions
320+
AggregateExprNode aggregate_expr = 5;
340321

341322
// null checks
342323
IsNull is_null_expr = 6;
@@ -346,6 +327,7 @@ message LogicalExprNode {
346327
BetweenNode between = 9;
347328
CaseNode case_ = 10;
348329
CastNode cast = 11;
330+
SortExprNode sort = 12;
349331
NegativeNode negative = 13;
350332
InListNode in_list = 14;
351333
Wildcard wildcard = 15;
@@ -387,7 +369,7 @@ message LogicalExprNode {
387369
}
388370

389371
message Wildcard {
390-
TableReference qualifier = 1;
372+
string qualifier = 1;
391373
}
392374

393375
message PlaceholderNode {
@@ -489,14 +471,57 @@ message InListNode {
489471
bool negated = 3;
490472
}
491473

474+
enum AggregateFunction {
475+
MIN = 0;
476+
MAX = 1;
477+
SUM = 2;
478+
AVG = 3;
479+
COUNT = 4;
480+
APPROX_DISTINCT = 5;
481+
ARRAY_AGG = 6;
482+
// VARIANCE = 7;
483+
VARIANCE_POP = 8;
484+
// COVARIANCE = 9;
485+
// COVARIANCE_POP = 10;
486+
STDDEV = 11;
487+
STDDEV_POP = 12;
488+
CORRELATION = 13;
489+
APPROX_PERCENTILE_CONT = 14;
490+
APPROX_MEDIAN = 15;
491+
APPROX_PERCENTILE_CONT_WITH_WEIGHT = 16;
492+
GROUPING = 17;
493+
// MEDIAN = 18;
494+
BIT_AND = 19;
495+
BIT_OR = 20;
496+
BIT_XOR = 21;
497+
BOOL_AND = 22;
498+
BOOL_OR = 23;
499+
REGR_SLOPE = 26;
500+
REGR_INTERCEPT = 27;
501+
REGR_COUNT = 28;
502+
REGR_R2 = 29;
503+
REGR_AVGX = 30;
504+
REGR_AVGY = 31;
505+
REGR_SXX = 32;
506+
REGR_SYY = 33;
507+
REGR_SXY = 34;
508+
STRING_AGG = 35;
509+
NTH_VALUE_AGG = 36;
510+
}
511+
512+
message AggregateExprNode {
513+
AggregateFunction aggr_function = 1;
514+
repeated LogicalExprNode expr = 2;
515+
bool distinct = 3;
516+
LogicalExprNode filter = 4;
517+
repeated LogicalExprNode order_by = 5;
518+
}
492519

493520
message AggregateUDFExprNode {
494521
string fun_name = 1;
495522
repeated LogicalExprNode args = 2;
496-
bool distinct = 5;
497523
LogicalExprNode filter = 3;
498-
repeated SortExprNode order_by = 4;
499-
optional bytes fun_definition = 6;
524+
repeated LogicalExprNode order_by = 4;
500525
}
501526

502527
message ScalarUDFExprNode {
@@ -506,8 +531,7 @@ message ScalarUDFExprNode {
506531
}
507532

508533
enum BuiltInWindowFunction {
509-
UNSPECIFIED = 0; // https://protobuf.dev/programming-guides/dos-donts/#unspecified-enum
510-
// ROW_NUMBER = 0;
534+
ROW_NUMBER = 0;
511535
RANK = 1;
512536
DENSE_RANK = 2;
513537
PERCENT_RANK = 3;
@@ -522,16 +546,16 @@ enum BuiltInWindowFunction {
522546

523547
message WindowExprNode {
524548
oneof window_function {
549+
AggregateFunction aggr_function = 1;
525550
BuiltInWindowFunction built_in_function = 2;
526551
string udaf = 3;
527552
string udwf = 9;
528553
}
529554
LogicalExprNode expr = 4;
530555
repeated LogicalExprNode partition_by = 5;
531-
repeated SortExprNode order_by = 6;
556+
repeated LogicalExprNode order_by = 6;
532557
// repeated LogicalExprNode filter = 7;
533558
WindowFrame window_frame = 8;
534-
optional bytes fun_definition = 10;
535559
}
536560

537561
message BetweenNode {
@@ -650,11 +674,9 @@ message PlanType {
650674
datafusion_common.EmptyMessage FinalLogicalPlan = 3;
651675
datafusion_common.EmptyMessage InitialPhysicalPlan = 4;
652676
datafusion_common.EmptyMessage InitialPhysicalPlanWithStats = 9;
653-
datafusion_common.EmptyMessage InitialPhysicalPlanWithSchema = 11;
654677
OptimizedPhysicalPlanType OptimizedPhysicalPlan = 5;
655678
datafusion_common.EmptyMessage FinalPhysicalPlan = 6;
656679
datafusion_common.EmptyMessage FinalPhysicalPlanWithStats = 10;
657-
datafusion_common.EmptyMessage FinalPhysicalPlanWithSchema = 12;
658680
}
659681
}
660682

@@ -715,11 +737,10 @@ message PhysicalPlanNode {
715737
AnalyzeExecNode analyze = 23;
716738
JsonSinkExecNode json_sink = 24;
717739
SymmetricHashJoinExecNode symmetric_hash_join = 25;
718-
InterleaveExecNode interleave = 26;
740+
InterleaveExecNode interleave = 26;
719741
PlaceholderRowExecNode placeholder_row = 27;
720742
CsvSinkExecNode csv_sink = 28;
721743
ParquetSinkExecNode parquet_sink = 29;
722-
UnnestExecNode unnest = 30;
723744
}
724745
}
725746

@@ -731,21 +752,13 @@ message PartitionColumn {
731752

732753
message FileSinkConfig {
733754
reserved 6; // writer_mode
734-
reserved 8; // was `overwrite` which has been superseded by `insert_op`
735755

736756
string object_store_url = 1;
737757
repeated PartitionedFile file_groups = 2;
738758
repeated string table_paths = 3;
739759
datafusion_common.Schema output_schema = 4;
740760
repeated PartitionColumn table_partition_cols = 5;
741-
bool keep_partition_by_columns = 9;
742-
InsertOp insert_op = 10;
743-
}
744-
745-
enum InsertOp {
746-
Append = 0;
747-
Overwrite = 1;
748-
Replace = 2;
761+
bool overwrite = 8;
749762
}
750763

751764
message JsonSink {
@@ -784,19 +797,6 @@ message ParquetSinkExecNode {
784797
PhysicalSortExprNodeCollection sort_order = 4;
785798
}
786799

787-
message UnnestExecNode {
788-
PhysicalPlanNode input = 1;
789-
datafusion_common.Schema schema = 2;
790-
repeated ListUnnest list_type_columns = 3;
791-
repeated uint64 struct_type_columns = 4;
792-
UnnestOptions options = 5;
793-
}
794-
795-
message ListUnnest {
796-
uint32 index_in_input_schema = 1;
797-
uint32 depth = 2;
798-
}
799-
800800
message PhysicalExtensionNode {
801801
bytes node = 1;
802802
repeated PhysicalPlanNode inputs = 2;
@@ -838,8 +838,6 @@ message PhysicalExprNode {
838838
// was PhysicalDateTimeIntervalExprNode date_time_interval_expr = 17;
839839

840840
PhysicalLikeExprNode like_expr = 18;
841-
842-
PhysicalExtensionExprNode extension = 19;
843841
}
844842
}
845843

@@ -852,17 +850,17 @@ message PhysicalScalarUdfNode {
852850

853851
message PhysicalAggregateExprNode {
854852
oneof AggregateFunction {
853+
AggregateFunction aggr_function = 1;
855854
string user_defined_aggr_function = 4;
856855
}
857856
repeated PhysicalExprNode expr = 2;
858857
repeated PhysicalSortExprNode ordering_req = 5;
859858
bool distinct = 3;
860-
bool ignore_nulls = 6;
861-
optional bytes fun_definition = 7;
862859
}
863860

864861
message PhysicalWindowExprNode {
865862
oneof window_function {
863+
AggregateFunction aggr_function = 1;
866864
BuiltInWindowFunction built_in_function = 2;
867865
string user_defined_aggr_function = 3;
868866
}
@@ -871,7 +869,6 @@ message PhysicalWindowExprNode {
871869
repeated PhysicalSortExprNode order_by = 6;
872870
WindowFrame window_frame = 7;
873871
string name = 8;
874-
optional bytes fun_definition = 9;
875872
}
876873

877874
message PhysicalIsNull {
@@ -947,16 +944,10 @@ message PhysicalNegativeNode {
947944
PhysicalExprNode expr = 1;
948945
}
949946

950-
message PhysicalExtensionExprNode {
951-
bytes expr = 1;
952-
repeated PhysicalExprNode inputs = 2;
953-
}
954-
955947
message FilterExecNode {
956948
PhysicalPlanNode input = 1;
957949
PhysicalExprNode expr = 2;
958950
uint32 default_filter_selectivity = 3;
959-
repeated uint32 projection = 9;
960951
}
961952

962953
message FileGroup {
@@ -1003,10 +994,6 @@ message CsvScanExecNode {
1003994
oneof optional_escape {
1004995
string escape = 5;
1005996
}
1006-
oneof optional_comment {
1007-
string comment = 6;
1008-
}
1009-
bool newlines_in_values = 7;
1010997
}
1011998

1012999
message AvroScanExecNode {
@@ -1187,7 +1174,6 @@ message NestedLoopJoinExecNode {
11871174
message CoalesceBatchesExecNode {
11881175
PhysicalPlanNode input = 1;
11891176
uint32 target_batch_size = 2;
1190-
optional uint32 fetch = 3;
11911177
}
11921178

11931179
message CoalescePartitionsExecNode {

0 commit comments

Comments
 (0)