Skip to content

Commit 7b5245f

Browse files
timsaucerteh-cmc
andauthored
Update DataFusion to version 49.0.2 (#11291)
### What This PR is an update to DataFusion dependency to version 49.0.2. With this PR, python users must likewise update their `datafusion-python` versions to 49.0.2. --------- Co-authored-by: Clement Rey <[email protected]>
1 parent 22f2fc9 commit 7b5245f

File tree

12 files changed

+204
-142
lines changed

12 files changed

+204
-142
lines changed

Cargo.lock

Lines changed: 134 additions & 74 deletions
Large diffs are not rendered by default.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,10 +220,10 @@ convert_case = "0.6"
220220
criterion = "0.5"
221221
cros-codecs = "0.0.6"
222222
crossbeam = "0.8"
223-
datafusion = { version = "47", default-features = false, features = [
223+
datafusion = { version = "49.0.2", default-features = false, features = [
224224
"nested_expressions",
225225
] }
226-
datafusion-ffi = "47"
226+
datafusion-ffi = "49.0.2"
227227
directories = "6"
228228
document-features = "0.2.8"
229229
econtext = "0.2" # Prints error contexts on crashes

crates/store/re_datafusion/src/dataframe_query_common.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,8 @@ impl DataframeQueryTableProvider {
200200
}
201201
Expr::BinaryExpr(binary) => {
202202
if binary.op == Operator::NotEq
203-
&& let (Expr::Column(col), Expr::Literal(sv))
204-
| (Expr::Literal(sv), Expr::Column(col)) =
203+
&& let (Expr::Column(col), Expr::Literal(sv, _))
204+
| (Expr::Literal(sv, _), Expr::Column(col)) =
205205
(binary.left.as_ref(), binary.right.as_ref())
206206
&& sv.is_null()
207207
{

crates/store/re_datafusion/src/dataframe_query_provider.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,13 +227,16 @@ impl PartitionStreamExec {
227227
SortOptions::new(false, true),
228228
));
229229
}
230-
vec![LexOrdering::new(physical_ordering)]
230+
vec![
231+
LexOrdering::new(physical_ordering)
232+
.expect("LexOrdering should return Some since input is not empty"),
233+
]
231234
} else {
232235
vec![]
233236
};
234237

235238
let eq_properties =
236-
EquivalenceProperties::new_with_orderings(Arc::clone(&projected_schema), &orderings);
239+
EquivalenceProperties::new_with_orderings(Arc::clone(&projected_schema), orderings);
237240

238241
let partition_in_output_schema = projection.map(|p| p.contains(&0)).unwrap_or(false);
239242

crates/store/re_datafusion/src/dataframe_query_provider_wasm.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,13 @@ impl PartitionStreamExec {
224224
));
225225
}
226226

227-
let orderings = vec![LexOrdering::new(physical_ordering)];
227+
let orderings = vec![
228+
LexOrdering::new(physical_ordering)
229+
.expect("LexOrdering should return Some when non-empty vec is passed"),
230+
];
228231

229232
let eq_properties =
230-
EquivalenceProperties::new_with_orderings(Arc::clone(&projected_schema), &orderings);
233+
EquivalenceProperties::new_with_orderings(Arc::clone(&projected_schema), orderings);
231234

232235
let partition_in_output_schema = projection.map(|p| p.contains(&0)).unwrap_or(false);
233236

crates/viewer/re_dataframe_ui/src/filters/filter.rs

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@ use std::sync::Arc;
44

55
use arrow::array::{Array as _, ArrayRef, BooleanArray, ListArray, as_list_array};
66
use arrow::datatypes::{DataType, Field};
7+
use datafusion::common::ExprSchema as _;
78
use datafusion::common::{DFSchema, Result as DataFusionResult, exec_err};
89
use datafusion::logical_expr::{
910
ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, ScalarFunctionArgs, ScalarUDF,
1011
ScalarUDFImpl, Signature, TypeSignature, Volatility,
1112
};
12-
use datafusion::prelude::{Column, Expr, array_to_string, col, lit, lower};
13+
use datafusion::prelude::{Column, Expr, array_to_string, col, contains, lit, lower};
1314

1415
use super::{NonNullableBooleanFilter, NullableBooleanFilter};
1516

@@ -297,7 +298,7 @@ impl FilterOperation {
297298
}
298299
};
299300

300-
Ok(contains_patch(lower(operand), lower(lit(query_string))))
301+
Ok(contains(lower(operand), lower(lit(query_string))))
301302
}
302303

303304
Self::NullableBoolean(boolean_filter) => {
@@ -480,7 +481,7 @@ impl ScalarUDFImpl for FilterOperationUdf {
480481
}
481482
}
482483

483-
fn invoke_with_args(&self, args: ScalarFunctionArgs<'_>) -> DataFusionResult<ColumnarValue> {
484+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> DataFusionResult<ColumnarValue> {
484485
let ColumnarValue::Array(input_array) = &args.args[0] else {
485486
return exec_err!("FilterOperation expected array inputs, not scalar values");
486487
};
@@ -512,14 +513,3 @@ impl ScalarUDFImpl for FilterOperationUdf {
512513
}
513514
}
514515
}
515-
516-
// TODO(ab): this is a workaround for https://github.com/apache/datafusion/pull/16046. Next time we
517-
// update datafusion, this should break compilation. Remove this function and replace
518-
// `contains_patch` by `datafusion::prelude::contains` in the method above.
519-
fn contains_patch(arg1: Expr, arg2: Expr) -> Expr {
520-
// make sure we break compilation when we update datafusion
521-
#[cfg(debug_assertions)]
522-
let _ = datafusion::prelude::contains();
523-
524-
datafusion::functions::string::contains().call(<[_]>::into_vec(Box::new([arg1, arg2])))
525-
}

deny.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ all-features = true
2929
[advisories]
3030
version = 2
3131
ignore = [
32-
"RUSTSEC-2024-0384", # Waiting for https://github.com/console-rs/indicatif/pull/666
3332
"RUSTSEC-2024-0436", # https://rustsec.org/advisories/RUSTSEC-2024-0436 - paste is unmaintained - https://github.com/dtolnay/paste
3433
"RUSTSEC-2024-0014", # https://rustsec.org/advisories/RUSTSEC-2024-0014 - generational-arena is unmaintained
3534
]
@@ -54,6 +53,7 @@ deny = [
5453
skip = [
5554
{ name = "base64" }, # Too popular
5655
{ name = "block2" }, # Old version via rfd
56+
{ name = "bzip2" }, # Remove after https://github.com/apache/datafusion/pull/17509 closes
5757
{ name = "cargo-platform" }, # Older version used by ply-rs. It's build-time only!
5858
{ name = "cargo_metadata" }, # Older version used by ply-rs. It's small, and it's build-time only!
5959
{ name = "core-foundation" }, # Currently, e.g. `webbrowser` and `winit` use different versions.
@@ -64,6 +64,7 @@ skip = [
6464
{ name = "objc2-foundation" }, # `accesskit_macos` uses a different version than `arboard`
6565
{ name = "objc2" }, # `accesskit_macos` uses a different version than `arboard`
6666
{ name = "ordered-float" }, # Old version being used by parquet, but super small!
67+
{ name = "petgraph" }, # Remove after next release due to https://github.com/tokio-rs/prost/pull/1327
6768
{ name = "pollster" }, # rfd is still on 0.3
6869
{ name = "pulldown-cmark" }, # Build-dependency via `ply-rs` (!). TODO(emilk): use a better crate for .ply parsing
6970
{ name = "redox_syscall" }, # Plenty of versions in the wild
@@ -97,8 +98,8 @@ allow = [
9798
"OFL-1.1", # https://spdx.org/licenses/OFL-1.1.html
9899
"Ubuntu-font-1.0", # https://ubuntu.com/legal/font-licence
99100
"Unicode-3.0", # https://www.unicode.org/license.txt
100-
"Unicode-DFS-2016", # https://spdx.org/licenses/Unicode-DFS-2016.html
101101
"Zlib", # https://tldrlegal.com/license/zlib-libpng-license-(zlib)
102+
"bzip2-1.0.6", # https://github.com/trifectatechfoundation/libbzip2-rs/blob/v0.2.2/COPYING
102103
]
103104
exceptions = []
104105

@@ -112,7 +113,6 @@ name = "ring"
112113
expression = "MIT AND ISC AND OpenSSL"
113114
license-files = [{ path = "LICENSE", hash = 0xbd0eed23 }]
114115

115-
116116
[sources]
117117
unknown-registry = "deny"
118118
unknown-git = "deny"

docs/content/reference/migration/migration-0-26.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,9 @@ Use the new `timeout_sec` argument instead.
99
For non-blocking, use `timeout_sec=0`.
1010
Mostly you can just call `.flush()` with no arguments.
1111
That will block until all writes either finishes or an error occurs (e.g. the gRPC connection is severed).
12+
13+
## Python DataFusion interface: update to 49.0.0
14+
The DataFusion FFI that we rely on for user defined functions and
15+
table providers requires users to upgrade their `datafusion-python`
16+
version to 49.0.0. This only impacts customers who use the
17+
DataFusion tables provided through the `CatalogClient`.

0 commit comments

Comments
 (0)