Skip to content

Commit db116b8

Browse files
authored
Merge branch 'main' into alamb/enable_pushdown
2 parents 254c2c4 + 81f7a87 commit db116b8

File tree

47 files changed

+2746
-273
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2746
-273
lines changed

.github/workflows/audit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
steps:
4343
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
4444
- name: Install cargo-audit
45-
uses: taiki-e/install-action@710817a1645ef40daad5bcde7431ceccf6cc3528 # v2.67.13
45+
uses: taiki-e/install-action@650c5ca14212efbbf3e580844b04bdccf68dac31 # v2.67.18
4646
with:
4747
tool: cargo-audit
4848
- name: Run audit check

.github/workflows/rust.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,11 @@ jobs:
4545
# Check crate compiles and base cargo check passes
4646
linux-build-lib:
4747
name: linux build test
48-
runs-on: ubuntu-latest
48+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m7a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
4949
container:
5050
image: amd64/rust
5151
steps:
52+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
5253
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
5354
- name: Setup Rust toolchain
5455
uses: ./.github/actions/setup-builder
@@ -266,12 +267,13 @@ jobs:
266267
linux-test:
267268
name: cargo test (amd64)
268269
needs: linux-build-lib
269-
runs-on: ubuntu-latest
270+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m7a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
270271
container:
271272
image: amd64/rust
272273
volumes:
273274
- /usr/local:/host/usr/local
274275
steps:
276+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
275277
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
276278
with:
277279
submodules: true
@@ -421,7 +423,7 @@ jobs:
421423
sudo apt-get update -qq
422424
sudo apt-get install -y -qq clang
423425
- name: Setup wasm-pack
424-
uses: taiki-e/install-action@710817a1645ef40daad5bcde7431ceccf6cc3528 # v2.67.13
426+
uses: taiki-e/install-action@650c5ca14212efbbf3e580844b04bdccf68dac31 # v2.67.18
425427
with:
426428
tool: wasm-pack
427429
- name: Run tests with headless mode
@@ -733,7 +735,7 @@ jobs:
733735

734736
- name: Set up Node.js (required for prettier)
735737
# doc_prettier_check.sh uses npx to run prettier for Markdown formatting
736-
uses: actions/setup-node@v4
738+
uses: actions/setup-node@v6
737739
with:
738740
node-version: '18'
739741

@@ -756,7 +758,7 @@ jobs:
756758
- name: Setup Rust toolchain
757759
uses: ./.github/actions/setup-builder
758760
- name: Install cargo-msrv
759-
uses: taiki-e/install-action@710817a1645ef40daad5bcde7431ceccf6cc3528 # v2.67.13
761+
uses: taiki-e/install-action@650c5ca14212efbbf3e580844b04bdccf68dac31 # v2.67.18
760762
with:
761763
tool: cargo-msrv
762764

Cargo.lock

Lines changed: 14 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,14 +152,14 @@ datafusion-substrait = { path = "datafusion/substrait", version = "52.1.0" }
152152

153153
doc-comment = "0.3"
154154
env_logger = "0.11"
155-
flate2 = "1.1.8"
155+
flate2 = "1.1.9"
156156
futures = "0.3"
157157
glob = "0.3.0"
158158
half = { version = "2.7.0", default-features = false }
159159
hashbrown = { version = "0.16.1" }
160160
hex = { version = "0.4.3" }
161161
indexmap = "2.13.0"
162-
insta = { version = "1.46.1", features = ["glob", "filters"] }
162+
insta = { version = "1.46.3", features = ["glob", "filters"] }
163163
itertools = "0.14"
164164
liblzma = { version = "0.4.4", features = ["static"] }
165165
log = "^0.4"

benchmarks/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ mimalloc_extended = ["libmimalloc-sys/extended"]
4040

4141
[dependencies]
4242
arrow = { workspace = true }
43-
clap = { version = "4.5.53", features = ["derive"] }
43+
clap = { version = "4.5.56", features = ["derive"] }
4444
datafusion = { workspace = true, default-features = true }
4545
datafusion-common = { workspace = true, default-features = true }
4646
env_logger = { workspace = true }

datafusion-cli/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ async-trait = { workspace = true }
4040
aws-config = "1.8.12"
4141
aws-credential-types = "1.2.7"
4242
chrono = { workspace = true }
43-
clap = { version = "4.5.53", features = ["cargo", "derive"] }
43+
clap = { version = "4.5.56", features = ["cargo", "derive"] }
4444
datafusion = { workspace = true, features = [
4545
"avro",
4646
"compression",

datafusion/common/src/cast.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ use arrow::array::{
2525
BinaryViewArray, Decimal32Array, Decimal64Array, DurationMicrosecondArray,
2626
DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, Float16Array,
2727
Int8Array, Int16Array, LargeBinaryArray, LargeListViewArray, LargeStringArray,
28-
ListViewArray, StringViewArray, UInt16Array,
28+
ListViewArray, RunArray, StringViewArray, UInt16Array,
2929
};
30+
use arrow::datatypes::RunEndIndexType;
3031
use arrow::{
3132
array::{
3233
Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array,
@@ -334,3 +335,8 @@ pub fn as_list_view_array(array: &dyn Array) -> Result<&ListViewArray> {
334335
pub fn as_large_list_view_array(array: &dyn Array) -> Result<&LargeListViewArray> {
335336
Ok(downcast_value!(array, LargeListViewArray))
336337
}
338+
339+
// Downcast Array to RunArray
340+
pub fn as_run_array<T: RunEndIndexType>(array: &dyn Array) -> Result<&RunArray<T>> {
341+
Ok(downcast_value!(array, RunArray, T))
342+
}

datafusion/common/src/nested_struct.rs

Lines changed: 32 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use std::{collections::HashSet, sync::Arc};
3131
///
3232
/// ## Field Matching Strategy
3333
/// - **By Name**: Source struct fields are matched to target fields by name (case-sensitive)
34-
/// - **By Position**: When there is no name overlap and the field counts match, fields are cast by index
34+
/// - **No Positional Mapping**: Structs with no overlapping field names are rejected
3535
/// - **Type Adaptation**: When a matching field is found, it is recursively cast to the target field's type
3636
/// - **Missing Fields**: Target fields not present in the source are filled with null values
3737
/// - **Extra Fields**: Source fields not present in the target are ignored
@@ -67,24 +67,16 @@ fn cast_struct_column(
6767
if let Some(source_struct) = source_col.as_any().downcast_ref::<StructArray>() {
6868
let source_fields = source_struct.fields();
6969
validate_struct_compatibility(source_fields, target_fields)?;
70-
let has_overlap = has_one_of_more_common_fields(source_fields, target_fields);
71-
7270
let mut fields: Vec<Arc<Field>> = Vec::with_capacity(target_fields.len());
7371
let mut arrays: Vec<ArrayRef> = Vec::with_capacity(target_fields.len());
7472
let num_rows = source_col.len();
7573

76-
// Iterate target fields and pick source child either by name (when fields overlap)
77-
// or by position (when there is no name overlap).
78-
for (index, target_child_field) in target_fields.iter().enumerate() {
74+
// Iterate target fields and pick source child by name when present.
75+
for target_child_field in target_fields.iter() {
7976
fields.push(Arc::clone(target_child_field));
8077

81-
// Determine the source child column: by name when overlapping names exist,
82-
// otherwise by position.
83-
let source_child_opt: Option<&ArrayRef> = if has_overlap {
84-
source_struct.column_by_name(target_child_field.name())
85-
} else {
86-
Some(source_struct.column(index))
87-
};
78+
let source_child_opt =
79+
source_struct.column_by_name(target_child_field.name());
8880

8981
match source_child_opt {
9082
Some(source_child_col) => {
@@ -230,20 +222,11 @@ pub fn validate_struct_compatibility(
230222
) -> Result<()> {
231223
let has_overlap = has_one_of_more_common_fields(source_fields, target_fields);
232224
if !has_overlap {
233-
if source_fields.len() != target_fields.len() {
234-
return _plan_err!(
235-
"Cannot cast struct with {} fields to {} fields without name overlap; positional mapping is ambiguous",
236-
source_fields.len(),
237-
target_fields.len()
238-
);
239-
}
240-
241-
for (source_field, target_field) in source_fields.iter().zip(target_fields.iter())
242-
{
243-
validate_field_compatibility(source_field, target_field)?;
244-
}
245-
246-
return Ok(());
225+
return _plan_err!(
226+
"Cannot cast struct with {} fields to {} fields because there is no field name overlap",
227+
source_fields.len(),
228+
target_fields.len()
229+
);
247230
}
248231

249232
// Check compatibility for each target field
@@ -323,7 +306,11 @@ fn validate_field_compatibility(
323306
Ok(())
324307
}
325308

326-
fn has_one_of_more_common_fields(
309+
/// Check if two field lists have at least one common field by name.
310+
///
311+
/// This is useful for validating struct compatibility when casting between structs,
312+
/// ensuring that source and target fields have overlapping names.
313+
pub fn has_one_of_more_common_fields(
327314
source_fields: &[FieldRef],
328315
target_fields: &[FieldRef],
329316
) -> bool {
@@ -546,7 +533,7 @@ mod tests {
546533
}
547534

548535
#[test]
549-
fn test_validate_struct_compatibility_positional_no_overlap_mismatch_len() {
536+
fn test_validate_struct_compatibility_no_overlap_mismatch_len() {
550537
let source_fields = vec![
551538
arc_field("left", DataType::Int32),
552539
arc_field("right", DataType::Int32),
@@ -556,7 +543,7 @@ mod tests {
556543
let result = validate_struct_compatibility(&source_fields, &target_fields);
557544
assert!(result.is_err());
558545
let error_msg = result.unwrap_err().to_string();
559-
assert!(error_msg.contains("positional mapping is ambiguous"));
546+
assert_contains!(error_msg, "no field name overlap");
560547
}
561548

562549
#[test]
@@ -665,21 +652,21 @@ mod tests {
665652
}
666653

667654
#[test]
668-
fn test_validate_struct_compatibility_positional_with_type_mismatch() {
669-
// Source struct: {left: Struct} - nested struct
670-
let source_fields =
671-
vec![arc_struct_field("left", vec![field("x", DataType::Int32)])];
655+
fn test_validate_struct_compatibility_no_overlap_equal_len() {
656+
let source_fields = vec![
657+
arc_field("left", DataType::Int32),
658+
arc_field("right", DataType::Utf8),
659+
];
672660

673-
// Target struct: {alpha: Int32} (no name overlap, incompatible type at position 0)
674-
let target_fields = vec![arc_field("alpha", DataType::Int32)];
661+
let target_fields = vec![
662+
arc_field("alpha", DataType::Int32),
663+
arc_field("beta", DataType::Utf8),
664+
];
675665

676666
let result = validate_struct_compatibility(&source_fields, &target_fields);
677667
assert!(result.is_err());
678668
let error_msg = result.unwrap_err().to_string();
679-
assert_contains!(
680-
error_msg,
681-
"Cannot cast struct field 'alpha' from type Struct(\"x\": Int32) to type Int32"
682-
);
669+
assert_contains!(error_msg, "no field name overlap");
683670
}
684671

685672
#[test]
@@ -948,7 +935,7 @@ mod tests {
948935
}
949936

950937
#[test]
951-
fn test_cast_struct_positional_when_no_overlap() {
938+
fn test_cast_struct_no_overlap_rejected() {
952939
let first = Arc::new(Int32Array::from(vec![Some(10), Some(20)])) as ArrayRef;
953940
let second =
954941
Arc::new(StringArray::from(vec![Some("alpha"), Some("beta")])) as ArrayRef;
@@ -964,17 +951,10 @@ mod tests {
964951
vec![field("a", DataType::Int64), field("b", DataType::Utf8)],
965952
);
966953

967-
let result =
968-
cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS).unwrap();
969-
let struct_array = result.as_any().downcast_ref::<StructArray>().unwrap();
970-
971-
let a_col = get_column_as!(&struct_array, "a", Int64Array);
972-
assert_eq!(a_col.value(0), 10);
973-
assert_eq!(a_col.value(1), 20);
974-
975-
let b_col = get_column_as!(&struct_array, "b", StringArray);
976-
assert_eq!(b_col.value(0), "alpha");
977-
assert_eq!(b_col.value(1), "beta");
954+
let result = cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS);
955+
assert!(result.is_err());
956+
let error_msg = result.unwrap_err().to_string();
957+
assert_contains!(error_msg, "no field name overlap");
978958
}
979959

980960
#[test]

0 commit comments

Comments
 (0)