Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions .github/workflows/audit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,27 @@ concurrency:

on:
push:
branches:
- main
paths:
- "**/Cargo.toml"
- "**/Cargo.lock"
branches:
- main

pull_request:
paths:
- "**/Cargo.toml"
- "**/Cargo.lock"

merge_group:

jobs:
security_audit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Install cargo-audit
run: cargo install cargo-audit
uses: taiki-e/install-action@f535147c22906d77695e11cb199e764aa610a4fc # v2.62.46
with:
tool: cargo-audit
- name: Run audit check
# Ignored until https://github.com/apache/datafusion/issues/15571
# ignored py03 warning until arrow 55 upgrade
run: cargo audit --ignore RUSTSEC-2024-0370 --ignore RUSTSEC-2025-0020 --ignore RUSTSEC-2025-0047
run: cargo audit --ignore RUSTSEC-2025-0111
29 changes: 28 additions & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,21 @@ jobs:
runs-on: ubuntu-latest
container:
image: amd64/rust
volumes:
- /usr/local:/host/usr/local
steps:
- name: Remove unnecessary preinstalled software
run: |
echo "Disk space before cleanup:"
df -h
# remove tool cache: about 8.5GB (github has host /opt/hostedtoolcache mounted as /__t)
rm -rf /__t/* || true
# remove Haskell runtime: about 6.3GB (host /usr/local/.ghcup)
rm -rf /host/usr/local/.ghcup || true
# remove Android library: about 7.8GB (host /usr/local/lib/android)
rm -rf /host/usr/local/lib/android || true
echo "Disk space after cleanup:"
df -h
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
submodules: true
Expand Down Expand Up @@ -347,6 +361,19 @@ jobs:
with:
save-if: ${{ github.ref_name == 'main' }}
shared-key: "amd-ci-linux-test-example"
- name: Remove unnecessary preinstalled software
run: |
echo "Disk space before cleanup:"
df -h
apt-get clean
rm -rf /__t/CodeQL
rm -rf /__t/PyPy
rm -rf /__t/Java_Temurin-Hotspot_jdk
rm -rf /__t/Python
rm -rf /__t/go
rm -rf /__t/Ruby
echo "Disk space after cleanup:"
df -h
- name: Run examples
run: |
# test datafusion-sql examples
Expand Down Expand Up @@ -444,7 +471,7 @@ jobs:
export RUST_MIN_STACK=20971520
export TPCH_DATA=`realpath datafusion/sqllogictest/test_files/tpch/data`
cargo test plan_q --package datafusion-benchmarks --profile ci --features=ci -- --test-threads=1
INCLUDE_TPCH=true cargo test --features backtrace --profile ci --package datafusion-sqllogictest --test sqllogictests
INCLUDE_TPCH=true cargo test --features backtrace,parquet_encryption --profile ci --package datafusion-sqllogictest --test sqllogictests
- name: Verify Working Directory Clean
run: git diff --exit-code

Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ rpath = false
strip = false # Retain debug info for flamegraphs

[profile.ci]
debug = false
inherits = "dev"
incremental = false

Expand Down
2 changes: 1 addition & 1 deletion datafusion-testing
2 changes: 1 addition & 1 deletion datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2387,7 +2387,7 @@ impl ScalarValue {
Arc::new(array)
}
// explicitly enumerate unsupported types so newly added
// types must be aknowledged, Time32 and Time64 types are
// types must be acknowledged, Time32 and Time64 types are
// not supported if the TimeUnit is not valid (Time32 can
// only be used with Second and Millisecond, Time64 only
// with Microsecond and Nanosecond)
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/listing/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ impl ListingTable {
}
}

// Expressions can be used for parttion pruning if they can be evaluated using
// Expressions can be used for partition pruning if they can be evaluated using
// only the partition columns and there are partition columns.
fn can_be_evaluated_for_partition_pruning(
partition_column_names: &[&str],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1234,7 +1234,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() {
Arc::new(CoalesceBatchesExec::new(hash_join, 8192)) as Arc<dyn ExecutionPlan>;
// Top-level CoalescePartitionsExec
let cp = Arc::new(CoalescePartitionsExec::new(cb)) as Arc<dyn ExecutionPlan>;
// Add a sort for determistic output
// Add a sort for deterministic output
let plan = Arc::new(SortExec::new(
LexOrdering::new(vec![PhysicalSortExpr::new(
col("a", &probe_side_schema).unwrap(),
Expand Down
6 changes: 6 additions & 0 deletions datafusion/datasource-parquet/src/opener.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ pub(super) struct ParquetOpener {
/// Coerce INT96 timestamps to specific TimeUnit
pub coerce_int96: Option<TimeUnit>,
/// Optional parquet FileDecryptionProperties
#[cfg(feature = "parquet_encryption")]
pub file_decryption_properties: Option<Arc<FileDecryptionProperties>>,
/// Rewrite expressions in the context of the file schema
pub(crate) expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
Expand Down Expand Up @@ -151,9 +152,11 @@ impl FileOpener for ParquetOpener {
let mut predicate_file_schema = Arc::clone(&self.logical_file_schema);

let enable_page_index = self.enable_page_index;
#[cfg(feature = "parquet_encryption")]
let encryption_context = self.get_encryption_context();

Ok(Box::pin(async move {
#[cfg(feature = "parquet_encryption")]
let file_decryption_properties = encryption_context
.get_file_decryption_properties(&file_location)
.await?;
Expand Down Expand Up @@ -502,6 +505,7 @@ where
}

#[derive(Default)]
#[cfg_attr(not(feature = "parquet_encryption"), allow(dead_code))]
struct EncryptionContext {
#[cfg(feature = "parquet_encryption")]
file_decryption_properties: Option<Arc<FileDecryptionProperties>>,
Expand Down Expand Up @@ -544,6 +548,7 @@ impl EncryptionContext {
}

#[cfg(not(feature = "parquet_encryption"))]
#[allow(dead_code)]
impl EncryptionContext {
async fn get_file_decryption_properties(
&self,
Expand All @@ -563,6 +568,7 @@ impl ParquetOpener {
}

#[cfg(not(feature = "parquet_encryption"))]
#[allow(dead_code)]
fn get_encryption_context(&self) -> EncryptionContext {
EncryptionContext::default()
}
Expand Down
3 changes: 3 additions & 0 deletions datafusion/datasource-parquet/src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ use datafusion_physical_plan::metrics::Count;
use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
use datafusion_physical_plan::DisplayFormatType;

#[cfg(feature = "parquet_encryption")]
use datafusion_common::encryption::map_config_decryption_to_decryption;
#[cfg(feature = "parquet_encryption")]
use datafusion_execution::parquet_encryption::EncryptionFactory;
Expand Down Expand Up @@ -541,6 +542,7 @@ impl FileSource for ParquetSource {
Arc::new(DefaultParquetFileReaderFactory::new(object_store)) as _
});

#[cfg(feature = "parquet_encryption")]
let file_decryption_properties = self
.table_parquet_options()
.crypto
Expand Down Expand Up @@ -576,6 +578,7 @@ impl FileSource for ParquetSource {
enable_row_group_stats_pruning: self.table_parquet_options.global.pruning,
schema_adapter_factory,
coerce_int96,
#[cfg(feature = "parquet_encryption")]
file_decryption_properties,
expr_adapter_factory,
#[cfg(feature = "parquet_encryption")]
Expand Down
1 change: 0 additions & 1 deletion datafusion/physical-expr/src/expressions/case.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1070,7 +1070,6 @@ mod tests {
.into_iter()
.collect();

//let valid_array = vec![true, false, false, true, false, tru
let null_buffer = Buffer::from([0b00101001u8]);
let load4 = load4
.into_data()
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ postgres = [
"tokio-postgres",
]
parquet_encryption = [
"datafusion/parquet_encryption"
"datafusion/parquet_encryption",
]

[dev-dependencies]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ logical_plan
02)--TableScan: test_parquet projection=[id, value, name]
physical_plan
01)SortExec: TopK(fetch=3), expr=[value@1 DESC], preserve_partitioning=[false]
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/test_data.parquet]]}, projection=[id, value, name], file_type=parquet, predicate=DynamicFilter [ empty ]
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/test_data.parquet]]}, projection=[id, value, name], file_type=parquet, predicate=DynamicFilterPhysicalExpr [ true ]

# Disable TopK dynamic filter pushdown
statement ok
Expand Down Expand Up @@ -127,7 +127,7 @@ physical_plan
02)--CoalesceBatchesExec: target_batch_size=8192
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilterPhysicalExpr [ true ]

# Disable Join dynamic filter pushdown
statement ok
Expand Down Expand Up @@ -184,7 +184,7 @@ physical_plan
02)--CoalesceBatchesExec: target_batch_size=8192
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilterPhysicalExpr [ true ]

# Enable TopK, disable Join
statement ok
Expand Down Expand Up @@ -306,7 +306,7 @@ physical_plan
02)--CoalesceBatchesExec: target_batch_size=8192
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilterPhysicalExpr [ true ]

# Cleanup

Expand Down
8 changes: 4 additions & 4 deletions datafusion/sqllogictest/test_files/encrypted_parquet.slt
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ STORED AS PARQUET LOCATION 'test_files/scratch/encrypted_parquet/' OPTIONS (
-- Encryption properties
'format.crypto.file_encryption.encrypt_footer' 'true',
'format.crypto.file_encryption.footer_key_as_hex' '30313233343536373839303132333435', -- b"0123456789012345"
'format.crypto.file_encryption.column_key_as_hex::double_field' '31323334353637383930313233343530', -- b"1234567890123450"
'format.crypto.file_encryption.column_key_as_hex::float_field' '31323334353637383930313233343531', -- b"1234567890123451"
'format.crypto.file_encryption.column_key_as_hex::double_field' '31323334353637383930313233343530', -- b"1234567890123450"
'format.crypto.file_encryption.column_key_as_hex::float_field' '31323334353637383930313233343531', -- b"1234567890123451"
-- Decryption properties
'format.crypto.file_decryption.footer_key_as_hex' '30313233343536373839303132333435', -- b"0123456789012345"
'format.crypto.file_decryption.column_key_as_hex::double_field' '31323334353637383930313233343530', -- b"1234567890123450"
'format.crypto.file_decryption.footer_key_as_hex' '30313233343536373839303132333435', -- b"0123456789012345"
'format.crypto.file_decryption.column_key_as_hex::double_field' '31323334353637383930313233343530', -- b"1234567890123450"
'format.crypto.file_decryption.column_key_as_hex::float_field' '31323334353637383930313233343531', -- b"1234567890123451"
)

Expand Down
2 changes: 1 addition & 1 deletion docs/source/user-guide/introduction.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ Here are some example systems built using DataFusion:
By using DataFusion, projects are freed to focus on their specific
features, and avoid reimplementing general (but still necessary)
features such as an expression representation, standard optimizations,
parellelized streaming execution plans, file format support, etc.
parallelized streaming execution plans, file format support, etc.

## Known Users

Expand Down
6 changes: 5 additions & 1 deletion typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ alph = "alph"
wih = "wih"
Ded = "Ded"

# From SLT README
nteger = "nteger"

[files]
extend-exclude = [
"*.slt",
Expand All @@ -42,5 +45,6 @@ extend-exclude = [
"*.sql",
"dev/changelog/**",
"benchmarks/**",
"*.csv"
"*.csv",
"docs/source/contributor-guide/governance.md"
]
Loading