Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b1ddc24
fix: `Rows` `size` should use `capacity` and not `len` (#9044)
rluvaton Dec 27, 2025
8ed2b52
fix: integration / Archery test With other arrows container ran out o…
lyang24 Dec 27, 2025
de1686a
feat: support array indices in VariantPath dot notation (#9012)
foskey51 Dec 27, 2025
7f656ff
Minor: avoid some clones when reading parquet (#9048)
alamb Dec 27, 2025
814ee42
Add benchmarks for Utf8View scalars for zip (#8988)
mkleen Dec 28, 2025
2d6fc51
Add examples for min and max functions (#9062)
EduardAkhmetshin Dec 30, 2025
0991c76
[Variant] Unify the CastOptions usage in parquet-variant-compute (#8984)
klion26 Dec 30, 2025
9b16fb3
fix: don't generate nulls for `Decimal128` and `Decimal256` when fiel…
rluvaton Dec 30, 2025
5ddddbd
Minor: avoid clone in RunArray row decoding via buffer stealing (#9052)
lyang24 Dec 30, 2025
9213ffd
perf: improve performance of encoding `GenericByteArray` by 8% (#9054)
rluvaton Dec 30, 2025
843bee2
Fix headers and empty lines in code examples (#9064)
EduardAkhmetshin Dec 31, 2025
6afdfbb
docs: fix misleading reserve documentation (#9076)
WaterWhisperer Jan 1, 2026
44d4c90
chore: run validation when debug assertion enabled and not only for t…
rluvaton Jan 1, 2026
49c27d6
Add special implementation for zip for Utf8View/BinaryView scalars (#…
mkleen Jan 3, 2026
1ba902e
Fix `nullif` kernel (#9087)
alamb Jan 5, 2026
b8a2c1a
[parquet] Avoid a clone while resolving the read strategy (#9056)
alamb Jan 5, 2026
a9d6e92
[Variant] Move `ArrayVariantToArrowRowBuilder` to `variant_to_arrow` …
liamzwbao Jan 5, 2026
b1dfb69
Fix row slice bug in Union column decoding with many columns (#9000)
friendlymatthew Jan 6, 2026
068a7e4
Add `DataType::is_decimal` (#9100)
AdamGS Jan 6, 2026
2507946
Add `FlightInfo::with_endpoints` method (#9075)
lewiszlw Jan 6, 2026
10a976f
chore: increase row count and batch size for more deterministic tests…
Weijun-H Jan 7, 2026
2b179b8
feat(parquet): relax type compatility check in parquet ArrowWriter (#…
gruuya Jan 7, 2026
721f373
Seal Array trait (#9092)
tustvold Jan 7, 2026
a8346be
Minor: make it clear cache array reader is not cloning arrays (#9057)
alamb Jan 7, 2026
28f66f9
Add Union encoding documentation (#9102)
EduardAkhmetshin Jan 7, 2026
9e822e0
Update version to `57.2.0`, add CHANGELOG (#9103)
alamb Jan 7, 2026
67e04e7
feat: change default behavior for Parquet `PageEncodingStats` to bitm…
WaterWhisperer Jan 8, 2026
37d5013
docs: Update release schedule in README.md (#9111)
alamb Jan 8, 2026
73bbfee
feat: add benchmarks for json parser (#9107)
Weijun-H Jan 8, 2026
964daec
chore: switch test from `bincode` to maintained `postcard` crate (RUS…
alamb Jan 8, 2026
96637fc
Speed up binary kernels (30% faster `and` and `or`), add `BooleanBuff…
alamb Jan 9, 2026
13c43c4
[Variant] Optimize the object header generation logic in ObjectBuilde…
klion26 Jan 9, 2026
98532ad
Merge remote-tracking branch 'apache/main' into arrow-6408
alamb Jan 9, 2026
34f62ef
Update null_if kernel to use Arc<[Buffer]>
alamb Jan 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 60 additions & 6 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,58 +78,112 @@ jobs:
run:
shell: bash
steps:
- name: Monitor disk usage - Initial
run: |
echo "=== Initial Disk Usage ==="
df -h /
echo ""

- name: Remove unnecessary preinstalled software
run: |
echo "=== Cleaning up host disk space ==="
echo "Disk space before cleanup:"
df -h /

# Clean apt cache
apt-get clean || true

# Remove GitHub Actions tool cache
rm -rf /__t/* || true

# Remove large packages from host filesystem (mounted at /host/)
rm -rf /host/usr/share/dotnet || true
rm -rf /host/usr/local/lib/android || true
rm -rf /host/usr/local/.ghcup || true
rm -rf /host/opt/hostedtoolcache/CodeQL || true

echo ""
echo "Disk space after cleanup:"
df -h /
echo ""

# This is necessary so that actions/checkout can find git
- name: Export conda path
run: echo "/opt/conda/envs/arrow/bin" >> $GITHUB_PATH
# This is necessary so that Rust can find cargo
- name: Export cargo path
run: echo "/root/.cargo/bin" >> $GITHUB_PATH
- name: Check rustup
run: which rustup
- name: Check cmake
run: which cmake

# Checkout repos (using shallow clones with fetch-depth: 1)
- name: Checkout Arrow
uses: actions/checkout@v6
with:
repository: apache/arrow
submodules: true
fetch-depth: 0
fetch-depth: 1
- name: Checkout Arrow Rust
uses: actions/checkout@v6
with:
path: rust
submodules: true
fetch-depth: 0
fetch-depth: 1
- name: Checkout Arrow .NET
uses: actions/checkout@v6
with:
repository: apache/arrow-dotnet
path: dotnet
fetch-depth: 1
- name: Checkout Arrow Go
uses: actions/checkout@v6
with:
repository: apache/arrow-go
path: go
fetch-depth: 1
- name: Checkout Arrow Java
uses: actions/checkout@v6
with:
repository: apache/arrow-java
path: java
fetch-depth: 1
- name: Checkout Arrow JavaScript
uses: actions/checkout@v6
with:
repository: apache/arrow-js
path: js
fetch-depth: 1
- name: Checkout Arrow nanoarrow
uses: actions/checkout@v6
with:
repository: apache/arrow-nanoarrow
path: nanoarrow
fetch-depth: 1

- name: Monitor disk usage - After checkouts
run: |
echo "=== After Checkouts ==="
df -h /
echo ""

- name: Build
run: conda run --no-capture-output ci/scripts/integration_arrow_build.sh $PWD /build

- name: Monitor disk usage - After build
if: always()
run: |
echo "=== After Build ==="
df -h /
echo ""

- name: Run
run: conda run --no-capture-output ci/scripts/integration_arrow.sh $PWD /build

- name: Monitor disk usage - After tests
if: always()
run: |
echo "=== After Tests ==="
df -h /
echo ""

# test FFI against the C-Data interface exposed by pyarrow
pyarrow-integration-test:
name: Pyarrow C Data Interface
Expand Down
167 changes: 167 additions & 0 deletions CHANGELOG-old.md

Large diffs are not rendered by default.

292 changes: 147 additions & 145 deletions CHANGELOG.md

Large diffs are not rendered by default.

42 changes: 21 additions & 21 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ exclude = [
]

[workspace.package]
version = "57.1.0"
version = "57.2.0"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
Expand All @@ -85,26 +85,26 @@ edition = "2024"
rust-version = "1.85"

[workspace.dependencies]
arrow = { version = "57.1.0", path = "./arrow", default-features = false }
arrow-arith = { version = "57.1.0", path = "./arrow-arith" }
arrow-array = { version = "57.1.0", path = "./arrow-array" }
arrow-buffer = { version = "57.1.0", path = "./arrow-buffer" }
arrow-cast = { version = "57.1.0", path = "./arrow-cast" }
arrow-csv = { version = "57.1.0", path = "./arrow-csv" }
arrow-data = { version = "57.1.0", path = "./arrow-data" }
arrow-ipc = { version = "57.1.0", path = "./arrow-ipc" }
arrow-json = { version = "57.1.0", path = "./arrow-json" }
arrow-ord = { version = "57.1.0", path = "./arrow-ord" }
arrow-pyarrow = { version = "57.1.0", path = "./arrow-pyarrow" }
arrow-row = { version = "57.1.0", path = "./arrow-row" }
arrow-schema = { version = "57.1.0", path = "./arrow-schema" }
arrow-select = { version = "57.1.0", path = "./arrow-select" }
arrow-string = { version = "57.1.0", path = "./arrow-string" }
parquet = { version = "57.1.0", path = "./parquet", default-features = false }
parquet-geospatial = { version = "57.1.0", path = "./parquet-geospatial" }
parquet-variant = { version = "57.1.0", path = "./parquet-variant" }
parquet-variant-json = { version = "57.1.0", path = "./parquet-variant-json" }
parquet-variant-compute = { version = "57.1.0", path = "./parquet-variant-compute" }
arrow = { version = "57.2.0", path = "./arrow", default-features = false }
arrow-arith = { version = "57.2.0", path = "./arrow-arith" }
arrow-array = { version = "57.2.0", path = "./arrow-array" }
arrow-buffer = { version = "57.2.0", path = "./arrow-buffer" }
arrow-cast = { version = "57.2.0", path = "./arrow-cast" }
arrow-csv = { version = "57.2.0", path = "./arrow-csv" }
arrow-data = { version = "57.2.0", path = "./arrow-data" }
arrow-ipc = { version = "57.2.0", path = "./arrow-ipc" }
arrow-json = { version = "57.2.0", path = "./arrow-json" }
arrow-ord = { version = "57.2.0", path = "./arrow-ord" }
arrow-pyarrow = { version = "57.2.0", path = "./arrow-pyarrow" }
arrow-row = { version = "57.2.0", path = "./arrow-row" }
arrow-schema = { version = "57.2.0", path = "./arrow-schema" }
arrow-select = { version = "57.2.0", path = "./arrow-select" }
arrow-string = { version = "57.2.0", path = "./arrow-string" }
parquet = { version = "57.2.0", path = "./parquet", default-features = false }
parquet-geospatial = { version = "57.2.0", path = "./parquet-geospatial" }
parquet-variant = { version = "57.2.0", path = "./parquet-variant" }
parquet-variant-json = { version = "57.2.0", path = "./parquet-variant-json" }
parquet-variant-compute = { version = "57.2.0", path = "./parquet-variant-compute" }

chrono = { version = "0.4.40", default-features = false, features = ["clock"] }

Expand Down
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,17 @@ Planned Release Schedule

| Approximate Date | Version | Notes |
| ---------------- | ---------- | --------------------------------------- |
| October 2025 | [`57.0.0`] | Major, potentially breaking API changes |
| November 2025 | [`57.1.0`] | Minor, NO breaking API changes |
| December 2025 | [`57.2.0`] | Minor, NO breaking API changes |
| January 2026 | [`58.0.0`] | Major, potentially breaking API changes |
| February 2026 | [`58.1.0`] | Minor, NO breaking API changes |
| March 2026 | [`58.2.0`] | Minor, NO breaking API changes |
| April 2026 | [`59.0.0`] | Major, potentially breaking API changes |

[`57.0.0`]: https://github.com/apache/arrow-rs/issues/7835
[`57.1.0`]: https://github.com/apache/arrow-rs/milestone/3
[`57.2.0`]: https://github.com/apache/arrow-rs/milestone/5
[`58.0.0`]: https://github.com/apache/arrow-rs/milestone/6
[`58.1.0`]: https://github.com/apache/arrow-rs/issues/9108
[`58.2.0`]: https://github.com/apache/arrow-rs/issues/9109
[`59.0.0`]: https://github.com/apache/arrow-rs/issues/9110
[ticket #5368]: https://github.com/apache/arrow-rs/issues/5368
[semantic versioning]: https://semver.org/

Expand Down
22 changes: 20 additions & 2 deletions arrow-arith/src/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,10 +332,10 @@ fn aggregate<T: ArrowNativeTypeOp, P: ArrowPrimitiveType<Native = T>, A: Numeric

/// Returns the minimum value in the boolean array.
///
/// # Example
/// ```
/// # use arrow_array::BooleanArray;
/// # use arrow_arith::aggregate::min_boolean;
///
/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
/// assert_eq!(min_boolean(&a), Some(false))
/// ```
Expand Down Expand Up @@ -390,10 +390,10 @@ pub fn min_boolean(array: &BooleanArray) -> Option<bool> {

/// Returns the maximum value in the boolean array
///
/// # Example
/// ```
/// # use arrow_array::BooleanArray;
/// # use arrow_arith::aggregate::max_boolean;
///
/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
/// assert_eq!(max_boolean(&a), Some(true))
/// ```
Expand Down Expand Up @@ -809,6 +809,15 @@ where

/// Returns the minimum value in the array, according to the natural order.
/// For floating point arrays any NaN values are considered to be greater than any other non-null value
///
/// # Example
/// ```rust
/// # use arrow_array::Int32Array;
/// # use arrow_arith::aggregate::min;
/// let array = Int32Array::from(vec![8, 2, 4]);
/// let result = min(&array);
/// assert_eq!(result, Some(2));
/// ```
pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
where
T::Native: PartialOrd,
Expand All @@ -818,6 +827,15 @@ where

/// Returns the maximum value in the array, according to the natural order.
/// For floating point arrays any NaN values are considered to be greater than any other non-null value
///
/// # Example
/// ```rust
/// # use arrow_array::Int32Array;
/// # use arrow_arith::aggregate::max;
/// let array = Int32Array::from(vec![4, 8, 2]);
/// let result = max(&array);
/// assert_eq!(result, Some(8));
/// ```
pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
where
T::Native: PartialOrd,
Expand Down
24 changes: 13 additions & 11 deletions arrow-arith/src/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.

use arrow_array::*;
use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper};
use arrow_buffer::buffer::bitwise_quaternary_op_helper;
use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not};
use arrow_schema::ArrowError;

Expand Down Expand Up @@ -74,7 +74,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
// The final null bit is set only if:
// 1. left null bit is set, or
// 2. right data bit is false (because null AND false = false).
Some(bitwise_bin_op_helper(
Some(BooleanBuffer::from_bitwise_binary_op(
left_null_buffer.buffer(),
left_null_buffer.offset(),
right_values.inner(),
Expand All @@ -85,7 +85,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
}
(None, Some(right_null_buffer)) => {
// Same as above
Some(bitwise_bin_op_helper(
Some(BooleanBuffer::from_bitwise_binary_op(
right_null_buffer.buffer(),
right_null_buffer.offset(),
left_values.inner(),
Expand All @@ -100,7 +100,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
// d is right data bits.
// The final null bits are:
// (a | (c & !d)) & (c | (a & !b))
Some(bitwise_quaternary_op_helper(
let buffer = bitwise_quaternary_op_helper(
[
left_null_buffer.buffer(),
left_values.inner(),
Expand All @@ -115,10 +115,11 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
],
left.len(),
|a, b, c, d| (a | (c & !d)) & (c | (a & !b)),
))
);
Some(BooleanBuffer::new(buffer, 0, left.len()))
}
};
let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len())));
let nulls = buffer.map(NullBuffer::new);
Ok(BooleanArray::new(left_values & right_values, nulls))
}

Expand Down Expand Up @@ -169,7 +170,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
// The final null bit is set only if:
// 1. left null bit is set, or
// 2. right data bit is true (because null OR true = true).
Some(bitwise_bin_op_helper(
Some(BooleanBuffer::from_bitwise_binary_op(
left_nulls.buffer(),
left_nulls.offset(),
right_values.inner(),
Expand All @@ -180,7 +181,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
}
(None, Some(right_nulls)) => {
// Same as above
Some(bitwise_bin_op_helper(
Some(BooleanBuffer::from_bitwise_binary_op(
right_nulls.buffer(),
right_nulls.offset(),
left_values.inner(),
Expand All @@ -195,7 +196,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
// d is right data bits.
// The final null bits are:
// (a | (c & d)) & (c | (a & b))
Some(bitwise_quaternary_op_helper(
let buffer = bitwise_quaternary_op_helper(
[
left_nulls.buffer(),
left_values.inner(),
Expand All @@ -210,11 +211,12 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
],
left.len(),
|a, b, c, d| (a | (c & d)) & (c | (a & b)),
))
);
Some(BooleanBuffer::new(buffer, 0, left.len()))
}
};

let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len())));
let nulls = buffer.map(NullBuffer::new);
Ok(BooleanArray::new(left_values | right_values, nulls))
}

Expand Down
2 changes: 2 additions & 0 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ impl BooleanArray {
}
}

impl super::private::Sealed for BooleanArray {}

impl Array for BooleanArray {
fn as_any(&self) -> &dyn Any {
self
Expand Down
2 changes: 2 additions & 0 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,8 @@ impl<T: ByteArrayType> std::fmt::Debug for GenericByteArray<T> {
}
}

impl<T: ByteArrayType> super::private::Sealed for GenericByteArray<T> {}

impl<T: ByteArrayType> Array for GenericByteArray<T> {
fn as_any(&self) -> &dyn Any {
self
Expand Down
2 changes: 2 additions & 0 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,8 @@ impl<T: ByteViewType + ?Sized> Debug for GenericByteViewArray<T> {
}
}

impl<T: ByteViewType + ?Sized> super::private::Sealed for GenericByteViewArray<T> {}

impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
fn as_any(&self) -> &dyn Any {
self
Expand Down
4 changes: 4 additions & 0 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,8 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator<&'a str> for DictionaryArray<T>
}
}

impl<T: ArrowDictionaryKeyType> super::private::Sealed for DictionaryArray<T> {}

impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
fn as_any(&self) -> &dyn Any {
self
Expand Down Expand Up @@ -856,6 +858,8 @@ impl<'a, K: ArrowDictionaryKeyType, V> TypedDictionaryArray<'a, K, V> {
}
}

impl<K: ArrowDictionaryKeyType, V: Sync> super::private::Sealed for TypedDictionaryArray<'_, K, V> {}

impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V> {
fn as_any(&self) -> &dyn Any {
self.dictionary
Expand Down
2 changes: 2 additions & 0 deletions arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,8 @@ impl std::fmt::Debug for FixedSizeBinaryArray {
}
}

impl super::private::Sealed for FixedSizeBinaryArray {}

impl Array for FixedSizeBinaryArray {
fn as_any(&self) -> &dyn Any {
self
Expand Down
Loading