Skip to content

Commit 7a89e7f

Browse files
authored
feat: Remove 'parquet' feature and some other dependency bloat (#4251)
This is a step in the right direction, removes two of our slowest-to-build dependencies which are not really needed. I just re-implemented the cast logic we care about and parquet was just there for an error variant. To measure the difference, I ran ```bash cargo clean; cargo build --release -p vortex --timings --all-features ``` Currently on develop `vortex` has 413 dependencies, with the top 10 in build time being: <img width="521" height="405" alt="Screenshot 2025-08-15 at 10 58 51" src="https://github.com/user-attachments/assets/7ed61150-174b-4ec1-a916-82f06491df9d" /> With this PR it has 327 dependencies and the top 10 are: <img width="527" height="435" alt="Screenshot 2025-08-15 at 11 03 50" src="https://github.com/user-attachments/assets/49d68b0d-19fd-437b-b346-7b90916c3d5a" /> `arrow-ord` and `arrow-select` do some heavier lifting that I think will be harder to replace, but we should also do that at some point. --------- Signed-off-by: Adam Gutglick <[email protected]>
1 parent d95d597 commit 7a89e7f

File tree

22 files changed

+45
-81
lines changed

22 files changed

+45
-81
lines changed

Cargo.lock

Lines changed: 1 addition & 23 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ moka = { version = "0.12.10", default-features = false }
121121
multiversion = "0.8.0"
122122
num-traits = "0.2.19"
123123
num_enum = { version = "0.7.3", default-features = false }
124-
object_store = { version = "0.12.3", features = ["aws"] }
124+
object_store = { version = "0.12.3", default-features = false }
125125
once_cell = "1.21"
126126
opentelemetry = "0.30.0"
127127
opentelemetry-otlp = "0.30.0"

bench-vortex/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ url = { workspace = true }
7272
uuid = { workspace = true, features = ["v4"] }
7373
vortex = { workspace = true, features = [
7474
"object_store",
75-
"parquet",
7675
"files",
7776
"tokio",
7877
"zstd",

encodings/decimal-byte-parts/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ version = { workspace = true }
1717
workspace = true
1818

1919
[dependencies]
20-
itertools = { workspace = true }
2120
num-traits = { workspace = true }
2221
prost = { workspace = true }
2322
vortex-array = { workspace = true }

encodings/pco/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ version = { workspace = true }
1717
workspace = true
1818

1919
[dependencies]
20-
half = { workspace = true }
2120
pco = { workspace = true }
2221
prost = { workspace = true }
2322
vortex-array = { workspace = true }

encodings/pco/src/array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use vortex_array::vtable::{
1818
};
1919
use vortex_array::{ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, ToCanonical, vtable};
2020
use vortex_buffer::{BufferMut, ByteBuffer, ByteBufferMut};
21-
use vortex_dtype::{DType, PType};
21+
use vortex_dtype::{DType, PType, half};
2222
use vortex_error::{VortexError, VortexResult, vortex_bail, vortex_err};
2323
use vortex_scalar::Scalar;
2424

encodings/sequence/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ rust-version = { workspace = true }
1414
version = { workspace = true }
1515

1616
[dependencies]
17-
arcref = { workspace = true }
1817
num-traits = { workspace = true }
1918
prost = { workspace = true }
2019
vortex-array = { workspace = true }

encodings/sparse/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ workspace = true
2020
itertools = { workspace = true }
2121
num-traits = { workspace = true }
2222
prost = { workspace = true }
23-
rstest_reuse = { workspace = true }
2423
vortex-array = { workspace = true }
2524
vortex-buffer = { workspace = true }
2625
vortex-dtype = { workspace = true }

vortex-array/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ arcref = { workspace = true }
2222
arrow-arith = { workspace = true }
2323
arrow-array = { workspace = true, features = ["ffi"] }
2424
arrow-buffer = { workspace = true }
25-
arrow-cast = { workspace = true }
2625
arrow-data = { workspace = true }
2726
arrow-ord = { workspace = true }
2827
arrow-schema = { workspace = true }
@@ -73,6 +72,7 @@ table-display = ["dep:tabled"]
7372
test-harness = ["dep:goldenfile", "dep:rstest", "dep:rstest_reuse"]
7473

7574
[dev-dependencies]
75+
arrow-cast = { workspace = true }
7676
divan = { workspace = true }
7777
rstest = { workspace = true }
7878
vortex-array = { path = ".", features = ["test-harness"] }

vortex-array/src/arrays/varbin/canonical.rs

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use std::sync::Arc;
5+
6+
use arrow_array::cast::AsArray;
7+
use arrow_array::{BinaryViewArray, StringViewArray};
48
use arrow_schema::DataType;
59
use vortex_dtype::DType;
610
use vortex_error::VortexResult;
@@ -17,11 +21,28 @@ impl CanonicalVTable<VarBinVTable> for VarBinVTable {
1721
let nullable = dtype.is_nullable();
1822

1923
let array_ref = array.to_array().into_arrow_preferred()?;
20-
let array = match dtype {
21-
DType::Utf8(_) => arrow_cast::cast(array_ref.as_ref(), &DataType::Utf8View)?,
22-
DType::Binary(_) => arrow_cast::cast(array_ref.as_ref(), &DataType::BinaryView)?,
2324

24-
_ => unreachable!("VarBinArray must have Utf8 or Binary dtype"),
25+
let array = match (&dtype, array_ref.data_type()) {
26+
(DType::Utf8(_), DataType::Utf8) => {
27+
Arc::new(StringViewArray::from(array_ref.as_string::<i32>()))
28+
as Arc<dyn arrow_array::Array>
29+
}
30+
(DType::Utf8(_), DataType::LargeUtf8) => {
31+
Arc::new(StringViewArray::from(array_ref.as_string::<i64>()))
32+
as Arc<dyn arrow_array::Array>
33+
}
34+
35+
(DType::Binary(_), DataType::Binary) => {
36+
Arc::new(BinaryViewArray::from(array_ref.as_binary::<i32>()))
37+
}
38+
(DType::Binary(_), DataType::LargeBinary) => {
39+
Arc::new(BinaryViewArray::from(array_ref.as_binary::<i64>()))
40+
}
41+
// If its already a view, no need to do anything
42+
(DType::Binary(_), DataType::BinaryView) | (DType::Utf8(_), DataType::Utf8View) => {
43+
array_ref
44+
}
45+
_ => unreachable!("VarBinArray must have Utf8 or Binary dtype, instead got: {dtype}",),
2546
};
2647
Ok(Canonical::VarBinView(
2748
ArrayRef::from_arrow(array.as_ref(), nullable).to_varbinview()?,

0 commit comments

Comments
 (0)