Skip to content

Commit 15668bb

Browse files
authored
Use split arrow crates for smaller dependency footprint (#362)
* Use split arrow crates for smaller dependency footprint * simpler cargo.toml
1 parent 9fa70cc commit 15668bb

File tree

9 files changed

+24
-227
lines changed

9 files changed

+24
-227
lines changed

Cargo.lock

Lines changed: 3 additions & 202 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ name = "fastexcel"
2525
crate-type = ["cdylib"]
2626

2727
[dependencies]
28-
# There's a lot of stuff we don't want here, such as serde support
29-
arrow = { version = "^55.2.0", default-features = false, features = ["ffi"] }
28+
arrow-schema = "^55.2.0"
29+
arrow-array = { version = "^55.2.0", features = ["ffi"] }
30+
arrow-pyarrow = { version = "^55.2.0", optional = true }
3031
calamine = { version = "^0.30.0", features = ["dates"] }
3132
chrono = { version = "^0.4.41", default-features = false }
3233
log = "0.4.27"
@@ -42,7 +43,7 @@ rstest = { version = "^0.26.1", default-features = false }
4243
[features]
4344
default = ["extension-module", "pyarrow"]
4445
extension-module = ["pyo3/extension-module"]
45-
pyarrow = ["arrow/pyarrow"]
46+
pyarrow = ["dep:arrow-pyarrow"]
4647
# feature for tests only. This makes Python::with_gil auto-initialize Python
4748
# interpreters, which allows us to instantiate Python objects in tests
4849
# (see https://pyo3.rs/v0.22.3/features#auto-initialize)

src/arrow_capsule.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
use std::ffi::c_void;
22

3-
use arrow::array::Array;
4-
use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
5-
use arrow::record_batch::RecordBatch;
3+
use arrow_array::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
4+
use arrow_array::{Array, RecordBatch, StructArray};
5+
use arrow_schema::Schema;
66
use pyo3::exceptions::PyValueError;
77
use pyo3::ffi::PyCapsule_New;
88
use pyo3::prelude::*;
@@ -11,7 +11,7 @@ use pyo3::types::PyCapsule;
1111
/// Creates a PyCapsule containing an ArrowSchema
1212
pub fn schema_to_pycapsule<'py>(
1313
py: Python<'py>,
14-
schema: &arrow::datatypes::Schema,
14+
schema: &Schema,
1515
) -> PyResult<Bound<'py, PyCapsule>> {
1616
let schema_ptr = Box::into_raw(Box::new(FFI_ArrowSchema::try_from(schema).map_err(
1717
|e| PyValueError::new_err(format!("Failed to convert schema to FFI format: {}", e)),
@@ -78,7 +78,7 @@ pub fn record_batch_to_pycapsules<'py>(
7878
let schema_capsule = schema_to_pycapsule(py, record_batch.schema().as_ref())?;
7979

8080
// For record batches, we need to convert to a struct array
81-
let struct_array = arrow::array::StructArray::from(record_batch.clone());
81+
let struct_array = StructArray::from(record_batch.clone());
8282
let array_capsule = array_to_pycapsule(py, &struct_array)?;
8383

8484
Ok((schema_capsule, array_capsule))

src/data.rs

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
use std::sync::Arc;
22

3-
use arrow::{
4-
array::{Array, NullArray, RecordBatch},
5-
datatypes::{Field, Schema},
6-
};
3+
use arrow_array::{Array, ArrayRef, NullArray, RecordBatch};
4+
use arrow_schema::{Field, Schema};
75
use calamine::{Data as CalData, DataRef as CalDataRef, DataType, Range};
86

97
use crate::{
@@ -77,7 +75,7 @@ mod array_impls {
7775
use std::fmt::Debug;
7876
use std::sync::Arc;
7977

80-
use arrow::array::{
78+
use arrow_array::{
8179
Array, BooleanArray, Date32Array, DurationMillisecondArray, Float64Array, Int64Array,
8280
StringArray, TimestampMillisecondArray,
8381
};
@@ -570,10 +568,7 @@ pub(crate) fn record_batch_from_data_and_columns_with_errors(
570568
let dtype = *column_info.dtype();
571569

572570
let (array, new_cell_errors) = match dtype {
573-
DType::Null => (
574-
Arc::new(NullArray::new(limit - offset)) as Arc<dyn arrow::array::Array>,
575-
vec![],
576-
),
571+
DType::Null => (Arc::new(NullArray::new(limit - offset)) as ArrayRef, vec![]),
577572
DType::Int => create_int_array_with_errors(data, col_idx, offset, limit),
578573
DType::Float => create_float_array_with_errors(data, col_idx, offset, limit),
579574
DType::String => create_string_array_with_errors(data, col_idx, offset, limit),

src/types/dtype.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::{
55
sync::OnceLock,
66
};
77

8-
use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
8+
use arrow_schema::{DataType as ArrowDataType, TimeUnit};
99
use calamine::{CellErrorType, CellType, DataType, Range};
1010
use log::warn;
1111
use pyo3::{

src/types/python/excelreader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ use std::{
33
io::{BufReader, Cursor},
44
};
55

6+
use arrow_array::RecordBatch;
67
#[cfg(feature = "pyarrow")]
7-
use arrow::pyarrow::ToPyArrow;
8-
use arrow::record_batch::RecordBatch;
8+
use arrow_pyarrow::ToPyArrow;
99
use pyo3::{Bound, IntoPyObjectExt, PyAny, PyResult, Python, pyclass, pymethods};
1010

1111
use calamine::{

src/types/python/excelsheet/column_info.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::{fmt::Display, str::FromStr};
22

3-
use arrow::datatypes::Field;
3+
use arrow_schema::Field;
44
use calamine::DataType;
55
use pyo3::{PyResult, pyclass, pymethods};
66

src/types/python/excelsheet/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ use calamine::{CellType, Range, Sheet as CalamineSheet, SheetVisible as Calamine
55
use column_info::{AvailableColumns, ColumnInfoNoDtype};
66
use std::{cmp, collections::HashSet, fmt::Debug, str::FromStr};
77

8+
use arrow_array::RecordBatch;
89
#[cfg(feature = "pyarrow")]
9-
use arrow::pyarrow::ToPyArrow;
10-
use arrow::record_batch::RecordBatch;
10+
use arrow_pyarrow::ToPyArrow;
1111

1212
use pyo3::{
1313
Bound, IntoPyObject, IntoPyObjectExt, PyAny, PyObject, PyResult,

0 commit comments

Comments
 (0)