Skip to content

Commit 9e370ec

Browse files
committed
Try to fix python bindings.
1 parent d743e8d commit 9e370ec

File tree

8 files changed

+254
-259
lines changed

8 files changed

+254
-259
lines changed

bindings/python/Cargo.lock

Lines changed: 187 additions & 193 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bindings/python/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ license = "Apache-2.0"
3131
crate-type = ["cdylib"]
3232

3333
[dependencies]
34-
arrow = { version = "56", features = ["pyarrow", "chrono-tz"] }
34+
arrow = { version = "57", features = ["pyarrow", "chrono-tz"] }
3535
iceberg = { path = "../../crates/iceberg" }
36-
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
36+
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
3737
iceberg-datafusion = { path = "../../crates/integrations/datafusion" }
38-
datafusion-ffi = { version = "50" }
38+
datafusion-ffi = { git = "https://github.com/apache/datafusion.git", rev = "f32984b2dbf9e5a193c20643ce624167295fbd61" }
3939
tokio = { version = "1.46.1", default-features = false }

bindings/python/src/transform.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,54 +24,54 @@ use pyo3::prelude::*;
2424
use crate::error::to_py_err;
2525

2626
#[pyfunction]
27-
pub fn identity(py: Python, array: PyObject) -> PyResult<PyObject> {
27+
pub fn identity(py: Python, array: Py<PyAny>) -> PyResult<Py<PyAny>> {
2828
apply(py, array, Transform::Identity)
2929
}
3030

3131
#[pyfunction]
32-
pub fn void(py: Python, array: PyObject) -> PyResult<PyObject> {
32+
pub fn void(py: Python, array: Py<PyAny>) -> PyResult<Py<PyAny>> {
3333
apply(py, array, Transform::Void)
3434
}
3535

3636
#[pyfunction]
37-
pub fn year(py: Python, array: PyObject) -> PyResult<PyObject> {
37+
pub fn year(py: Python, array: Py<PyAny>) -> PyResult<Py<PyAny>> {
3838
apply(py, array, Transform::Year)
3939
}
4040

4141
#[pyfunction]
42-
pub fn month(py: Python, array: PyObject) -> PyResult<PyObject> {
42+
pub fn month(py: Python, array: Py<PyAny>) -> PyResult<Py<PyAny>> {
4343
apply(py, array, Transform::Month)
4444
}
4545

4646
#[pyfunction]
47-
pub fn day(py: Python, array: PyObject) -> PyResult<PyObject> {
47+
pub fn day(py: Python, array: Py<PyAny>) -> PyResult<Py<PyAny>> {
4848
apply(py, array, Transform::Day)
4949
}
5050

5151
#[pyfunction]
52-
pub fn hour(py: Python, array: PyObject) -> PyResult<PyObject> {
52+
pub fn hour(py: Python, array: Py<PyAny>) -> PyResult<Py<PyAny>> {
5353
apply(py, array, Transform::Hour)
5454
}
5555

5656
#[pyfunction]
57-
pub fn bucket(py: Python, array: PyObject, num_buckets: u32) -> PyResult<PyObject> {
57+
pub fn bucket(py: Python, array: Py<PyAny>, num_buckets: u32) -> PyResult<Py<PyAny>> {
5858
apply(py, array, Transform::Bucket(num_buckets))
5959
}
6060

6161
#[pyfunction]
62-
pub fn truncate(py: Python, array: PyObject, width: u32) -> PyResult<PyObject> {
62+
pub fn truncate(py: Python, array: Py<PyAny>, width: u32) -> PyResult<Py<PyAny>> {
6363
apply(py, array, Transform::Truncate(width))
6464
}
6565

66-
fn apply(py: Python, array: PyObject, transform: Transform) -> PyResult<PyObject> {
66+
fn apply(py: Python, array: Py<PyAny>, transform: Transform) -> PyResult<Py<PyAny>> {
6767
// import
6868
let array = ArrayData::from_pyarrow_bound(array.bind(py))?;
6969
let array = make_array(array);
7070
let transform_function = create_transform_function(&transform).map_err(to_py_err)?;
7171
let array = transform_function.transform(array).map_err(to_py_err)?;
7272
// export
7373
let array = array.into_data();
74-
array.to_pyarrow(py)
74+
Ok(array.to_pyarrow(py)?.unbind())
7575
}
7676

7777
pub fn register_module(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {

crates/iceberg/src/arrow/reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1783,7 +1783,7 @@ message schema {
17831783
assert_eq!(err.kind(), ErrorKind::DataInvalid);
17841784
assert_eq!(
17851785
err.to_string(),
1786-
"DataInvalid => Unsupported Arrow data type: Duration(Microsecond)".to_string()
1786+
"DataInvalid => Unsupported Arrow data type: Duration(µs)".to_string()
17871787
);
17881788

17891789
// Omitting field c2, we still get an error due to c3 being selected

crates/iceberg/src/inspect/manifests.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -296,18 +296,18 @@ mod tests {
296296
check_record_batches(
297297
record_batch.try_collect::<Vec<_>>().await.unwrap(),
298298
expect![[r#"
299-
Field { name: "content", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "14"} },
300-
Field { name: "path", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} },
301-
Field { name: "length", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} },
302-
Field { name: "partition_spec_id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} },
303-
Field { name: "added_snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} },
304-
Field { name: "added_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} },
305-
Field { name: "existing_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} },
306-
Field { name: "deleted_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} },
307-
Field { name: "added_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "15"} },
308-
Field { name: "existing_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "16"} },
309-
Field { name: "deleted_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "17"} },
310-
Field { name: "partition_summaries", data_type: List(Field { name: "item", data_type: Struct([Field { name: "contains_null", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }, Field { name: "contains_nan", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "11"} }, Field { name: "lower_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "12"} }, Field { name: "upper_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "13"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }"#]],
299+
Field { "content": Int32, metadata: {"PARQUET:field_id": "14"} },
300+
Field { "path": Utf8, metadata: {"PARQUET:field_id": "1"} },
301+
Field { "length": Int64, metadata: {"PARQUET:field_id": "2"} },
302+
Field { "partition_spec_id": Int32, metadata: {"PARQUET:field_id": "3"} },
303+
Field { "added_snapshot_id": Int64, metadata: {"PARQUET:field_id": "4"} },
304+
Field { "added_data_files_count": Int32, metadata: {"PARQUET:field_id": "5"} },
305+
Field { "existing_data_files_count": Int32, metadata: {"PARQUET:field_id": "6"} },
306+
Field { "deleted_data_files_count": Int32, metadata: {"PARQUET:field_id": "7"} },
307+
Field { "added_delete_files_count": Int32, metadata: {"PARQUET:field_id": "15"} },
308+
Field { "existing_delete_files_count": Int32, metadata: {"PARQUET:field_id": "16"} },
309+
Field { "deleted_delete_files_count": Int32, metadata: {"PARQUET:field_id": "17"} },
310+
Field { "partition_summaries": List(Struct("contains_null": Boolean, metadata: {"PARQUET:field_id": "10"}, "contains_nan": nullable Boolean, metadata: {"PARQUET:field_id": "11"}, "lower_bound": nullable Utf8, metadata: {"PARQUET:field_id": "12"}, "upper_bound": nullable Utf8, metadata: {"PARQUET:field_id": "13"}), metadata: {"PARQUET:field_id": "9"}), metadata: {"PARQUET:field_id": "8"} }"#]],
311311
expect![[r#"
312312
content: PrimitiveArray<Int32>
313313
[

crates/iceberg/src/inspect/snapshots.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,14 @@ mod tests {
151151
check_record_batches(
152152
batch_stream.try_collect::<Vec<_>>().await.unwrap(),
153153
expect![[r#"
154-
Field { name: "committed_at", data_type: Timestamp(Microsecond, Some("+00:00")), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} },
155-
Field { name: "snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} },
156-
Field { name: "parent_id", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} },
157-
Field { name: "operation", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} },
158-
Field { name: "manifest_list", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} },
159-
Field { name: "summary", data_type: Map(Field { name: "key_value", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "value", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }"#]],
154+
Field { "committed_at": Timestamp(µs, "+00:00"), metadata: {"PARQUET:field_id": "1"} },
155+
Field { "snapshot_id": Int64, metadata: {"PARQUET:field_id": "2"} },
156+
Field { "parent_id": nullable Int64, metadata: {"PARQUET:field_id": "3"} },
157+
Field { "operation": nullable Utf8, metadata: {"PARQUET:field_id": "4"} },
158+
Field { "manifest_list": nullable Utf8, metadata: {"PARQUET:field_id": "5"} },
159+
Field { "summary": nullable Map("key_value": Struct("key": Utf8, metadata: {"PARQUET:field_id": "7"}, "value": nullable Utf8, metadata: {"PARQUET:field_id": "8"}), unsorted), metadata: {"PARQUET:field_id": "6"} }"#]],
160160
expect![[r#"
161-
committed_at: PrimitiveArray<Timestamp(Microsecond, Some("+00:00"))>
161+
committed_at: PrimitiveArray<Timestamp(µs, "+00:00")>
162162
[
163163
2018-01-04T21:22:35.770+00:00,
164164
2019-04-12T20:29:15.770+00:00,

crates/integrations/datafusion/src/table/table_provider_factory.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ mod tests {
244244
constraints: Constraints::default(),
245245
column_defaults: Default::default(),
246246
if_not_exists: Default::default(),
247+
or_replace: false,
247248
temporary: false,
248249
definition: Default::default(),
249250
unbounded: Default::default(),

0 commit comments

Comments
 (0)