Skip to content

Commit a15fdcc

Browse files
authored
update all dependencies (#67)
1 parent 71b9770 commit a15fdcc

File tree

8 files changed

+656
-518
lines changed

8 files changed

+656
-518
lines changed

Cargo.lock

Lines changed: 485 additions & 444 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,19 @@ version = "0.1.0"
2525
edition = "2021"
2626
readme = "README.md"
2727
license = "Apache-2.0"
28-
rust-version = "1.76"
28+
rust-version = "1.85"
2929
build = "build.rs"
3030

3131
[dependencies]
3232
anyhow = "1"
33-
arrow = { version = "53.3", features = ["pyarrow", "ipc"] }
34-
arrow-flight = "53.3"
33+
arrow = { version = "54", features = ["pyarrow", "ipc"] }
34+
arrow-flight = "54"
3535
async-stream = "0.3"
3636
async-channel = "2.3"
3737
bytesize = "1.3"
38-
datafusion = { version = "43.0", features = ["pyarrow", "avro"] }
39-
datafusion-python = { version = "43.1" }
40-
datafusion-proto = "43.0"
38+
datafusion = { version = "45", features = ["pyarrow", "avro"] }
39+
datafusion-python = { version = "45" }
40+
datafusion-proto = "45"
4141
env_logger = "0.11"
4242
futures = "0.3"
4343
glob = "0.3.1"
@@ -52,15 +52,15 @@ object_store = { version = "0.11.0", features = [
5252
] }
5353
parking_lot = { version = "0.12", features = ["deadlock_detection"] }
5454
prost = "0.13"
55-
pyo3 = { version = "0.22.6", features = [
55+
pyo3 = { version = "0.23", features = [
5656
"extension-module",
5757
"abi3",
5858
"abi3-py38",
5959
] }
60-
pyo3-async-runtimes = { version = "0.22", features = ["tokio-runtime"] }
60+
pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"] }
6161
pyo3-pylogger = "0.3.0"
6262
rust_decimal = "1.36"
63-
tokio = { version = "1.40", features = [
63+
tokio = { version = "1.43", features = [
6464
"macros",
6565
"rt",
6666
"rt-multi-thread",

src/dataframe.rs

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,19 @@ use datafusion::physical_plan::repartition::RepartitionExec;
3131
use datafusion::physical_plan::sorts::sort::SortExec;
3232
use datafusion::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
3333
use datafusion::prelude::DataFrame;
34+
use datafusion_python::errors::PyDataFusionError;
3435
use datafusion_python::physical_plan::PyExecutionPlan;
3536
use datafusion_python::sql::logical::PyLogicalPlan;
3637
use datafusion_python::utils::wait_for_future;
3738
use futures::stream::StreamExt;
3839
use itertools::Itertools;
3940
use log::trace;
41+
use pyo3::exceptions::PyStopAsyncIteration;
42+
use pyo3::exceptions::PyStopIteration;
4043
use pyo3::prelude::*;
4144
use std::borrow::Cow;
4245
use std::sync::Arc;
46+
use tokio::sync::Mutex;
4347

4448
use crate::isolator::PartitionIsolatorExec;
4549
use crate::max_rows::MaxRowsExec;
@@ -428,9 +432,12 @@ impl PyDataFrameStage {
428432
}
429433
}
430434

435+
// PyRecordBatch and PyRecordBatchStream are borrowed, and slightly modified from datafusion-python
436+
// they are not publicly exposed in that repo
437+
431438
#[pyclass]
432439
pub struct PyRecordBatch {
433-
batch: RecordBatch,
440+
pub batch: RecordBatch,
434441
}
435442

436443
#[pymethods]
@@ -448,31 +455,58 @@ impl From<RecordBatch> for PyRecordBatch {
448455

449456
#[pyclass]
450457
pub struct PyRecordBatchStream {
451-
stream: SendableRecordBatchStream,
458+
stream: Arc<Mutex<SendableRecordBatchStream>>,
452459
}
453460

454461
impl PyRecordBatchStream {
455462
pub fn new(stream: SendableRecordBatchStream) -> Self {
456-
Self { stream }
463+
Self {
464+
stream: Arc::new(Mutex::new(stream)),
465+
}
457466
}
458467
}
459468

460469
#[pymethods]
461470
impl PyRecordBatchStream {
462-
fn next(&mut self, py: Python) -> PyResult<Option<PyObject>> {
463-
let result = self.stream.next();
464-
match wait_for_future(py, result) {
465-
None => Ok(None),
466-
Some(Ok(b)) => Ok(Some(b.to_pyarrow(py)?)),
467-
Some(Err(e)) => Err(e.into()),
468-
}
471+
fn next(&mut self, py: Python) -> PyResult<PyObject> {
472+
let stream = self.stream.clone();
473+
wait_for_future(py, next_stream(stream, true)).and_then(|b| b.to_pyarrow(py))
469474
}
470475

471-
fn __next__(&mut self, py: Python) -> PyResult<Option<PyObject>> {
476+
fn __next__(&mut self, py: Python) -> PyResult<PyObject> {
472477
self.next(py)
473478
}
474479

480+
fn __anext__<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
481+
let stream = self.stream.clone();
482+
pyo3_async_runtimes::tokio::future_into_py(py, next_stream(stream, false))
483+
}
484+
475485
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
476486
slf
477487
}
488+
489+
fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
490+
slf
491+
}
492+
}
493+
494+
async fn next_stream(
495+
stream: Arc<Mutex<SendableRecordBatchStream>>,
496+
sync: bool,
497+
) -> PyResult<PyRecordBatch> {
498+
let mut stream = stream.lock().await;
499+
match stream.next().await {
500+
Some(Ok(batch)) => Ok(batch.into()),
501+
Some(Err(e)) => Err(PyDataFusionError::from(e))?,
502+
None => {
503+
// Depending on whether the iteration is sync or not, we raise either a
504+
// StopIteration or a StopAsyncIteration
505+
if sync {
506+
Err(PyStopIteration::new_err("stream exhausted"))
507+
} else {
508+
Err(PyStopAsyncIteration::new_err("stream exhausted"))
509+
}
510+
}
511+
}
478512
}

0 commit comments

Comments
 (0)