Skip to content

Commit 07d4f4b

Browse files
committed
Merge branch 'main' into fill-null
2 parents 73b692f + 3584bec commit 07d4f4b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+195
-180
lines changed

.github/workflows/test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171

7272
- name: Run Clippy
7373
if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }}
74-
run: cargo clippy --all-targets --all-features -- -D clippy::all -A clippy::redundant_closure
74+
run: cargo clippy --all-targets --all-features -- -D clippy::all -D warnings -A clippy::redundant_closure
7575

7676
- name: Install dependencies and build
7777
uses: astral-sh/setup-uv@v5

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ repos:
4040
- id: rust-clippy
4141
name: Rust clippy
4242
description: Run cargo clippy on files included in the commit. clippy should be installed before-hand.
43-
entry: cargo clippy --all-targets --all-features -- -Dclippy::all -Aclippy::redundant_closure
43+
entry: cargo clippy --all-targets --all-features -- -Dclippy::all -D warnings -Aclippy::redundant_closure
4444
pass_filenames: false
4545
types: [file, rust]
4646
language: system

python/datafusion/functions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@
252252
"to_hex",
253253
"to_timestamp",
254254
"to_timestamp_micros",
255+
"to_timestamp_nanos",
255256
"to_timestamp_millis",
256257
"to_timestamp_seconds",
257258
"to_unixtime",

python/tests/test_functions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,7 @@ def test_temporal_functions(df):
871871
f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")),
872872
f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")),
873873
f.extract(literal("day"), column("d")),
874+
f.to_timestamp_nanos(literal("2023-09-07 05:06:14.523952")),
874875
)
875876
result = df.collect()
876877
assert len(result) == 1
@@ -909,6 +910,9 @@ def test_temporal_functions(df):
909910
[datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us")
910911
)
911912
assert result.column(10) == pa.array([31, 26, 2], type=pa.int32())
913+
assert result.column(11) == pa.array(
914+
[datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns")
915+
)
912916

913917

914918
def test_arrow_cast(df):

src/config.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,14 @@ impl PyConfig {
4747
}
4848

4949
/// Get a configuration option
50-
pub fn get(&mut self, key: &str, py: Python) -> PyResult<PyObject> {
50+
pub fn get<'py>(&mut self, key: &str, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
5151
let options = self.config.to_owned();
5252
for entry in options.entries() {
5353
if entry.key == key {
54-
return Ok(entry.value.into_py(py));
54+
return Ok(entry.value.into_pyobject(py)?);
5555
}
5656
}
57-
Ok(None::<String>.into_py(py))
57+
Ok(None::<String>.into_pyobject(py)?)
5858
}
5959

6060
/// Set a configuration option
@@ -66,10 +66,10 @@ impl PyConfig {
6666

6767
/// Get all configuration options
6868
pub fn get_all(&mut self, py: Python) -> PyResult<PyObject> {
69-
let dict = PyDict::new_bound(py);
69+
let dict = PyDict::new(py);
7070
let options = self.config.to_owned();
7171
for entry in options.entries() {
72-
dict.set_item(entry.key, entry.value.clone().into_py(py))?;
72+
dict.set_item(entry.key, entry.value.clone().into_pyobject(py)?)?;
7373
}
7474
Ok(dict.into())
7575
}

src/context.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -458,8 +458,8 @@ impl PySessionContext {
458458
let py = data.py();
459459

460460
// Instantiate pyarrow Table object & convert to Arrow Table
461-
let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
462-
let args = PyTuple::new_bound(py, &[data]);
461+
let table_class = py.import("pyarrow")?.getattr("Table")?;
462+
let args = PyTuple::new(py, &[data])?;
463463
let table = table_class.call_method1("from_pylist", args)?;
464464

465465
// Convert Arrow Table to datafusion DataFrame
@@ -478,8 +478,8 @@ impl PySessionContext {
478478
let py = data.py();
479479

480480
// Instantiate pyarrow Table object & convert to Arrow Table
481-
let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
482-
let args = PyTuple::new_bound(py, &[data]);
481+
let table_class = py.import("pyarrow")?.getattr("Table")?;
482+
let args = PyTuple::new(py, &[data])?;
483483
let table = table_class.call_method1("from_pydict", args)?;
484484

485485
// Convert Arrow Table to datafusion DataFrame
@@ -533,8 +533,8 @@ impl PySessionContext {
533533
let py = data.py();
534534

535535
// Instantiate pyarrow Table object & convert to Arrow Table
536-
let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
537-
let args = PyTuple::new_bound(py, &[data]);
536+
let table_class = py.import("pyarrow")?.getattr("Table")?;
537+
let args = PyTuple::new(py, &[data])?;
538538
let table = table_class.call_method1("from_pandas", args)?;
539539

540540
// Convert Arrow Table to datafusion DataFrame

src/dataframe.rs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -545,12 +545,12 @@ impl PyDataFrame {
545545
/// Convert to Arrow Table
546546
/// Collect the batches and pass to Arrow Table
547547
fn to_arrow_table(&self, py: Python<'_>) -> PyResult<PyObject> {
548-
let batches = self.collect(py)?.to_object(py);
549-
let schema: PyObject = self.schema().into_pyobject(py)?.to_object(py);
548+
let batches = self.collect(py)?.into_pyobject(py)?;
549+
let schema = self.schema().into_pyobject(py)?;
550550

551551
// Instantiate pyarrow Table object and use its from_batches method
552-
let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
553-
let args = PyTuple::new_bound(py, &[batches, schema]);
552+
let table_class = py.import("pyarrow")?.getattr("Table")?;
553+
let args = PyTuple::new(py, &[batches, schema])?;
554554
let table: PyObject = table_class.call_method1("from_batches", args)?.into();
555555
Ok(table)
556556
}
@@ -585,8 +585,7 @@ impl PyDataFrame {
585585

586586
let ffi_stream = FFI_ArrowArrayStream::new(reader);
587587
let stream_capsule_name = CString::new("arrow_array_stream").unwrap();
588-
PyCapsule::new_bound(py, ffi_stream, Some(stream_capsule_name))
589-
.map_err(PyDataFusionError::from)
588+
PyCapsule::new(py, ffi_stream, Some(stream_capsule_name)).map_err(PyDataFusionError::from)
590589
}
591590

592591
fn execute_stream(&self, py: Python) -> PyDataFusionResult<PyRecordBatchStream> {
@@ -649,8 +648,8 @@ impl PyDataFrame {
649648
/// Collect the batches, pass to Arrow Table & then convert to polars DataFrame
650649
fn to_polars(&self, py: Python<'_>) -> PyResult<PyObject> {
651650
let table = self.to_arrow_table(py)?;
652-
let dataframe = py.import_bound("polars")?.getattr("DataFrame")?;
653-
let args = PyTuple::new_bound(py, &[table]);
651+
let dataframe = py.import("polars")?.getattr("DataFrame")?;
652+
let args = PyTuple::new(py, &[table])?;
654653
let result: PyObject = dataframe.call1(args)?.into();
655654
Ok(result)
656655
}
@@ -673,7 +672,7 @@ fn print_dataframe(py: Python, df: DataFrame) -> PyDataFusionResult<()> {
673672

674673
// Import the Python 'builtins' module to access the print function
675674
// Note that println! does not print to the Python debug console and is not visible in notebooks for instance
676-
let print = py.import_bound("builtins")?.getattr("print")?;
675+
let print = py.import("builtins")?.getattr("print")?;
677676
print.call1((result,))?;
678677
Ok(())
679678
}

src/dataset.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ impl Dataset {
4848
// Creates a Python PyArrow.Dataset
4949
pub fn new(dataset: &Bound<'_, PyAny>, py: Python) -> PyResult<Self> {
5050
// Ensure that we were passed an instance of pyarrow.dataset.Dataset
51-
let ds = PyModule::import_bound(py, "pyarrow.dataset")?;
51+
let ds = PyModule::import(py, "pyarrow.dataset")?;
5252
let ds_attr = ds.getattr("Dataset")?;
5353
let ds_type = ds_attr.downcast::<PyType>()?;
5454
if dataset.is_instance(ds_type)? {

src/dataset_exec.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ impl DatasetExec {
104104
})
105105
.transpose()?;
106106

107-
let kwargs = PyDict::new_bound(py);
107+
let kwargs = PyDict::new(py);
108108

109109
kwargs.set_item("columns", columns.clone())?;
110110
kwargs.set_item(
@@ -121,7 +121,7 @@ impl DatasetExec {
121121
.0,
122122
);
123123

124-
let builtins = Python::import_bound(py, "builtins")?;
124+
let builtins = Python::import(py, "builtins")?;
125125
let pylist = builtins.getattr("list")?;
126126

127127
// Get the fragments or partitions of the dataset
@@ -198,7 +198,7 @@ impl ExecutionPlan for DatasetExec {
198198
let dataset_schema = dataset
199199
.getattr("schema")
200200
.map_err(|err| InnerDataFusionError::External(Box::new(err)))?;
201-
let kwargs = PyDict::new_bound(py);
201+
let kwargs = PyDict::new(py);
202202
kwargs
203203
.set_item("columns", self.columns.clone())
204204
.map_err(|err| InnerDataFusionError::External(Box::new(err)))?;
@@ -223,7 +223,7 @@ impl ExecutionPlan for DatasetExec {
223223
let record_batches: Bound<'_, PyIterator> = scanner
224224
.call_method0("to_batches")
225225
.map_err(|err| InnerDataFusionError::External(Box::new(err)))?
226-
.iter()
226+
.try_iter()
227227
.map_err(|err| InnerDataFusionError::External(Box::new(err)))?;
228228

229229
let record_batches = PyArrowBatchesAdapter {

src/errors.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,7 @@ pub fn py_datafusion_err(e: impl Debug) -> PyErr {
9191
pub fn py_unsupported_variant_err(e: impl Debug) -> PyErr {
9292
PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{e:?}"))
9393
}
94+
95+
pub fn to_datafusion_err(e: impl Debug) -> InnerDataFusionError {
96+
InnerDataFusionError::Execution(format!("{e:?}"))
97+
}

0 commit comments

Comments
 (0)