Skip to content

Commit 6e570e2

Browse files
Bind SQLOptions and relative ctx method apache#567 (apache#588)
1 parent 7d08ec9 commit 6e570e2

File tree

4 files changed

+100
-6
lines changed

4 files changed

+100
-6
lines changed

datafusion/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
SessionConfig,
3434
RuntimeConfig,
3535
ScalarUDF,
36+
SQLOptions,
3637
)
3738

3839
from .common import (
@@ -96,6 +97,7 @@
9697
"DataFrame",
9798
"SessionContext",
9899
"SessionConfig",
100+
"SQLOptions",
99101
"RuntimeConfig",
100102
"Expr",
101103
"AggregateUDF",

datafusion/tests/test_context.py

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,17 @@
1919

2020
import pyarrow as pa
2121
import pyarrow.dataset as ds
22+
import pytest
2223

2324
from datafusion import (
25+
DataFrame,
26+
RuntimeConfig,
27+
SessionConfig,
28+
SessionContext,
29+
SQLOptions,
2430
column,
2531
literal,
26-
SessionContext,
27-
SessionConfig,
28-
RuntimeConfig,
29-
DataFrame,
3032
)
31-
import pytest
3233

3334

3435
def test_create_context_no_args():
@@ -389,3 +390,38 @@ def test_read_parquet(ctx):
389390
def test_read_avro(ctx):
390391
csv_df = ctx.read_avro(path="testing/data/avro/alltypes_plain.avro")
391392
csv_df.show()
393+
394+
395+
def test_create_sql_options():
396+
SQLOptions()
397+
398+
399+
def test_sql_with_options_no_ddl(ctx):
400+
sql = "CREATE TABLE IF NOT EXISTS valuetable AS VALUES(1,'HELLO'),(12,'DATAFUSION')"
401+
ctx.sql(sql)
402+
options = SQLOptions().with_allow_ddl(False)
403+
with pytest.raises(Exception, match="DDL"):
404+
ctx.sql_with_options(sql, options=options)
405+
406+
407+
def test_sql_with_options_no_dml(ctx):
408+
table_name = "t"
409+
batch = pa.RecordBatch.from_arrays(
410+
[pa.array([1, 2, 3]), pa.array([4, 5, 6])],
411+
names=["a", "b"],
412+
)
413+
dataset = ds.dataset([batch])
414+
ctx.register_dataset(table_name, dataset)
415+
sql = f'INSERT INTO "{table_name}" VALUES (1, 2), (2, 3);'
416+
ctx.sql(sql)
417+
options = SQLOptions().with_allow_dml(False)
418+
with pytest.raises(Exception, match="DML"):
419+
ctx.sql_with_options(sql, options=options)
420+
421+
422+
def test_sql_with_options_no_statements(ctx):
423+
sql = "SET time zone = 1;"
424+
ctx.sql(sql)
425+
options = SQLOptions().with_allow_statements(False)
426+
with pytest.raises(Exception, match="SetVariable"):
427+
ctx.sql_with_options(sql, options=options)

src/context.rs

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ use datafusion::arrow::record_batch::RecordBatch;
4545
use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
4646
use datafusion::datasource::MemTable;
4747
use datafusion::datasource::TableProvider;
48-
use datafusion::execution::context::{SessionConfig, SessionContext, SessionState, TaskContext};
48+
use datafusion::execution::context::{
49+
SQLOptions, SessionConfig, SessionContext, SessionState, TaskContext,
50+
};
4951
use datafusion::execution::disk_manager::DiskManagerConfig;
5052
use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, UnboundedMemoryPool};
5153
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
@@ -210,6 +212,43 @@ impl PyRuntimeConfig {
210212
}
211213
}
212214

215+
/// `PySQLOptions` allows you to specify options to the sql execution.
216+
#[pyclass(name = "SQLOptions", module = "datafusion", subclass)]
217+
#[derive(Clone)]
218+
pub struct PySQLOptions {
219+
pub options: SQLOptions,
220+
}
221+
222+
impl From<SQLOptions> for PySQLOptions {
223+
fn from(options: SQLOptions) -> Self {
224+
Self { options }
225+
}
226+
}
227+
228+
#[pymethods]
229+
impl PySQLOptions {
230+
#[new]
231+
fn new() -> Self {
232+
let options = SQLOptions::new();
233+
Self { options }
234+
}
235+
236+
/// Should DDL data modification commands (e.g. `CREATE TABLE`) be run? Defaults to `true`.
237+
fn with_allow_ddl(&self, allow: bool) -> Self {
238+
Self::from(self.options.with_allow_ddl(allow))
239+
}
240+
241+
/// Should DML data modification commands (e.g. `INSERT and COPY`) be run? Defaults to `true`
242+
pub fn with_allow_dml(&self, allow: bool) -> Self {
243+
Self::from(self.options.with_allow_dml(allow))
244+
}
245+
246+
/// Should Statements such as (e.g. `SET VARIABLE and `BEGIN TRANSACTION` ...`) be run?. Defaults to `true`
247+
pub fn with_allow_statements(&self, allow: bool) -> Self {
248+
Self::from(self.options.with_allow_statements(allow))
249+
}
250+
}
251+
213252
/// `PySessionContext` is able to plan and execute DataFusion plans.
214253
/// It has a powerful optimizer, a physical planner for local execution, and a
215254
/// multi-threaded execution engine to perform the execution.
@@ -285,6 +324,22 @@ impl PySessionContext {
285324
Ok(PyDataFrame::new(df))
286325
}
287326

327+
pub fn sql_with_options(
328+
&mut self,
329+
query: &str,
330+
options: Option<PySQLOptions>,
331+
py: Python,
332+
) -> PyResult<PyDataFrame> {
333+
let options = if let Some(options) = options {
334+
options.options
335+
} else {
336+
SQLOptions::new()
337+
};
338+
let result = self.ctx.sql_with_options(query, options);
339+
let df = wait_for_future(py, result).map_err(DataFusionError::from)?;
340+
Ok(PyDataFrame::new(df))
341+
}
342+
288343
pub fn create_dataframe(
289344
&mut self,
290345
partitions: PyArrowType<Vec<Vec<RecordBatch>>>,

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ fn _internal(py: Python, m: &PyModule) -> PyResult<()> {
8585
m.add_class::<context::PyRuntimeConfig>()?;
8686
m.add_class::<context::PySessionConfig>()?;
8787
m.add_class::<context::PySessionContext>()?;
88+
m.add_class::<context::PySQLOptions>()?;
8889
m.add_class::<dataframe::PyDataFrame>()?;
8990
m.add_class::<udf::PyScalarUDF>()?;
9091
m.add_class::<udaf::PyAggregateUDF>()?;

0 commit comments

Comments
 (0)