Skip to content

Commit d2a1dc9

Browse files
committed
feat: Add method to configure DataFrame display options in SessionConfig (python)
- Introduced `with_dataframe_display_config` method in `SessionConfig` to allow customization of DataFrame display settings. - Parameters include `max_table_bytes`, `min_table_rows`, `max_cell_length`, and `max_table_rows_in_repr` for flexible display configurations. - Utilizes `DataframeDisplayConfig` for internal management of display settings.
1 parent b401e1a commit d2a1dc9

File tree

3 files changed

+87
-51
lines changed

3 files changed

+87
-51
lines changed

python/datafusion/context.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from ._internal import SessionConfig as SessionConfigInternal
3737
from ._internal import SessionContext as SessionContextInternal
3838
from ._internal import SQLOptions as SQLOptionsInternal
39+
from ._internal import DataframeDisplayConfig as DataframeDisplayConfigInternal
3940

4041
if TYPE_CHECKING:
4142
import pathlib
@@ -89,6 +90,37 @@ def __init__(self, config_options: dict[str, str] | None = None) -> None:
8990
"""
9091
self.config_internal = SessionConfigInternal(config_options)
9192

93+
def with_dataframe_display_config(
94+
self,
95+
max_table_bytes: int = None,
96+
min_table_rows: int = None,
97+
max_cell_length: int = None,
98+
max_table_rows_in_repr: int = None,
99+
) -> SessionConfig:
100+
"""Configure the display options for DataFrames.
101+
102+
Args:
103+
max_table_bytes: Maximum bytes to display for table presentation (default: 2MB)
104+
min_table_rows: Minimum number of table rows to display (default: 20)
105+
max_cell_length: Maximum length of a cell before it gets minimized (default: 25)
106+
max_table_rows_in_repr: Maximum number of rows to display in repr string output (default: 10)
107+
108+
Returns:
109+
A new :py:class:`SessionConfig` object with the updated display settings.
110+
"""
111+
112+
display_config = DataframeDisplayConfigInternal(
113+
max_table_bytes=max_table_bytes,
114+
min_table_rows=min_table_rows,
115+
max_cell_length=max_cell_length,
116+
max_table_rows_in_repr=max_table_rows_in_repr,
117+
)
118+
119+
self.config_internal = self.config_internal.with_dataframe_display_config(
120+
display_config
121+
)
122+
return self
123+
92124
def with_create_default_catalog_and_schema(
93125
self, enabled: bool = True
94126
) -> SessionConfig:
@@ -806,9 +838,11 @@ def register_parquet(
806838
file_extension,
807839
skip_metadata,
808840
schema,
809-
[sort_list_to_raw_sort_list(exprs) for exprs in file_sort_order]
810-
if file_sort_order is not None
811-
else None,
841+
(
842+
[sort_list_to_raw_sort_list(exprs) for exprs in file_sort_order]
843+
if file_sort_order is not None
844+
else None
845+
),
812846
)
813847

814848
def register_csv(

src/context.rs

Lines changed: 49 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -73,54 +73,6 @@ use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple, PyType};
7373
use tokio::task::JoinHandle;
7474

7575
/// Configuration for displaying DataFrames
76-
#[pyclass(name = "DataframeDisplayConfig", module = "datafusion", subclass)]
77-
#[derive(Clone)]
78-
pub struct DataframeDisplayConfig {
79-
/// Maximum bytes to display for table presentation (default: 2MB)
80-
#[pyo3(get, set)]
81-
pub max_table_bytes: usize,
82-
/// Minimum number of table rows to display (default: 20)
83-
#[pyo3(get, set)]
84-
pub min_table_rows: usize,
85-
/// Maximum length of a cell before it gets minimized (default: 25)
86-
#[pyo3(get, set)]
87-
pub max_cell_length: usize,
88-
/// Maximum number of rows to display in repr string output (default: 10)
89-
#[pyo3(get, set)]
90-
pub max_table_rows_in_repr: usize,
91-
}
92-
93-
#[pymethods]
94-
impl DataframeDisplayConfig {
95-
#[new]
96-
#[pyo3(signature = (max_table_bytes=None, min_table_rows=None, max_cell_length=None, max_table_rows_in_repr=None))]
97-
fn new(
98-
max_table_bytes: Option<usize>,
99-
min_table_rows: Option<usize>,
100-
max_cell_length: Option<usize>,
101-
max_table_rows_in_repr: Option<usize>,
102-
) -> Self {
103-
let default = Self::default();
104-
Self {
105-
max_table_bytes: max_table_bytes.unwrap_or(default.max_table_bytes),
106-
min_table_rows: min_table_rows.unwrap_or(default.min_table_rows),
107-
max_cell_length: max_cell_length.unwrap_or(default.max_cell_length),
108-
max_table_rows_in_repr: max_table_rows_in_repr
109-
.unwrap_or(default.max_table_rows_in_repr),
110-
}
111-
}
112-
}
113-
114-
impl Default for DataframeDisplayConfig {
115-
fn default() -> Self {
116-
Self {
117-
max_table_bytes: 2 * 1024 * 1024, // 2 MB
118-
min_table_rows: 20,
119-
max_cell_length: 25,
120-
max_table_rows_in_repr: 10,
121-
}
122-
}
123-
}
12476
12577
/// Configuration options for a SessionContext
12678
#[pyclass(name = "SessionConfig", module = "datafusion", subclass)]
@@ -229,6 +181,55 @@ impl PySessionConfig {
229181
}
230182
}
231183

184+
#[pyclass(name = "DataframeDisplayConfig", module = "datafusion", subclass)]
185+
#[derive(Clone)]
186+
pub struct DataframeDisplayConfig {
187+
/// Maximum bytes to display for table presentation (default: 2MB)
188+
#[pyo3(get, set)]
189+
pub max_table_bytes: usize,
190+
/// Minimum number of table rows to display (default: 20)
191+
#[pyo3(get, set)]
192+
pub min_table_rows: usize,
193+
/// Maximum length of a cell before it gets minimized (default: 25)
194+
#[pyo3(get, set)]
195+
pub max_cell_length: usize,
196+
/// Maximum number of rows to display in repr string output (default: 10)
197+
#[pyo3(get, set)]
198+
pub max_table_rows_in_repr: usize,
199+
}
200+
201+
#[pymethods]
202+
impl DataframeDisplayConfig {
203+
#[new]
204+
#[pyo3(signature = (max_table_bytes=None, min_table_rows=None, max_cell_length=None, max_table_rows_in_repr=None))]
205+
fn new(
206+
max_table_bytes: Option<usize>,
207+
min_table_rows: Option<usize>,
208+
max_cell_length: Option<usize>,
209+
max_table_rows_in_repr: Option<usize>,
210+
) -> Self {
211+
let default = Self::default();
212+
Self {
213+
max_table_bytes: max_table_bytes.unwrap_or(default.max_table_bytes),
214+
min_table_rows: min_table_rows.unwrap_or(default.min_table_rows),
215+
max_cell_length: max_cell_length.unwrap_or(default.max_cell_length),
216+
max_table_rows_in_repr: max_table_rows_in_repr
217+
.unwrap_or(default.max_table_rows_in_repr),
218+
}
219+
}
220+
}
221+
222+
impl Default for DataframeDisplayConfig {
223+
fn default() -> Self {
224+
Self {
225+
max_table_bytes: 2 * 1024 * 1024, // 2 MB
226+
min_table_rows: 20,
227+
max_cell_length: 25,
228+
max_table_rows_in_repr: 10,
229+
}
230+
}
231+
}
232+
232233
/// Runtime options for a SessionContext
233234
#[pyclass(name = "RuntimeEnvBuilder", module = "datafusion", subclass)]
234235
#[derive(Clone)]

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> {
8282
m.add_class::<context::PyRuntimeEnvBuilder>()?;
8383
m.add_class::<context::PySessionConfig>()?;
8484
m.add_class::<context::PySessionContext>()?;
85+
m.add_class::<context::DataframeDisplayConfig>()?;
8586
m.add_class::<context::PySQLOptions>()?;
8687
m.add_class::<dataframe::PyDataFrame>()?;
8788
m.add_class::<udf::PyScalarUDF>()?;

0 commit comments

Comments
 (0)