Skip to content

Commit 1ba8807

Browse files
authored
Improve show() output for empty DataFrames (#1208)
* Add test for showing empty DataFrame and improve print output for empty DataFrames * Add tests for handling empty DataFrames and zero-row queries * Add tests for showing DataFrames with no rows and improve output messages * Fix assertion in test_show_from_empty_batch to ensure proper output for empty DataFrames * feat(tests): add a blank line before test_show_select_where_no_rows function for improved readability
1 parent e28256c commit 1ba8807

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

python/tests/test_dataframe.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,13 @@ def test_filter(df):
252252
assert result.column(2) == pa.array([5])
253253

254254

255+
def test_show_empty(df, capsys):
256+
df_empty = df.filter(column("a") > literal(3))
257+
df_empty.show()
258+
captured = capsys.readouterr()
259+
assert "DataFrame has no rows" in captured.out
260+
261+
255262
def test_sort(df):
256263
df = df.sort(column("b").sort(ascending=False))
257264

@@ -2657,3 +2664,19 @@ def trigger_interrupt():
26572664

26582665
# Make sure the interrupt thread has finished
26592666
interrupt_thread.join(timeout=1.0)
2667+
2668+
2669+
def test_show_select_where_no_rows(capsys) -> None:
2670+
ctx = SessionContext()
2671+
df = ctx.sql("SELECT 1 WHERE 1=0")
2672+
df.show()
2673+
out = capsys.readouterr().out
2674+
assert "DataFrame has no rows" in out
2675+
2676+
2677+
def test_show_from_empty_batch(capsys) -> None:
2678+
ctx = SessionContext()
2679+
batch = pa.record_batch([pa.array([], type=pa.int32())], names=["a"])
2680+
ctx.create_dataframe([[batch]]).show()
2681+
out = capsys.readouterr().out
2682+
assert "| a |" in out

src/dataframe.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -998,10 +998,13 @@ impl PyDataFrame {
998998
fn print_dataframe(py: Python, df: DataFrame) -> PyDataFusionResult<()> {
999999
// Get string representation of record batches
10001000
let batches = wait_for_future(py, df.collect())??;
1001-
let batches_as_string = pretty::pretty_format_batches(&batches);
1002-
let result = match batches_as_string {
1003-
Ok(batch) => format!("DataFrame()\n{batch}"),
1004-
Err(err) => format!("Error: {:?}", err.to_string()),
1001+
let result = if batches.is_empty() {
1002+
"DataFrame has no rows".to_string()
1003+
} else {
1004+
match pretty::pretty_format_batches(&batches) {
1005+
Ok(batch) => format!("DataFrame()\n{batch}"),
1006+
Err(err) => format!("Error: {:?}", err.to_string()),
1007+
}
10051008
};
10061009

10071010
// Import the Python 'builtins' module to access the print function

0 commit comments

Comments
 (0)