Skip to content

Commit 30c9d99

Browse files
committed
revert to before DisplayConfig in PyDataFrame
1 parent a5d224f commit 30c9d99

File tree

3 files changed

+11
-455
lines changed

3 files changed

+11
-455
lines changed

python/datafusion/dataframe.py

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
import pyarrow as pa
5050

5151
from datafusion._internal import DataFrame as DataFrameInternal
52-
from datafusion._internal import DisplayConfig
5352
from datafusion._internal import expr as expr_internal
5453

5554
from enum import Enum
@@ -814,60 +813,6 @@ def count(self) -> int:
814813
"""
815814
return self.df.count()
816815

817-
def configure_display(
818-
self,
819-
max_table_bytes: Optional[int] = None,
820-
min_table_rows: Optional[int] = None,
821-
max_cell_length: Optional[int] = None,
822-
max_table_rows_in_repr: Optional[int] = None,
823-
) -> None:
824-
"""Configure display options for DataFrame representation.
825-
826-
Args:
827-
max_table_bytes: Maximum bytes to display for table presentation
828-
(default: 2MB).
829-
Set to lower value for large tables to limit memory usage.
830-
min_table_rows: Minimum number of table rows to display (default: 20).
831-
This is used for initial display and in notebooks.
832-
max_cell_length: Maximum length of a cell before it gets minimized
833-
(default: 25).
834-
Longer cells will be truncated with an expand button.
835-
max_table_rows_in_repr: Maximum number of rows to display in string
836-
representation
837-
(default: 10).
838-
839-
Raises:
840-
ValueError: If any of the provided values are less than or equal to 0.
841-
"""
842-
if any(
843-
value is not None and value <= 0
844-
for value in (
845-
max_table_bytes,
846-
min_table_rows,
847-
max_cell_length,
848-
max_table_rows_in_repr,
849-
)
850-
):
851-
error_msg = "All values must be greater than 0."
852-
raise ValueError(error_msg)
853-
854-
self.df.configure_display(
855-
max_table_bytes, min_table_rows, max_cell_length, max_table_rows_in_repr
856-
)
857-
858-
def reset_display_config(self) -> None:
859-
"""Reset display configuration to default values."""
860-
self.df.reset_display_config()
861-
862-
@property
863-
def display_config(self) -> DisplayConfig:
864-
"""Get the current display configuration.
865-
866-
Returns:
867-
DisplayConfig: The current display configuration settings
868-
"""
869-
return self.df.display_config
870-
871816
@deprecated("Use :py:func:`unnest_columns` instead.")
872817
def unnest_column(self, column: str, preserve_nulls: bool = True) -> DataFrame:
873818
"""See :py:func:`unnest_columns`."""

python/tests/test_dataframe.py

Lines changed: 0 additions & 281 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,284 +1261,3 @@ def test_dataframe_repr_html(df) -> None:
12611261
body_lines = [f"<td(.*?)>{v}</td>" for inner in body_data for v in inner]
12621262
body_pattern = "(.*?)".join(body_lines)
12631263
assert len(re.findall(body_pattern, output, re.DOTALL)) == 1
1264-
1265-
1266-
def test_display_config(df):
1267-
"""Test the display configuration properties are accessible."""
1268-
config = df.display_config
1269-
1270-
# Verify default values
1271-
assert config.max_table_bytes == 2 * 1024 * 1024 # 2 MB
1272-
assert config.min_table_rows == 20
1273-
assert config.max_cell_length == 25
1274-
assert config.max_table_rows_in_repr == 10
1275-
1276-
1277-
def test_configure_display(df):
1278-
"""Test setting display configuration properties."""
1279-
# Modify the display configuration
1280-
df.configure_display(
1281-
max_table_bytes=1024 * 1024,
1282-
min_table_rows=10,
1283-
max_cell_length=50,
1284-
max_table_rows_in_repr=15,
1285-
)
1286-
1287-
# Verify the changes took effect
1288-
config = df.display_config
1289-
assert config.max_table_bytes == 1024 * 1024 # 1 MB
1290-
assert config.min_table_rows == 10
1291-
assert config.max_cell_length == 50
1292-
assert config.max_table_rows_in_repr == 15
1293-
1294-
# Test partial update (only changing one property)
1295-
df.configure_display(max_table_rows_in_repr=5)
1296-
config = df.display_config
1297-
assert config.max_table_bytes == 1024 * 1024 # previous value retained
1298-
assert config.min_table_rows == 10 # previous value retained
1299-
assert config.max_cell_length == 50 # previous value retained
1300-
assert config.max_table_rows_in_repr == 5 # only this value changed
1301-
1302-
# Test with extreme values
1303-
# Zero values
1304-
with pytest.raises(ValueError, match=r".*must be greater than 0.*"):
1305-
df.configure_display(max_table_bytes=0, min_table_rows=0, max_cell_length=0)
1306-
1307-
# Test with negative values
1308-
# This tests for expected behavior when users accidentally pass negative values
1309-
# Since these are usize in Rust, we expect a Python ValueError when trying to pass
1310-
# negative values.
1311-
with pytest.raises(ValueError, match=r".*must be greater than 0.*"):
1312-
df.configure_display(max_table_bytes=-1)
1313-
1314-
with pytest.raises(ValueError, match=r".*must be greater than 0.*"):
1315-
df.configure_display(min_table_rows=-5)
1316-
1317-
with pytest.raises(ValueError, match=r".*must be greater than 0.*"):
1318-
df.configure_display(max_cell_length=-10)
1319-
1320-
# Reset for next tests
1321-
df.reset_display_config()
1322-
1323-
1324-
def test_reset_display_config(df):
1325-
"""Test resetting display configuration to defaults."""
1326-
# First modify the configuration
1327-
df.configure_display(
1328-
max_table_bytes=1024 * 1024,
1329-
min_table_rows=10,
1330-
max_cell_length=50,
1331-
max_table_rows_in_repr=15,
1332-
)
1333-
1334-
# Verify changes took effect
1335-
config = df.display_config
1336-
assert config.max_table_bytes == 1024 * 1024
1337-
assert config.min_table_rows == 10
1338-
assert config.max_cell_length == 50
1339-
assert config.max_table_rows_in_repr == 15
1340-
1341-
# Now reset to defaults
1342-
df.reset_display_config()
1343-
1344-
# Verify defaults are restored
1345-
config = df.display_config
1346-
assert config.max_table_bytes == 2 * 1024 * 1024 # 2 MB
1347-
assert config.min_table_rows == 20
1348-
assert config.max_cell_length == 25
1349-
assert config.max_table_rows_in_repr == 10
1350-
1351-
1352-
def test_min_table_rows_display(ctx):
1353-
"""Test that at least min_table_rows rows are displayed."""
1354-
# Create a dataframe with more rows than the default min_table_rows
1355-
rows = 100
1356-
df = _create_numeric_test_df(ctx, rows)
1357-
1358-
# Set min_table_rows to a specific value
1359-
custom_min_rows = 30
1360-
df.configure_display(min_table_rows=custom_min_rows)
1361-
1362-
# Get HTML representation
1363-
html_output = df._repr_html_()
1364-
1365-
# Count table rows in the HTML (excluding header row)
1366-
# Each row has a <tr> tag
1367-
row_count = html_output.count("<tr>") - 1 # subtract 1 for the header row
1368-
1369-
# Verify at least min_table_rows rows are displayed
1370-
assert row_count >= custom_min_rows, (
1371-
f"Expected at least {custom_min_rows} rows, got {row_count}"
1372-
)
1373-
1374-
# If data was truncated, "Data truncated" message should be present
1375-
if row_count < rows:
1376-
assert "Data truncated" in html_output
1377-
1378-
1379-
def test_max_table_bytes_display(ctx):
1380-
"""Test that reducing max_table_bytes limits the amount of data displayed."""
1381-
# Create a dataframe with large string values to consume memory
1382-
# Each string is approximately 1000 bytes
1383-
large_strings = ["x" * 1000 for _ in range(50)]
1384-
batch = pa.RecordBatch.from_arrays([pa.array(large_strings)], names=["large_data"])
1385-
df = ctx.create_dataframe([[batch]])
1386-
1387-
# First test with default settings
1388-
default_html = df._repr_html_()
1389-
default_row_count = default_html.count("<tr>") - 1 # subtract header row
1390-
1391-
# Now set a very small max_table_bytes
1392-
df.configure_display(max_table_bytes=5000) # 5KB should only fit a few rows
1393-
limited_html = df._repr_html_()
1394-
limited_row_count = limited_html.count("<tr>") - 1
1395-
1396-
# Verify fewer rows are displayed with the byte limit
1397-
assert limited_row_count < default_row_count, (
1398-
f"Expected fewer rows with byte limit. "
1399-
f"Default: {default_row_count}, Limited: {limited_row_count}"
1400-
)
1401-
1402-
# "Data truncated" should be present when limited
1403-
assert "Data truncated" in limited_html
1404-
1405-
1406-
def test_max_cell_length_display(ctx):
1407-
"""Test that cells longer than max_cell_length are truncated in display."""
1408-
# Create a dataframe with long string values
1409-
long_strings = [
1410-
"short",
1411-
"medium text",
1412-
"this is a very long string that should be truncated",
1413-
]
1414-
batch = pa.RecordBatch.from_arrays([pa.array(long_strings)], names=["text"])
1415-
df = ctx.create_dataframe([[batch]])
1416-
1417-
# Set a small max_cell_length
1418-
max_length = 10
1419-
df.configure_display(max_cell_length=max_length)
1420-
1421-
# Get HTML representation
1422-
html_output = df._repr_html_()
1423-
1424-
# Check for expand button for long text
1425-
assert "expandable-container" in html_output
1426-
1427-
# Check that expandable class is used for long text
1428-
assert 'class="expandable"' in html_output
1429-
1430-
# Look for the truncated text and expand button
1431-
long_text = long_strings[2]
1432-
assert long_text[:max_length] in html_output # Truncated text should be present
1433-
assert "expand-btn" in html_output # Expand button should be present
1434-
assert long_text in html_output # Full text should also be in the HTML (hidden)
1435-
1436-
1437-
def test_display_config_repr_string(ctx):
1438-
"""Test that __repr__ respects display configuration."""
1439-
# Create a dataframe with more rows than we want to show
1440-
# df.__repr__ returns max 10 rows by default, so we start test with 7 rows
1441-
rows = 7
1442-
df = _create_numeric_test_df(ctx, rows)
1443-
1444-
# Configure to show at least 5 rows in string representation
1445-
min_table_rows_in_display = 5
1446-
df.configure_display(min_table_rows=min_table_rows_in_display)
1447-
1448-
# Get the string representation
1449-
repr_str = df.__repr__()
1450-
1451-
# Count the number of rows using helper function
1452-
lines_count = _count_lines_in_str(repr_str)
1453-
1454-
assert lines_count >= min_table_rows_in_display
1455-
1456-
# Now set min_rows higher and see if more rows appear
1457-
min_table_rows_in_display = 7
1458-
rows = 11
1459-
df = _create_numeric_test_df(ctx, rows) # Recreate to reset the state
1460-
df.configure_display(min_table_rows=min_table_rows_in_display)
1461-
1462-
repr_str_more = df.__repr__()
1463-
# The string should contain "Data truncated"
1464-
assert "Data truncated" in repr_str_more
1465-
1466-
# Count lines again
1467-
lines_count2 = _count_lines_in_str(repr_str_more)
1468-
1469-
# Should show more rows now
1470-
assert lines_count2 > lines_count
1471-
assert lines_count2 >= min_table_rows_in_display
1472-
1473-
1474-
def _count_lines_in_str(repr_str: str) -> int:
1475-
"""Count the number of rows displayed in a string representation.
1476-
1477-
Args:
1478-
repr_str: String representation of the DataFrame.
1479-
1480-
Returns:
1481-
Number of rows that appear in the string representation.
1482-
"""
1483-
# DataFrame tables are formatted with | value | patterns
1484-
# Count lines that match actual data rows (not headers or separators)
1485-
value_lines = 0
1486-
for line in repr_str.split("\n"):
1487-
# Look for lines like "| 0 |", "| 1 |", etc.
1488-
if re.search(r"\|\s*\d+\s*\|", line):
1489-
value_lines += 1
1490-
return value_lines
1491-
1492-
1493-
def _create_numeric_test_df(ctx, rows) -> DataFrame:
1494-
"""Create a test dataframe with numeric values from 0 to rows-1.
1495-
1496-
Args:
1497-
ctx: SessionContext to use for creating the dataframe.
1498-
rows: Number of rows to create.
1499-
1500-
Returns:
1501-
DataFrame with a single column "values" containing numbers 0 to rows-1.
1502-
"""
1503-
data = list(range(rows))
1504-
batch = pa.RecordBatch.from_arrays([pa.array(data)], names=["values"])
1505-
return ctx.create_dataframe([[batch]])
1506-
1507-
1508-
def test_max_table_rows_in_repr(ctx):
1509-
"""Test that max_table_rows_in_repr controls the number of rows in string
1510-
representation.
1511-
"""
1512-
# Create a dataframe with more rows than the default max_table_rows_in_repr (10)
1513-
rows = 20
1514-
df = _create_numeric_test_df(ctx, rows)
1515-
1516-
# First test with default setting (should limit to 10 rows)
1517-
repr_str = df.__repr__()
1518-
lines_default = _count_lines_in_str(repr_str)
1519-
1520-
# Default should be 10 rows max
1521-
assert lines_default <= 10
1522-
assert "Data truncated" in repr_str
1523-
1524-
# Now set a custom max_table_rows_in_repr value
1525-
custom_max_rows = 15
1526-
df.configure_display(max_table_rows_in_repr=custom_max_rows)
1527-
1528-
# Get the string representation with new configuration
1529-
repr_str_more = df.__repr__()
1530-
lines_custom = _count_lines_in_str(repr_str_more)
1531-
1532-
# Should show more rows than default but not more than configured max
1533-
assert lines_custom > lines_default
1534-
assert lines_custom <= custom_max_rows
1535-
assert "Data truncated" in repr_str_more
1536-
1537-
# Now set max_rows higher than total rows - should show all rows
1538-
df.configure_display(max_table_rows_in_repr=25)
1539-
repr_str_all = df.__repr__()
1540-
lines_all = _count_lines_in_str(repr_str_all)
1541-
1542-
# Should show all rows (20)
1543-
assert lines_all == rows
1544-
assert "Data truncated" not in repr_str_all

0 commit comments

Comments
 (0)