Skip to content

Commit b467100

Browse files
committed
test: Add fixture for test data and refactor tests to use it
1 parent d30c641 commit b467100

File tree

2 files changed

+35
-49
lines changed

2 files changed

+35
-49
lines changed

python/tests/test_dataframe.py

Lines changed: 35 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ def df():
5252
return ctx.from_arrow(batch)
5353

5454

55+
@pytest.fixture
56+
def data():
57+
return [{"a": 1, "b": "x" * 50, "c": 3}] * 100
58+
59+
5560
def test_display_config():
5661
# Test display_config initialization
5762
config = DataframeDisplayConfig(
@@ -93,12 +98,12 @@ def test_display_config():
9398
config.max_table_rows_in_repr = -5
9499

95100

96-
def test_session_with_display_config():
101+
def test_session_with_display_config(data):
97102
# Test with_display_config returns a new context with updated config
98103
ctx = SessionContext()
99104

100105
# Verify the default values are used initially
101-
df = ctx.from_pylist([{"a": 1, "b": "x" * 50, "c": 3}] * 100)
106+
df = ctx.from_pylist(data)
102107
html_repr = df._repr_html_()
103108

104109
# Create a new context with custom display config
@@ -110,7 +115,7 @@ def test_session_with_display_config():
110115
)
111116

112117
# Create a dataframe with the same data but using the new context
113-
df2 = ctx2.from_pylist([{"a": 1, "b": "x" * 50, "c": 3}] * 100)
118+
df2 = ctx2.from_pylist(data)
114119
html_repr2 = df2._repr_html_()
115120

116121
# The HTML representation should be different with different display configs
@@ -121,7 +126,7 @@ def test_session_with_display_config():
121126
assert f'>{("x" * 10)}</span>' in html_repr2
122127

123128

124-
def test_display_config_in_init():
129+
def test_display_config_in_init(data):
125130
# Test providing display config directly in SessionContext constructor
126131
display_config = DataframeDisplayConfig(
127132
max_table_bytes=1024,
@@ -131,7 +136,7 @@ def test_display_config_in_init():
131136
)
132137

133138
ctx = SessionContext()
134-
df1 = ctx.from_pylist([{"a": 1, "b": "x" * 50, "c": 3}] * 100)
139+
df1 = ctx.from_pylist(data)
135140
html_repr1 = df1._repr_html_()
136141

137142
# Create a context with custom display config through the with_display_config method
@@ -141,7 +146,7 @@ def test_display_config_in_init():
141146
max_cell_length=10,
142147
max_table_rows_in_repr=3,
143148
)
144-
df2 = ctx2.from_pylist([{"a": 1, "b": "x" * 50, "c": 3}] * 100)
149+
df2 = ctx2.from_pylist(data)
145150
html_repr2 = df2._repr_html_()
146151

147152
# Both methods should result in equivalent display configuration
@@ -1360,87 +1365,77 @@ def test_dataframe_repr_html(df) -> None:
13601365
assert len(re.findall(body_pattern, output, re.DOTALL)) == 1
13611366

13621367

1363-
def test_display_config_affects_repr():
1368+
def test_display_config_affects_repr(data):
13641369
max_table_rows_in_repr = 3
13651370
# Create a context with custom display config
13661371
ctx = SessionContext().with_display_config(
13671372
max_table_rows_in_repr=max_table_rows_in_repr
13681373
)
13691374

13701375
# Create a DataFrame with more rows than the display limit
1371-
data = [{"a": i, "b": f"value_{i}", "c": i * 10} for i in range(10)]
13721376
df = ctx.from_pylist(data)
13731377

1374-
# Get the string representation
1375-
# +---+---------+----+
1376-
# | a | b | c |
1377-
# +---+---------+----+
1378-
# | 0 | value_0 | 0 |
1379-
# | 1 | value_1 | 10 |
1380-
# | 2 | value_2 | 20 |
1381-
# +---+---------+----+
1382-
# Data truncated.
13831378
repr_str = repr(df)
13841379

13851380
# The representation should show truncated data (3 rows as specified)
13861381
assert (
1387-
repr_str.count("\n") <= max_table_rows_in_repr + 5
1388-
) # header row + separator lines + data rows + possibly truncation message
1382+
# 5 = 1 header row + 3 separator line + 1 truncation message
1383+
repr_str.count("\n")
1384+
<= max_table_rows_in_repr + 5
1385+
)
13891386
assert "Data truncated" in repr_str
13901387

13911388
# Create a context with larger display limit
1392-
ctx2 = SessionContext().with_display_config(max_table_rows_in_repr=15)
1389+
max_table_rows_in_repr = 100
1390+
ctx2 = SessionContext().with_display_config(
1391+
max_table_rows_in_repr=max_table_rows_in_repr
1392+
)
13931393

13941394
df2 = ctx2.from_pylist(data)
13951395
repr_str2 = repr(df2)
13961396

13971397
# Should show all data without truncation message
1398-
assert repr_str2.count("\n") >= 10 # All rows should be shown
1398+
assert (
1399+
# 4 = 1 header row + 3 separator lines
1400+
repr_str2.count("\n")
1401+
== max_table_rows_in_repr + 4
1402+
) # All rows should be shown
13991403
assert "Data truncated" not in repr_str2
14001404

14011405

1402-
def test_display_config_affects_html_repr():
1406+
def test_display_config_affects_html_repr(data):
14031407
# Create a context with custom display config to show only a small cell length
14041408
ctx = SessionContext().with_display_config(max_cell_length=5)
14051409

14061410
# Create a DataFrame with a column containing long strings
1407-
data = [
1408-
{"a": 1, "b": "This is a very long string that should be truncated", "c": 100}
1409-
]
14101411
df = ctx.from_pylist(data)
14111412

14121413
# Get the HTML representation
14131414
html_str = df._repr_html_()
14141415

14151416
# The cell should be truncated to 5 characters and have expansion button
1416-
assert ">This " in html_str # 5 character limit
1417-
assert "expandable" in html_str
1418-
assert "expand-btn" in html_str
1417+
assert ">xxxxx" in html_str # 5 character limit
1418+
expandable_class = 'class="expandable-container"'
1419+
assert expandable_class in html_str
14191420

14201421
# Create a context with larger cell length limit
1421-
ctx2 = SessionContext().with_display_config(max_cell_length=50)
1422+
ctx2 = SessionContext().with_display_config(max_cell_length=60)
14221423

14231424
df2 = ctx2.from_pylist(data)
14241425
html_str2 = df2._repr_html_()
14251426

14261427
# String shouldn't be truncated (or at least not in the same way)
1427-
if "expandable" in html_str2:
1428-
# If it still has an expandable div, it should contain more characters
1429-
assert ">This is a very long string that" in html_str2
1430-
else:
1431-
# Or it might not need expansion at all
1432-
assert "This is a very long string that should be truncated" in html_str2
1428+
assert expandable_class not in html_str2
14331429

14341430

1435-
def test_display_config_rows_limit_in_html():
1431+
def test_display_config_rows_limit_in_html(data):
14361432
max_table_rows = 5
14371433
# Create a context with custom display config to limit rows
14381434
ctx = SessionContext().with_display_config(
14391435
max_table_rows_in_repr=max_table_rows,
14401436
)
14411437

14421438
# Create a DataFrame with 10 rows
1443-
data = [{"a": i, "b": f"value_{i}", "c": i * 10} for i in range(10)]
14441439
df = ctx.from_pylist(data)
14451440

14461441
# Get the HTML representation
@@ -1452,7 +1447,7 @@ def test_display_config_rows_limit_in_html():
14521447
assert "Data truncated" in html_str
14531448

14541449
# Create a context with larger row limit
1455-
max_table_rows = 20
1450+
max_table_rows = 100
14561451
ctx2 = SessionContext().with_display_config(
14571452
max_table_rows_in_repr=max_table_rows
14581453
) # Show more rows
@@ -1462,11 +1457,11 @@ def test_display_config_rows_limit_in_html():
14621457

14631458
# Should show all rows
14641459
row_count2 = html_str2.count("<tr>") - 1 # Subtract 1 for header row
1465-
assert row_count2 == 10 # Should show all 10 rows
1460+
assert row_count2 == max_table_rows
14661461
assert "Data truncated" not in html_str2
14671462

14681463

1469-
def test_display_config_max_bytes_limit():
1464+
def test_display_config_max_bytes_limit(data):
14701465
min_table_rows = 10
14711466
max_table_rows = 20
14721467
# Create a context with custom display config with very small byte limit
@@ -1477,12 +1472,6 @@ def test_display_config_max_bytes_limit():
14771472
) # Very small limit
14781473

14791474
# Create a DataFrame with large content
1480-
# Generate some data with long strings to hit the byte limit quickly
1481-
large_string = "x" * 50
1482-
data = [
1483-
{"a": i, "b": large_string, "c": large_string}
1484-
for i in range(20) # 20 rows with long strings
1485-
]
14861475
df = ctx.from_pylist(data)
14871476

14881477
# Get the HTML representation

src/dataframe.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -944,9 +944,6 @@ async fn collect_record_batches_to_display(
944944
record_batches.push(rb);
945945
}
946946
}
947-
println!(
948-
"==> after while, size_estimate_so_far: {size_estimate_so_far}, rows_so_far: {rows_so_far}"
949-
);
950947

951948
if record_batches.is_empty() {
952949
return Ok((Vec::default(), false));

0 commit comments

Comments
 (0)