@@ -1261,284 +1261,3 @@ def test_dataframe_repr_html(df) -> None:
12611261 body_lines = [f"<td(.*?)>{ v } </td>" for inner in body_data for v in inner ]
12621262 body_pattern = "(.*?)" .join (body_lines )
12631263 assert len (re .findall (body_pattern , output , re .DOTALL )) == 1
1264-
1265-
1266- def test_display_config (df ):
1267- """Test the display configuration properties are accessible."""
1268- config = df .display_config
1269-
1270- # Verify default values
1271- assert config .max_table_bytes == 2 * 1024 * 1024 # 2 MB
1272- assert config .min_table_rows == 20
1273- assert config .max_cell_length == 25
1274- assert config .max_table_rows_in_repr == 10
1275-
1276-
1277- def test_configure_display (df ):
1278- """Test setting display configuration properties."""
1279- # Modify the display configuration
1280- df .configure_display (
1281- max_table_bytes = 1024 * 1024 ,
1282- min_table_rows = 10 ,
1283- max_cell_length = 50 ,
1284- max_table_rows_in_repr = 15 ,
1285- )
1286-
1287- # Verify the changes took effect
1288- config = df .display_config
1289- assert config .max_table_bytes == 1024 * 1024 # 1 MB
1290- assert config .min_table_rows == 10
1291- assert config .max_cell_length == 50
1292- assert config .max_table_rows_in_repr == 15
1293-
1294- # Test partial update (only changing one property)
1295- df .configure_display (max_table_rows_in_repr = 5 )
1296- config = df .display_config
1297- assert config .max_table_bytes == 1024 * 1024 # previous value retained
1298- assert config .min_table_rows == 10 # previous value retained
1299- assert config .max_cell_length == 50 # previous value retained
1300- assert config .max_table_rows_in_repr == 5 # only this value changed
1301-
1302- # Test with extreme values
1303- # Zero values
1304- with pytest .raises (ValueError , match = r".*must be greater than 0.*" ):
1305- df .configure_display (max_table_bytes = 0 , min_table_rows = 0 , max_cell_length = 0 )
1306-
1307- # Test with negative values
1308- # This tests for expected behavior when users accidentally pass negative values
1309- # Since these are usize in Rust, we expect a Python ValueError when trying to pass
1310- # negative values.
1311- with pytest .raises (ValueError , match = r".*must be greater than 0.*" ):
1312- df .configure_display (max_table_bytes = - 1 )
1313-
1314- with pytest .raises (ValueError , match = r".*must be greater than 0.*" ):
1315- df .configure_display (min_table_rows = - 5 )
1316-
1317- with pytest .raises (ValueError , match = r".*must be greater than 0.*" ):
1318- df .configure_display (max_cell_length = - 10 )
1319-
1320- # Reset for next tests
1321- df .reset_display_config ()
1322-
1323-
1324- def test_reset_display_config (df ):
1325- """Test resetting display configuration to defaults."""
1326- # First modify the configuration
1327- df .configure_display (
1328- max_table_bytes = 1024 * 1024 ,
1329- min_table_rows = 10 ,
1330- max_cell_length = 50 ,
1331- max_table_rows_in_repr = 15 ,
1332- )
1333-
1334- # Verify changes took effect
1335- config = df .display_config
1336- assert config .max_table_bytes == 1024 * 1024
1337- assert config .min_table_rows == 10
1338- assert config .max_cell_length == 50
1339- assert config .max_table_rows_in_repr == 15
1340-
1341- # Now reset to defaults
1342- df .reset_display_config ()
1343-
1344- # Verify defaults are restored
1345- config = df .display_config
1346- assert config .max_table_bytes == 2 * 1024 * 1024 # 2 MB
1347- assert config .min_table_rows == 20
1348- assert config .max_cell_length == 25
1349- assert config .max_table_rows_in_repr == 10
1350-
1351-
1352- def test_min_table_rows_display (ctx ):
1353- """Test that at least min_table_rows rows are displayed."""
1354- # Create a dataframe with more rows than the default min_table_rows
1355- rows = 100
1356- df = _create_numeric_test_df (ctx , rows )
1357-
1358- # Set min_table_rows to a specific value
1359- custom_min_rows = 30
1360- df .configure_display (min_table_rows = custom_min_rows )
1361-
1362- # Get HTML representation
1363- html_output = df ._repr_html_ ()
1364-
1365- # Count table rows in the HTML (excluding header row)
1366- # Each row has a <tr> tag
1367- row_count = html_output .count ("<tr>" ) - 1 # subtract 1 for the header row
1368-
1369- # Verify at least min_table_rows rows are displayed
1370- assert row_count >= custom_min_rows , (
1371- f"Expected at least { custom_min_rows } rows, got { row_count } "
1372- )
1373-
1374- # If data was truncated, "Data truncated" message should be present
1375- if row_count < rows :
1376- assert "Data truncated" in html_output
1377-
1378-
1379- def test_max_table_bytes_display (ctx ):
1380- """Test that reducing max_table_bytes limits the amount of data displayed."""
1381- # Create a dataframe with large string values to consume memory
1382- # Each string is approximately 1000 bytes
1383- large_strings = ["x" * 1000 for _ in range (50 )]
1384- batch = pa .RecordBatch .from_arrays ([pa .array (large_strings )], names = ["large_data" ])
1385- df = ctx .create_dataframe ([[batch ]])
1386-
1387- # First test with default settings
1388- default_html = df ._repr_html_ ()
1389- default_row_count = default_html .count ("<tr>" ) - 1 # subtract header row
1390-
1391- # Now set a very small max_table_bytes
1392- df .configure_display (max_table_bytes = 5000 ) # 5KB should only fit a few rows
1393- limited_html = df ._repr_html_ ()
1394- limited_row_count = limited_html .count ("<tr>" ) - 1
1395-
1396- # Verify fewer rows are displayed with the byte limit
1397- assert limited_row_count < default_row_count , (
1398- f"Expected fewer rows with byte limit. "
1399- f"Default: { default_row_count } , Limited: { limited_row_count } "
1400- )
1401-
1402- # "Data truncated" should be present when limited
1403- assert "Data truncated" in limited_html
1404-
1405-
1406- def test_max_cell_length_display (ctx ):
1407- """Test that cells longer than max_cell_length are truncated in display."""
1408- # Create a dataframe with long string values
1409- long_strings = [
1410- "short" ,
1411- "medium text" ,
1412- "this is a very long string that should be truncated" ,
1413- ]
1414- batch = pa .RecordBatch .from_arrays ([pa .array (long_strings )], names = ["text" ])
1415- df = ctx .create_dataframe ([[batch ]])
1416-
1417- # Set a small max_cell_length
1418- max_length = 10
1419- df .configure_display (max_cell_length = max_length )
1420-
1421- # Get HTML representation
1422- html_output = df ._repr_html_ ()
1423-
1424- # Check for expand button for long text
1425- assert "expandable-container" in html_output
1426-
1427- # Check that expandable class is used for long text
1428- assert 'class="expandable"' in html_output
1429-
1430- # Look for the truncated text and expand button
1431- long_text = long_strings [2 ]
1432- assert long_text [:max_length ] in html_output # Truncated text should be present
1433- assert "expand-btn" in html_output # Expand button should be present
1434- assert long_text in html_output # Full text should also be in the HTML (hidden)
1435-
1436-
1437- def test_display_config_repr_string (ctx ):
1438- """Test that __repr__ respects display configuration."""
1439- # Create a dataframe with more rows than we want to show
1440- # df.__repr__ returns max 10 rows by default, so we start test with 7 rows
1441- rows = 7
1442- df = _create_numeric_test_df (ctx , rows )
1443-
1444- # Configure to show at least 5 rows in string representation
1445- min_table_rows_in_display = 5
1446- df .configure_display (min_table_rows = min_table_rows_in_display )
1447-
1448- # Get the string representation
1449- repr_str = df .__repr__ ()
1450-
1451- # Count the number of rows using helper function
1452- lines_count = _count_lines_in_str (repr_str )
1453-
1454- assert lines_count >= min_table_rows_in_display
1455-
1456- # Now set min_rows higher and see if more rows appear
1457- min_table_rows_in_display = 7
1458- rows = 11
1459- df = _create_numeric_test_df (ctx , rows ) # Recreate to reset the state
1460- df .configure_display (min_table_rows = min_table_rows_in_display )
1461-
1462- repr_str_more = df .__repr__ ()
1463- # The string should contain "Data truncated"
1464- assert "Data truncated" in repr_str_more
1465-
1466- # Count lines again
1467- lines_count2 = _count_lines_in_str (repr_str_more )
1468-
1469- # Should show more rows now
1470- assert lines_count2 > lines_count
1471- assert lines_count2 >= min_table_rows_in_display
1472-
1473-
1474- def _count_lines_in_str (repr_str : str ) -> int :
1475- """Count the number of rows displayed in a string representation.
1476-
1477- Args:
1478- repr_str: String representation of the DataFrame.
1479-
1480- Returns:
1481- Number of rows that appear in the string representation.
1482- """
1483- # DataFrame tables are formatted with | value | patterns
1484- # Count lines that match actual data rows (not headers or separators)
1485- value_lines = 0
1486- for line in repr_str .split ("\n " ):
1487- # Look for lines like "| 0 |", "| 1 |", etc.
1488- if re .search (r"\|\s*\d+\s*\|" , line ):
1489- value_lines += 1
1490- return value_lines
1491-
1492-
1493- def _create_numeric_test_df (ctx , rows ) -> DataFrame :
1494- """Create a test dataframe with numeric values from 0 to rows-1.
1495-
1496- Args:
1497- ctx: SessionContext to use for creating the dataframe.
1498- rows: Number of rows to create.
1499-
1500- Returns:
1501- DataFrame with a single column "values" containing numbers 0 to rows-1.
1502- """
1503- data = list (range (rows ))
1504- batch = pa .RecordBatch .from_arrays ([pa .array (data )], names = ["values" ])
1505- return ctx .create_dataframe ([[batch ]])
1506-
1507-
1508- def test_max_table_rows_in_repr (ctx ):
1509- """Test that max_table_rows_in_repr controls the number of rows in string
1510- representation.
1511- """
1512- # Create a dataframe with more rows than the default max_table_rows_in_repr (10)
1513- rows = 20
1514- df = _create_numeric_test_df (ctx , rows )
1515-
1516- # First test with default setting (should limit to 10 rows)
1517- repr_str = df .__repr__ ()
1518- lines_default = _count_lines_in_str (repr_str )
1519-
1520- # Default should be 10 rows max
1521- assert lines_default <= 10
1522- assert "Data truncated" in repr_str
1523-
1524- # Now set a custom max_table_rows_in_repr value
1525- custom_max_rows = 15
1526- df .configure_display (max_table_rows_in_repr = custom_max_rows )
1527-
1528- # Get the string representation with new configuration
1529- repr_str_more = df .__repr__ ()
1530- lines_custom = _count_lines_in_str (repr_str_more )
1531-
1532- # Should show more rows than default but not more than configured max
1533- assert lines_custom > lines_default
1534- assert lines_custom <= custom_max_rows
1535- assert "Data truncated" in repr_str_more
1536-
1537- # Now set max_rows higher than total rows - should show all rows
1538- df .configure_display (max_table_rows_in_repr = 25 )
1539- repr_str_all = df .__repr__ ()
1540- lines_all = _count_lines_in_str (repr_str_all )
1541-
1542- # Should show all rows (20)
1543- assert lines_all == rows
1544- assert "Data truncated" not in repr_str_all
0 commit comments