4141)
4242from pyarrow .csv import write_csv
4343
44-
44+ MB = 1024 * 1024
4545@pytest .fixture
4646def ctx ():
4747 return SessionContext ()
@@ -116,6 +116,30 @@ def clean_formatter_state():
116116 """Reset the HTML formatter after each test."""
117117 reset_formatter ()
118118
119+ # custom style for testing with html formatter
120+ class CustomStyleProvider :
121+ def get_cell_style (self ) -> str :
122+ return (
123+ "background-color: #f5f5f5; color: #333; padding: 8px; border: "
124+ "1px solid #ddd;"
125+ )
126+
127+ def get_header_style (self ) -> str :
128+ return (
129+ "background-color: #4285f4; color: white; font-weight: bold; "
130+ "padding: 10px; border: 1px solid #3367d6;"
131+ )
132+
133+ def count_table_rows (html_content : str ) -> int :
134+ """Count the number of table rows in HTML content.
135+
136+ Args:
137+ html_content: HTML string to analyze
138+
139+ Returns:
140+ Number of table rows found (number of <tr> tags)
141+ """
142+ return len (re .findall (r"<tr" , html_content ))
119143
120144def test_select (df ):
121145 df_1 = df .select (
@@ -671,11 +695,10 @@ def test_window_frame_defaults_match_postgres(partitioned_df):
671695 assert df_2 .sort (col_a ).to_pydict () == expected
672696
673697
674- def test_html_formatter_configuration (df , clean_formatter_state ):
698+ def test_html_formatter_cell_dimension (df , clean_formatter_state ):
675699 """Test configuring the HTML formatter with different options."""
676700 # Configure with custom settings
677701 configure_formatter (
678- max_cell_length = 5 ,
679702 max_width = 500 ,
680703 max_height = 200 ,
681704 enable_cell_expansion = False ,
@@ -693,19 +716,6 @@ def test_html_formatter_configuration(df, clean_formatter_state):
693716def test_html_formatter_custom_style_provider (df , clean_formatter_state ):
694717 """Test using custom style providers with the HTML formatter."""
695718
696- class CustomStyleProvider :
697- def get_cell_style (self ) -> str :
698- return (
699- "background-color: #f5f5f5; color: #333; padding: 8px; border: "
700- "1px solid #ddd;"
701- )
702-
703- def get_header_style (self ) -> str :
704- return (
705- "background-color: #4285f4; color: white; font-weight: bold; "
706- "padding: 10px; border: 1px solid #3367d6;"
707- )
708-
709719 # Configure with custom style provider
710720 configure_formatter (style_provider = CustomStyleProvider ())
711721
@@ -917,37 +927,67 @@ def get_header_style(self) -> str:
917927 assert "color: #5af" in html_output # Even numbers
918928
919929
920- def test_html_formatter_memory_and_rows ( ):
930+ def test_html_formatter_memory ( df , clean_formatter_state ):
921931 """Test the memory and row control parameters in DataFrameHtmlFormatter."""
922-
923- # Test default values
924- formatter = DataFrameHtmlFormatter ()
925- assert formatter .max_memory_bytes == 2 * 1024 * 1024 # 2 MB
926- assert formatter .min_rows_display == 20
927- assert formatter .repr_rows == 10
928-
929- # Test custom values
930- formatter = DataFrameHtmlFormatter (
931- max_memory_bytes = 1024 * 1024 , # 1 MB
932- min_rows_display = 10 ,
933- repr_rows = 5
932+ configure_formatter (
933+ max_memory_bytes = 10 ,
934+ min_rows_display = 1
934935 )
935- assert formatter .max_memory_bytes == 1024 * 1024
936- assert formatter .min_rows_display == 10
937- assert formatter .repr_rows == 5
938-
939- # Test extremely large values and tiny values (edge cases)
940- # These should not raise exceptions
941- extreme_formatter = DataFrameHtmlFormatter (
942- max_memory_bytes = 10 * 1024 * 1024 * 1024 , # 10 GB
943- min_rows_display = 1 ,
944- repr_rows = 1
936+ html_output = df ._repr_html_ ()
937+
938+ # Count the number of table rows in the output
939+ tr_count = count_table_rows (html_output )
940+ # With a tiny memory limit of 10 bytes, the formatter should display
941+ # the minimum number of rows (1) plus a message about truncation
942+ assert tr_count == 2 # 1 for header row, 1 for data row
943+ assert "data truncated" in html_output .lower ()
944+
945+ configure_formatter (
946+ max_memory_bytes = 10 * MB ,
947+ min_rows_display = 2
945948 )
946- assert extreme_formatter .max_memory_bytes == 10 * 1024 * 1024 * 1024
947- assert extreme_formatter .min_rows_display == 1
948- assert extreme_formatter .repr_rows == 1
949-
949+ html_output = df ._repr_html_ ()
950+ # With larger memory limit and min_rows=2, should display all rows
951+ tr_count = count_table_rows (html_output )
952+ # Table should have header row (1) + 3 data rows = 4 rows
953+ assert tr_count == 4
954+ # No truncation message should appear
955+ assert "data truncated" not in html_output .lower ()
956+
957+ def test_html_formatter_repr_rows (df , clean_formatter_state ):
958+ configure_formatter (
959+ min_rows_display = 2 ,
960+ repr_rows = 2
961+ )
962+ html_output = df ._repr_html_ ()
963+
964+ tr_count = count_table_rows (html_output )
965+ # Tabe should have header row (1) + 2 data rows = 3 rows
966+ assert tr_count == 3
967+
968+ configure_formatter (
969+ min_rows_display = 2 ,
970+ repr_rows = 3
971+ )
972+ html_output = df ._repr_html_ ()
973+
974+ tr_count = count_table_rows (html_output )
975+ # Tabe should have header row (1) + 3 data rows = 4 rows
976+ assert tr_count == 4
977+
978+
979+ def test_html_formatter_validation ():
950980 # Test validation for invalid parameters
981+
982+ with pytest .raises (ValueError , match = "max_cell_length must be a positive integer" ):
983+ DataFrameHtmlFormatter (max_cell_length = 0 )
984+
985+ with pytest .raises (ValueError , match = "max_width must be a positive integer" ):
986+ DataFrameHtmlFormatter (max_width = 0 )
987+
988+ with pytest .raises (ValueError , match = "max_height must be a positive integer" ):
989+ DataFrameHtmlFormatter (max_height = 0 )
990+
951991 with pytest .raises (ValueError , match = "max_memory_bytes must be a positive integer" ):
952992 DataFrameHtmlFormatter (max_memory_bytes = 0 )
953993
@@ -967,55 +1007,56 @@ def test_html_formatter_memory_and_rows():
9671007 DataFrameHtmlFormatter (repr_rows = - 10 )
9681008
9691009
970- def test_custom_style_provider_html_formatter (df , clean_formatter_state ):
1010+ def test_configure_formatter (df , clean_formatter_state ):
9711011 """Test using custom style providers with the HTML formatter and configured
9721012 parameters."""
9731013
974- class CustomStyleProvider :
975- def get_cell_style (self ) -> str :
976- return (
977- "background-color: #f5f5f5; color: #333; padding: 8px; border: "
978- "1px solid #ddd;"
979- )
980-
981- def get_header_style (self ) -> str :
982- return (
983- "background-color: #4285f4; color: white; font-weight: bold; "
984- "padding: 10px; border: 1px solid #3367d6;"
985- )
986-
987- # Configure with custom style provider
988- configure_formatter (style_provider = CustomStyleProvider ())
989-
990- html_output = df ._repr_html_ ()
991-
992- # Verify our custom styles were applied
993- assert "background-color: #4285f4" in html_output
994- assert "color: white" in html_output
995- assert "background-color: #f5f5f5" in html_output
996-
997- # Reset for the next part of the test
1014+ # these are non-default values
1015+ MAX_CELL_LENGTH = 10
1016+ MAX_WIDTH = 500
1017+ MAX_HEIGHT = 30
1018+ MAX_MEMORY_BYTES = 3 * MB
1019+ MIN_ROWS_DISPLAY = 2
1020+ REPR_ROWS = 2
1021+ ENABLE_CELL_EXPANSION = False
1022+ SHOW_TRUNCATION_MESSAGE = False
1023+ USE_SHARED_STYLES = False
1024+
9981025 reset_formatter ()
1026+ formatter_default = get_formatter ()
1027+
1028+ assert formatter_default .max_cell_length != MAX_CELL_LENGTH
1029+ assert formatter_default .max_width != MAX_WIDTH
1030+ assert formatter_default .max_height != MAX_HEIGHT
1031+ assert formatter_default .max_memory_bytes != MAX_MEMORY_BYTES
1032+ assert formatter_default .min_rows_display != MIN_ROWS_DISPLAY
1033+ assert formatter_default .repr_rows != REPR_ROWS
1034+ assert formatter_default .enable_cell_expansion != ENABLE_CELL_EXPANSION
1035+ assert formatter_default .show_truncation_message != SHOW_TRUNCATION_MESSAGE
1036+ assert formatter_default .use_shared_styles != USE_SHARED_STYLES
1037+
9991038 # Configure with custom style provider and additional parameters
10001039 configure_formatter (
1001- style_provider = CustomStyleProvider (),
1002- max_memory_bytes = 3 * 1024 * 1024 , # 3 MB
1003- min_rows_display = 15 ,
1004- repr_rows = 7
1040+ max_cell_length = MAX_CELL_LENGTH ,
1041+ max_width = MAX_WIDTH ,
1042+ max_height = MAX_HEIGHT ,
1043+ max_memory_bytes = MAX_MEMORY_BYTES ,
1044+ min_rows_display = MIN_ROWS_DISPLAY ,
1045+ repr_rows = REPR_ROWS ,
1046+ enable_cell_expansion = ENABLE_CELL_EXPANSION ,
1047+ show_truncation_message = SHOW_TRUNCATION_MESSAGE ,
1048+ use_shared_styles = USE_SHARED_STYLES
10051049 )
1006-
1007- html_output = df ._repr_html_ ()
1008-
1009- # Verify our custom styles were applied
1010- assert "background-color: #4285f4" in html_output
1011- assert "color: white" in html_output
1012- assert "background-color: #f5f5f5" in html_output
1013-
1014- # Test memory and row parameters were properly set
1015- formatter = get_formatter ()
1016- assert formatter .max_memory_bytes == 3 * 1024 * 1024 # 3 MB
1017- assert formatter .min_rows_display == 15
1018- assert formatter .repr_rows == 7
1050+ formatter_custom = get_formatter ()
1051+ assert formatter_custom .max_cell_length == MAX_CELL_LENGTH
1052+ assert formatter_custom .max_width == MAX_WIDTH
1053+ assert formatter_custom .max_height == MAX_HEIGHT
1054+ assert formatter_custom .max_memory_bytes == MAX_MEMORY_BYTES
1055+ assert formatter_custom .min_rows_display == MIN_ROWS_DISPLAY
1056+ assert formatter_custom .repr_rows == REPR_ROWS
1057+ assert formatter_custom .enable_cell_expansion == ENABLE_CELL_EXPANSION
1058+ assert formatter_custom .show_truncation_message == SHOW_TRUNCATION_MESSAGE
1059+ assert formatter_custom .use_shared_styles == USE_SHARED_STYLES
10191060
10201061
10211062def test_get_dataframe (tmp_path ):
@@ -1606,9 +1647,8 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame:
16061647 assert result ["new_col" ] == [3 for _i in range (3 )]
16071648
16081649
1609- def test_dataframe_repr_html_structure (df ) -> None :
1650+ def test_dataframe_repr_html_structure (df , clean_formatter_state ) -> None :
16101651 """Test that DataFrame._repr_html_ produces expected HTML output structure."""
1611- import re
16121652
16131653 output = df ._repr_html_ ()
16141654
@@ -1638,13 +1678,13 @@ def test_dataframe_repr_html_structure(df) -> None:
16381678 assert len (body_matches ) == 1 , "Expected pattern of values not found in HTML output"
16391679
16401680
1641- def test_dataframe_repr_html_values (df ):
1681+ def test_dataframe_repr_html_values (df , clean_formatter_state ):
16421682 """Test that DataFrame._repr_html_ contains the expected data values."""
16431683 html = df ._repr_html_ ()
16441684 assert html is not None
16451685
16461686 # Create a more flexible pattern that handles values being wrapped in spans
1647- # This pattern will match the sequence of values 1,4,8,2,5,5,3,6,8 regardless
1687+ # This pattern will match the sequence of values 1,4,8,2,5,5 regardless
16481688 # of formatting
16491689 pattern = re .compile (
16501690 r"<td[^>]*?>(?:<span[^>]*?>)?1(?:</span>)?</td>.*?"
@@ -1748,4 +1788,4 @@ def test_html_formatter_manual_format_html(clean_formatter_state):
17481788
17491789 assert "<style>" in local_html_1
17501790 assert "<style>" in local_html_2
1751-
1791+
0 commit comments