11"""HTML formatting utilities for DataFusion DataFrames."""
22
3+ from __future__ import annotations
4+
35from typing import (
46 Any ,
57 Callable ,
6- Dict ,
7- List ,
88 Optional ,
99 Protocol ,
10- Type ,
1110 runtime_checkable ,
1211)
1312
@@ -43,7 +42,10 @@ def get_cell_style(self) -> str:
4342 Returns:
4443 CSS style string
4544 """
46- return "border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;"
45+ return (
46+ "border: 1px solid black; padding: 8px; text-align: left; "
47+ "white-space: nowrap;"
48+ )
4749
4850 def get_header_style (self ) -> str :
4951 """Get the CSS style for header cells.
@@ -73,11 +75,13 @@ class DataFrameHtmlFormatter:
7375 max_cell_length: Maximum characters to display in a cell before truncation
7476 max_width: Maximum width of the HTML table in pixels
7577 max_height: Maximum height of the HTML table in pixels
76- enable_cell_expansion: Whether to add expand/collapse buttons for long cell values
78+ enable_cell_expansion: Whether to add expand/collapse buttons for long cell
79+ values
7780 custom_css: Additional CSS to include in the HTML output
7881 show_truncation_message: Whether to display a message when data is truncated
7982 style_provider: Custom provider for cell and header styles
80- use_shared_styles: Whether to load styles and scripts only once per notebook session
83+ use_shared_styles: Whether to load styles and scripts only once per notebook
84+ session
8185 """
8286
8387 # Class variable to track if styles have been loaded in the notebook
@@ -93,30 +97,72 @@ def __init__(
9397 show_truncation_message : bool = True ,
9498 style_provider : Optional [StyleProvider ] = None ,
9599 use_shared_styles : bool = True ,
96- ):
100+ ) -> None :
101+ """Initialize the HTML formatter.
102+
103+ Parameters
104+ ----------
105+ max_cell_length : int, default 25
106+ Maximum length of cell content before truncation.
107+ max_width : int, default 1000
108+ Maximum width of the displayed table in pixels.
109+ max_height : int, default 300
110+ Maximum height of the displayed table in pixels.
111+ enable_cell_expansion : bool, default True
112+ Whether to allow cells to expand when clicked.
113+ custom_css : str, optional
114+ Custom CSS to apply to the HTML table.
115+ show_truncation_message : bool, default True
116+ Whether to show a message indicating that content has been truncated.
117+ style_provider : StyleProvider, optional
118+ Provider of CSS styles for the HTML table. If None, DefaultStyleProvider
119+ is used.
120+ use_shared_styles : bool, default True
121+ Whether to use shared styles across multiple tables.
122+
123+ Raises:
124+ ------
125+ ValueError
126+ If max_cell_length, max_width, or max_height is not a positive integer.
127+ TypeError
128+ If enable_cell_expansion, show_truncation_message, or use_shared_styles is
129+ not a boolean,
130+ or if custom_css is provided but is not a string,
131+ or if style_provider is provided but does not implement the StyleProvider
132+ protocol.
133+ """
97134 # Validate numeric parameters
135+
98136 if not isinstance (max_cell_length , int ) or max_cell_length <= 0 :
99- raise ValueError ("max_cell_length must be a positive integer" )
137+ msg = "max_cell_length must be a positive integer"
138+ raise ValueError (msg )
100139 if not isinstance (max_width , int ) or max_width <= 0 :
101- raise ValueError ("max_width must be a positive integer" )
140+ msg = "max_width must be a positive integer"
141+ raise ValueError (msg )
102142 if not isinstance (max_height , int ) or max_height <= 0 :
103- raise ValueError ("max_height must be a positive integer" )
143+ msg = "max_height must be a positive integer"
144+ raise ValueError (msg )
104145
105146 # Validate boolean parameters
106147 if not isinstance (enable_cell_expansion , bool ):
107- raise TypeError ("enable_cell_expansion must be a boolean" )
148+ msg = "enable_cell_expansion must be a boolean"
149+ raise TypeError (msg )
108150 if not isinstance (show_truncation_message , bool ):
109- raise TypeError ("show_truncation_message must be a boolean" )
151+ msg = "show_truncation_message must be a boolean"
152+ raise TypeError (msg )
110153 if not isinstance (use_shared_styles , bool ):
111- raise TypeError ("use_shared_styles must be a boolean" )
154+ msg = "use_shared_styles must be a boolean"
155+ raise TypeError (msg )
112156
113157 # Validate custom_css
114158 if custom_css is not None and not isinstance (custom_css , str ):
115- raise TypeError ("custom_css must be None or a string" )
159+ msg = "custom_css must be None or a string"
160+ raise TypeError (msg )
116161
117162 # Validate style_provider
118163 if style_provider is not None and not isinstance (style_provider , StyleProvider ):
119- raise TypeError ("style_provider must implement the StyleProvider protocol" )
164+ msg = "style_provider must implement the StyleProvider protocol"
165+ raise TypeError (msg )
120166
121167 self .max_cell_length = max_cell_length
122168 self .max_width = max_width
@@ -127,12 +173,12 @@ def __init__(
127173 self .style_provider = style_provider or DefaultStyleProvider ()
128174 self .use_shared_styles = use_shared_styles
129175 # Registry for custom type formatters
130- self ._type_formatters : Dict [ Type , CellFormatter ] = {}
176+ self ._type_formatters : dict [ type , CellFormatter ] = {}
131177 # Custom cell builders
132178 self ._custom_cell_builder : Optional [Callable [[Any , int , int , str ], str ]] = None
133179 self ._custom_header_builder : Optional [Callable [[Any ], str ]] = None
134180
135- def register_formatter (self , type_class : Type , formatter : CellFormatter ) -> None :
181+ def register_formatter (self , type_class : type , formatter : CellFormatter ) -> None :
136182 """Register a custom formatter for a specific data type.
137183
138184 Args:
@@ -182,7 +228,7 @@ def format_html(
182228 batches : list ,
183229 schema : Any ,
184230 has_more : bool = False ,
185- table_uuid : Optional [ str ] = None ,
231+ table_uuid : str | None = None ,
186232 ) -> str :
187233 """Format record batches as HTML.
188234
@@ -206,15 +252,8 @@ def format_html(
206252
207253 # Validate schema
208254 if schema is None or not hasattr (schema , "__iter__" ):
209- if batches :
210- import warnings
211-
212- warnings .warn (
213- "Schema not provided or invalid. Using schema from first batch."
214- )
215- schema = batches [0 ].schema
216- else :
217- raise TypeError ("Schema must be provided when batches list is empty" )
255+ msg = "Schema must be provided"
256+ raise TypeError (msg )
218257
219258 # Generate a unique ID if none provided
220259 table_uuid = table_uuid or f"df-{ id (batches )} "
@@ -254,7 +293,7 @@ def format_html(
254293
255294 return "\n " .join (html )
256295
257- def _build_html_header (self ) -> List [str ]:
296+ def _build_html_header (self ) -> list [str ]:
258297 """Build the HTML header with CSS styles."""
259298 html = []
260299 html .append ("<style>" )
@@ -266,17 +305,18 @@ def _build_html_header(self) -> List[str]:
266305 html .append ("</style>" )
267306 return html
268307
269- def _build_table_container_start (self ) -> List [str ]:
308+ def _build_table_container_start (self ) -> list [str ]:
270309 """Build the opening tags for the table container."""
271310 html = []
272311 html .append (
273312 f'<div style="width: 100%; max-width: { self .max_width } px; '
274- f'max-height: { self .max_height } px; overflow: auto; border: 1px solid #ccc;">'
313+ f"max-height: { self .max_height } px; overflow: auto; border: "
314+ '1px solid #ccc;">'
275315 )
276316 html .append ('<table style="border-collapse: collapse; min-width: 100%">' )
277317 return html
278318
279- def _build_table_header (self , schema : Any ) -> List [str ]:
319+ def _build_table_header (self , schema : Any ) -> list [str ]:
280320 """Build the HTML table header with column names."""
281321 html = []
282322 html .append ("<thead>" )
@@ -286,13 +326,14 @@ def _build_table_header(self, schema: Any) -> List[str]:
286326 html .append (self ._custom_header_builder (field ))
287327 else :
288328 html .append (
289- f"<th style='{ self .style_provider .get_header_style ()} '>{ field .name } </th>"
329+ f"<th style='{ self .style_provider .get_header_style ()} '>"
330+ f"{ field .name } </th>"
290331 )
291332 html .append ("</tr>" )
292333 html .append ("</thead>" )
293334 return html
294335
295- def _build_table_body (self , batches : list , table_uuid : str ) -> List [str ]:
336+ def _build_table_body (self , batches : list , table_uuid : str ) -> list [str ]:
296337 """Build the HTML table body with data rows."""
297338 html = []
298339 html .append ("<tbody>" )
@@ -312,7 +353,8 @@ def _build_table_body(self, batches: list, table_uuid: str) -> List[str]:
312353
313354 # Then apply either custom cell builder or standard cell formatting
314355 if self ._custom_cell_builder :
315- # Pass both the raw value and formatted value to let the builder decide
356+ # Pass both the raw value and formatted value to let the
357+ # builder decide
316358 cell_html = self ._custom_cell_builder (
317359 raw_value , row_count , col_idx , table_uuid
318360 )
@@ -346,20 +388,14 @@ def _get_cell_value(self, column: Any, row_idx: int) -> Any:
346388 The raw cell value
347389 """
348390 try :
349- # Get the value from the column
350391 value = column [row_idx ]
351392
352- # Try to convert scalar types to Python native types
353- try :
354- # Arrow scalars typically have a .as_py() method
355- if hasattr (value , "as_py" ):
356- return value .as_py ()
357- except (AttributeError , TypeError ):
358- pass
359-
393+ if hasattr (value , "as_py" ):
394+ return value .as_py ()
395+ except (AttributeError , TypeError ):
396+ pass
397+ else :
360398 return value
361- except (IndexError , TypeError ):
362- return ""
363399
364400 def _format_cell_value (self , value : Any ) -> str :
365401 """Format a cell value for display.
@@ -375,8 +411,7 @@ def _format_cell_value(self, value: Any) -> str:
375411 # Check for custom type formatters
376412 for type_cls , formatter in self ._type_formatters .items ():
377413 if isinstance (value , type_cls ):
378- result = formatter (value )
379- return result
414+ return formatter (value )
380415
381416 # If no formatter matched, return string representation
382417 return str (value )
@@ -389,9 +424,11 @@ def _build_expandable_cell(
389424 return (
390425 f"<td style='{ self .style_provider .get_cell_style ()} '>"
391426 f"<div class='expandable-container'>"
392- f"<span class='expandable' id='{ table_uuid } -min-text-{ row_count } -{ col_idx } '>"
427+ "<span class='expandable' "
428+ f"id='{ table_uuid } -min-text-{ row_count } -{ col_idx } '>"
393429 f"{ short_value } </span>"
394- f"<span class='full-text' id='{ table_uuid } -full-text-{ row_count } -{ col_idx } '>"
430+ "<span class='full-text' "
431+ f"id='{ table_uuid } -full-text-{ row_count } -{ col_idx } '>"
395432 f"{ formatted_value } </span>"
396433 f"<button class='expand-btn' "
397434 f"onclick=\" toggleDataFrameCellText('{ table_uuid } ',{ row_count } ,{ col_idx } )\" >"
@@ -406,7 +443,7 @@ def _build_regular_cell(self, formatted_value: str) -> str:
406443 f"<td style='{ self .style_provider .get_cell_style ()} '>{ formatted_value } </td>"
407444 )
408445
409- def _build_html_footer (self , has_more : bool ) -> List [str ]:
446+ def _build_html_footer (self , has_more : bool ) -> list [str ]:
410447 """Build the HTML footer with JavaScript and messages."""
411448 html = []
412449
@@ -455,8 +492,12 @@ def _get_javascript(self) -> str:
455492 return """
456493 <script>
457494 function toggleDataFrameCellText(table_uuid, row, col) {
458- var shortText = document.getElementById(table_uuid + "-min-text-" + row + "-" + col);
459- var fullText = document.getElementById(table_uuid + "-full-text-" + row + "-" + col);
495+ var shortText = document.getElementById(
496+ table_uuid + "-min-text-" + row + "-" + col
497+ );
498+ var fullText = document.getElementById(
499+ table_uuid + "-full-text-" + row + "-" + col
500+ );
460501 var button = event.target;
461502
462503 if (fullText.style.display === "none") {
@@ -473,8 +514,29 @@ def _get_javascript(self) -> str:
473514 """
474515
475516
476- # Global formatter instance to be used by default
477- _default_formatter = DataFrameHtmlFormatter ()
517+ class FormatterManager :
518+ """Manager class for the global DataFrame HTML formatter instance."""
519+
520+ _default_formatter : DataFrameHtmlFormatter = DataFrameHtmlFormatter ()
521+
522+ @classmethod
523+ def set_formatter (cls , formatter : DataFrameHtmlFormatter ) -> None :
524+ """Set the global DataFrame HTML formatter.
525+
526+ Args:
527+ formatter: The formatter instance to use globally
528+ """
529+ cls ._default_formatter = formatter
530+ _refresh_formatter_reference ()
531+
532+ @classmethod
533+ def get_formatter (cls ) -> DataFrameHtmlFormatter :
534+ """Get the current global DataFrame HTML formatter.
535+
536+ Returns:
537+ The global HTML formatter instance
538+ """
539+ return cls ._default_formatter
478540
479541
480542def get_formatter () -> DataFrameHtmlFormatter :
@@ -492,7 +554,21 @@ def get_formatter() -> DataFrameHtmlFormatter:
492554 >>> formatter = get_formatter()
493555 >>> formatter.max_cell_length = 50 # Increase cell length
494556 """
495- return _default_formatter
557+ return FormatterManager .get_formatter ()
558+
559+
560+ def set_formatter (formatter : DataFrameHtmlFormatter ) -> None :
561+ """Set the global DataFrame HTML formatter.
562+
563+ Args:
564+ formatter: The formatter instance to use globally
565+
566+ Example:
567+ >>> from datafusion.html_formatter import get_formatter, set_formatter
568+ >>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100)
569+ >>> set_formatter(custom_formatter)
570+ """
571+ FormatterManager .set_formatter (formatter )
496572
497573
498574def configure_formatter (** kwargs : Any ) -> None :
@@ -514,11 +590,7 @@ def configure_formatter(**kwargs: Any) -> None:
514590 ... use_shared_styles=True
515591 ... )
516592 """
517- global _default_formatter
518- _default_formatter = DataFrameHtmlFormatter (** kwargs )
519-
520- # Ensure the changes are reflected in existing DataFrames
521- _refresh_formatter_reference ()
593+ set_formatter (DataFrameHtmlFormatter (** kwargs ))
522594
523595
524596def reset_formatter () -> None :
@@ -531,14 +603,10 @@ def reset_formatter() -> None:
531603 >>> from datafusion.html_formatter import reset_formatter
532604 >>> reset_formatter() # Reset formatter to default settings
533605 """
534- global _default_formatter
535- _default_formatter = DataFrameHtmlFormatter ()
536-
606+ formatter = DataFrameHtmlFormatter ()
537607 # Reset the styles_loaded flag to ensure styles will be reloaded
538608 DataFrameHtmlFormatter ._styles_loaded = False
539-
540- # Ensure the changes are reflected in existing DataFrames
541- _refresh_formatter_reference ()
609+ set_formatter (formatter )
542610
543611
544612def reset_styles_loaded_state () -> None :
@@ -560,8 +628,4 @@ def _refresh_formatter_reference() -> None:
560628 This helps ensure that changes to the formatter are reflected in existing
561629 DataFrames that might be caching the formatter reference.
562630 """
563- try :
564- # This is a no-op but signals modules to refresh their reference
565- pass
566- except Exception :
567- pass
631+ # This is a no-op but signals modules to refresh their reference
0 commit comments