Skip to content

Commit 86017a2

Browse files
committed
fix ruff errors
1 parent 43158e7 commit 86017a2

File tree

2 files changed

+174
-105
lines changed

2 files changed

+174
-105
lines changed

python/datafusion/html_formatter.py

Lines changed: 135 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
"""HTML formatting utilities for DataFusion DataFrames."""
22

3+
from __future__ import annotations
4+
35
from typing import (
46
Any,
57
Callable,
6-
Dict,
7-
List,
88
Optional,
99
Protocol,
10-
Type,
1110
runtime_checkable,
1211
)
1312

@@ -43,7 +42,10 @@ def get_cell_style(self) -> str:
4342
Returns:
4443
CSS style string
4544
"""
46-
return "border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;"
45+
return (
46+
"border: 1px solid black; padding: 8px; text-align: left; "
47+
"white-space: nowrap;"
48+
)
4749

4850
def get_header_style(self) -> str:
4951
"""Get the CSS style for header cells.
@@ -73,11 +75,13 @@ class DataFrameHtmlFormatter:
7375
max_cell_length: Maximum characters to display in a cell before truncation
7476
max_width: Maximum width of the HTML table in pixels
7577
max_height: Maximum height of the HTML table in pixels
76-
enable_cell_expansion: Whether to add expand/collapse buttons for long cell values
78+
enable_cell_expansion: Whether to add expand/collapse buttons for long cell
79+
values
7780
custom_css: Additional CSS to include in the HTML output
7881
show_truncation_message: Whether to display a message when data is truncated
7982
style_provider: Custom provider for cell and header styles
80-
use_shared_styles: Whether to load styles and scripts only once per notebook session
83+
use_shared_styles: Whether to load styles and scripts only once per notebook
84+
session
8185
"""
8286

8387
# Class variable to track if styles have been loaded in the notebook
@@ -93,30 +97,72 @@ def __init__(
9397
show_truncation_message: bool = True,
9498
style_provider: Optional[StyleProvider] = None,
9599
use_shared_styles: bool = True,
96-
):
100+
) -> None:
101+
"""Initialize the HTML formatter.
102+
103+
Parameters
104+
----------
105+
max_cell_length : int, default 25
106+
Maximum length of cell content before truncation.
107+
max_width : int, default 1000
108+
Maximum width of the displayed table in pixels.
109+
max_height : int, default 300
110+
Maximum height of the displayed table in pixels.
111+
enable_cell_expansion : bool, default True
112+
Whether to allow cells to expand when clicked.
113+
custom_css : str, optional
114+
Custom CSS to apply to the HTML table.
115+
show_truncation_message : bool, default True
116+
Whether to show a message indicating that content has been truncated.
117+
style_provider : StyleProvider, optional
118+
Provider of CSS styles for the HTML table. If None, DefaultStyleProvider
119+
is used.
120+
use_shared_styles : bool, default True
121+
Whether to use shared styles across multiple tables.
122+
123+
Raises:
124+
------
125+
ValueError
126+
If max_cell_length, max_width, or max_height is not a positive integer.
127+
TypeError
128+
If enable_cell_expansion, show_truncation_message, or use_shared_styles is
129+
not a boolean,
130+
or if custom_css is provided but is not a string,
131+
or if style_provider is provided but does not implement the StyleProvider
132+
protocol.
133+
"""
97134
# Validate numeric parameters
135+
98136
if not isinstance(max_cell_length, int) or max_cell_length <= 0:
99-
raise ValueError("max_cell_length must be a positive integer")
137+
msg = "max_cell_length must be a positive integer"
138+
raise ValueError(msg)
100139
if not isinstance(max_width, int) or max_width <= 0:
101-
raise ValueError("max_width must be a positive integer")
140+
msg = "max_width must be a positive integer"
141+
raise ValueError(msg)
102142
if not isinstance(max_height, int) or max_height <= 0:
103-
raise ValueError("max_height must be a positive integer")
143+
msg = "max_height must be a positive integer"
144+
raise ValueError(msg)
104145

105146
# Validate boolean parameters
106147
if not isinstance(enable_cell_expansion, bool):
107-
raise TypeError("enable_cell_expansion must be a boolean")
148+
msg = "enable_cell_expansion must be a boolean"
149+
raise TypeError(msg)
108150
if not isinstance(show_truncation_message, bool):
109-
raise TypeError("show_truncation_message must be a boolean")
151+
msg = "show_truncation_message must be a boolean"
152+
raise TypeError(msg)
110153
if not isinstance(use_shared_styles, bool):
111-
raise TypeError("use_shared_styles must be a boolean")
154+
msg = "use_shared_styles must be a boolean"
155+
raise TypeError(msg)
112156

113157
# Validate custom_css
114158
if custom_css is not None and not isinstance(custom_css, str):
115-
raise TypeError("custom_css must be None or a string")
159+
msg = "custom_css must be None or a string"
160+
raise TypeError(msg)
116161

117162
# Validate style_provider
118163
if style_provider is not None and not isinstance(style_provider, StyleProvider):
119-
raise TypeError("style_provider must implement the StyleProvider protocol")
164+
msg = "style_provider must implement the StyleProvider protocol"
165+
raise TypeError(msg)
120166

121167
self.max_cell_length = max_cell_length
122168
self.max_width = max_width
@@ -127,12 +173,12 @@ def __init__(
127173
self.style_provider = style_provider or DefaultStyleProvider()
128174
self.use_shared_styles = use_shared_styles
129175
# Registry for custom type formatters
130-
self._type_formatters: Dict[Type, CellFormatter] = {}
176+
self._type_formatters: dict[type, CellFormatter] = {}
131177
# Custom cell builders
132178
self._custom_cell_builder: Optional[Callable[[Any, int, int, str], str]] = None
133179
self._custom_header_builder: Optional[Callable[[Any], str]] = None
134180

135-
def register_formatter(self, type_class: Type, formatter: CellFormatter) -> None:
181+
def register_formatter(self, type_class: type, formatter: CellFormatter) -> None:
136182
"""Register a custom formatter for a specific data type.
137183
138184
Args:
@@ -182,7 +228,7 @@ def format_html(
182228
batches: list,
183229
schema: Any,
184230
has_more: bool = False,
185-
table_uuid: Optional[str] = None,
231+
table_uuid: str | None = None,
186232
) -> str:
187233
"""Format record batches as HTML.
188234
@@ -206,15 +252,8 @@ def format_html(
206252

207253
# Validate schema
208254
if schema is None or not hasattr(schema, "__iter__"):
209-
if batches:
210-
import warnings
211-
212-
warnings.warn(
213-
"Schema not provided or invalid. Using schema from first batch."
214-
)
215-
schema = batches[0].schema
216-
else:
217-
raise TypeError("Schema must be provided when batches list is empty")
255+
msg = "Schema must be provided"
256+
raise TypeError(msg)
218257

219258
# Generate a unique ID if none provided
220259
table_uuid = table_uuid or f"df-{id(batches)}"
@@ -254,7 +293,7 @@ def format_html(
254293

255294
return "\n".join(html)
256295

257-
def _build_html_header(self) -> List[str]:
296+
def _build_html_header(self) -> list[str]:
258297
"""Build the HTML header with CSS styles."""
259298
html = []
260299
html.append("<style>")
@@ -266,17 +305,18 @@ def _build_html_header(self) -> List[str]:
266305
html.append("</style>")
267306
return html
268307

269-
def _build_table_container_start(self) -> List[str]:
308+
def _build_table_container_start(self) -> list[str]:
270309
"""Build the opening tags for the table container."""
271310
html = []
272311
html.append(
273312
f'<div style="width: 100%; max-width: {self.max_width}px; '
274-
f'max-height: {self.max_height}px; overflow: auto; border: 1px solid #ccc;">'
313+
f"max-height: {self.max_height}px; overflow: auto; border: "
314+
'1px solid #ccc;">'
275315
)
276316
html.append('<table style="border-collapse: collapse; min-width: 100%">')
277317
return html
278318

279-
def _build_table_header(self, schema: Any) -> List[str]:
319+
def _build_table_header(self, schema: Any) -> list[str]:
280320
"""Build the HTML table header with column names."""
281321
html = []
282322
html.append("<thead>")
@@ -286,13 +326,14 @@ def _build_table_header(self, schema: Any) -> List[str]:
286326
html.append(self._custom_header_builder(field))
287327
else:
288328
html.append(
289-
f"<th style='{self.style_provider.get_header_style()}'>{field.name}</th>"
329+
f"<th style='{self.style_provider.get_header_style()}'>"
330+
f"{field.name}</th>"
290331
)
291332
html.append("</tr>")
292333
html.append("</thead>")
293334
return html
294335

295-
def _build_table_body(self, batches: list, table_uuid: str) -> List[str]:
336+
def _build_table_body(self, batches: list, table_uuid: str) -> list[str]:
296337
"""Build the HTML table body with data rows."""
297338
html = []
298339
html.append("<tbody>")
@@ -312,7 +353,8 @@ def _build_table_body(self, batches: list, table_uuid: str) -> List[str]:
312353

313354
# Then apply either custom cell builder or standard cell formatting
314355
if self._custom_cell_builder:
315-
# Pass both the raw value and formatted value to let the builder decide
356+
# Pass both the raw value and formatted value to let the
357+
# builder decide
316358
cell_html = self._custom_cell_builder(
317359
raw_value, row_count, col_idx, table_uuid
318360
)
@@ -346,20 +388,14 @@ def _get_cell_value(self, column: Any, row_idx: int) -> Any:
346388
The raw cell value
347389
"""
348390
try:
349-
# Get the value from the column
350391
value = column[row_idx]
351392

352-
# Try to convert scalar types to Python native types
353-
try:
354-
# Arrow scalars typically have a .as_py() method
355-
if hasattr(value, "as_py"):
356-
return value.as_py()
357-
except (AttributeError, TypeError):
358-
pass
359-
393+
if hasattr(value, "as_py"):
394+
return value.as_py()
395+
except (AttributeError, TypeError):
396+
pass
397+
else:
360398
return value
361-
except (IndexError, TypeError):
362-
return ""
363399

364400
def _format_cell_value(self, value: Any) -> str:
365401
"""Format a cell value for display.
@@ -375,8 +411,7 @@ def _format_cell_value(self, value: Any) -> str:
375411
# Check for custom type formatters
376412
for type_cls, formatter in self._type_formatters.items():
377413
if isinstance(value, type_cls):
378-
result = formatter(value)
379-
return result
414+
return formatter(value)
380415

381416
# If no formatter matched, return string representation
382417
return str(value)
@@ -389,9 +424,11 @@ def _build_expandable_cell(
389424
return (
390425
f"<td style='{self.style_provider.get_cell_style()}'>"
391426
f"<div class='expandable-container'>"
392-
f"<span class='expandable' id='{table_uuid}-min-text-{row_count}-{col_idx}'>"
427+
"<span class='expandable' "
428+
f"id='{table_uuid}-min-text-{row_count}-{col_idx}'>"
393429
f"{short_value}</span>"
394-
f"<span class='full-text' id='{table_uuid}-full-text-{row_count}-{col_idx}'>"
430+
"<span class='full-text' "
431+
f"id='{table_uuid}-full-text-{row_count}-{col_idx}'>"
395432
f"{formatted_value}</span>"
396433
f"<button class='expand-btn' "
397434
f"onclick=\"toggleDataFrameCellText('{table_uuid}',{row_count},{col_idx})\">"
@@ -406,7 +443,7 @@ def _build_regular_cell(self, formatted_value: str) -> str:
406443
f"<td style='{self.style_provider.get_cell_style()}'>{formatted_value}</td>"
407444
)
408445

409-
def _build_html_footer(self, has_more: bool) -> List[str]:
446+
def _build_html_footer(self, has_more: bool) -> list[str]:
410447
"""Build the HTML footer with JavaScript and messages."""
411448
html = []
412449

@@ -455,8 +492,12 @@ def _get_javascript(self) -> str:
455492
return """
456493
<script>
457494
function toggleDataFrameCellText(table_uuid, row, col) {
458-
var shortText = document.getElementById(table_uuid + "-min-text-" + row + "-" + col);
459-
var fullText = document.getElementById(table_uuid + "-full-text-" + row + "-" + col);
495+
var shortText = document.getElementById(
496+
table_uuid + "-min-text-" + row + "-" + col
497+
);
498+
var fullText = document.getElementById(
499+
table_uuid + "-full-text-" + row + "-" + col
500+
);
460501
var button = event.target;
461502
462503
if (fullText.style.display === "none") {
@@ -473,8 +514,29 @@ def _get_javascript(self) -> str:
473514
"""
474515

475516

476-
# Global formatter instance to be used by default
477-
_default_formatter = DataFrameHtmlFormatter()
517+
class FormatterManager:
518+
"""Manager class for the global DataFrame HTML formatter instance."""
519+
520+
_default_formatter: DataFrameHtmlFormatter = DataFrameHtmlFormatter()
521+
522+
@classmethod
523+
def set_formatter(cls, formatter: DataFrameHtmlFormatter) -> None:
524+
"""Set the global DataFrame HTML formatter.
525+
526+
Args:
527+
formatter: The formatter instance to use globally
528+
"""
529+
cls._default_formatter = formatter
530+
_refresh_formatter_reference()
531+
532+
@classmethod
533+
def get_formatter(cls) -> DataFrameHtmlFormatter:
534+
"""Get the current global DataFrame HTML formatter.
535+
536+
Returns:
537+
The global HTML formatter instance
538+
"""
539+
return cls._default_formatter
478540

479541

480542
def get_formatter() -> DataFrameHtmlFormatter:
@@ -492,7 +554,21 @@ def get_formatter() -> DataFrameHtmlFormatter:
492554
>>> formatter = get_formatter()
493555
>>> formatter.max_cell_length = 50 # Increase cell length
494556
"""
495-
return _default_formatter
557+
return FormatterManager.get_formatter()
558+
559+
560+
def set_formatter(formatter: DataFrameHtmlFormatter) -> None:
561+
"""Set the global DataFrame HTML formatter.
562+
563+
Args:
564+
formatter: The formatter instance to use globally
565+
566+
Example:
567+
>>> from datafusion.html_formatter import get_formatter, set_formatter
568+
>>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100)
569+
>>> set_formatter(custom_formatter)
570+
"""
571+
FormatterManager.set_formatter(formatter)
496572

497573

498574
def configure_formatter(**kwargs: Any) -> None:
@@ -514,11 +590,7 @@ def configure_formatter(**kwargs: Any) -> None:
514590
... use_shared_styles=True
515591
... )
516592
"""
517-
global _default_formatter
518-
_default_formatter = DataFrameHtmlFormatter(**kwargs)
519-
520-
# Ensure the changes are reflected in existing DataFrames
521-
_refresh_formatter_reference()
593+
set_formatter(DataFrameHtmlFormatter(**kwargs))
522594

523595

524596
def reset_formatter() -> None:
@@ -531,14 +603,10 @@ def reset_formatter() -> None:
531603
>>> from datafusion.html_formatter import reset_formatter
532604
>>> reset_formatter() # Reset formatter to default settings
533605
"""
534-
global _default_formatter
535-
_default_formatter = DataFrameHtmlFormatter()
536-
606+
formatter = DataFrameHtmlFormatter()
537607
# Reset the styles_loaded flag to ensure styles will be reloaded
538608
DataFrameHtmlFormatter._styles_loaded = False
539-
540-
# Ensure the changes are reflected in existing DataFrames
541-
_refresh_formatter_reference()
609+
set_formatter(formatter)
542610

543611

544612
def reset_styles_loaded_state() -> None:
@@ -560,8 +628,4 @@ def _refresh_formatter_reference() -> None:
560628
This helps ensure that changes to the formatter are reflected in existing
561629
DataFrames that might be caching the formatter reference.
562630
"""
563-
try:
564-
# This is a no-op but signals modules to refresh their reference
565-
pass
566-
except Exception:
567-
pass
631+
# This is a no-op but signals modules to refresh their reference

0 commit comments

Comments
 (0)