Skip to content

Commit 7d2a4a5

Browse files
authored
Merge pull request #220 from posit-dev/fix-vals-fmt-number-use-series
fix: have internal numeric formatters work with any single DF library on user system
2 parents 2e06b03 + 78e4b3a commit 7d2a4a5

File tree

2 files changed

+1222
-47
lines changed

2 files changed

+1222
-47
lines changed

pointblank/validate.py

Lines changed: 217 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import commonmark
1818
import narwhals as nw
1919
from great_tables import GT, from_column, google_font, html, loc, md, style, vals
20+
from great_tables.gt import _get_column_of_values
2021
from great_tables.vals import fmt_integer, fmt_number
2122
from importlib_resources import files
2223
from narwhals.typing import FrameT
@@ -11483,7 +11484,9 @@ def get_tabular_report(
1148311484
# Create the label, table type, and thresholds HTML fragments
1148411485
label_html = _create_label_html(label=self.label, start_time="")
1148511486
table_type_html = _create_table_type_html(tbl_type=tbl_info, tbl_name=self.tbl_name)
11486-
thresholds_html = _create_thresholds_html(thresholds=thresholds, locale=locale)
11487+
thresholds_html = _create_thresholds_html(
11488+
thresholds=thresholds, locale=locale, df_lib=df_lib
11489+
)
1148711490

1148811491
# Compose the subtitle HTML fragment
1148911492
combined_subtitle = (
@@ -11796,6 +11799,7 @@ def get_tabular_report(
1179611799
interrogation_performed=interrogation_performed,
1179711800
active=active,
1179811801
locale=locale,
11802+
df_lib=df_lib,
1179911803
)
1180011804

1180111805
# ------------------------------------------------
@@ -11812,6 +11816,7 @@ def get_tabular_report(
1181211816
interrogation_performed=interrogation_performed,
1181311817
active=active,
1181411818
locale=locale,
11819+
df_lib=df_lib,
1181511820
)
1181611821

1181711822
validation_info_dict["fail"] = _transform_passed_failed(
@@ -11820,6 +11825,7 @@ def get_tabular_report(
1182011825
interrogation_performed=interrogation_performed,
1182111826
active=active,
1182211827
locale=locale,
11828+
df_lib=df_lib,
1182311829
)
1182411830

1182511831
# ------------------------------------------------
@@ -11999,7 +12005,9 @@ def get_tabular_report(
1199912005
# Create the label, table type, and thresholds HTML fragments
1200012006
label_html = _create_label_html(label=self.label, start_time=self.time_start)
1200112007
table_type_html = _create_table_type_html(tbl_type=tbl_info, tbl_name=self.tbl_name)
12002-
thresholds_html = _create_thresholds_html(thresholds=thresholds, locale=locale)
12008+
thresholds_html = _create_thresholds_html(
12009+
thresholds=thresholds, locale=locale, df_lib=df_lib
12010+
)
1200312011

1200412012
# Compose the subtitle HTML fragment
1200512013
combined_subtitle = (
@@ -13802,29 +13810,122 @@ def _transform_eval(
1380213810
return symbol_list
1380313811

1380413812

13813+
def _format_numbers_with_gt(
13814+
values: list[int], n_sigfig: int = 3, compact: bool = True, locale: str = "en"
13815+
) -> list[str]:
13816+
"""Format numbers using Great Tables GT object to avoid pandas dependency."""
13817+
import polars as pl
13818+
13819+
# Create a single-column DataFrame with all values
13820+
df = pl.DataFrame({"values": values})
13821+
13822+
# Create GT object and format the column
13823+
gt_obj = GT(df).fmt_number(columns="values", n_sigfig=n_sigfig, compact=compact, locale=locale)
13824+
13825+
# Extract the formatted values using _get_column_of_values
13826+
formatted_values = _get_column_of_values(gt_obj, column_name="values", context="html")
13827+
13828+
return formatted_values
13829+
13830+
13831+
def _format_single_number_with_gt(
13832+
value: int, n_sigfig: int = 3, compact: bool = True, locale: str = "en", df_lib=None
13833+
) -> str:
13834+
"""Format a single number using Great Tables GT object to avoid pandas dependency."""
13835+
if df_lib is None:
13836+
# Use library detection to select appropriate DataFrame library
13837+
if _is_lib_present("polars"):
13838+
import polars as pl
13839+
13840+
df_lib = pl
13841+
elif _is_lib_present("pandas"):
13842+
import pandas as pd
13843+
13844+
df_lib = pd
13845+
else:
13846+
raise ImportError("Neither Polars nor Pandas is available for formatting")
13847+
13848+
# Create a single-row, single-column DataFrame using the specified library
13849+
df = df_lib.DataFrame({"value": [value]})
13850+
13851+
# Create GT object and format the column
13852+
gt_obj = GT(df).fmt_number(columns="value", n_sigfig=n_sigfig, compact=compact, locale=locale)
13853+
13854+
# Extract the formatted value using _get_column_of_values
13855+
formatted_values = _get_column_of_values(gt_obj, column_name="value", context="html")
13856+
13857+
return formatted_values[0] # Return the single formatted value
13858+
13859+
1380513860
def _transform_test_units(
13806-
test_units: list[int], interrogation_performed: bool, active: list[bool], locale: str
13861+
test_units: list[int],
13862+
interrogation_performed: bool,
13863+
active: list[bool],
13864+
locale: str,
13865+
df_lib=None,
1380713866
) -> list[str]:
1380813867
# If no interrogation was performed, return a list of empty strings
1380913868
if not interrogation_performed:
1381013869
return ["" for _ in range(len(test_units))]
1381113870

13871+
# Define the helper function that'll format numbers safely with Great Tables
13872+
def _format_number_safe(value: int) -> str:
13873+
if df_lib is not None:
13874+
# Use GT-based formatting to avoid Pandas dependency completely
13875+
return _format_single_number_with_gt(
13876+
value, n_sigfig=3, compact=True, locale=locale, df_lib=df_lib
13877+
)
13878+
else:
13879+
# Fallback to the original behavior
13880+
return str(vals.fmt_number(value, n_sigfig=3, compact=True, locale=locale)[0])
13881+
1381213882
return [
1381313883
(
13814-
(
13815-
str(test_units[i])
13816-
if test_units[i] < 10000
13817-
else str(vals.fmt_number(test_units[i], n_sigfig=3, compact=True, locale=locale)[0])
13818-
)
13884+
(str(test_units[i]) if test_units[i] < 10000 else _format_number_safe(test_units[i]))
1381913885
if active[i]
1382013886
else "&mdash;"
1382113887
)
1382213888
for i in range(len(test_units))
1382313889
]
1382413890

1382513891

13826-
def _fmt_lg(value: int, locale: str) -> str:
13827-
return vals.fmt_number(value, n_sigfig=3, compact=True, locale=locale)[0]
13892+
def _fmt_lg(value: int, locale: str, df_lib=None) -> str:
13893+
if df_lib is not None:
13894+
# Use GT-based formatting if a DataFrame library is provided
13895+
return _format_single_number_with_gt(
13896+
value, n_sigfig=3, compact=True, locale=locale, df_lib=df_lib
13897+
)
13898+
else:
13899+
# Fallback to the original behavior
13900+
return vals.fmt_number(value, n_sigfig=3, compact=True, locale=locale)[0]
13901+
13902+
13903+
def _format_single_float_with_gt(
13904+
value: float, decimals: int = 2, locale: str = "en", df_lib=None
13905+
) -> str:
13906+
if df_lib is None:
13907+
# Use library detection to select appropriate DataFrame library
13908+
if _is_lib_present("polars"):
13909+
import polars as pl
13910+
13911+
df_lib = pl
13912+
elif _is_lib_present("pandas"):
13913+
import pandas as pd
13914+
13915+
df_lib = pd
13916+
else:
13917+
raise ImportError("Neither Polars nor Pandas is available for formatting")
13918+
13919+
# Create a single-row, single-column DataFrame using the specified library
13920+
df = df_lib.DataFrame({"value": [value]})
13921+
13922+
# Create GT object and format the column
13923+
gt_obj = GT(df).fmt_number(columns="value", decimals=decimals, locale=locale)
13924+
13925+
# Extract the formatted value using _get_column_of_values
13926+
formatted_values = _get_column_of_values(gt_obj, column_name="value", context="html")
13927+
13928+
return formatted_values[0] # Return the single formatted value
1382813929

1382913930

1383013931
def _transform_passed_failed(
@@ -13833,14 +13934,24 @@ def _transform_passed_failed(
1383313934
interrogation_performed: bool,
1383413935
active: list[bool],
1383513936
locale: str,
13937+
df_lib=None,
1383613938
) -> list[str]:
1383713939
if not interrogation_performed:
1383813940
return ["" for _ in range(len(n_passed_failed))]
1383913941

13942+
# Helper function to format numbers safely
13943+
def _format_float_safe(value: float) -> str:
13944+
if df_lib is not None:
13945+
# Use GT-based formatting to avoid Pandas dependency completely
13946+
return _format_single_float_with_gt(value, decimals=2, locale=locale, df_lib=df_lib)
13947+
else:
13948+
# Fallback to the original behavior
13949+
return vals.fmt_number(value, decimals=2, locale=locale)[0]
13950+
1384013951
passed_failed = [
1384113952
(
13842-
f"{n_passed_failed[i] if n_passed_failed[i] < 10000 else _fmt_lg(n_passed_failed[i], locale=locale)}"
13843-
f"<br />{vals.fmt_number(f_passed_failed[i], decimals=2, locale=locale)[0]}"
13953+
f"{n_passed_failed[i] if n_passed_failed[i] < 10000 else _fmt_lg(n_passed_failed[i], locale=locale, df_lib=df_lib)}"
13954+
f"<br />{_format_float_safe(f_passed_failed[i])}"
1384413955
if active[i]
1384513956
else "&mdash;"
1384613957
)
@@ -14051,41 +14162,122 @@ def _create_label_html(label: str | None, start_time: str) -> str:
1405114162
)
1405214163

1405314164

14054-
def _create_thresholds_html(thresholds: Thresholds, locale: str) -> str:
14165+
def _format_single_integer_with_gt(value: int, locale: str = "en", df_lib=None) -> str:
14166+
"""Format a single integer using Great Tables GT object to avoid pandas dependency."""
14167+
if df_lib is None:
14168+
# Use library detection to select appropriate DataFrame library
14169+
if _is_lib_present("polars"):
14170+
import polars as pl
14171+
14172+
df_lib = pl
14173+
elif _is_lib_present("pandas"):
14174+
import pandas as pd
14175+
14176+
df_lib = pd
14177+
else:
14178+
raise ImportError("Neither Polars nor Pandas is available for formatting")
14179+
14180+
# Create a single-row, single-column DataFrame using the specified library
14181+
df = df_lib.DataFrame({"value": [value]})
14182+
14183+
# Create GT object and format the column
14184+
gt_obj = GT(df).fmt_integer(columns="value", locale=locale)
14185+
14186+
# Extract the formatted value using _get_column_of_values
14187+
formatted_values = _get_column_of_values(gt_obj, column_name="value", context="html")
14188+
14189+
return formatted_values[0] # Return the single formatted value
14190+
14191+
14192+
def _format_single_float_with_gt_custom(
14193+
value: float,
14194+
decimals: int = 2,
14195+
drop_trailing_zeros: bool = False,
14196+
locale: str = "en",
14197+
df_lib=None,
14198+
) -> str:
14199+
"""Format a single float with custom options using Great Tables GT object to avoid pandas dependency."""
14200+
if df_lib is None:
14201+
# Use library detection to select appropriate DataFrame library
14202+
if _is_lib_present("polars"):
14203+
import polars as pl
14204+
14205+
df_lib = pl
14206+
elif _is_lib_present("pandas"):
14207+
import pandas as pd
14208+
14209+
df_lib = pd
14210+
else:
14211+
raise ImportError("Neither Polars nor Pandas is available for formatting")
14212+
14213+
# Create a single-row, single-column DataFrame using the specified library
14214+
df = df_lib.DataFrame({"value": [value]})
14215+
14216+
# Create GT object and format the column
14217+
gt_obj = GT(df).fmt_number(
14218+
columns="value", decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
14219+
)
14220+
14221+
# Extract the formatted value using _get_column_of_values
14222+
formatted_values = _get_column_of_values(gt_obj, column_name="value", context="html")
14223+
14224+
return formatted_values[0] # Return the single formatted value
14225+
14226+
14227+
def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) -> str:
1405514228
if thresholds == Thresholds():
1405614229
return ""
1405714230

14231+
# Helper functions to format numbers safely
14232+
def _format_number_safe(value: float, decimals: int, drop_trailing_zeros: bool = False) -> str:
14233+
if df_lib is not None and value is not None:
14234+
# Use GT-based formatting to avoid Pandas dependency completely
14235+
return _format_single_float_with_gt_custom(
14236+
value,
14237+
decimals=decimals,
14238+
drop_trailing_zeros=drop_trailing_zeros,
14239+
locale=locale,
14240+
df_lib=df_lib,
14241+
)
14242+
else:
14243+
# Fallback to the original behavior
14244+
return fmt_number(
14245+
value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
14246+
)[0]
14247+
14248+
def _format_integer_safe(value: int) -> str:
14249+
if df_lib is not None and value is not None:
14250+
# Use GT-based formatting to avoid Pandas dependency completely
14251+
return _format_single_integer_with_gt(value, locale=locale, df_lib=df_lib)
14252+
else:
14253+
# Fallback to the original behavior
14254+
return fmt_integer(value, locale=locale)[0]
14255+
1405814256
warning = (
14059-
fmt_number(
14060-
thresholds.warning_fraction, decimals=3, drop_trailing_zeros=True, locale=locale
14061-
)[0]
14257+
_format_number_safe(thresholds.warning_fraction, decimals=3, drop_trailing_zeros=True)
1406214258
if thresholds.warning_fraction is not None
1406314259
else (
14064-
fmt_integer(thresholds.warning_count, locale=locale)[0]
14260+
_format_integer_safe(thresholds.warning_count)
1406514261
if thresholds.warning_count is not None
1406614262
else "&mdash;"
1406714263
)
1406814264
)
1406914265

1407014266
error = (
14071-
fmt_number(thresholds.error_fraction, decimals=3, drop_trailing_zeros=True, locale=locale)[
14072-
0
14073-
]
14267+
_format_number_safe(thresholds.error_fraction, decimals=3, drop_trailing_zeros=True)
1407414268
if thresholds.error_fraction is not None
1407514269
else (
14076-
fmt_integer(thresholds.error_count, locale=locale)[0]
14270+
_format_integer_safe(thresholds.error_count)
1407714271
if thresholds.error_count is not None
1407814272
else "&mdash;"
1407914273
)
1408014274
)
1408114275

1408214276
critical = (
14083-
fmt_number(
14084-
thresholds.critical_fraction, decimals=3, drop_trailing_zeros=True, locale=locale
14085-
)[0]
14277+
_format_number_safe(thresholds.critical_fraction, decimals=3, drop_trailing_zeros=True)
1408614278
if thresholds.critical_fraction is not None
1408714279
else (
14088-
fmt_integer(thresholds.critical_count, locale=locale)[0]
14280+
_format_integer_safe(thresholds.critical_count)
1408914281
if thresholds.critical_count is not None
1409014282
else "&mdash;"
1409114283
)

0 commit comments

Comments
 (0)