2828BAR_CHARS : List [str ] = ["█" , "▒" , "▐" , "░" , "▄" , "▀" ]
2929
3030
31+ def _auto_n_bins (n_samples : int , n_unique : int ) -> int :
32+ """Pick a number of bins using Sturges' rule, capped at unique values."""
33+ import math
34+
35+ if n_samples <= 1 :
36+ return 1
37+ sturges = math .ceil (math .log2 (n_samples ) + 1 )
38+ # Don't exceed the number of unique values, and clamp to [2, 50]
39+ return max (2 , min (sturges , n_unique , 50 ))
40+
41+
42+ def _auto_bar_width (label_width : int , n_datasets : int ) -> int :
43+ """Pick bar_width to fit within terminal width."""
44+ import shutil
45+
46+ term_width = shutil .get_terminal_size ((80 , 24 )).columns
47+ # Each line: label_width + " | " (3) + bar + " (XX.X%)" (9)
48+ available = term_width - label_width - 3 - 9
49+ return max (10 , available )
50+
51+
3152def _weighted_histogram (
3253 values : pd .Series ,
3354 weights : Optional [pd .Series ],
@@ -65,7 +86,9 @@ def _render_horizontal_bars(
6586 """Renders a group of horizontal bars for one category or bin.
6687
6788 Each dataset gets its own line with a distinct character and a percentage
68- label at the end.
89+ label at the end. When a proportion is non-zero but too small to render
90+ even one bar character, a single dot (``.``) is shown so that the reader
91+ can distinguish "present but tiny" from "truly zero".
6992
7093 Args:
7194 label: The category label or bin range string.
@@ -86,7 +109,13 @@ def _render_horizontal_bars(
86109 bar_len = int (round ((prop / max_value ) * bar_width ))
87110 else :
88111 bar_len = 0
89- bar = char * bar_len
112+ if bar_len > 0 :
113+ bar = char * bar_len
114+ elif prop > 0 :
115+ # Non-zero proportion too small to render — show a dot
116+ bar = "."
117+ else :
118+ bar = ""
90119 if i == 0 :
91120 prefix = label .ljust (label_width )
92121 else :
@@ -121,8 +150,9 @@ def ascii_plot_bar(
121150 names : List [str ],
122151 column : str ,
123152 weighted : bool = True ,
124- bar_width : int = 40 ,
153+ bar_width : Optional [ int ] = None ,
125154 dist_type : Optional [str ] = None ,
155+ separate_categories : bool = True ,
126156) -> str :
127157 """Produces an ASCII grouped barplot for a single categorical variable.
128158
@@ -147,6 +177,8 @@ def ascii_plot_bar(
147177 bar_width: Maximum character width for bars. Defaults to 40.
148178 dist_type: Accepted for compatibility but only "hist_ascii" is supported.
149179 A warning is logged if any other value is passed.
180+ separate_categories: If True, insert a blank line between categories
181+ for readability. Defaults to True.
150182
151183 Returns:
152184 ASCII barplot text for this variable.
@@ -168,8 +200,10 @@ def ascii_plot_bar(
168200 |
169201 blue | ████████████████████ (50.0%)
170202 | ▒▒▒▒▒▒▒▒▒▒ (25.0%)
203+ <BLANKLINE>
171204 green | ██████████ (25.0%)
172205 | ▒▒▒▒▒▒▒▒▒▒ (25.0%)
206+ <BLANKLINE>
173207 red | ██████████ (25.0%)
174208 | ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ (50.0%)
175209 <BLANKLINE>
@@ -208,6 +242,9 @@ def ascii_plot_bar(
208242 label_width = max (len (str (c )) for c in categories ) if categories else 8
209243 label_width = max (label_width , 8 ) # minimum width for "Category"
210244
245+ if bar_width is None :
246+ bar_width = _auto_bar_width (label_width , len (legend_names ))
247+
211248 # Build output
212249 lines : List [str ] = []
213250 lines .append (f"=== { column } (categorical) ===" )
@@ -218,7 +255,9 @@ def ascii_plot_bar(
218255 lines .append (f"{ header_label } | { ' ' .join (legend_names )} " )
219256 lines .append (f"{ ' ' * label_width } |" )
220257
221- for cat in categories :
258+ for ci , cat in enumerate (categories ):
259+ if separate_categories and ci > 0 :
260+ lines .append ("" )
222261 cat_data = combined [combined [column ] == cat ]
223262 proportions : Dict [str , float ] = {}
224263 for _ , row in cat_data .iterrows ():
@@ -241,8 +280,8 @@ def ascii_plot_hist(
241280 names : List [str ],
242281 column : str ,
243282 weighted : bool = True ,
244- n_bins : int = 10 ,
245- bar_width : int = 40 ,
283+ n_bins : Optional [ int ] = None ,
284+ bar_width : Optional [ int ] = None ,
246285 dist_type : Optional [str ] = None ,
247286) -> str :
248287 """Produces an ASCII histogram for a single numeric variable.
@@ -322,6 +361,9 @@ def ascii_plot_hist(
322361 if len (combined_values ) == 0 :
323362 return f"=== { column } (numeric) ===\n \n No data available.\n "
324363
364+ if n_bins is None :
365+ n_bins = _auto_n_bins (len (combined_values ), combined_values .nunique ())
366+
325367 global_min = float (combined_values .min ())
326368 global_max = float (combined_values .max ())
327369
@@ -352,6 +394,9 @@ def ascii_plot_hist(
352394 label_width = max (len (lbl ) for lbl in bin_labels ) if bin_labels else 8
353395 label_width = max (label_width , 3 ) # minimum width for "Bin"
354396
397+ if bar_width is None :
398+ bar_width = _auto_bar_width (label_width , len (legend_names ))
399+
355400 # Build output
356401 lines : List [str ] = []
357402 lines .append (f"=== { column } (numeric) ===" )
@@ -384,8 +429,8 @@ def ascii_comparative_hist(
384429 names : List [str ],
385430 column : str ,
386431 weighted : bool = True ,
387- n_bins : int = 10 ,
388- bar_width : int = 20 ,
432+ n_bins : Optional [ int ] = None ,
433+ bar_width : Optional [ int ] = None ,
389434) -> str :
390435 """Produces a columnar, baseline-relative ASCII histogram.
391436
@@ -456,6 +501,9 @@ def ascii_comparative_hist(
456501 if len (combined_values ) == 0 :
457502 return "No data available."
458503
504+ if n_bins is None :
505+ n_bins = _auto_n_bins (len (combined_values ), combined_values .nunique ())
506+
459507 global_min = float (combined_values .min ())
460508 global_max = float (combined_values .max ())
461509
@@ -485,6 +533,20 @@ def ascii_comparative_hist(
485533 bracket_right = "]" if i == n_bins - 1 else ")"
486534 bin_labels .append (f"[{ left :,.2f} , { right :,.2f} { bracket_right } " )
487535
536+ # Range column width (computed early so bar_width auto-detection can use it)
537+ range_header = "Range"
538+ range_width = max (len (range_header ), max (len (lbl ) for lbl in bin_labels ))
539+
540+ if bar_width is None :
541+ import shutil
542+
543+ term_width = shutil .get_terminal_size ((80 , 24 )).columns
544+ n_cols = len (legend_names )
545+ # Each column needs: bar_width + pct string (~6) + spacing (3)
546+ available = term_width - range_width - 4 # " | " separator
547+ per_col = max (10 , (available - (n_cols - 1 ) * 3 ) // n_cols - 6 )
548+ bar_width = per_col
549+
488550 # Baseline percentages (first dataset)
489551 baseline_pcts = hist_pcts [0 ]
490552
@@ -533,10 +595,6 @@ def ascii_comparative_hist(
533595 max_cell_w = max (len (cell_strings [di ][bi ]) for bi in range (n_bins ))
534596 col_widths .append (max (header_w , max_cell_w ))
535597
536- # Range column width
537- range_header = "Range"
538- range_width = max (len (range_header ), max (len (lbl ) for lbl in bin_labels ))
539-
540598 # Build output
541599 lines : List [str ] = []
542600
@@ -583,9 +641,10 @@ def ascii_plot_dist(
583641 variables : Optional [List [str ]] = None ,
584642 numeric_n_values_threshold : int = 15 ,
585643 weighted : bool = True ,
586- n_bins : int = 10 ,
587- bar_width : int = 40 ,
644+ n_bins : Optional [ int ] = None ,
645+ bar_width : Optional [ int ] = None ,
588646 dist_type : Optional [str ] = None ,
647+ separate_categories : bool = True ,
589648) -> str :
590649 """Produces ASCII text comparing weighted distributions across datasets.
591650
@@ -607,6 +666,8 @@ def ascii_plot_dist(
607666 bar_width: Maximum character width for the longest bar. Defaults to 40.
608667 dist_type: Accepted for compatibility but only "hist_ascii" is supported.
609668 A warning is logged if any other value is passed.
669+ separate_categories: If True, insert a blank line between categories
670+ in barplots for readability. Defaults to True.
610671
611672 Returns:
612673 The full ASCII output text.
@@ -636,8 +697,10 @@ def ascii_plot_dist(
636697 |
637698 blue | ████████████████████ (50.0%)
638699 | ▒▒▒▒▒▒▒▒▒▒ (25.0%)
700+ <BLANKLINE>
639701 green | ██████████ (25.0%)
640702 | ▒▒▒▒▒▒▒▒▒▒ (25.0%)
703+ <BLANKLINE>
641704 red | ██████████ (25.0%)
642705 | ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ (50.0%)
643706 <BLANKLINE>
@@ -684,7 +747,14 @@ def ascii_plot_dist(
684747
685748 if categorical :
686749 output_parts .append (
687- ascii_plot_bar (dfs , names , o , weighted = weighted , bar_width = bar_width )
750+ ascii_plot_bar (
751+ dfs ,
752+ names ,
753+ o ,
754+ weighted = weighted ,
755+ bar_width = bar_width ,
756+ separate_categories = separate_categories ,
757+ )
688758 )
689759 else :
690760 output_parts .append (
0 commit comments