ucs-detect/make_results_rst.py at master · jquast/ucs-detect · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
import re
import os
import sys
import math
import yaml
import contextlib
import unicodedata
import colorsys

# Try to use faster C-based YAML loader
try:
    from yaml import CSafeLoader as SafeLoader
except ImportError:
    from yaml import SafeLoader

# 3rd party
import blessed
import wcwidth
import tabulate

_DPM = blessed.Terminal.DecPrivateMode


def _fmt_mode(mode_num):
    """Format a DEC private mode as ``'description (number)'``."""
    return f"{_DPM(mode_num).long_description} ({mode_num})"


# Plotting support
import matplotlib
matplotlib.use('Agg')  # Non-interactive backend for ReadTheDocs
import matplotlib.pyplot as plt
import numpy as np

GITHUB_DATA_LINK = 'https://github.com/jquast/ucs-detect/blob/master/data/{fname}'
DATA_PATH = os.path.join(os.path.dirname(__file__), "data")
TERMINAL_DETAIL_MIXINS_PATH = os.path.join(DATA_PATH, "terminal_detail_mixins.yaml")
PLOTS_PATH = os.path.join(os.path.dirname(__file__), "docs", "_static", "plots")
RST_DEPTH = [None, "=", "-", "+", "^"]
LINK_REGEX = re.compile(r'[^a-zA-Z0-9]')


def score_to_color(score):
    # Map score to hue: 0 degrees (red) to 120 degrees (green)
    # In HSV, hue is 0-1, so 120 degrees = 120/360 = 0.333
    hue = score * 0.333
    saturation = 0.2
    value = 0.95

    # Convert HSV to RGB (returns 0-1 range)
    r, g, b = colorsys.hsv_to_rgb(hue, saturation, value)

    # Convert to 0-255 range
    return (int(r * 255), int(g * 255), int(b * 255))


def make_score_css_class(score):
    if math.isnan(score):
        return 'score-na'
    return f'score-{round(score * 100)}'


def generate_score_css():
    """
    Generate CSS rules for all score classes (0-100).
    Returns a string containing CSS rules.
    """
    css_lines = [
        '/* Auto-generated score color classes */',
        '/* Common properties for all score classes */',
        '[class^="score-"], [class*=" score-"] {',
        '  display: block;',
        '  padding: 0.3em 0.5em;',
        '}',
        ''
    ]
    for score_pct in range(101):
        score = score_pct / 100.0
        r, g, b = score_to_color(score)
        class_name = make_score_css_class(score)
        css_lines.append(f'.{class_name} {{ background-color: rgb({r}, {g}, {b}); }}')
    return '\n'.join(css_lines)


def generate_score_roles():
    """
    Generate reStructuredText role definitions for all score classes.
    Returns a string containing role definitions that can be used inline.
    """
    lines = ['.. Generate custom roles for score coloring', '']
    for score_pct in range(101):
        score = score_pct / 100.0
        class_name = make_score_css_class(score)
        lines.append(f'.. role:: {class_name}')
        lines.append(f'   :class: {class_name}')
        lines.append('')
    # Add role for N/A scores
    lines.append('.. role:: score-na')
    lines.append('   :class: score-na')
    lines.append('')
    return '\n'.join(lines)


def wrap_with_score_role(text, score):
    """
    Wrap text with a reStructuredText role based on the score.

    Args:
        text: The text content to wrap (e.g., "75.0%")
        score: The score value (0.0 to 1.0) used to determine the role class

    Returns:
        Text wrapped with inline role syntax: :score-75:`75.0%`
    """
    role_name = make_score_css_class(score)
    return f':{role_name}:`{text}`'


def wrap_score_with_hyperlink(text, score, terminal_name, section_suffix):
    """
    Wrap score text with both a hyperlink and score styling using the :sref: role.

    Args:
        text: The text to display (e.g., "75.0%", "32s")
        score: The score value (0.0 to 1.0) for styling
        terminal_name: The terminal name for creating the link target
        section_suffix: The section suffix (e.g., "_wide", "_lang", "_time")

    Returns:
        Text wrapped with hyperlink and role: :sref:`75.0% <terminal_wide> 75`
    """
    score_value = round(score * 100) if not math.isnan(score) else 'na'
    link_target = make_link(terminal_name + section_suffix)
    return f':sref:`{text} <{link_target}> {score_value}`'


def wrap_time_with_hyperlink(text, score, elapsed_seconds, terminal_name, section_suffix):
    """
    Wrap elapsed time text with hyperlink and score styling, using actual seconds for sorting.
    """
    score_value_for_color = round(score * 100) if not math.isnan(score) else 'na'
    sort_value = int(elapsed_seconds) if not math.isnan(elapsed_seconds) else 'na'
    link_target = make_link(terminal_name + section_suffix)
    # Use score for color (inverted - faster is better), but elapsed_seconds for sorting
    return f':sref:`{text} <{link_target}> {score_value_for_color}:{sort_value}`'


def load_terminal_detail_mixins():
    """
    Load terminal detail mixins from YAML file.
    Returns a dictionary keyed by lowercase software_name.
    """
    if not os.path.exists(TERMINAL_DETAIL_MIXINS_PATH):
        return {}

    with open(TERMINAL_DETAIL_MIXINS_PATH, 'r') as f:
        data = yaml.load(f, Loader=SafeLoader)

    # Normalize keys to lowercase for case-insensitive matching
    terminals = data.get('terminals', {})
    return {key.lower(): value for key, value in terminals.items()}


def print_datatable(table_str, caption=None):
    """
    Print a table with sphinx-datatable class for sortable/searchable functionality.

    Args:
        table_str: The table string (RST format from tabulate)
        caption: Optional caption for the table
    """
    if caption:
        print(f".. table:: {caption}")
    else:
        print(".. table::")
    print("   :class: sphinx-datatable")
    print()
    # Indent the table content
    for line in table_str.split('\n'):
        if line.strip():  # Only indent non-empty lines
            print(f"   {line}")
        else:
            print()
    print()


def create_score_plots(sw_name, entry, score_table):
    """
    Create matplotlib plot comparing terminal scores against all terminals.

    Parameters
    ----------
    sw_name : str
        Terminal software name
    entry : dict
        Score entry for this terminal
    score_table : list
        List of all score entries for comparison
    """
    # Collect all scores for comparison
    metrics = ['WIDE', 'ZWJ', 'LANG', 'VS16', 'VS15', 'CAP', 'GFX', 'TIME']
    terminal_scores_scaled = {}
    all_scores_scaled = {}

    # Map metric names to entry keys
    score_keys = {
        'WIDE': 'score_wide',
        'ZWJ': 'score_zwj',
        'LANG': 'score_language',
        'VS16': 'score_emoji_vs16',
        'VS15': 'score_emoji_vs15',
        'CAP': 'score_capabilities',
        'GFX': 'score_graphics',
        'TIME': 'score_elapsed',
    }

    for metric in metrics:
        key = score_keys[metric]
        terminal_scores_scaled[metric] = entry[key + '_scaled']
        all_scores_scaled[metric] = [e[key + '_scaled'] for e in score_table]

    # Create output directory
    os.makedirs(PLOTS_PATH, exist_ok=True)

    # Create plot for scaled scores
    plot_filename_scaled = f"{make_link(sw_name)}_scores_scaled.png"
    plot_path_scaled = os.path.join(PLOTS_PATH, plot_filename_scaled)
    _create_multi_metric_plot(sw_name, terminal_scores_scaled, all_scores_scaled,
                              plot_path_scaled, use_scaled=True)

    return plot_filename_scaled


def _percentile_to_color(pct):
    """Interpolate HSV shortest path from red (0%) to green (100%)."""
    # hue 0.0 = red, hue 0.333 = green, interpolate by percentile
    h = (pct / 100.0) * (1.0 / 3.0)
    r, g, b = colorsys.hsv_to_rgb(h, 0.7, 0.9)
    return '#{:02x}{:02x}{:02x}'.format(int(r * 255), int(g * 255), int(b * 255))


def _create_multi_metric_plot(terminal_name, scores_dict, all_scores_dict,
                               output_path, use_scaled=False):
    """
    Create a bar chart showing multiple metrics at once.

    Parameters
    ----------
    terminal_name : str
        Name of the terminal
    scores_dict : dict
        Dictionary of {metric_name: score_value}
    all_scores_dict : dict
        Dictionary of {metric_name: [list of all scores]}
    output_path : str
        Path to save the plot
    use_scaled : bool
        If True, use scaled scores, otherwise raw scores
    """
    metrics = list(scores_dict.keys())
    values = []
    percentiles = []

    for metric in metrics:
        score = scores_dict[metric]
        all_scores = all_scores_dict[metric]
        valid_scores = [s for s in all_scores if not math.isnan(s)]

        if math.isnan(score):
            values.append(0)
            percentiles.append(0)
        else:
            values.append(score * 100)
            pct = sum(1 for s in valid_scores if s <= score) / len(valid_scores) * 100
            percentiles.append(pct)

    # Create bar chart (8 inches at 100dpi = 800px wide to accommodate 8 metrics)
    fig, ax = plt.subplots(figsize=(8, 4))

    x_pos = np.arange(len(metrics))
    colors = [_percentile_to_color(p) for p in percentiles]

    bars = ax.bar(x_pos, values, color=colors, alpha=0.7, edgecolor='black', linewidth=1.5)

    # Add mean lines for each metric
    for i, metric in enumerate(metrics):
        all_scores = all_scores_dict[metric]
        valid = [s * 100 for s in all_scores if not math.isnan(s)]
        if valid:
            mean_val = np.mean(valid)
            ax.hlines(mean_val, i - 0.4, i + 0.4, colors='red',
                     linestyles='dashed', linewidth=2, label='Mean' if i == 0 else '')

    # Add value labels above all bars, drawn on top of mean lines
    for i, val in enumerate(values):
        y_pos = max(val, 2)
        ax.text(i, y_pos + 1, f'{val:.0f}%', ha='center', va='bottom',
                fontsize=9, fontweight='bold', color='black')

    ylabel = 'Final Scaled Score' if use_scaled else 'RAW Score'
    ax.set_ylabel(ylabel, fontsize=12)
    ax.set_title(f'{terminal_name} - {"Scaled" if use_scaled else "Raw"} Scores vs All Terminals',
                 fontsize=14, fontweight='bold')
    ax.set_xticks(x_pos)
    ax.set_xticklabels(metrics, rotation=0, ha='center')
    ax.set_ylim(0, 110)
    ax.grid(True, alpha=0.3, axis='y')
    ax.legend()

    plt.tight_layout()
    plt.savefig(output_path, dpi=100, bbox_inches='tight',
                # 'None' CreationDate is used so the git hash's don't unnecessarily update
                metadata={'CreationDate': None})
    plt.close()


def main():
    print(f'Generating score table... ', file=sys.stderr, end='', flush=True)
    score_table, all_successful_languages = make_score_table()
    print('ok', file=sys.stderr)

    print(f'Loading terminal detail mixins... ', file=sys.stderr, end='', flush=True)
    terminal_mixins = load_terminal_detail_mixins()
    print('ok', file=sys.stderr)

    print(f'Writing docs/_static/score-colors.css ... ', file=sys.stderr, end='', flush=True)
    os.makedirs('docs/_static', exist_ok=True)
    with open('docs/_static/score-colors.css', 'w') as fout:
        fout.write(generate_score_css())
    print('ok', file=sys.stderr)

    print(f'Writing docs/results.rst ... ', file=sys.stderr, end='', flush=True)
    with open('docs/results.rst', 'w') as fout, contextlib.redirect_stdout(fout):
        display_tabulated_scores(score_table)
        # Definitions removed - not shown in individual terminal pages
        display_common_languages(all_successful_languages)
        display_capabilities_table(score_table)
        display_results_toc(score_table)
        display_common_hyperlinks()
    print('ok', file=sys.stderr)
    for entry in score_table:
        sw_name = entry["terminal_software_name"]

        # Generate score comparison plot
        print(f'Generating plots for {sw_name} ... ', file=sys.stderr, end='', flush=True)
        plot_scaled = create_score_plots(sw_name, entry, score_table)
        print('ok', file=sys.stderr)

        # Write terminal documentation page
        fname = f'docs/sw_results/{make_link(sw_name)}.rst'
        print(f'Writing {fname} ... ', file=sys.stderr, end='', flush=True)
        with open(fname, 'w') as fout, contextlib.redirect_stdout(fout):
            show_software_header(entry, sw_name, terminal_mixins)
            show_score_breakdown(sw_name, entry, plot_scaled)
            show_wide_character_support(sw_name, entry)
            show_emoji_zwj_results(sw_name, entry)
            show_vs_results(sw_name, entry, '16')
            show_vs_results(sw_name, entry, '15')
            show_graphics_results(sw_name, entry)
            show_language_results(sw_name, entry)
            show_dec_modes_results(sw_name, entry)
            show_kitty_keyboard_results(sw_name, entry)
            show_xtgettcap_results(sw_name, entry)
            show_reproduce_command(sw_name, entry)
            show_time_elapsed_results(sw_name, entry)
            display_common_hyperlinks()
        print('ok', file=sys.stderr)


def make_unicode_codepoint(wchar):
    if ord(wchar) > 0xFFFF:
        u_str = f"U+{ord(wchar):08X}"
    else:
        u_str = f"U+{ord(wchar):04X}"
    return f"`{u_str} <https://codepoints.net/{u_str}>`_"


def display_results_toc(score_table):
    display_title("Full Report by Terminal", 2)
    print(".. toctree::")
    print("   :maxdepth: 1")
    print()
    for entry in score_table:
        sw_name = make_link(entry["terminal_software_name"])
        print(f"   sw_results/{sw_name}")
    print()


def display_common_hyperlinks():
    print(".. _`printf(1)`: https://www.man7.org/linux/man-pages/man1/printf.1.html")
    print(".. _`wcwidth.wcswidth()`: https://wcwidth.readthedocs.io/en/latest/intro.html")
    print(".. _`ucs-detect`: https://github.com/jquast/ucs-detect")
    print(".. _`DEC Private Modes`: https://blessed.readthedocs.io/en/latest/dec_modes.html")

def make_link(text):
    return re.sub(LINK_REGEX, '', text).lower()

def make_outbound_hyperlink(text, link_text=None):
    if link_text is None:
        link_text = text
    return f":ref:`{text} <{make_link(link_text)}>`"

def display_inbound_hyperlink(link_text):
    print(f".. _{make_link(link_text)}:")
    print()


def find_best_failure(records):
    sorted_records = sorted(records, key=lambda record: record["measured_by_wcwidth"])
    return sorted_records[len(sorted_records) // 2]


def make_printf_hex(wchar):
    # python's b'\x12..' representation is compatible enough with printf(1)
    return repr(bytes(wchar, "utf8").decode("unicode-escape").encode("utf8"))[2:-1]


def make_score_table():
    score_table = []
    #
    # Suggest generating YAML files with something like:
    #     python ucs_detect/__init__.py --save-yaml data/output.yaml --limit-codepoints=1000 --limit-words=1000 --limit-errors=100
    #
    try:
        for yaml_path in [
            os.path.join(DATA_PATH, fname)
            for fname in os.listdir(DATA_PATH)
            if fname.endswith(".yaml") and not fname.startswith("_")
            and fname != "terminal_detail_mixins.yaml"
            and os.path.isfile(os.path.join(DATA_PATH, fname))
        ]:
            data = yaml.load(open(yaml_path, "r"), Loader=SafeLoader)

            # determine score for 'WIDE',
            _score_wide = score_wide(data)

            # 'EMOJI ZWJ',
            _score_zwj = score_zwj(data)

            # 'EMOJI VS-16',
            _vs16_base = data["test_results"].get("emoji_vs16_results", {})
            if _vs16_base and "9.0.0" in _vs16_base:
                score_emoji_vs16 = _vs16_base["9.0.0"]["pct_success"] / 100
            else:
                score_emoji_vs16 = 0.0

            # 'EMOJI VS-15',
            # Support both new (emoji_vs15_results) and old (emoji_vs15_type_a_results) formats
            _vs15_base = data["test_results"].get("emoji_vs15_results",
                                                   data["test_results"].get("emoji_vs15_type_a_results"))
            if _vs15_base and "9.0.0" in _vs15_base:
                score_emoji_vs15 = _vs15_base["9.0.0"]["pct_success"] / 100
            else:
                score_emoji_vs15 = 0.0

            # Language Support,
            score_language = score_lang(data)

            # DEC Modes Support,
            _score_dec_modes = score_dec_modes(data)

            # Elapsed time (inverse score - lower is better)
            _score_elapsed = score_elapsed_time(data)
            _elapsed_seconds = data.get("seconds_elapsed", float('NaN'))

            # Sixel support - binary score based on DA1 device attributes response
            _sixel_support = data.get("terminal_results", {}).get("sixel", False)
            _score_sixel = 1.0 if _sixel_support else 0.0

            # Capabilities score - fraction of notable capabilities supported
            _score_capabilities = score_capabilities(data)

            # Graphics protocol score - 1.0 modern, 0.5 legacy, 0.0 none
            _score_graphics = score_graphics(data)

            score_table.append(
                dict(
                    terminal_software_name=data.get("software_name", data.get('software')),
                    terminal_software_version=data.get("software_version", data.get('version')),
                    os_system=data["system"],
                    score_emoji_vs16=score_emoji_vs16,
                    score_emoji_vs15=score_emoji_vs15,
                    score_dec_modes=_score_dec_modes,
                    score_elapsed=_score_elapsed,
                    elapsed_seconds=_elapsed_seconds,
                    score_language=score_language,
                    score_wide=_score_wide,
                    score_zwj=_score_zwj,
                    score_sixel=_score_sixel,
                    sixel_support=_sixel_support,
                    score_capabilities=_score_capabilities,
                    score_graphics=_score_graphics,
                    data=data,
                    fname=os.path.basename(yaml_path),
                )
            )
    except Exception:
        print(f"Error in yaml_path={yaml_path}", file=sys.stderr)
        raise

    # Normalize elapsed time scores to 0-1 range
    # Get valid elapsed scores
    valid_elapsed = [e["score_elapsed"] for e in score_table if not math.isnan(e["score_elapsed"])]
    max_elapsed = max(valid_elapsed) if valid_elapsed else 1.0
    min_elapsed = min(valid_elapsed) if valid_elapsed else 0.0

    # Normalize DEC modes for display (not used in final score)
    valid_dec_modes = [e["score_dec_modes"] for e in score_table
                       if not math.isnan(e["score_dec_modes"])]
    max_dec_modes = max(valid_dec_modes) if valid_dec_modes else 1.0
    min_dec_modes = min(valid_dec_modes) if valid_dec_modes else 0.0

    # Normalize and calculate final scores
    for entry in score_table:
        # Normalize DEC modes to 0-1 (for display only)
        if not math.isnan(entry["score_dec_modes"]):
            if max_dec_modes == min_dec_modes:
                entry["score_dec_modes_norm"] = 1.0
            else:
                entry["score_dec_modes_norm"] = (
                    (entry["score_dec_modes"] - min_dec_modes)
                    / (max_dec_modes - min_dec_modes)
                )
        else:
            entry["score_dec_modes_norm"] = float('NaN')

        # Normalize elapsed time to 0-1 (inverse - lower is better)
        if not math.isnan(entry["score_elapsed"]):
            if max_elapsed == min_elapsed:
                entry["score_elapsed_norm"] = 1.0
            else:
                # Use log scale for time (inverse)
                log_elapsed = math.log10(entry["score_elapsed"])
                log_min = math.log10(min_elapsed)
                log_max = math.log10(max_elapsed)
                entry["score_elapsed_norm"] = 1.0 - (
                    (log_elapsed - log_min) / (log_max - log_min))
        else:
            entry["score_elapsed_norm"] = float('NaN')

        # Calculate final score using weighted average
        # Time is weighted at 0.5 (half as powerful as other metrics)
        # Graphics (GFX) scores: 1.0 modern (iTerm2/Kitty), 0.5 legacy (Sixel/ReGIS), 0.0 none
        TIME_WEIGHT = 0.5
        scores_with_weights = [
            (entry["score_language"], 1.0),
            (entry["score_emoji_vs16"], 1.0),
            (entry["score_emoji_vs15"], 1.0),
            (entry["score_zwj"], 1.0),
            (entry["score_wide"], 1.0),
            (entry["score_capabilities"], 1.0),
            (entry["score_graphics"], 1.0),
            (entry["score_elapsed_norm"], TIME_WEIGHT)
        ]
        valid_scores_with_weights = [(s, w) for s, w in scores_with_weights if not math.isnan(s)]
        if valid_scores_with_weights:
            weighted_sum = sum(s * w for s, w in valid_scores_with_weights)
            total_weight = sum(w for s, w in valid_scores_with_weights)
            entry["score_final"] = weighted_sum / total_weight
        else:
            entry["score_final"] = float('NaN')

    # after accumulating all entries, create graded scale
    result = []
    _score_keys = [key for key in score_table[0].keys() if key.startswith("score_")]
    for entry in score_table:
        for key in _score_keys:
            entry[key + "_scaled"] = scale_scores(score_table, entry, key)
        result.append(entry)
    # Sort with NaN values at the end (treat NaN as negative infinity for sorting)
    result.sort(key=lambda x: (math.isnan(x["score_final"]), -x["score_final"] if not math.isnan(x["score_final"]) else 0))

    # create unique set of all languages tested, then find languages that are
    # successful for all terminals (english, etc.) and remove them from the
    # result.
    all_languages = set()
    for entry in result:
        lang_results = entry["data"]["test_results"].get("language_results") or {}
        all_languages.update(
            lang for lang in lang_results
            if lang_results[lang]["n_errors"] == 0
        )

    all_successful_languages = set()
    for lang in all_languages:
        if all(
            lang in (entry["data"]["test_results"].get("language_results") or {}) and
            (entry["data"]["test_results"].get("language_results") or {})[lang]["n_errors"] == 0
            for entry in result
        ):
            all_successful_languages.add(lang)
            for entry in result:
                lang_results = entry["data"]["test_results"].get("language_results") or {}
                if lang in lang_results:
                    del lang_results[lang]
    return result, all_successful_languages


def format_score_pct(score):
    """Format a score as a percentage, handling NaN values."""
    if math.isnan(score):
        return "N/A"
    return f'{score*100:0.1f}%'


def format_score_int(score):
    """Format a score as an integer 0-100, handling NaN values."""
    if math.isnan(score):
        return "N/A"
    return f'{round(score*100)}'


def _truncate_version(version):
    """Truncate version string at first '-', appending ellipsis if truncated."""
    version = str(version) if version is not None else ""
    if '-' in version:
        return version.split('-', 1)[0] + '\u2026'
    return version


def _count_capabilities(entry):
    """Count supported and total notable capabilities for a terminal."""
    tr = entry["data"].get("terminal_results") or {}
    if not tr:
        return 0, 0

    modes = tr.get("modes") or {}
    n_found = 0
    n_total = 0
    for mode_num in (_DPM.BRACKETED_PASTE, _DPM.SYNCHRONIZED_OUTPUT,
                     _DPM.FOCUS_IN_OUT_EVENTS, _DPM.MOUSE_EXTENDED_SGR,
                     _DPM.GRAPHEME_CLUSTERING, _DPM.BRACKETED_PASTE_MIME):
        n_total += 1
        if _get_dec_mode_supported(modes, mode_num):
            n_found += 1
    if tr.get("kitty_keyboard") is not None:
        n_total += 1
        n_found += 1
    elif tr.get("modes"):
        n_total += 1
    xtgettcap = tr.get("xtgettcap", {})
    if xtgettcap.get("supported", False) and bool(xtgettcap.get("capabilities")):
        n_total += 1
        n_found += 1
    elif "xtgettcap" in tr:
        n_total += 1
    return n_found, n_total


def _format_capabilities_summary(entry, max_caps):
    """Format detected capabilities as a count with scored hyperlink."""
    sw_name = entry["terminal_software_name"]
    n_found, _n_total = _count_capabilities(entry)
    score = n_found / max_caps if max_caps else 0.0
    return wrap_score_with_hyperlink(
        str(n_found), score, sw_name, "_dec_modes"
    )


def _format_graphics_protocols(entry, sw_name):
    """
    Format detected graphics protocols as a comma-joined list with color scoring.

    Green (1.0) for modern protocols (iTerm2, Kitty), yellow (0.5) for legacy
    only (Sixel, ReGIS), red (0.0) for none.
    """
    tr = entry["data"].get("terminal_results") or {}
    if not tr:
        return wrap_with_score_role("N/A", float('nan'))

    protocols = []
    if tr.get("sixel", False):
        protocols.append("Sixel")
    da_ext = tr.get("device_attributes", {}).get("extensions", [])
    if 3 in da_ext:
        protocols.append("ReGIS")
    has_modern = False
    iterm2 = tr.get("iterm2_features") or {}
    if iterm2.get("supported", False):
        protocols.append("iTerm2")
        has_modern = True
    if tr.get("kitty_graphics", False):
        protocols.append("Kitty")
        has_modern = True

    if not protocols:
        return wrap_score_with_hyperlink("none", 0.0, sw_name, "_graphics")
    score = 1.0 if has_modern else 0.5
    return wrap_score_with_hyperlink(", ".join(protocols), score, sw_name, "_graphics")


def display_tabulated_scores(score_table):
    display_title("Results", 1)

    # Introduction and disclaimer
    print("This is a volunteer-maintained analysis created by and for terminal emulator and ")
    print("TUI/CLI library developers. ")
    print()
    print("We welcome productive contributions and corrections to improve the accuracy and")
    print("completeness of these measurements.")
    print()
    print(".. note::")
    print()
    print("   These test results are provided as-is and we do not guarantee their correctness.")
    print("   The scores and ratings presented here are objective measurements of Unicode and")
    print("   terminal feature support by analysis of automatic response, and should not be")
    print("   interpreted as an overall assessment of terminal emulator quality or a")
    print("   recommendation. Many factors beyond Unicode support contribute to terminal quality.")
    print("   Some terminals may optionally support features and modes not represented here.")
    print("   This data represents only automatic responses received when launched in their")
    print("   default configurations and packaged build options. Some languages and emoji")
    print("   tests may also pass 'accidentally'!")
    print()


    display_title("General Tabulated Summary", 2)

    tabulated_scores = []

    # determine max capabilities across all terminals for scaling
    max_caps = max((_count_capabilities(r)[0] for r in score_table), default=1)

    for rank, result in enumerate(score_table, start=1):
        # Build capabilities summary count
        capabilities_list = _format_capabilities_summary(result, max_caps)

        tabulated_scores.append(
            {
                "Rank": rank,
                "Terminal Software": make_outbound_hyperlink(result["terminal_software_name"]),
                "Software Version": _truncate_version(result["terminal_software_version"]),
                "OS System": result["os_system"],

                "Score": wrap_score_with_hyperlink(
                    format_score_int(result["score_final_scaled"]),
                    result["score_final_scaled"],
                    result["terminal_software_name"],
                    "_scores"
                ),
                "WIDE": wrap_score_with_hyperlink(
                    format_score_int(result["score_wide_scaled"]),
                    result["score_wide_scaled"],
                    result["terminal_software_name"],
                    "_wide"
                ),
                "LANG": wrap_score_with_hyperlink(
                    format_score_int(result["score_language_scaled"]),
                    result["score_language_scaled"],
                    result["terminal_software_name"],
                    "_lang"
                ),
                "ZWJ": wrap_score_with_hyperlink(
                    format_score_int(result["score_zwj_scaled"]),
                    result["score_zwj_scaled"],
                    result["terminal_software_name"],
                    "_zwj"
                ),
                "VS16": wrap_score_with_hyperlink(
                    format_score_int(result["score_emoji_vs16_scaled"]),
                    result["score_emoji_vs16_scaled"],
                    result["terminal_software_name"],
                    "_vs16"
                ),
                "VS15": wrap_score_with_hyperlink(
                    format_score_int(result["score_emoji_vs15_scaled"]),
                    result["score_emoji_vs15_scaled"],
                    result["terminal_software_name"],
                    "_vs15"
                ),
                "Capabilities": capabilities_list,
                "Graphics": _format_graphics_protocols(result, result["terminal_software_name"]),
            }
        )

    # Output role definitions for inline score coloring
    print(generate_score_roles())

    # Generate and print table with inline role-colored scores
    table_str = tabulate.tabulate(tabulated_scores, headers="keys", tablefmt="rst")
    print_datatable(table_str)


def display_table_definitions():
    print("Definitions:\n")
    print(
        "- *FINAL score*: The overall terminal emulator quality score, calculated as\n"
        "  the weighted average of all feature scores (WIDE, LANG, ZWJ, VS16, VS15,\n"
        "  DEC Modes, and TIME), then scaled (normalized 0-100%) relative to all terminals tested.\n"
        "  Higher scores indicate better overall Unicode and terminal feature support. DEC Modes and\n"
        "  TIME are normalized to 0-1 range before averaging. TIME is weighted at 0.5 (half as\n"
        "  powerful as other metrics) to reduce its impact on the final score."
    )
    print(
        "- *WIDE score*: Percentage of wide character codepoints correctly\n"
        "  displayed for the latest Unicode version. Calculated as the total\n"
        "  number of successful codepoints divided by total codepoints tested, scaled."
    )
    print(
        "- *LANG score*: Calculated using the geometric mean of success percentages\n"
        "  across all international languages tested. This fairly accounts for partial\n"
        "  support (e.g., 99%, 98%) without letting one low score dominate, scaled."
    )
    print(
        "- *ZWJ score*: Percentage of emoji ZWJ (Zero-Width Joiner) sequences\n"
        "  correctly displayed for the latest Unicode Emoji version. Calculated as the\n"
        "  total number of successful sequences divided by total sequences tested, scaled."
    )
    print(
        "- *VS16 score*: Determined by the number of Emoji using Variation\n"
        "  Selector-16 supported as wide characters."
    )
    print(
        "- *VS15 score*: Determined by the number of Emoji using Variation\n"
        "  Selector-15 supported as narrow characters."
    )
    print(
        "- *Mode 2027*: DEC Mode 2027 (GRAPHEME_CLUSTERING) support. Shows 'enabled'\n"
        "  if the mode is currently enabled, 'may enable' if the mode is supported but\n"
        "  not enabled and can be changed to enabled, or 'no' if not supported.\n"
        "  This mode enables grapheme clustering behavior in the terminal."
    )
    print(
        "- *DEC Modes*: Determined by the number of DEC private modes\n"
        "  that are changeable by the terminal, scaled."
    )
    print(
        "- *Elapsed Time*: Test execution time in seconds, scaled inversely\n"
        "  (lower time is better)."
    )
    print()


def scale_scores(score_table, entry, key):
    my_score = entry[key]
    if math.isnan(my_score):
        return float('NaN')

    # VS16, VS15, Sixel, and Graphics are not scaled - return raw score
    if key in ('score_emoji_vs16', 'score_emoji_vs15', 'score_sixel',
               'score_graphics'):
        return my_score

    valid_scores = [_entry[key] for _entry in score_table if not math.isnan(_entry[key])]
    if not valid_scores:
        return float('NaN')
    max_score = max(valid_scores)
    min_score = min(valid_scores)
    if max_score == min_score:
        return 1.0  # All scores are the same

    # Inverse log10 scaling for elapsed time (lower is better, log scale for color distribution)
    if key == 'score_elapsed':
        log_my_score = math.log10(my_score)
        log_min_score = math.log10(min_score)
        log_max_score = math.log10(max_score)
        return 1.0 - ((log_my_score - log_min_score) / (log_max_score - log_min_score))

    return (my_score - min_score) / (max_score - min_score)


def score_zwj(data):
    """Calculate ZWJ score as percentage of successful sequences tested."""
    zwj_results = data["test_results"].get("emoji_zwj_results") or {}
    if not zwj_results:
        return 0.0
    result = next(iter(zwj_results.values()))
    n_total = result["n_total"]
    if n_total == 0:
        return 0.0
    return (n_total - result["n_errors"]) / n_total


def score_wide(data):
    """Calculate WIDE score as percentage of successful codepoints tested."""
    wide_results = data["test_results"].get("unicode_wide_results") or {}
    if not wide_results:
        return 0.0
    result = next(iter(wide_results.values()))
    n_total = result["n_total"]
    if n_total == 0:
        return 0.0
    return (n_total - result["n_errors"]) / n_total


def score_lang(data):
    """
    Calculate language support score using geometric mean of all language success percentages.

    This gives a fairer score than simple counting of 100% languages, as it considers
    partial support (e.g., 99%, 98%) and doesn't let one low score dominate the result.
    """
    language_results = data["test_results"]["language_results"]
    if not language_results:
        return 0.0

    # Get success percentages for all languages (as fractions 0.0-1.0)
    percentages = [
        lang_data["pct_success"] / 100
        for lang_data in language_results.values()
    ]

    # Calculate geometric mean using log space to avoid overflow
    # geometric_mean = exp(mean(log(percentages)))
    if any(p == 0 for p in percentages):
        # If any language has 0% support, treat those as very small values
        percentages = [max(p, 0.0001) for p in percentages]

    log_percentages = [math.log(p) for p in percentages]
    geometric_mean = math.exp(sum(log_percentages) / len(log_percentages))

    return geometric_mean


def score_dec_modes(data):
    """
    Calculate score based on changeable DEC private modes.

    Returns the count of changeable modes.
    """
    if "terminal_results" not in data or "modes" not in data["terminal_results"]:
        return float('NaN')

    modes = data["terminal_results"]["modes"]
    changeable_modes = sum(
        1 for mode_data in modes.values()
        if mode_data.get("changeable", False)
    )

    return changeable_modes


def score_capabilities(data):
    """
    Calculate score as fraction of notable terminal capabilities supported.

    Checks 12 capabilities: Bracketed Paste (mode 2004), Synced Output (mode 2026),
    Focus Events (mode 1004), Mouse SGR (mode 1006), Graphemes (mode 2027),
    Bracketed Paste MIME (mode 5522), Kitty Keyboard, XTGETTCAP, Text Sizing,
    Kitty Clipboard, Kitty Pointer Shapes, and Kitty Notifications.

    :rtype: float
    :returns: fraction 0.0-1.0 of capabilities supported
    """
    tr = data.get("terminal_results") or {}
    if not tr:
        return float('NaN')

    modes = tr.get("modes") or {}
    count = 0
    total = 12

    for mode_num in (_DPM.BRACKETED_PASTE, _DPM.SYNCHRONIZED_OUTPUT,
                     _DPM.FOCUS_IN_OUT_EVENTS, _DPM.MOUSE_EXTENDED_SGR,
                     _DPM.GRAPHEME_CLUSTERING, _DPM.BRACKETED_PASTE_MIME):
        mode_key = str(mode_num) if str(mode_num) in modes else mode_num
        if mode_key in modes and modes[mode_key].get("supported", False):
            count += 1

    if tr.get("kitty_keyboard") is not None:
        count += 1

    xtgettcap = tr.get("xtgettcap", {})
    if xtgettcap.get("supported", False) and bool(xtgettcap.get("capabilities")):
        count += 1

    text_sizing = tr.get("text_sizing", {})
    if text_sizing.get("width") or text_sizing.get("scale"):
        count += 1

    if tr.get("kitty_clipboard_protocol", False):
        count += 1

    kitty_ptr = tr.get("kitty_pointer_shapes")
    if isinstance(kitty_ptr, dict) and kitty_ptr.get("supported", False):
        count += 1

    kitty_notif = tr.get("kitty_notifications")
    if isinstance(kitty_notif, dict) and kitty_notif.get("supported", False):
        count += 1

    return count / total


def score_graphics(data):
    """
    Calculate graphics protocol support score.

    :rtype: float
    :returns: 1.0 for modern (iTerm2/Kitty), 0.5 for legacy only (Sixel/ReGIS), 0.0 for none
    """
    tr = data.get("terminal_results") or {}
    if not tr:
        return 0.0

    has_any = False
    if tr.get("sixel", False):
        has_any = True
    da_ext = tr.get("device_attributes", {}).get("extensions", [])
    if 3 in da_ext: