From d18b58679a759ad569ccb29634453bcf2b8b78c2 Mon Sep 17 00:00:00 2001
From: Jake Wagoner <jakewagoneredu@gmail.com>
Date: Wed, 10 Jan 2024 12:07:13 -0700
Subject: [PATCH 1/4] Add grammar generation (needs data)

---
 upsetplot/alt_text.py | 121 ++++++++++++++++++++++++++++++++++++++++++
 upsetplot/plotting.py |  24 +++++++++
 2 files changed, 145 insertions(+)
 create mode 100644 upsetplot/alt_text.py

diff --git a/upsetplot/alt_text.py b/upsetplot/alt_text.py
new file mode 100644
index 0000000..01f6715
--- /dev/null
+++ b/upsetplot/alt_text.py
@@ -0,0 +1,121 @@
+def generate_grammar(
+    df,
+    intersections,
+    totals,
+    *,
+    sort_by,
+    sort_categories_by,
+    min_degree=None,
+    max_degree=None,
+    include_empty_subsets=False,
+):
+    """
+    Generate the grammar used by UpSet 2 and Multinet to generate alt text.
+
+    Some values present in UpSet 2 will not be present in all implementations (e.g. Aggregation), so default values will be provided here
+
+    .. versionadded:: 0.10
+
+    Parameters
+    ----------
+    df : :class:`pandas.core.frame.DataFrame`
+        The UpSet data DataFrame.
+    intersections : :class:`pandas.core.series.Series`
+        The list of intersections.
+    totals : :class:`pandas.core.series.Series`
+        The list of totals.
+    sort_by : str
+        The attribute to sort the sets by.
+    sort_categories_by : str
+        The attribute to sort the categories by.
+    min_degree : number, optional
+        The minimum degree (optional).
+    max_degree : number, optional
+        The maximum degree (optional).
+    include_empty_subsets : bool, default=False
+        Whether to include empty subsets (default: False).
+
+    Returns
+    -------
+    The generated grammar compatible with UpSet 2 and Multinet's alt-text generator.
+    """
+
+    # default grammar state values required by UpSet 2/Multinet
+    grammar = {
+       "plotInformation": {
+            "description": "",
+            "sets": "",
+            "items": ""
+        },
+        "firstAggregateBy": "None",
+        "firstOverlapDegree": 2,
+        "secondAggregateBy": "None",
+        "secondOverlapDegree": 2,
+        "sortVisibleBy": "Alphabetical",
+        "sortBy": "Size",
+        "filters": {
+            "maxVisible": 6,
+            "minVisible": 0,
+            "hideEmpty": True,
+            "hideNoSet": False
+        },
+        "visibleSets": [],
+        "visibleAttributes": [],
+        "bookmarkedIntersections": [],
+        "collapsed": [],
+        "plots": {
+            "scatterplots": [],
+            "histograms": [],
+            "wordClouds": []
+        },
+        "allSets": [],
+        # this value will likely be redundant with the latest alt-text generator release
+        "altText": {
+            "verbosity": "low",
+            "explain": "full"
+        },
+        "rawData": {}, # this value may not be necessary
+        "processedData": {},
+        "accessibleProcessedData": {},
+    }
+
+    # TODO: update this when UpSet adds reverese sorting functionality
+    if sort_by == "cardinality" or sort_by == "-cardinality":
+        grammar["sortBy"] = "Size"
+    if sort_by == "degree" or sort_by == "-degree":
+        grammar["sortBy"] = "Degree"
+    # this sort type is not supported by UpSet 2
+    if sort_by == "input" or sort_by == "-input":
+        grammar["sortBy"] = "Size"
+
+    if sort_categories_by == "cardinality":
+        grammar["sortVisibleBy"] = "Descending"
+    if sort_categories_by == "-cardinality":
+        grammar["sortVisibleBy"] = "Ascending"
+    # this sort type is not supported by UpSet 2
+    if sort_categories_by == "input" or sort_categories_by == "-input":
+        grammar["sortVisibleBy"] = "Alphabetical"
+
+    grammar["filters"]["hideEmpty"] = not include_empty_subsets
+    grammar["filters"]["minVisible"] = (
+        min_degree if min_degree is not None else 0
+    )
+    grammar["filters"]["maxVisible"] = (
+        max_degree if max_degree is not None else 6
+    )
+
+    # these two values are the same as there is no way to "hide" sets
+    grammar["visibleSets"] = totals.index.to_list()
+    grammar["allSets"] = totals.index.to_list()
+
+    return grammar
+
+
+def get_alt_text():
+    """
+    Get the alt text for an UpSet plot.
+
+    Returns:
+        The alt text for the plot.
+    """
+    pass
diff --git a/upsetplot/plotting.py b/upsetplot/plotting.py
index c69636b..48b4e54 100644
--- a/upsetplot/plotting.py
+++ b/upsetplot/plotting.py
@@ -10,6 +10,8 @@
 from . import util
 from .reformat import _get_subset_mask, query
 
+from .alt_text import generate_grammar
+
 # prevents ImportError on matplotlib versions >3.5.2
 try:
     from matplotlib.tight_layout import get_renderer
@@ -360,6 +362,25 @@ def __init__(
         ]
         self.subset_legend = []  # pairs of (style, label)
 
+        self.grammar = generate_grammar(
+            self._df,
+            self.intersections,
+            self.totals,
+            sort_by=sort_by,
+            sort_categories_by=sort_categories_by,
+            # these attributes are not present in UpSet 2
+            # subset_size=subset_size,
+            # sum_over=sum_over,
+            # min_subset_size=min_subset_size,
+            # max_subset_size=max_subset_size,
+            # max_subset_rank=max_subset_rank,
+            min_degree=min_degree,
+            max_degree=max_degree,
+            # this attribute is not present in UpSet 2
+            # reverse=not self._horizontal,
+            include_empty_subsets=include_empty_subsets,
+        )
+
     def _swapaxes(self, x, y):
         if self._horizontal:
             return x, y
@@ -633,6 +654,9 @@ def add_catplot(self, kind, value=None, elements=3, **kw):
             }
         )
 
+        # add the category to the list of visible categories
+        self.grammar['visibleAttributes'].append(value)
+
     def _check_value(self, value):
         if value is None and "_value" in self._df.columns:
             value = "_value"

From d82c4fa5bbfd17c460d5ebc767c9cb37511b81fe Mon Sep 17 00:00:00 2001
From: Jake Wagoner <jakewagoneredu@gmail.com>
Date: Thu, 22 Aug 2024 14:34:13 -0600
Subject: [PATCH 2/4] Add alt-text generation from upset-alttxt

---
 README.rst                |   1 +
 doc/requirements.txt      |   1 +
 examples/plot_alt_text.py |  73 +++++++++
 upsetplot/alt_text.py     | 302 ++++++++++++++++++++++++++++++++++----
 upsetplot/plotting.py     | 128 +++++++++++-----
 5 files changed, 445 insertions(+), 60 deletions(-)
 create mode 100644 examples/plot_alt_text.py

diff --git a/README.rst b/README.rst
index e56155e..49633b3 100644
--- a/README.rst
+++ b/README.rst
@@ -130,6 +130,7 @@ Installation requires:
 * pandas
 * matplotlib >= 2.0
 * seaborn to use `UpSet.add_catplot`
+* upset-alttxt v0.4.3 to use `UpSet.get_alt_text`
 
 It should then be possible to::
 
diff --git a/doc/requirements.txt b/doc/requirements.txt
index aa52f7d..b866391 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -10,3 +10,4 @@ scikit-learn
 nbsphinx
 sphinx<2
 sphinx-rtd-theme
+upset-alttxt==0.2.7
diff --git a/examples/plot_alt_text.py b/examples/plot_alt_text.py
new file mode 100644
index 0000000..15590c9
--- /dev/null
+++ b/examples/plot_alt_text.py
@@ -0,0 +1,73 @@
+"""
+==========================================
+Data Vis: Alt text generation in UpSetPlot
+==========================================
+
+Explore text description generation via upset-alttxt (2024).
+
+When text description generation is enabled, there are no changes to the actual plot.
+The generated text description can be accessed after creating the UpSet plot object.
+
+"""
+
+from matplotlib import pyplot as plt
+from upsetplot import generate_counts
+from upsetplot import UpSet
+
+# Load the dataset into a DataFrame
+example = generate_counts()
+
+##########################################################################
+
+print("Generating a plot AND grammar for textual description")
+upset = UpSet(
+    example,
+    subset_size="count",
+    sort_by="-cardinality",
+    sort_categories_by="-cardinality",
+    orientation="vertical",
+    gen_grammar=True,
+    meta_data={"items": "RANDOM ITEMS"},
+)
+upset.plot()
+plt.suptitle("UpSet plot with text description generated'")
+text_description = upset.get_alt_text()
+
+print('==================================')
+print('Long Description (markdown formatted)')
+print('==================================')
+print(text_description['longDescription'])
+
+print('==================================')
+print('Short Description')
+print('==================================')
+print(text_description['shortDescription'])
+
+print('\n==================================')
+print('Technique Description')
+print('==================================')
+print(text_description['techniqueDescription'])
+
+plt.show()
+
+
+print('\nNow to generate the same plot with no alt text generation')
+
+# To disable grammar generation, simply ignore the gen_grammar parameter or set it to False.
+upset = UpSet(
+    example,
+    subset_size="count",
+    sort_by="-cardinality",
+    sort_categories_by="-cardinality",
+    orientation="vertical",
+)
+
+upset.plot()
+plt.suptitle("UpSet plot with no alt text generation")
+
+try:
+    text_description = upset.get_alt_text()
+except ValueError:
+    print('gen_grammar must be set to True for any alt text generation.')
+
+plt.show()
diff --git a/upsetplot/alt_text.py b/upsetplot/alt_text.py
index 01f6715..d1657ec 100644
--- a/upsetplot/alt_text.py
+++ b/upsetplot/alt_text.py
@@ -1,13 +1,31 @@
+from alttxt.enums import Level
+from alttxt.generator import AltTxtGen
+from alttxt.tokenmap import TokenMap
+from alttxt.parser import Parser
+
+
+"""
+    alt_text.py
+    -----------
+    This file contains various conversions from the datatypes, structures,
+    and values from UpSetPlot to Upset2 at upset.multinet.app.
+    This is necessary to generate alt text with the Multinet API.
+"""
+
+
 def generate_grammar(
     df,
     intersections,
     totals,
     *,
+    horizontal,
     sort_by,
     sort_categories_by,
     min_degree=None,
     max_degree=None,
     include_empty_subsets=False,
+    include_data=False,
+    meta_data=None,
 ):
     """
     Generate the grammar used by UpSet 2 and Multinet to generate alt text.
@@ -24,6 +42,8 @@ def generate_grammar(
         The list of intersections.
     totals : :class:`pandas.core.series.Series`
         The list of totals.
+    horizontal: bool
+        Plot orientation.
     sort_by : str
         The attribute to sort the sets by.
     sort_categories_by : str
@@ -34,19 +54,24 @@ def generate_grammar(
         The maximum degree (optional).
     include_empty_subsets : bool, default=False
         Whether to include empty subsets (default: False).
+    include_data : bool, default=False
+        Whether to include subset data (default: False).
 
     Returns
     -------
-    The generated grammar compatible with UpSet 2 and Multinet's alt-text generator.
+    The generated grammar as a dictionary.
     """
 
     # default grammar state values required by UpSet 2/Multinet
     grammar = {
-       "plotInformation": {
+        "plotInformation": {
+            "title": "",
+            "caption": "",
             "description": "",
             "sets": "",
             "items": ""
         },
+        "horizontal": False,
         "firstAggregateBy": "None",
         "firstOverlapDegree": 2,
         "secondAggregateBy": "None",
@@ -57,46 +82,56 @@ def generate_grammar(
             "maxVisible": 6,
             "minVisible": 0,
             "hideEmpty": True,
-            "hideNoSet": False
+            "hideNoSet": False,
         },
         "visibleSets": [],
         "visibleAttributes": [],
         "bookmarkedIntersections": [],
         "collapsed": [],
-        "plots": {
-            "scatterplots": [],
-            "histograms": [],
-            "wordClouds": []
-        },
+        "plots": {"scatterplots": [], "histograms": [], "wordClouds": []},
         "allSets": [],
-        # this value will likely be redundant with the latest alt-text generator release
-        "altText": {
-            "verbosity": "low",
-            "explain": "full"
-        },
-        "rawData": {}, # this value may not be necessary
-        "processedData": {},
-        "accessibleProcessedData": {},
     }
 
-    # TODO: update this when UpSet adds reverese sorting functionality
-    if sort_by == "cardinality" or sort_by == "-cardinality":
-        grammar["sortBy"] = "Size"
-    if sort_by == "degree" or sort_by == "-degree":
+    grammar["horizontal"] = horizontal
+
+    if meta_data is not None:
+        grammar["plotInformation"]["title"] = meta_data.get("title", "")
+        grammar["plotInformation"]["caption"] = meta_data.get("caption", "")
+        grammar["plotInformation"]["description"] = meta_data.get("description", "")
+        grammar["plotInformation"]["sets"] = meta_data.get("sets", "")
+        grammar["plotInformation"]["items"] = meta_data.get("items", "")
+
+    if sort_by == "degree":
         grammar["sortBy"] = "Degree"
+        grammar["sortByOrder"] = "Descending"
+    if sort_by == "-degree":
+        grammar["sortBy"] = "Degree"
+        grammar["sortByOrder"] = "Ascending"
+    if sort_by == "cardinality":
+        grammar["sortBy"] = "Size"
+        grammar["sortByOrder"] = "Ascending"
+    if sort_by == "-cardinality":
+        grammar["sortBy"] = "Size"
+        grammar["sortByOrder"] = "Descending"
     # this sort type is not supported by UpSet 2
     if sort_by == "input" or sort_by == "-input":
         grammar["sortBy"] = "Size"
+        grammar["sortByOrder"] = "Descending"
 
     if sort_categories_by == "cardinality":
         grammar["sortVisibleBy"] = "Descending"
     if sort_categories_by == "-cardinality":
         grammar["sortVisibleBy"] = "Ascending"
-    # this sort type is not supported by UpSet 2
+    # this category sort type is not supported by UpSet 2
     if sort_categories_by == "input" or sort_categories_by == "-input":
         grammar["sortVisibleBy"] = "Alphabetical"
 
     grammar["filters"]["hideEmpty"] = not include_empty_subsets
+    # if the min degree is above 0, the no set intersection should be hidden
+    grammar["filters"]["hideNoSet"] = (
+        min_degree > 0 if min_degree is not None else False
+    )
+
     grammar["filters"]["minVisible"] = (
         min_degree if min_degree is not None else 0
     )
@@ -104,18 +139,233 @@ def generate_grammar(
         max_degree if max_degree is not None else 6
     )
 
-    # these two values are the same as there is no way to "hide" sets
     grammar["visibleSets"] = totals.index.to_list()
-    grammar["allSets"] = totals.index.to_list()
+
+    grammar["allSets"] = get_all_sets_info(totals)
+
+    grammar["bookmarkedIntersections"] = [
+        # generate intersection ids, or simply append index?
+    ]
+
+    if (include_data):
+        grammar["processedData"] = generate_processed_data(
+            df, intersections, totals
+        )
+        grammar["rawData"] = {}
+        grammar["accessibleProcessedData"] = generate_processed_data(
+            df, intersections, totals, accessible=True
+        )
 
     return grammar
 
 
-def get_alt_text():
+def get_all_sets_info(totals):
+    """
+    Returns a list of objects, each containing the name and size of a set.
+
+    Parameters:
+    totals : dict
+      A dictionary containing the set names as keys and their sizes as values.
+
+    Returns:
+    list: A list of dictionaries, where each dictionary represents a set and contains the keys "name" and "size".
+          The "name" key holds the name of the set (str), and the "size" key holds the size of the set (int).
+    """
+    all_sets = []
+
+    for set_name, set_size in totals.items():
+        all_sets.append({"name": set_name, "size": set_size})
+
+    return all_sets
+
+
+def calculate_deviation(
+    contained_sets,
+    v_sets,
+    sets,
+    intersection_size,
+    total_items
+):
+    """
+    Calculate the deviation of a given intersection.
+    Based on deviation calculation in 2014 paper by Lex et al.
+
+    Parameters:
+    contained_sets : list
+        The list of sets contained in the subset row (intersection)
+    v_sets : list
+        The list of all visible sets loaded into the UpSet plot
+    sets : dict
+        A dictionary containing the set names as keys and their sizes as values.
+    intersection_size : int
+        The size of the subset row (intersection)
+    total_items : int
+        The total number of items in the dataset
+
+    Returns:
+    float: The deviation of the intersection.
+    """
+    contained_product = 1
+    for s in contained_sets:
+        set_size = sets[s]
+        contained_product *= set_size / total_items
+
+    non_contained_product = 1
+    for v in v_sets:
+        if v not in contained_sets:
+            set_size = sets[v]
+            non_contained_product *= 1 - set_size / total_items
+
+    dev = intersection_size / total_items - contained_product * non_contained_product
+
+    return dev * 100
+
+
+def get_set_membership_from_index(intersections, idx):
+    """
+    Returns a dictionary indicating the set membership of a given index.
+
+    Parameters:
+    intersections : :class:`pandas.core.series.Series`
+        The list of intersections.
+    idx : int
+        The index to retrieve set membership for.
+
+    Returns:
+    dict: A dictionary where the keys are the set names and the values are either "Yes" or "No" indicating set membership.
     """
-    Get the alt text for an UpSet plot.
+    names = intersections.index.names
+    set_membership = {}
+    for i, name in enumerate(names):
+        set_membership[name] = "Yes" if intersections.index[idx][i] else "No"
+
+    return set_membership
+
+
+def get_degree_from_set_membership(set_membership):
+    """
+    Returns the degree of a given set membership.
+
+    Parameters:
+    set_membership : dict
+        A dictionary indicating the set membership of a given index.
+
+    Returns:
+    int: The degree of the set membership.
+    """
+    return list(set_membership.values()).count("Yes")
+
+
+def get_element_name_from_id(id):
+    """
+    Returns the element name (for use in alt-txt) from the given ID.
+    (e.g) "Just cat1" or "cat1, cat2, and cat3"
+
+    Parameters:
+    id : str
+        The ID to retrieve the element name for.
+    """
+    # remove "Subset_"
+    # split the id by _ (this is the default delimiter between set names)
+    # join with commas, but the last element should also have "and " prepended
+    # if elements is only one element, return "Just {element}"
+    stripped_id = id.replace("Subset~_~", "")
+    elements = stripped_id.split("~_~")
+
+    if len(elements) == 1:
+        # the empty subset is named "Unincluded"
+        #   and does not need "Just" prepended
+        if (elements[0] == "Unincluded"):
+            return "Unincluded"
+        return f"Just {elements[0]}"
+
+    element_name = ""
+    for i, element in enumerate(elements):
+        if i == len(elements) - 1:
+            element_name += f"and {element}"
+        else:
+            element_name += f"{element}, "
+
+    return element_name
+
+
+def generate_intersection_id(intersections, idx):
+    """
+    Generates an intersection ID based on the given intersections and index.
+
+    Parameters:
+    intersections : :class:`pandas.core.series.Series`
+        The list of intersections.
+    idx : int
+        The index to retrieve set membership for.
+
+    Returns:
+    str: The generated intersection ID.
+    """
+    names = intersections.index.names
+    intersection_id = "Subset"
+    set_membership = get_set_membership_from_index(intersections, idx)
+    for name in names:
+        # the delimiter "~_~" is used here as it is unlikely that it will be used in a set name
+        intersection_id += f"~_~{name}" if set_membership[name] == "Yes" else ""
+
+    # the empty subset is named "Subset_Unincluded" in UpSet2
+    if (intersection_id == "Subset"):
+        intersection_id += "~_~Unincluded"
+
+    return intersection_id
+
+
+def generate_processed_data(df, intersections, totals, accessible=False):
+    processedData = {"values": {}, "order": []}
+    # for every row in intersections:
+    # generate the setMembership object
+    for i in range(len(intersections)):
+        id = generate_intersection_id(intersections, i)
+        set_membership = get_set_membership_from_index(intersections, i)
+        contained_sets = [name for name, membership in set_membership.items() if membership == "Yes"]
+
+        intersection_size = int(intersections.iat[i])
+
+        deviation = calculate_deviation(
+                contained_sets=contained_sets,
+                v_sets=list(totals.index),
+                sets=totals,
+                intersection_size=intersection_size,
+                total_items=totals.sum(),
+            )
+
+        processedData["values"][id] = {
+            "id": id,
+            "elementName": get_element_name_from_id(id),
+            "setMembership": set_membership,
+            "size": intersection_size,
+            "type": "Subset",
+            "degree": get_degree_from_set_membership(set_membership),
+            "attributes": {},
+            "deviation": deviation,
+        }
+        if accessible:
+            processedData["values"][id]["deviation"] = deviation
+        else:
+            processedData["values"][id]["items"] = []
+            processedData["order"].append(id)
+
+    return processedData
+
+
+def fetch_alt_text(grammar):
+    """
+    Get the alt text for an UpSet plot. Calls Multinet API
 
     Returns:
         The alt text for the plot.
     """
-    pass
+    parser = Parser(grammar)
+    parsed_data = parser.get_data()
+    parsed_grammar = parser.get_grammar()
+
+    tokenmap: TokenMap = TokenMap(parsed_data, parsed_grammar, 'title')
+    gen = AltTxtGen(Level.DEFAULT, True, tokenmap, parsed_grammar)
+
+    return gen.text
diff --git a/upsetplot/plotting.py b/upsetplot/plotting.py
index 48b4e54..965213a 100644
--- a/upsetplot/plotting.py
+++ b/upsetplot/plotting.py
@@ -10,7 +10,7 @@
 from . import util
 from .reformat import _get_subset_mask, query
 
-from .alt_text import generate_grammar
+from .alt_text import fetch_alt_text, generate_grammar
 
 # prevents ImportError on matplotlib versions >3.5.2
 try:
@@ -280,6 +280,8 @@ class UpSet:
     include_empty_subsets : bool (default=False)
         If True, all possible category combinations will be shown as subsets,
         even when some are not present in data.
+    gen_grammar : bool (default=False)
+        If True, a grammar will be generated for the plot.
     """
 
     _default_figsize = (10, 6)
@@ -308,6 +310,8 @@ def __init__(
         show_counts="",
         show_percentages=False,
         include_empty_subsets=False,
+        gen_grammar=False,
+        meta_data=None,
     ):
         self._horizontal = orientation == "horizontal"
         self._reorient = _identity if self._horizontal else _transpose
@@ -342,44 +346,64 @@ def __init__(
         self._show_counts = show_counts
         self._show_percentages = show_percentages
 
-        (self.total, self._df, self.intersections, self.totals) = _process_data(
-            data,
-            sort_by=sort_by,
-            sort_categories_by=sort_categories_by,
-            subset_size=subset_size,
-            sum_over=sum_over,
-            min_subset_size=min_subset_size,
-            max_subset_size=max_subset_size,
-            max_subset_rank=max_subset_rank,
-            min_degree=min_degree,
-            max_degree=max_degree,
-            reverse=not self._horizontal,
-            include_empty_subsets=include_empty_subsets,
+        (self.total, self._df, self.intersections, self.totals) = (
+            _process_data(
+                data,
+                sort_by=sort_by,
+                sort_categories_by=sort_categories_by,
+                subset_size=subset_size,
+                sum_over=sum_over,
+                min_subset_size=min_subset_size,
+                max_subset_size=max_subset_size,
+                max_subset_rank=max_subset_rank,
+                min_degree=min_degree,
+                max_degree=max_degree,
+                reverse=not self._horizontal,
+                include_empty_subsets=include_empty_subsets,
+            )
         )
         self.category_styles = {}
         self.subset_styles = [
             {"facecolor": facecolor} for i in range(len(self.intersections))
         ]
         self.subset_legend = []  # pairs of (style, label)
+        self._grammar = None
+
+        if (gen_grammar):
+            self._grammar = generate_grammar(
+                    self._df,
+                    self.intersections,
+                    self.totals,
+                    horizontal=self._horizontal,
+                    sort_by=sort_by,
+                    sort_categories_by=sort_categories_by,
+                    min_degree=min_degree,
+                    max_degree=max_degree,
+                    include_empty_subsets=include_empty_subsets,
+                    include_data=True,
+                    meta_data=meta_data,
+            )
 
-        self.grammar = generate_grammar(
-            self._df,
-            self.intersections,
-            self.totals,
-            sort_by=sort_by,
-            sort_categories_by=sort_categories_by,
-            # these attributes are not present in UpSet 2
-            # subset_size=subset_size,
-            # sum_over=sum_over,
-            # min_subset_size=min_subset_size,
-            # max_subset_size=max_subset_size,
-            # max_subset_rank=max_subset_rank,
-            min_degree=min_degree,
-            max_degree=max_degree,
-            # this attribute is not present in UpSet 2
-            # reverse=not self._horizontal,
-            include_empty_subsets=include_empty_subsets,
-        )
+    def get_alt_text(self):
+        """Return a textual description of the plot from upset-alttxt
+
+        Returns
+        -------
+        dict
+            A json object with textual descriptions of the plot.
+            Contains entries:
+                'techniqueDescription' (str),
+                'shortDescription' (str)
+                'longDescription' (str): a markdown formatted string
+        """
+        if (self._grammar is None):
+            raise ValueError("Grammar not generated.")
+
+        try:
+            return fetch_alt_text(self._grammar)
+        except Exception as e:
+            warnings.warn("Failed to fetch alt text: %s" % e)
+            return {}
 
     def _swapaxes(self, x, y):
         if self._horizontal:
@@ -654,8 +678,11 @@ def add_catplot(self, kind, value=None, elements=3, **kw):
             }
         )
 
-        # add the category to the list of visible categories
-        self.grammar['visibleAttributes'].append(value)
+        # add the category to the grammar (list of visible categories)
+        if self._grammar is not None:
+            self._grammar['visibleAttributes'].append(value)
+
+        # attribute stats data needs to be added to every subset
 
     def _check_value(self, value):
         if value is None and "_value" in self._df.columns:
@@ -960,6 +987,39 @@ def make_args(val):
         else:
             raise NotImplementedError("unhandled where: %r" % where)
 
+    def get_grammar(self):
+        """Return the grammar dictionary for the plot.
+
+        Returns:
+            dict: The grammar dictionary for the plot.
+        """
+        return self._grammar
+
+    def _update_grammar(self, key, value):
+        """
+        Update the grammar dictionary with the given key-value pair.
+
+        If the key already exists in the grammar, the value is appended to the existing list.
+        If the key does not exist, a ValueError is raised.
+
+        Args:
+            key (str): The key to update in the grammar dictionary.
+            value (Any): The value to append to the existing list or assign to the key.
+
+        Raises:
+            ValueError: If the key is not found in the grammar dictionary.
+        """
+        if self._grammar is None:
+            return
+        if key in self._grammar:
+            if isinstance(self.grammar[key], list):
+                self.grammar[key].append(value)
+            else:
+                self.grammar[key] = value
+            return True
+        else:
+            raise ValueError(f"Key {key} not found in grammar")
+
     def plot_totals(self, ax):
         """Plot bars indicating total set size"""
         orig_ax = ax

From fc60bceabf77d91b7aa7103bba681aab7d1b5dbd Mon Sep 17 00:00:00 2001
From: Jake Wagoner <jakewagoneredu@gmail.com>
Date: Thu, 19 Dec 2024 12:30:45 -0700
Subject: [PATCH 3/4] Improve debug statements for alt-text gen and add version
 number to alt-text grammar

---
 README.rst                |  2 +-
 examples/plot_alt_text.py |  1 -
 upsetplot/alt_text.py     | 26 +++++++++++++++++---------
 upsetplot/plotting.py     |  2 +-
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/README.rst b/README.rst
index 49633b3..492e435 100644
--- a/README.rst
+++ b/README.rst
@@ -130,7 +130,7 @@ Installation requires:
 * pandas
 * matplotlib >= 2.0
 * seaborn to use `UpSet.add_catplot`
-* upset-alttxt v0.4.3 to use `UpSet.get_alt_text`
+* upset-alttxt v0.4.9 to use `UpSet.get_alt_text`
 
 It should then be possible to::
 
diff --git a/examples/plot_alt_text.py b/examples/plot_alt_text.py
index 15590c9..b9d6140 100644
--- a/examples/plot_alt_text.py
+++ b/examples/plot_alt_text.py
@@ -27,7 +27,6 @@
     sort_categories_by="-cardinality",
     orientation="vertical",
     gen_grammar=True,
-    meta_data={"items": "RANDOM ITEMS"},
 )
 upset.plot()
 plt.suptitle("UpSet plot with text description generated'")
diff --git a/upsetplot/alt_text.py b/upsetplot/alt_text.py
index d1657ec..9657555 100644
--- a/upsetplot/alt_text.py
+++ b/upsetplot/alt_text.py
@@ -64,6 +64,7 @@ def generate_grammar(
 
     # default grammar state values required by UpSet 2/Multinet
     grammar = {
+        "version": "0.1.0",  # alt-text grammar version
         "plotInformation": {
             "title": "",
             "caption": "",
@@ -86,7 +87,7 @@ def generate_grammar(
         },
         "visibleSets": [],
         "visibleAttributes": [],
-        "bookmarkedIntersections": [],
+        "bookmarks": [],
         "collapsed": [],
         "plots": {"scatterplots": [], "histograms": [], "wordClouds": []},
         "allSets": [],
@@ -216,7 +217,7 @@ def calculate_deviation(
             set_size = sets[v]
             non_contained_product *= 1 - set_size / total_items
 
-    dev = intersection_size / total_items - contained_product * non_contained_product
+    dev = (intersection_size / total_items) - (contained_product * non_contained_product)
 
     return dev * 100
 
@@ -306,7 +307,7 @@ def generate_intersection_id(intersections, idx):
     intersection_id = "Subset"
     set_membership = get_set_membership_from_index(intersections, idx)
     for name in names:
-        # the delimiter "~_~" is used here as it is unlikely that it will be used in a set name
+        # the delimiter "~_~" is used in UpSet2 in the internal ID
         intersection_id += f"~_~{name}" if set_membership[name] == "Yes" else ""
 
     # the empty subset is named "Subset_Unincluded" in UpSet2
@@ -361,11 +362,18 @@ def fetch_alt_text(grammar):
     Returns:
         The alt text for the plot.
     """
-    parser = Parser(grammar)
-    parsed_data = parser.get_data()
-    parsed_grammar = parser.get_grammar()
+    try:
+        parser = Parser(grammar)
+        parsed_data = parser.get_data()
+        parsed_grammar = parser.get_grammar()
 
-    tokenmap: TokenMap = TokenMap(parsed_data, parsed_grammar, 'title')
-    gen = AltTxtGen(Level.DEFAULT, True, tokenmap, parsed_grammar)
+        tokenmap: TokenMap = TokenMap(parsed_data, parsed_grammar, 'title')
 
-    return gen.text
+        gen = AltTxtGen(Level.DEFAULT, True, tokenmap, parsed_grammar)
+    except Exception as e:
+        raise Exception(f"Failed to create alt text generator: {e}")
+
+    try:
+        return gen.text
+    except Exception as e:
+        raise Exception(f"Failed to generate alt text: {e}")
diff --git a/upsetplot/plotting.py b/upsetplot/plotting.py
index 965213a..b70e93c 100644
--- a/upsetplot/plotting.py
+++ b/upsetplot/plotting.py
@@ -385,7 +385,7 @@ def __init__(
             )
 
     def get_alt_text(self):
-        """Return a textual description of the plot from upset-alttxt
+        """Return a textual description of the plot from upset-alttxt package
 
         Returns
         -------

From 7bcf41d2ffbcf741d56a47d42c7b2299b70e3d4d Mon Sep 17 00:00:00 2001
From: Jake Wagoner <jakewagoneredu@gmail.com>
Date: Thu, 19 Dec 2024 13:48:03 -0700
Subject: [PATCH 4/4] Add testing coverage for alt-text

---
 .github/workflows/test.yml       |  1 +
 README.rst                       |  2 +-
 doc/requirements.txt             |  2 +-
 examples/plot_alt_text.py        | 34 +++++++--------
 upsetplot/__init__.py            |  6 +++
 upsetplot/alt_text.py            | 58 ++++++++++---------------
 upsetplot/plotting.py            | 61 +++++++++++++-------------
 upsetplot/tests/test_alttext.py  | 73 ++++++++++++++++++++++++++++++++
 upsetplot/tests/test_examples.py |  1 +
 9 files changed, 153 insertions(+), 85 deletions(-)
 create mode 100644 upsetplot/tests/test_alttext.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f146d20..5e757c6 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -20,6 +20,7 @@ jobs:
         shell: bash -el {0}
         run: |
           conda install pytest pytest-cov coveralls ${{ matrix.conda-deps }}
+          pip install upset-alttxt==0.5.2
           python setup.py install
           cp ci/matplotlibrc matplotlibrc
       - name: test
diff --git a/README.rst b/README.rst
index 492e435..2902314 100644
--- a/README.rst
+++ b/README.rst
@@ -130,7 +130,7 @@ Installation requires:
 * pandas
 * matplotlib >= 2.0
 * seaborn to use `UpSet.add_catplot`
-* upset-alttxt v0.4.9 to use `UpSet.get_alt_text`
+* upset-alttxt v0.5.2 to use `UpSet.get_alt_text`
 
 It should then be possible to::
 
diff --git a/doc/requirements.txt b/doc/requirements.txt
index b866391..ae049f7 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -10,4 +10,4 @@ scikit-learn
 nbsphinx
 sphinx<2
 sphinx-rtd-theme
-upset-alttxt==0.2.7
+upset-alttxt
diff --git a/examples/plot_alt_text.py b/examples/plot_alt_text.py
index b9d6140..e2dd6fa 100644
--- a/examples/plot_alt_text.py
+++ b/examples/plot_alt_text.py
@@ -3,7 +3,7 @@
 Data Vis: Alt text generation in UpSetPlot
 ==========================================
 
-Explore text description generation via upset-alttxt (2024).
+Explore text description generation via upset-alttxt (2025).
 
 When text description generation is enabled, there are no changes to the actual plot.
 The generated text description can be accessed after creating the UpSet plot object.
@@ -11,8 +11,8 @@
 """
 
 from matplotlib import pyplot as plt
-from upsetplot import generate_counts
-from upsetplot import UpSet
+
+from upsetplot import UpSet, generate_counts
 
 # Load the dataset into a DataFrame
 example = generate_counts()
@@ -32,25 +32,25 @@
 plt.suptitle("UpSet plot with text description generated'")
 text_description = upset.get_alt_text()
 
-print('==================================')
-print('Long Description (markdown formatted)')
-print('==================================')
-print(text_description['longDescription'])
+print("==================================")
+print("Long Description (markdown formatted)")
+print("==================================")
+print(text_description["longDescription"])
 
-print('==================================')
-print('Short Description')
-print('==================================')
-print(text_description['shortDescription'])
+print("==================================")
+print("Short Description")
+print("==================================")
+print(text_description["shortDescription"])
 
-print('\n==================================')
-print('Technique Description')
-print('==================================')
-print(text_description['techniqueDescription'])
+print("\n==================================")
+print("Technique Description")
+print("==================================")
+print(text_description["techniqueDescription"])
 
 plt.show()
 
 
-print('\nNow to generate the same plot with no alt text generation')
+print("\nNow to generate the same plot with no alt text generation")
 
 # To disable grammar generation, simply ignore the gen_grammar parameter or set it to False.
 upset = UpSet(
@@ -67,6 +67,6 @@
 try:
     text_description = upset.get_alt_text()
 except ValueError:
-    print('gen_grammar must be set to True for any alt text generation.')
+    print("gen_grammar must be set to True for any alt text generation.")
 
 plt.show()
diff --git a/upsetplot/__init__.py b/upsetplot/__init__.py
index 4cb4364..07d9955 100644
--- a/upsetplot/__init__.py
+++ b/upsetplot/__init__.py
@@ -3,6 +3,10 @@
 import os
 
 if os.environ.get("__IN-SETUP", None) != "1":
+    from .alt_text import (
+        fetch_alt_text,
+        generate_grammar,
+    )
     from .data import (
         from_contents,
         from_indicators,
@@ -24,4 +28,6 @@
         "from_contents",
         "from_indicators",
         "query",
+        "generate_grammar",
+        "fetch_alt_text",
     ]
diff --git a/upsetplot/alt_text.py b/upsetplot/alt_text.py
index 9657555..ab2ca23 100644
--- a/upsetplot/alt_text.py
+++ b/upsetplot/alt_text.py
@@ -1,8 +1,7 @@
 from alttxt.enums import Level
 from alttxt.generator import AltTxtGen
-from alttxt.tokenmap import TokenMap
 from alttxt.parser import Parser
-
+from alttxt.tokenmap import TokenMap
 
 """
     alt_text.py
@@ -70,7 +69,7 @@ def generate_grammar(
             "caption": "",
             "description": "",
             "sets": "",
-            "items": ""
+            "items": "",
         },
         "horizontal": False,
         "firstAggregateBy": "None",
@@ -133,12 +132,8 @@ def generate_grammar(
         min_degree > 0 if min_degree is not None else False
     )
 
-    grammar["filters"]["minVisible"] = (
-        min_degree if min_degree is not None else 0
-    )
-    grammar["filters"]["maxVisible"] = (
-        max_degree if max_degree is not None else 6
-    )
+    grammar["filters"]["minVisible"] = min_degree if min_degree is not None else 0
+    grammar["filters"]["maxVisible"] = max_degree if max_degree is not None else 6
 
     grammar["visibleSets"] = totals.index.to_list()
 
@@ -148,10 +143,8 @@ def generate_grammar(
         # generate intersection ids, or simply append index?
     ]
 
-    if (include_data):
-        grammar["processedData"] = generate_processed_data(
-            df, intersections, totals
-        )
+    if include_data:
+        grammar["processedData"] = generate_processed_data(df, intersections, totals)
         grammar["rawData"] = {}
         grammar["accessibleProcessedData"] = generate_processed_data(
             df, intersections, totals, accessible=True
@@ -180,13 +173,7 @@ def get_all_sets_info(totals):
     return all_sets
 
 
-def calculate_deviation(
-    contained_sets,
-    v_sets,
-    sets,
-    intersection_size,
-    total_items
-):
+def calculate_deviation(contained_sets, v_sets, sets, intersection_size, total_items):
     """
     Calculate the deviation of a given intersection.
     Based on deviation calculation in 2014 paper by Lex et al.
@@ -217,7 +204,9 @@ def calculate_deviation(
             set_size = sets[v]
             non_contained_product *= 1 - set_size / total_items
 
-    dev = (intersection_size / total_items) - (contained_product * non_contained_product)
+    dev = (intersection_size / total_items) - (
+        contained_product * non_contained_product
+    )
 
     return dev * 100
 
@@ -276,7 +265,7 @@ def get_element_name_from_id(id):
     if len(elements) == 1:
         # the empty subset is named "Unincluded"
         #   and does not need "Just" prepended
-        if (elements[0] == "Unincluded"):
+        if elements[0] == "Unincluded":
             return "Unincluded"
         return f"Just {elements[0]}"
 
@@ -311,7 +300,7 @@ def generate_intersection_id(intersections, idx):
         intersection_id += f"~_~{name}" if set_membership[name] == "Yes" else ""
 
     # the empty subset is named "Subset_Unincluded" in UpSet2
-    if (intersection_id == "Subset"):
+    if intersection_id == "Subset":
         intersection_id += "~_~Unincluded"
 
     return intersection_id
@@ -324,17 +313,19 @@ def generate_processed_data(df, intersections, totals, accessible=False):
     for i in range(len(intersections)):
         id = generate_intersection_id(intersections, i)
         set_membership = get_set_membership_from_index(intersections, i)
-        contained_sets = [name for name, membership in set_membership.items() if membership == "Yes"]
+        contained_sets = [
+            name for name, membership in set_membership.items() if membership == "Yes"
+        ]
 
         intersection_size = int(intersections.iat[i])
 
         deviation = calculate_deviation(
-                contained_sets=contained_sets,
-                v_sets=list(totals.index),
-                sets=totals,
-                intersection_size=intersection_size,
-                total_items=totals.sum(),
-            )
+            contained_sets=contained_sets,
+            v_sets=list(totals.index),
+            sets=totals,
+            intersection_size=intersection_size,
+            total_items=totals.sum(),
+        )
 
         processedData["values"][id] = {
             "id": id,
@@ -357,17 +348,14 @@ def generate_processed_data(df, intersections, totals, accessible=False):
 
 def fetch_alt_text(grammar):
     """
-    Get the alt text for an UpSet plot. Calls Multinet API
-
-    Returns:
-        The alt text for the plot.
+    Get the alt text for an UpSet plot (from upset-alttxt)
     """
     try:
         parser = Parser(grammar)
         parsed_data = parser.get_data()
         parsed_grammar = parser.get_grammar()
 
-        tokenmap: TokenMap = TokenMap(parsed_data, parsed_grammar, 'title')
+        tokenmap: TokenMap = TokenMap(parsed_data, parsed_grammar, "title")
 
         gen = AltTxtGen(Level.DEFAULT, True, tokenmap, parsed_grammar)
     except Exception as e:
diff --git a/upsetplot/plotting.py b/upsetplot/plotting.py
index b70e93c..95e3391 100644
--- a/upsetplot/plotting.py
+++ b/upsetplot/plotting.py
@@ -8,9 +8,8 @@
 from matplotlib import pyplot as plt
 
 from . import util
-from .reformat import _get_subset_mask, query
-
 from .alt_text import fetch_alt_text, generate_grammar
+from .reformat import _get_subset_mask, query
 
 # prevents ImportError on matplotlib versions >3.5.2
 try:
@@ -346,21 +345,19 @@ def __init__(
         self._show_counts = show_counts
         self._show_percentages = show_percentages
 
-        (self.total, self._df, self.intersections, self.totals) = (
-            _process_data(
-                data,
-                sort_by=sort_by,
-                sort_categories_by=sort_categories_by,
-                subset_size=subset_size,
-                sum_over=sum_over,
-                min_subset_size=min_subset_size,
-                max_subset_size=max_subset_size,
-                max_subset_rank=max_subset_rank,
-                min_degree=min_degree,
-                max_degree=max_degree,
-                reverse=not self._horizontal,
-                include_empty_subsets=include_empty_subsets,
-            )
+        (self.total, self._df, self.intersections, self.totals) = _process_data(
+            data,
+            sort_by=sort_by,
+            sort_categories_by=sort_categories_by,
+            subset_size=subset_size,
+            sum_over=sum_over,
+            min_subset_size=min_subset_size,
+            max_subset_size=max_subset_size,
+            max_subset_rank=max_subset_rank,
+            min_degree=min_degree,
+            max_degree=max_degree,
+            reverse=not self._horizontal,
+            include_empty_subsets=include_empty_subsets,
         )
         self.category_styles = {}
         self.subset_styles = [
@@ -369,19 +366,21 @@ def __init__(
         self.subset_legend = []  # pairs of (style, label)
         self._grammar = None
 
-        if (gen_grammar):
+        # pre-generate the grammar
+        # it is necessary to do this within the init function, as some values (sort_by, sort_categories_by, etc) are not available outside of the plot initialization
+        if gen_grammar:
             self._grammar = generate_grammar(
-                    self._df,
-                    self.intersections,
-                    self.totals,
-                    horizontal=self._horizontal,
-                    sort_by=sort_by,
-                    sort_categories_by=sort_categories_by,
-                    min_degree=min_degree,
-                    max_degree=max_degree,
-                    include_empty_subsets=include_empty_subsets,
-                    include_data=True,
-                    meta_data=meta_data,
+                self._df,
+                self.intersections,
+                self.totals,
+                horizontal=self._horizontal,
+                sort_by=sort_by,
+                sort_categories_by=sort_categories_by,
+                min_degree=min_degree,
+                max_degree=max_degree,
+                include_empty_subsets=include_empty_subsets,
+                include_data=True,
+                meta_data=meta_data,
             )
 
     def get_alt_text(self):
@@ -396,7 +395,7 @@ def get_alt_text(self):
                 'shortDescription' (str)
                 'longDescription' (str): a markdown formatted string
         """
-        if (self._grammar is None):
+        if self._grammar is None:
             raise ValueError("Grammar not generated.")
 
         try:
@@ -680,7 +679,7 @@ def add_catplot(self, kind, value=None, elements=3, **kw):
 
         # add the category to the grammar (list of visible categories)
         if self._grammar is not None:
-            self._grammar['visibleAttributes'].append(value)
+            self._grammar["visibleAttributes"].append(value)
 
         # attribute stats data needs to be added to every subset
 
diff --git a/upsetplot/tests/test_alttext.py b/upsetplot/tests/test_alttext.py
new file mode 100644
index 0000000..06163d2
--- /dev/null
+++ b/upsetplot/tests/test_alttext.py
@@ -0,0 +1,73 @@
+import pytest
+
+from upsetplot import UpSet, fetch_alt_text, generate_counts, generate_grammar
+
+
+@pytest.fixture
+def sample_data():
+    return generate_counts()
+
+
+@pytest.fixture
+def test_generate_grammar(sample_data):
+    upset = UpSet(
+        sample_data,
+        subset_size="count",
+        sort_by="-cardinality",
+        sort_categories_by="-cardinality",
+        orientation="vertical",
+        gen_grammar=True,
+    )
+
+    grammar = upset.get_grammar()
+    assert isinstance(grammar, dict)
+    assert "version" in grammar
+
+    return grammar
+
+
+def test_generate_grammar_invalid_data():
+    with pytest.raises(AttributeError):
+        generate_grammar(
+            df=None,
+            intersections=None,
+            totals=None,
+            horizontal=False,
+            sort_by="degree",
+            sort_categories_by="cardinality",
+            min_degree=None,
+            max_degree=None,
+            include_empty_subsets=False,
+            include_data=False,
+            meta_data=None,
+        )
+
+
+def test_generate_grammar_with_empty_subsets(sample_data):
+    grammar = generate_grammar(
+        df=sample_data,
+        intersections=sample_data,
+        totals=sample_data,
+        horizontal=False,
+        sort_by="degree",
+        sort_categories_by="cardinality",
+        min_degree=None,
+        max_degree=None,
+        include_empty_subsets=True,
+        include_data=False,
+        meta_data={"title": "Sample Plot", "caption": "This is a sample plot"},
+    )
+    assert grammar["filters"]["hideEmpty"] == False
+
+
+def test_fetch_alt_text(test_generate_grammar):
+    alt_text = fetch_alt_text(test_generate_grammar)
+    assert isinstance(alt_text, dict)
+    assert "techniqueDescription" in alt_text
+    assert "shortDescription" in alt_text
+    assert "longDescription" in alt_text
+
+
+def test_fetch_alt_text_invalid_grammar():
+    with pytest.raises(Exception, match="Failed to create alt text generator"):
+        fetch_alt_text({})
diff --git a/upsetplot/tests/test_examples.py b/upsetplot/tests/test_examples.py
index 61bde51..115b10c 100644
--- a/upsetplot/tests/test_examples.py
+++ b/upsetplot/tests/test_examples.py
@@ -14,6 +14,7 @@
 def test_example(path):
     pytest.importorskip("sklearn")
     pytest.importorskip("seaborn")
+    pytest.importorskip("upset-alttxt")
     env = os.environ.copy()
     env["PYTHONPATH"] = os.getcwd() + ":" + env.get("PYTHONPATH", "")
     subprocess.check_output([sys.executable, path], env=env)