diff --git a/arkouda_viz/README.md b/arkouda_viz/README.md index 1097281..bcffeac 100644 --- a/arkouda_viz/README.md +++ b/arkouda_viz/README.md @@ -1,14 +1,18 @@

- +

This is a client only implementation of vizualizations using Arkouda. Thus, all code is python and uses only server elements currently included in the main arkouda repository. ## Functionality Implemented -- 'datashade()' - Takes an Arkouda DataFrame along with optional parameters and creates an interactive plot using datashader. The method then updates the plot based on the user's selections of a variety of widgets. +- 'hist()' - Plots a histogram for numeric data. -- 'crossfilter()' - Takes an Arkouda Dataframe and creates a scatterplot with the widgets of size and color that manipulates the points. +- 'area()' - Plots a histogram for numeric data as an area plot. + +- 'boxWhisker()' - Plots a box plot for numeric data. + +- 'explore()' - Creates an interactive 2-D histogram for numeric data. ## Usage diff --git a/arkouda_viz/client/arkouda_viz/viz.py b/arkouda_viz/client/arkouda_viz/viz.py index ce6a7a1..1f747f5 100644 --- a/arkouda_viz/client/arkouda_viz/viz.py +++ b/arkouda_viz/client/arkouda_viz/viz.py @@ -2,14 +2,25 @@ import holoviews as hv import panel as pn import param -from typing import Tuple, Union +from typing import Tuple, Union, Optional + +_numeric_types = ["float64", "int64", "uint64"] + +""" +The plotting engine to be used by default +unless specified explicitly. +""" + + +default_engine = "matplotlib" + """ Helper method for setting up the plot rendering environment. Parameters ---------- -engine : str - The plotting engine. +engine : string + The plotting engine; 'default_engine' by default. width : int Width of the plot. height : int @@ -21,31 +32,38 @@ """ -def render_env(engine: str, width: int, height: int): - from bokeh.io import output_notebook +def render_env(engine: Optional[str], width: int, height: int): + global default_engine + if engine == None: engine = default_engine + + def ensure(name): + if not hv.extension._loaded or hv.Store.current_backend != name: + hv.extension(name) - output_notebook() if engine in ("bokeh", "b"): - hv.extension("bokeh") + ensure("bokeh") return dict(width=width, height=height) elif engine in ("plotly", "p"): - hv.extension("plotly") + ensure("plotly") return dict(width=width, height=height) elif engine in ("matplotlib", "m"): - hv.extension("matplotlib") - return dict(fig_inches=(5, 5)) + ensure("matplotlib") + return dict(fig_inches=(int(width/100), int(height/100))) else: raise ValueError("Please provide a supported plotting engine.") """ -Plots an area plot for numeric data. +Plots a histogram for numeric data as an area plot. +The X axis is always range(0, bins). Parameters ---------- data : ak.DataFrame or ak.pdarray The data to be plotted. +bins : int + Number of bins to divide the data into. engine : string - The plotting engine. + The plotting engine; 'default_engine' by default. width : int Width of the plot. height : int @@ -59,8 +77,8 @@ def render_env(engine: str, width: int, height: int): def area( data: Union[ak.DataFrame, ak.pdarray] = None, - bins=10, - engine: str = "matplotlib", + bins: int = 10, + engine: Optional[str] = None, width: int = 500, height: int = 500, ): @@ -70,7 +88,7 @@ def area( numeric_columns = [ col for col, dtype in data.dtypes.items() - if dtype in ["float64", "int64"] + if dtype in _numeric_types ] if len(numeric_columns) == 0: raise ValueError( @@ -79,14 +97,13 @@ def area( data = data[numeric_columns] h, b = ak.histogram(data[data.columns[0]], bins=bins) var = pn.widgets.Select( - name="variable", value=data.columns[0], options=data.columns + name="variable", value=data.columns[0], options=data.columns.to_list() ) all = pn.widgets.Checkbox(name="all") @pn.depends(var.param.value, all.param.value) def create_figure(var, all): if all: - var.disabled = True overlay = hv.Overlay() for column in data.columns: h, b = ak.histogram(data[column], bins=bins) @@ -98,7 +115,7 @@ def create_figure(var, all): widgets = pn.WidgetBox(var, all, width=200) return pn.Row(widgets, create_figure).servable("Area") - if isinstance(data, ak.pdarray) and data.dtype in ["int64", "float64"]: + if isinstance(data, ak.pdarray) and data.dtype in _numeric_types: h, b = ak.histogram(data, bins=bins) return hv.Area(h.to_ndarray()).opts(**opts) else: @@ -118,7 +135,7 @@ def create_figure(var, all): bins : int Number of bins to divide the data into. engine : string - The plotting engine. + The plotting engine; 'default_engine' by default. width : int Width of the plot. height : int @@ -133,7 +150,7 @@ def create_figure(var, all): def hist( data: Union[ak.DataFrame, ak.pdarray] = None, bins=10, - engine: str = "matplotlib", + engine: Optional[str] = None, width: int = 500, height: int = 500, ): @@ -143,7 +160,7 @@ def hist( numeric_columns = [ col for col, dtype in data.dtypes.items() - if dtype in ["float64", "int64"] + if dtype in _numeric_types ] if len(numeric_columns) == 0: raise ValueError( @@ -152,7 +169,7 @@ def hist( data = data[numeric_columns] h, b = ak.histogram(data[data.columns[0]], bins=bins) var = pn.widgets.Select( - name="variable", value=data.columns[0], options=data.columns + name="variable", value=data.columns[0], options=data.columns.to_list() ) @pn.depends(var.param.value) @@ -161,7 +178,7 @@ def create_figure(var): widgets = pn.WidgetBox(var, width=200) return pn.Row(widgets, create_figure).servable("Histogram") - if isinstance(data, ak.pdarray) and data.dtype in ["int64", "float64"]: + if isinstance(data, ak.pdarray) and data.dtype in _numeric_types: h, b = ak.histogram(data, bins=bins) return hv.Histogram((h.to_ndarray(), b.to_ndarray())).opts(**opts) else: @@ -179,7 +196,7 @@ def create_figure(var): data : ak.DataFrame or ak.pdarray The data to be plotted. engine : string - The plotting engine. + The plotting engine; 'default_engine' by default. width : int Width of the plot. height : int @@ -193,7 +210,7 @@ def create_figure(var): def boxWhisker( data: Union[ak.DataFrame, ak.pdarray] = None, - engine: str = "matplotlib", + engine: Optional[str] = None, width: int = 5, height: int = 5, ): @@ -213,7 +230,7 @@ def boxWhisker( data = data[numeric_columns] var = pn.widgets.Select( - name="variable", value=data.columns[0], options=data.columns + name="variable", value=data.columns[0], options=data.columns.to_list() ) @pn.depends(var.param.value) @@ -238,14 +255,14 @@ def create_figure(var): return boxwhisker.opts( hv.opts.Bounds(alpha=0.5, color="blue"), - hv.opts.HLine(color="red", linewidth=2, xlim=(0, 1)), + hv.opts.HLine(color="red", line_width=2, xlim=(0, 1)), hv.opts.Segments(color="black"), hv.opts.Points(color="green"), ) widgets = pn.WidgetBox(var, width=200) return pn.Row(widgets, create_figure).servable("Box and Whisker") - if isinstance(data, ak.pdarray) and data.dtype in ["int64", "float64"]: + if isinstance(data, ak.pdarray) and data.dtype in _numeric_types: sorted_data = ak.sort(data) values = { @@ -266,7 +283,7 @@ def create_figure(var): return boxwhisker.opts( hv.opts.Bounds(alpha=0.5, color="blue"), - hv.opts.HLine(color="red", linewidth=2, xlim=(0, 1)), + hv.opts.HLine(color="red", line_width=2, xlim=(0, 1)), hv.opts.Segments(color="black"), hv.opts.Points(color="green"), ) @@ -289,7 +306,7 @@ def create_figure(var): yBin : int Number of bins to divide the y data into. engine : string - The plotting engine. + The plotting engine; 'bokeh' by default. width : int Width of the plot. height : int @@ -300,18 +317,16 @@ def create_figure(var): An image with or without a variable dropdown menu based in single or multiple columns. """ - def explore( data: Union[ak.DataFrame, Tuple[ak.pdarray, ak.pdarray]] = None, - xBin: int = 10, - yBin: int = 10, - engine: str = "bokeh", + xBin: int = 100, + yBin: int = 100, + engine: Optional[str] = "bokeh", width: int = 500, height: int = 500, ): - render_env(engine, width=width, height=height) + opts = render_env(engine, width=width, height=height) pn.extension() - full_data = None if data is not None: if isinstance(data, ak.DataFrame): numeric_columns = [ @@ -323,16 +338,16 @@ def explore( raise ValueError( "The provided DataFrame does not have at least two numeric columns." ) - full_data = data[numeric_columns] elif ( isinstance(data, tuple) and len(data) == 2 and all(isinstance(item, ak.pdarray) for item in data) ): - full_data = ak.DataFrame(data[0], data[1]) + data = ak.DataFrame({"0":data[0], "1":data[1]}) + numeric_columns = ["0", "1"] else: raise ValueError( - "Invalid data. Please provide an ak.Dataframe or [ak.pdarray, ak.pdarray]." + "Invalid data. Please provide an ak.Dataframe or (ak.pdarray, ak.pdarray)." ) else: raise ValueError("Please provide data.") @@ -342,10 +357,10 @@ class Explore(param.Parameterized): label="color map", default="turbo", objects=hv.plotting.list_cmaps() ) x_var = param.Selector( - label="x-variable", default=data.columns[0], objects=data.columns + label="x-variable", default=numeric_columns[0], objects=numeric_columns ) y_var = param.Selector( - label="y-variable", default=data.columns[1], objects=data.columns + label="y-variable", default=numeric_columns[1], objects=numeric_columns ) enable_slider_checkbox = pn.widgets.Checkbox( name="remove outliers", value=False @@ -355,40 +370,38 @@ class Explore(param.Parameterized): ) params = Explore() - cols = full_data.columns - initial_xrange = (ak.min(full_data[cols[0]]), ak.max(full_data[cols[0]])) - initial_yrange = (ak.min(full_data[cols[1]]), ak.max(full_data[cols[1]])) - - def make_data(x_range, y_range, cmap, x_var, y_var): - if x_range is None or y_range is None or not x_range or not y_range: - binned_data = ak.histogram2d( - full_data[x_var], full_data[y_var], bins=(xBin, yBin) - ) - return hv.Image(binned_data[0].to_ndarray(), bounds=(0, 0, 1, 1)).opts( - cmap=cmap, width=width, height=height, color_bar=True - ) + server_widget = pn.widgets.StaticText(name="", value="", styles = {'color': 'red'}) + + # use caching to preserve the current range when changing the color map and to + # avoid unnecessary re-histogramming, which sometimes happens with hv.streams + class Cache: pass + h_cache = Cache() + h_cache.x_var, h_cache.y_var, h_cache.histo, h_cache.range2 = None, None, None, None + + def make_data(x_range, y_range, cmap, x_var, y_var, h_cache=h_cache): + if x_var == h_cache.x_var and y_var == h_cache.y_var \ + and (x_range == None or (x_range, y_range) == h_cache.range2): + histo, range2 = h_cache.histo, h_cache.range2 else: - subset_data = data[ - (full_data[x_var] >= x_range[0]) - & (full_data[x_var] <= x_range[1]) - & (full_data[y_var] >= y_range[0]) - & (full_data[y_var] <= y_range[1]) - ] - x_span = x_range[1] - x_range[0] - y_span = y_range[1] - y_range[0] - binned_data = ak.histogram2d( - subset_data[x_var], subset_data[y_var], bins=(1000, 1000) - ) + if x_range == None: range = None + else: range = (x_range, y_range) + server_widget.value = "server processing" + histo,xbins,ybins = ak.histogram2d(data[x_var], data[y_var], bins=(xBin, yBin), range=range) + server_widget.value = "" + histo = histo.to_ndarray().T[::-1,:] + range2 = ((xbins[0], xbins[-1]), (ybins[0], ybins[-1])) # ((xmin,xmax),(ymin,ymax)) + h_cache.x_var, h_cache.y_var, h_cache.histo, h_cache.range2 = x_var, y_var, histo, range2 + return hv.Image( - binned_data[0].to_ndarray(), - bounds=(x_range[0], y_range[0], x_range[0] + x_span, y_range[0] + y_span), - ).opts(cmap=cmap, width=width, height=height, colorbar=True) + histo, + bounds=(range2[0][0], range2[1][0], range2[0][1], range2[1][1]) # xmin, ymin, xmax, ymax + ).opts(cmap=cmap, colorbar=True, **opts) @pn.depends( cmap=params.param.cmap, x_var=params.param.x_var, y_var=params.param.y_var ) def update(cmap, x_var, y_var): - stream = hv.streams.RangeXY(x_range=initial_xrange, y_range=initial_yrange) + stream = hv.streams.RangeXY() dmap = hv.DynamicMap( lambda x_range, y_range: make_data(x_range, y_range, cmap, x_var, y_var), streams=[stream], @@ -400,8 +413,10 @@ def update(cmap, x_var, y_var): params.param.cmap, params.param.x_var, params.param.y_var, - params.enable_slider_checkbox, - params.z_score_threshold_slider, + # these are currently unused: + #params.enable_slider_checkbox, + #params.z_score_threshold_slider, + server_widget, width=200, ) - return pn.Row(widget_column, update) \ No newline at end of file + return pn.Row(widget_column, update) diff --git a/arkouda_viz/client/setup.py b/arkouda_viz/client/setup.py index 9581b1e..aa88c64 100644 --- a/arkouda_viz/client/setup.py +++ b/arkouda_viz/client/setup.py @@ -9,7 +9,7 @@ setup( name="arkouda_viz", - version="0.0.0", + version="0.0.1", description="Vizualizations for Arkouda.", long_description=long_description, long_description_content_type="text/markdown",