Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ jobs:
steps:
- uses: holoviz-dev/holoviz_tasks/pixi_lock@v0
with:
cache: ${{ github.event.inputs.cache == 'true' || github.event.inputs.cache == '' }}
cache: false # NOTE: REVERT
# cache: ${{ github.event.inputs.cache == 'true' || github.event.inputs.cache == '' }}

unit_test_suite:
name: unit:${{ matrix.environment }}:${{ matrix.os }}
Expand Down
108 changes: 72 additions & 36 deletions hvplot/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pandas as pd
import numpy as np
import colorcet as cc
import narwhals as nw

from bokeh.models import HoverTool
from holoviews.core.dimension import Dimension
Expand Down Expand Up @@ -55,6 +56,7 @@
_HV_GE_1_21_0,
_Undefined,
filter_opts,
is_narwhals,
is_tabular,
is_series,
is_dask,
Expand All @@ -73,7 +75,6 @@
relabel_redim,
redim_,
support_index,
check_library,
is_geodataframe,
process_derived_datetime_xarray,
process_derived_datetime_pandas,
Expand Down Expand Up @@ -1422,6 +1423,9 @@ def _process_data(
elif isinstance(data, pd.DataFrame):
datatype = 'pandas'
self.data = data
elif isinstance(data, (nw.LazyFrame, nw.DataFrame, nw.Series)):
datatype = 'narwhals'
self.data = data
elif is_dask(data):
datatype = 'dask'
self.data = data.persist() if persist else data
Expand Down Expand Up @@ -2638,19 +2642,29 @@ def _category_plot(self, element, x: str, y: list[str], data):

cur_opts, compat_opts = self._get_compat_opts(element.name, labelled=labelled)

id_vars = [x]
if any(v in self.indexes for v in id_vars):
# Calling reset_index() is required since id_vars from melt
# only accepts column names, not index names.
data = data.reset_index()
data = data[y + [x]]

if check_library(data, 'dask'):
from dask.dataframe import melt
else:
melt = pd.melt
# id_vars = [x]
# if any(v in self.indexes for v in id_vars):
# # Calling reset_index() is required since id_vars from melt
# # only accepts column names, not index names.
# data = data.reset_index()
# if x is "index":

#
# if check_library(data, 'dask'):
# from dask.dataframe import melt
# else:
# melt = pd.melt

# df = melt(data, id_vars=[x], var_name=self.group_label, value_name=self.value_label)

data = nw.from_native(data)
data = data.select([*y, x])
df = data.unpivot(
index=x,
variable_name=self.group_label,
value_name=self.value_label,
).to_native()

df = melt(data, id_vars=[x], var_name=self.group_label, value_name=self.value_label)
kdims = [x, self.group_label]
vdims = [self.value_label] + self.hover_cols
if self.subplots:
Expand Down Expand Up @@ -2707,14 +2721,23 @@ def _stats_plot(self, element, y, data=None):
cur_opts['labelled'] = labelled

kdims = [self.group_label]
data = data[list(y)]
if check_library(data, 'dask'):
from dask.dataframe import melt
else:
melt = pd.melt
df = melt(data, var_name=self.group_label, value_name=self.value_label)
if list(y) and df[self.value_label].dtype is not data[y[0]].dtype:
df[self.value_label] = df[self.value_label].astype(data[y[0]].dtype)
# if check_library(data, 'narwhals'):
# data = data.lazy().collect().to_pandas() # HACK: Should likely be handled more elegant
# data = data[list(y)]
# if check_library(data, 'dask'):
# from dask.dataframe import melt
# else:
# melt = pd.melt
# df = melt(data, var_name=self.group_label, value_name=self.value_label)
data = nw.from_native(data)
data = data.select(list(y))
df = data.unpivot(
index=x,
variable_name=self.group_label,
value_name=self.value_label,
).to_native()
# if list(y) and df[self.value_label].dtype is not data[y[0]].dtype:
# df[self.value_label] = df[self.value_label].astype(data[y[0]].dtype)
redim = self._merge_redim({self.value_label: ylim})
return relabel_redim(
element(df, kdims, self.value_label),
Expand Down Expand Up @@ -2794,13 +2817,11 @@ def hist(self, x=None, y=None, data=None):

if hist_opts['bin_range'] is None and not self._norm_opts.get('axiswise'):
ranges = []
nw_data = nw.from_native(data)

for col in y:
ys = data[col]
ymin, ymax = (ys.min(), ys.max())
if is_dask(ys):
ymin, ymax = ymin.compute(), ymax.compute()
elif is_ibis(ys):
ymin, ymax = ymin.execute(), ymax.execute()
ymin = nw_data.select(nw.col(col).min()).lazy().collect().item()
ymax = nw_data.select(nw.col(col).max()).lazy().collect().item()
ranges.append((ymin, ymax))
if ranges:
hist_opts['bin_range'] = max_range(ranges)
Expand Down Expand Up @@ -2855,10 +2876,17 @@ def kde(self, x=None, y=None, data=None):
dists = Distribution(data, y, [])
else:
ranges = {self.value_label: xlim}
data = data[y]
df = data.melt(var_name=self.group_label, value_name=self.value_label)
data = nw.from_native(data)
data = data.select([x, *y])
df = data.unpivot(
index=x,
variable_name=self.group_label,
value_name=self.value_label,
).to_native()
# data = data[y]
# df = data.melt(var_name=self.group_label, value_name=self.value_label)
ds = Dataset(df)
if len(df):
if len(ds):
dists = ds.to(Distribution, self.value_label)
dists = dists.layout() if self.subplots else dists.overlay(sort=False)
else:
Expand Down Expand Up @@ -2946,15 +2974,20 @@ def bivariate(self, x=None, y=None, data=None):
)

def ohlc(self, x=None, y=None, data=None):
from holoviews.core.util import dtype_kind

self._error_if_unavailable('ohlc', Rectangles)
self._error_if_unavailable('ohlc', Segments)
data = self.data if data is None else data
if x is None:
variables = [var for var in self.variables if var not in self.indexes]
if data[variables[0]].dtype.kind == 'M':
dtype = nw.from_native(data).schema[variables[0]]
if dtype_kind(dtype) == 'M':
x = variables[0]
else:
elif self.indexes:
x = self.indexes[0]
else:
x = 'index'
width = self.kwds.get('bar_width', 0.5)
if y is None:
o, h, l, c = [col for col in data.columns if col != x][:4] # noqa: E741
Expand All @@ -2967,7 +3000,7 @@ def ohlc(self, x=None, y=None, data=None):
self.hover_cols.remove(x)
vdims = list(dict.fromkeys(ohlc_cols + self.hover_cols))
ds = Dataset(data, [x], vdims)
if ds.dimension_values(x).dtype.kind in 'SUO':
if dtype_kind(ds.dimension_values(x)) in 'SUO':
rects = Rectangles(ds, [x, o, x, c])
else:
if len(ds):
Expand All @@ -2983,8 +3016,9 @@ def ohlc(self, x=None, y=None, data=None):
seg_cur_opts, seg_compat_opts = self._get_compat_opts('Segments')
tools = seg_cur_opts.pop('tools', [])
if 'hover' in tools:
x_data = data[x] if x in data.columns else data.index
if pd.api.types.is_datetime64_any_dtype(x_data):
# x_data = data[x] if x in data.columns else data.index
dtype = nw.from_native(data).schema[x]
if dtype_kind(dtype) == 'M':
# %F %T: strftime code for %Y-%m-%d %H:%M:%S.
# See https://man7.org/linux/man-pages/man3/strftime.3.html
x_tooltip = f'@{x}{{%F %T}}'
Expand Down Expand Up @@ -3020,7 +3054,7 @@ def ohlc(self, x=None, y=None, data=None):
def table(self, x=None, y=None, data=None):
self._error_if_unavailable('table')
data = self.data if data is None else data
if isinstance(data.index, (DatetimeIndex, MultiIndex)):
if isinstance(getattr(data, 'index', None), (DatetimeIndex, MultiIndex)):
# To get the index displayed in the table as Bokeh doesn't show it.
data = data.reset_index()

Expand Down Expand Up @@ -3260,6 +3294,8 @@ def _geom_plot(self, x=None, y=None, data=None, kind='polygons'):
data, x, y, _ = self._process_gridded_args(data, x, y, z=None)
params = dict(self._relabel)

if is_narwhals(self.data) and x == 'index':
x = None
if not (x and y):
if is_geodataframe(data):
x, y = ('Longitude', 'Latitude') if self.geo else ('x', 'y')
Expand Down
16 changes: 11 additions & 5 deletions hvplot/plotting/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1600,15 +1600,21 @@ def _get_converter(self, x=None, y=None, kind=None, **kwds):
# Reorder the columns as in the data.
columns = sorted(columns, key=lambda c: column_names.index(c))

if isinstance(self._data, pl.DataFrame):
data = self._data.select(columns).to_pandas()
if isinstance(self._data, (pl.LazyFrame, pl.DataFrame)):
data = self._data.select(columns) # .to_pandas()
elif isinstance(self._data, pl.Series):
data = self._data.to_pandas()
elif isinstance(self._data, pl.LazyFrame):
data = self._data.select(columns).collect().to_pandas()
data = self._data.to_frame() # .to_pandas()
# elif isinstance(self._data, pl.LazyFrame):
# data = self._data.select(columns).collect()# .to_pandas()
else:
raise ValueError('Only Polars DataFrame, Series, and LazyFrame are supported')

x = x or 'index'
if x == 'index':
data = data.with_row_index()
import narwhals as nw

data = nw.from_native(data)
return HoloViewsConverter(data, x, y, kind=kind, **params)


Expand Down
22 changes: 19 additions & 3 deletions hvplot/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ def is_list_like(obj):


def is_tabular(data):
if check_library(data, ['dask', 'streamz', 'pandas', 'geopandas', 'cudf']):
if check_library(data, ['dask', 'streamz', 'pandas', 'geopandas', 'cudf', 'narwhals']):
return True
elif check_library(data, 'intake'):
from intake.source.base import DataSource
Expand All @@ -438,7 +438,7 @@ def is_tabular(data):


def is_series(data):
if not check_library(data, ['dask', 'streamz', 'pandas', 'cudf']):
if not check_library(data, ['dask', 'streamz', 'pandas', 'cudf', 'narwhals']):
return False
elif isinstance(data, pd.Series):
return True
Expand All @@ -454,6 +454,10 @@ def is_series(data):
import cudf

return isinstance(data, cudf.Series)
elif check_library(data, 'narwhals'):
import narwhals as nw

return isinstance(data, nw.Series)
else:
return False

Expand Down Expand Up @@ -487,6 +491,14 @@ def is_duckdb(data):
return isinstance(data, (duckdb.DuckDBPyRelation, duckdb.DuckDBPyConnection))


def is_narwhals(data):
if not check_library(data, 'narwhals'):
return False
import narwhals as nw

return isinstance(data, (nw.DataFrame, nw.Series, nw.LazyFrame))


def is_polars(data):
if not check_library(data, 'polars'):
return False
Expand Down Expand Up @@ -530,7 +542,7 @@ def is_xarray(data):
def is_lazy_data(data):
"""Check if data is lazy

This checks if the datatype is Dask, Ibis, or Polars' LazyFrame.
This checks if the datatype is Dask, Ibis, Polars' LazyFrame, or Narwhals' LazyFrame.
It is useful to avoid eager evaluation of the data.
"""
if is_dask(data) or is_ibis(data):
Expand All @@ -539,6 +551,10 @@ def is_lazy_data(data):
import polars as pl

return isinstance(data, pl.LazyFrame)
elif is_narwhals(data):
import narwhals as nw

return isinstance(data, nw.LazyFrame)
return False


Expand Down
12 changes: 8 additions & 4 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,17 @@ pip = "*"
# Required
bokeh = ">=3.1"
colorcet = ">=2"
holoviews = ">=1.19.0"
# holoviews = ">=1.19.0" # NOTE: REVERT
numpy = ">=1.21"
packaging = "*"
pandas = ">=1.3"
panel = ">=1.0"
# panel = ">=1.0"
param = ">=1.12.0,<3.0"

[feature.required.pypi-dependencies]
holoviews = { git = "https://github.com/holoviz/holoviews" , branch = "feat_narwhals"} # NOTE: REVERT
panel = ">=1.8.0rc0" # Until an release

[feature.required.tasks]
download-data = 'python scripts/download_data.py'
install = 'python -m pip install --no-deps --disable-pip-version-check -e .'
Expand Down Expand Up @@ -205,11 +209,11 @@ pygraphviz = "*"

[feature.test.tasks]
test-unit-geo = 'pytest -v hvplot --geo'
test-unit-cov = 'pytest -v hvplot --cov=hvplot --cov-branch --cov-append'
test-unit-cov = 'pytest hvplot --cov=hvplot --cov-branch --cov-append -k polars' # NOTE: REVERT
test-unit-geo-cov = 'pytest -v hvplot --geo --cov=hvplot --cov-branch --cov-append'

[feature.test-core.tasks]
test-unit = 'pytest -v hvplot'
test-unit = 'pytest hvplot -k polars' # NOTE: REVERT

[feature.test-example.tasks]
test-example = 'pytest -n logical --dist loadscope --nbval-lax -p no:python'
Expand Down
Loading