Skip to content

bug: agg(nw.all().len()) raisesย #2973

@MarcoGorelli

Description

@MarcoGorelli
import narwhals as nw
import pandas as pd
nw.from_native(pd.DataFrame({'gender': ['m', 'f'], 'weight': [4,5], 'age': [7,8]})).select('gender', 'weight', 'age').group_by(['gender']).agg(nw.all().len())
In [17]: nw.from_native(pd.DataFrame({'gender': ['m', 'f'], 'weight': [4,5], 'age': [7,8]})).select('gender', 'weight',
       โ‹ฎ 'age').group_by(['gender']).agg(nw.all().len())
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/scratch/.venv/lib/python3.12/site-packages/narwhals/_pandas_like/utils.py:652, in select_columns_by_name(df, column_names, implementation)
    651 try:
--> 652     return df[column_names]
    653 except KeyError as e:

File ~/scratch/.venv/lib/python3.12/site-packages/pandas/core/frame.py:4113, in DataFrame.__getitem__(self, key)
   4112         key = list(key)
-> 4113     indexer = self.columns._get_indexer_strict(key, "columns")[1]
   4115 # take() does not accept boolean indexers

File ~/scratch/.venv/lib/python3.12/site-packages/pandas/core/indexes/base.py:6212, in Index._get_indexer_strict(self, key, axis_name)
   6210     keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
-> 6212 self._raise_if_missing(keyarr, indexer, axis_name)
   6214 keyarr = self.take(indexer)

File ~/scratch/.venv/lib/python3.12/site-packages/pandas/core/indexes/base.py:6264, in Index._raise_if_missing(self, key, indexer, axis_name)
   6263 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
-> 6264 raise KeyError(f"{not_found} not in index")

KeyError: "['age'] not in index"

The above exception was the direct cause of the following exception:

ColumnNotFoundError                       Traceback (most recent call last)
Cell In[17], line 1
----> 1 nw.from_native(pd.DataFrame({'gender': ['m', 'f'], 'weight': [4,5], 'age': [7,8]})).select('gender', 'weight', 'age').group_by(['gender']).agg(nw.all().len())

File ~/scratch/.venv/lib/python3.12/site-packages/narwhals/group_by.py:95, in GroupBy.agg(self, *aggs, **named_aggs)
     87 plx = self._df.__narwhals_namespace__()
     88 compliant_aggs = (
     89     *(x._to_compliant_expr(plx) for x in flat_aggs),
     90     *(
   (...)     93     ),
     94 )
---> 95 return self._df._with_compliant(self._grouped.agg(*compliant_aggs))

File ~/scratch/.venv/lib/python3.12/site-packages/narwhals/_pandas_like/group_by.py:227, in PandasLikeGroupBy.agg(self, *exprs)
    224 # NOTE: Keep `inplace=True` to avoid making a redundant copy.
    225 # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
    226 result.reset_index(inplace=True)  # noqa: PD002
--> 227 return self._select_results(result, agg_exprs)

File ~/scratch/.venv/lib/python3.12/site-packages/narwhals/_pandas_like/group_by.py:238, in PandasLikeGroupBy._select_results(self, df, agg_exprs)
    232 """Responsible for remapping temp column names back to original.
    233
    234 See `ParseKeysGroupBy`.
    235 """
    236 new_names = chain.from_iterable(e.aliases for e in agg_exprs)
    237 return (
--> 238     self.compliant._with_native(df, validate_column_names=False)
    239     .simple_select(*self._keys, *new_names)
    240     .rename(dict(zip(self._keys, self._output_key_names)))
    241 )

File ~/scratch/.venv/lib/python3.12/site-packages/narwhals/_pandas_like/dataframe.py:388, in PandasLikeDataFrame.simple_select(self, *column_names)
    386 def simple_select(self, *column_names: str) -> Self:
    387     return self._with_native(
--> 388         select_columns_by_name(self.native, list(column_names), self._implementation),
    389         validate_column_names=False,
    390     )

File ~/scratch/.venv/lib/python3.12/site-packages/narwhals/_pandas_like/utils.py:655, in select_columns_by_name(df, column_names, implementation)
    653 except KeyError as e:
    654     if error := check_columns_exist(column_names, available=df.columns.tolist()):
--> 655         raise error from e
    656     raise

ColumnNotFoundError: The following columns were not found: ['age']

Hint: Did you mean one of these columns: ['gender', 'weight']?

@dangotbanned this came up holoviz/holoviews#6567, I haven't checked but they mentioned the refactor you did as a cause #2680

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions