11import re
22from typing import Any , Callable , Dict , Optional , Sequence , Tuple
3+ from .helper import string_sig
34import pandas
45
56
67class CubeViewDef :
78 """
89 Defines how to compute a view.
10+
11+ :param key_index: keys to put in the row index
12+ :param values: values to show
13+ :param ignore_unique: ignore keys with a unique value
14+ :param order: to reorder key in columns index
15+ :param key_agg: aggregate according to these columns before
16+ creating the view
17+ :param agg_args: see :meth:`pandas.core.groupby.DataFrameGroupBy.agg`
18+ :param agg_kwargs: see :meth:`pandas.core.groupby.DataFrameGroupBy.agg`
919 """
10- def __init__ (self , )
20+
21+ def __init__ (
22+ self ,
1123 key_index : Sequence [str ],
1224 values : Sequence [str ],
1325 ignore_unique : bool = True ,
1426 order : Optional [Sequence [str ]] = None ,
27+ key_agg : Optional [Sequence [str ]] = None ,
28+ agg_args : Sequence [Any ] = ("sum" ,),
29+ agg_kwargs : Optional [Dict [str , Any ]] = None ,
30+ ):
31+ self .key_index = key_index
32+ self .values = values
33+ self .ignore_unique = ignore_unique
34+ self .order = order
35+ self .key_agg = key_agg
36+ self .agg_args = agg_args
37+ self .agg_kwargs = agg_kwargs
38+
39+ def __repr__ (self ) -> str :
40+ "usual"
41+ return string_sig (self )
42+
1543
1644class CubeLogs :
1745 """
@@ -37,7 +65,7 @@ def __init__(
3765 self ._formulas = formulas
3866
3967 def load (self , verbose : int = 0 ):
40- """Loads and preprocesses the data."""
68+ """Loads and preprocesses the data. Returns self. """
4169 if isinstance (self ._data , pandas .DataFrame ):
4270 if verbose :
4371 print (f"[CubeLogs.load] load from dataframe, shape={ self ._data .shape } " )
@@ -103,10 +131,16 @@ def load(self, verbose: int = 0):
103131 print (f"[CubeLogs.load] apply formula { k !r} " )
104132 self .data [k ] = f (self .data )
105133 self .values_for_key = {k : set (self .data [k ]) for k in self .keys }
106- nans = [c for c in self .keys if self .data [c ].isna ().astype (int ).sum () > 0 ]
134+ nans = [
135+ c for c in [self .time , * self .keys ] if self .data [c ].isna ().astype (int ).sum () > 0
136+ ]
107137 assert not nans , f"The following keys { nans } have nan values. This is not allowed."
138+ if verbose :
139+ print (f"[CubeLogs.load] convert column { self .time !r} into date" )
140+ self .data [self .time ] = pandas .to_datetime (self .data [self .time ])
108141 if verbose :
109142 print (f"[CubeLogs.load] done, shape={ self .shape } " )
143+ return self
110144
111145 @property
112146 def shape (self ) -> Tuple [int , int ]:
@@ -171,43 +205,63 @@ def __str__(self) -> str:
171205 "usual"
172206 return str (self .data ) if hasattr (self , "data" ) else str (self ._data )
173207
174- def view (
175- self ,
176- key_index : Sequence [str ],
177- values : Sequence [str ],
178- ignore_unique : bool = True ,
179- order : Optional [Sequence [str ]] = None ,
180- ) -> pandas .DataFrame :
208+ def view (self , view_def : CubeViewDef ) -> pandas .DataFrame :
181209 """
182210 Returns a dataframe, a pivot view.
183211 `key_index` determines the index, the other key columns determines
184212 the columns. If `ignore_unique` is True, every columns with a unique value
185213 is removed.
186214
187- :param key_index: keys to put in the row index
188- :param values: values to show
189- :param ignore_unique: ignore keys with a unique value
190- :param order: to reorder key in columns index
215+ :param view_def: view definition
191216 :return: dataframe
192217 """
193- key_index = self ._filter_column (key_index , self .keys )
194- values = self . _filter_column ( values , self . values )
195- assert set ( key_index ) <= set (
218+ key_agg = self ._filter_column (view_def . key_agg , self .keys ) if view_def . key_agg else []
219+ set_key_agg = set ( key_agg )
220+ assert set_key_agg <= set (
196221 self .keys
197- ), f"Non existing columns in key_index { set (key_index ) - set (self .keys )} "
222+ ), f"Non existing keys in key_agg { set_key_agg - set (self .keys )} "
223+
224+ values = self ._filter_column (view_def .values , self .values )
198225 assert set (values ) <= set (
199226 self .values
200227 ), f"Non existing columns in values { set (values ) - set (self .values )} "
201- set_key_columns = {c for c in self .keys if c not in key_index }
202- if ignore_unique :
228+
229+ if key_agg :
230+ key_index = [
231+ c
232+ for c in self ._filter_column (view_def .key_index , self .keys )
233+ if c not in set_key_agg
234+ ]
235+ keys_no_agg = [c for c in self .keys if c not in set_key_agg ]
236+ data = (
237+ self .data [[* keys_no_agg , * values ]]
238+ .groupby (key_index , as_index = False )
239+ .agg (* view_def .agg_args , ** (view_def .agg_kwargs or {}))
240+ )
241+ else :
242+ key_index = self ._filter_column (view_def .key_index , self .keys )
243+ data = self .data [[* self .keys , * values ]]
244+
245+ assert set (key_index ) <= set (
246+ self .keys
247+ ), f"Non existing keys in key_index { set (key_index ) - set (self .keys )} "
248+
249+ set_key_columns = {
250+ c for c in self .keys if c not in key_index and c not in set (key_agg )
251+ }
252+ if view_def .ignore_unique :
203253 key_index = [k for k in key_index if len (self .values_for_key [k ]) > 1 ]
204254 key_columns = [k for k in set_key_columns if len (self .values_for_key [k ]) > 1 ]
205255 else :
206256 key_columns = sorted (set_key_columns )
207- if order :
208- assert set (order ) <= set_key_columns , (
257+
258+ if view_def .order :
259+ assert set (view_def .order ) <= set_key_columns , (
209260 f"Non existing columns from order in key_columns "
210- f"{ set (order ) - set_key_columns } "
261+ f"{ set (view_def . order ) - set_key_columns } "
211262 )
212- key_columns = [* order , * [c for c in key_columns if c not in order ]]
213- return self .data .pivot (index = key_index [::- 1 ], columns = key_columns , values = values )
263+ key_columns = [
264+ * view_def .order ,
265+ * [c for c in key_columns if c not in view_def .order ],
266+ ]
267+ return data .pivot (index = key_index [::- 1 ], columns = key_columns , values = values )
0 commit comments