@@ -68,13 +68,36 @@ def breaking_last_point(signal: Sequence[float], threshold: float = 1.2):
6868 :return: significant change (-1, 0, +1), test value
6969 """
7070 signal = np .asarray (signal )
71- m = np .mean (signal [:- 1 ])
71+ if not np .issubdtype (signal .dtype , np .number ):
72+ return 0 , np .nan
73+ assert len (signal .shape ) == 1 , f"Unexpected signal shape={ signal .shape } , signal={ signal } "
74+ if signal .shape [0 ] <= 2 :
75+ return 0 , 0
76+
77+ has_value = ~ (np .isnan (signal ).all ()) and ~ (np .isinf (signal ).all ())
78+ if np .isnan (signal [- 1 ]) or np .isinf (signal [- 1 ]):
79+ return (- 1 , np .inf ) if has_value else (0 , 0 )
80+
81+ try :
82+ m = np .mean (signal [:- 1 ])
83+ except (TypeError , ValueError ):
84+ # Not a numerical type
85+ return 0 , np .nan
86+
87+ if np .isnan (m ) or np .isinf (m ):
88+ return (1 , np .inf ) if np .isinf (signal [- 2 ]) or np .isnan (signal [- 2 ]) else (0 , 0 )
7289 v = np .std (signal [:- 1 ])
7390 if v == 0 :
7491 test = signal [- 1 ] - m
92+ assert not np .isnan (
93+ test
94+ ), f"Unexpected test value, test={ test } , signal={ signal } , m={ m } , v={ v } "
7595 trend = np .sign (test )
7696 return trend , trend
7797 test = (signal [- 1 ] - m ) / v
98+ assert not np .isnan (
99+ test
100+ ), f"Unexpected test value, test={ test } , signal={ signal } , m={ m } , v={ v } "
78101 trend = np .sign (test ) if np .abs (test ) > threshold else 0
79102 return trend , test
80103
@@ -261,6 +284,42 @@ def open_dataframe(
261284 raise ValueError (f"Unexpected value for data: { data !r} " )
262285
263286
287+ def align_dataframe_with (
288+ df : pandas .DataFrame , baseline : pandas .DataFrame , fill_value : float = 0
289+ ) -> Optional [pandas .DataFrame ]:
290+ """
291+ Modifies the first dataframe *df* to get the exact same number of columns and rows.
292+ They must share the same levels on both axes. Empty cells are filled with 0.
293+ We only keep the numerical columns. The function return None if the output is empty.
294+ """
295+ df = df .select_dtypes (include = "number" )
296+ if df .shape [1 ] == 0 :
297+ return None
298+ bool_cols = list (df .select_dtypes (include = "bool" ).columns )
299+ if bool_cols :
300+ df [bool_cols ] = df [bool_cols ].astype (int )
301+ assert (
302+ df .columns .names == baseline .columns .names or df .index .names == baseline .index .names
303+ ), (
304+ f"Levels mismatch, expected index.names={ baseline .index .names } , "
305+ f"expected columns.names={ baseline .columns .names } , "
306+ f"got index.names={ df .index .names } , "
307+ f"got columns.names={ df .columns .names } "
308+ )
309+ dtypes = set (df [c ].dtype for c in df .columns )
310+ assert all (np .issubdtype (dt , np .number ) for dt in dtypes ), (
311+ f"All columns in the first dataframe are expected to share "
312+ f"the same type or be at least numerical but got { dtypes } \n { df } "
313+ )
314+ common_index = df .index .intersection (baseline .index )
315+ cp = pandas .DataFrame (float (fill_value ), index = baseline .index , columns = baseline .columns )
316+ for c in df .columns :
317+ if c not in cp .columns or not np .issubdtype (df [c ].dtype , np .number ):
318+ continue
319+ cp .loc [common_index , c ] = df .loc [common_index , c ].astype (cp [c ].dtype )
320+ return cp
321+
322+
264323class CubeViewDef :
265324 """
266325 Defines how to compute a view.
@@ -397,8 +456,8 @@ def apply_excel_style(
397456 workbook = filename_or_writer .book
398457 save = False
399458
400- mask_low = PatternFill (fgColor = "8888DD " , fill_type = "solid" )
401- mask_high = PatternFill (fgColor = "DD8888 " , fill_type = "solid" )
459+ mask_low = PatternFill (fgColor = "AAAAF0 " , fill_type = "solid" )
460+ mask_high = PatternFill (fgColor = "F0AAAA " , fill_type = "solid" )
402461
403462 left = Alignment (horizontal = "left" )
404463 left_shrink = Alignment (horizontal = "left" , shrink_to_fit = True )
@@ -927,12 +986,17 @@ def load(self, verbose: int = 0):
927986 shape = self .data .shape
928987 if verbose :
929988 print (f"[CubeLogs.load] removed columns, shape={ self .data .shape } " )
989+ assert self .data .shape [0 ] > 0 or self ._data .shape [0 ] == 0 , (
990+ f"The preprocessing reduced shape { shape } to { self .data .shape } , "
991+ f"initial shape={ self ._data .shape } ."
992+ )
930993 self ._preprocess ()
931994 if verbose :
932995 print (f"[CubeLogs.load] preprocess, shape={ self .data .shape } " )
933- assert (
934- self .data .shape [0 ] > 0
935- ), f"The preprocessing reduced shape { shape } to { self .data .shape } ."
996+ assert self .data .shape [0 ] > 0 or self ._data .shape [0 ] == 0 , (
997+ f"The preprocessing reduced shape { shape } to { self .data .shape } , "
998+ f"initial shape={ self ._data .shape } ."
999+ )
9361000 if self .recent and verbose :
9371001 print (f"[CubeLogs.load] keep most recent data.shape={ self .data .shape } " )
9381002
@@ -1462,29 +1526,19 @@ def to_excel(
14621526 continue
14631527 df , tview = self .view (view , return_view_def = True , verbose = max (verbose - 1 , 0 ))
14641528 if cube_time is not None :
1465- time_mask_view [name ] = cube_time .view (view )
1466- print ("----" )
1467- print (df )
1468- print ("-" )
1469- print (time_mask_view [name ])
1470- assert time_mask_view [name ].shape == df .shape , (
1471- f"Shape mismatch between the view { df .shape } and the mask "
1472- f"{ time_mask_view [name ].shape } "
1473- )
1474- assert (
1475- time_mask_view [name ].columns .names == df .columns .names
1476- or time_mask_view [name ].index .names == df .index .names
1477- ), (
1478- f"Levels mismatch, index.names={ df .index .names } , "
1479- f"columns.names={ df .columns .names } , "
1480- f"mask.index.names={ time_mask_view [name ].index .names } , "
1481- f"mask.columns.names={ time_mask_view [name ].columns .names } "
1482- )
1483- if verbose :
1484- print (
1485- f"[CubeLogs.to_excel] compute mask for view { name !r} with shape "
1529+ cube_mask = cube_time .view (view )
1530+ aligned = align_dataframe_with (cube_mask , df )
1531+ if aligned is not None :
1532+ assert aligned .shape == df .shape , (
1533+ f"Shape mismatch between the view { df .shape } and the mask "
14861534 f"{ time_mask_view [name ].shape } "
14871535 )
1536+ time_mask_view [name ] = aligned
1537+ if verbose :
1538+ print (
1539+ f"[CubeLogs.to_excel] compute mask for view { name !r} "
1540+ f"with shape { aligned .shape } "
1541+ )
14881542 if tview is None :
14891543 continue
14901544 memory = df .memory_usage (deep = True ).sum ()
@@ -1609,11 +1663,17 @@ def cube_time(self, fill_other_dates: bool = False, threshold: float = 1.2) -> "
16091663 """
16101664 unique_time = self .data [self .time ].unique ()
16111665 assert len (unique_time ) > 2 , f"Not enough dates to proceed: unique_time={ unique_time } "
1612- gr = self .data [[* self .keys_no_time , * self .values ]].groupby (self .keys_no_time )
1666+ gr = self .data [[* self .keys_no_time , * self .values ]].groupby (
1667+ self .keys_no_time , dropna = False
1668+ )
16131669 dgr = gr .agg (
16141670 lambda series , th = threshold : int (breaking_last_point (series , threshold = th )[0 ])
16151671 )
16161672 tm = unique_time .max ()
1673+ assert dgr .shape [0 ] > 0 , (
1674+ f"Unexpected output shape={ dgr .shape } , unique_time={ unique_time } , "
1675+ f"data.shape={ self .data .shape } "
1676+ )
16171677 dgr [self .time ] = tm
16181678 if fill_other_dates :
16191679 other_df = []
@@ -1626,6 +1686,11 @@ def cube_time(self, fill_other_dates: bool = False, threshold: float = 1.2) -> "
16261686 df [c ] = 0
16271687 other_df .append (df )
16281688 dgr = pandas .concat ([dgr , * other_df ], axis = 0 )
1689+ assert dgr .shape [0 ] > 0 , (
1690+ f"Unexpected output shape={ dgr .shape } , unique_time={ unique_time } , "
1691+ f"data.shape={ self .data .shape } , "
1692+ f"other_df shapes={ [df .shape for df in other_df ]} "
1693+ )
16291694 return self .clone (data = dgr .reset_index (drop = False ))
16301695
16311696
@@ -1724,6 +1789,7 @@ def clone(self, data: Optional[pandas.DataFrame] = None) -> "CubeLogs":
17241789 time = self .time ,
17251790 keys = self .keys_no_time ,
17261791 values = self .values ,
1792+ recent = False ,
17271793 )
17281794 cube .load ()
17291795 return cube
0 commit comments