@@ -59,7 +59,7 @@ def mann_kendall(series: Sequence[float], threshold: float = 0.5):
5959 return trend , test
6060
6161
62- def breaking_last_point (signal : Sequence [float ], threshold : float = 1.1 ):
62+ def breaking_last_point (signal : Sequence [float ], threshold : float = 1.2 ):
6363 """
6464 Assuming a timeseries is constant, we check the last value
6565 is not an outlier.
@@ -374,17 +374,21 @@ def __repr__(self) -> str:
374374def apply_excel_style (
375375 filename_or_writer : Any ,
376376 f_highlights : Optional [Dict [str , Callable [[Any ], CubeViewDef .HighLightKind ]]] = None ,
377+ time_mask_view : Optional [Dict [str , pandas .DataFrame ]] = None ,
377378):
378379 """
379380 Applies styles on all sheets in a file unless the sheet is too big.
380381
381382 :param filename_or_writer: filename, modified inplace
382383 :param f_highlight: color function to apply, one per sheet
384+ :param time_mask_view: if specified, it contains dataframe with the same shape
385+ and values in {-1, 0, +1} which indicates if a value is unexpectedly lower (-1)
386+ or higher (+1), it changes the color of the background then.
383387 """
384388 from openpyxl import load_workbook
385389 from openpyxl .styles import Alignment
386390 from openpyxl .utils import get_column_letter
387- from openpyxl .styles import Font # , PatternFill, numbers
391+ from openpyxl .styles import Font , PatternFill
388392
389393 if isinstance (filename_or_writer , str ):
390394 workbook = load_workbook (filename_or_writer )
@@ -393,6 +397,9 @@ def apply_excel_style(
393397 workbook = filename_or_writer .book
394398 save = False
395399
400+ mask_low = PatternFill (fgColor = "8888DD" , fill_type = "solid" )
401+ mask_high = PatternFill (fgColor = "DD8888" , fill_type = "solid" )
402+
396403 left = Alignment (horizontal = "left" )
397404 left_shrink = Alignment (horizontal = "left" , shrink_to_fit = True )
398405 right = Alignment (horizontal = "right" )
@@ -402,6 +409,14 @@ def apply_excel_style(
402409 }
403410
404411 for name in workbook .sheetnames :
412+ if time_mask_view and name in time_mask_view :
413+ mask = time_mask_view [name ]
414+ with pandas .ExcelWriter (io .BytesIO (), engine = "openpyxl" ) as mask_writer :
415+ mask .to_excel (mask_writer , sheet_name = name )
416+ sheet_mask = mask_writer .sheets [name ]
417+ else :
418+ sheet_mask = None
419+
405420 f_highlight = f_highlights .get (name , None ) if f_highlights else None
406421 sheet = workbook [name ]
407422 n_rows = sheet .max_row
@@ -479,6 +494,16 @@ def apply_excel_style(
479494 h = f_highlight (cell .value )
480495 if h in font_colors :
481496 cell .font = font_colors [h ]
497+
498+ if sheet_mask is not None :
499+ for i in range (1 , n_rows + 1 ):
500+ for j , (cell , cell_mask ) in enumerate (zip (sheet [i ], sheet_mask [i ])):
501+ if j > n_cols :
502+ break
503+ if cell_mask .value not in (1 , - 1 ):
504+ continue
505+ cell .fill = mask_low if cell_mask .value < 0 else mask_high
506+
482507 if save :
483508 workbook .save (filename_or_writer )
484509
@@ -1402,7 +1427,7 @@ def to_excel(
14021427 raw : Optional [str ] = "raw" ,
14031428 verbose : int = 0 ,
14041429 csv : Optional [Sequence [str ]] = None ,
1405- time_mask : bool = True ,
1430+ time_mask : bool = False ,
14061431 ):
14071432 """
14081433 Creates an excel file with a list of views.
@@ -1438,6 +1463,23 @@ def to_excel(
14381463 df , tview = self .view (view , return_view_def = True , verbose = max (verbose - 1 , 0 ))
14391464 if cube_time is not None :
14401465 time_mask_view [name ] = cube_time .view (view )
1466+ print ("----" )
1467+ print (df )
1468+ print ("-" )
1469+ print (time_mask_view [name ])
1470+ assert time_mask_view [name ].shape == df .shape , (
1471+ f"Shape mismatch between the view { df .shape } and the mask "
1472+ f"{ time_mask_view [name ].shape } "
1473+ )
1474+ assert (
1475+ time_mask_view [name ].columns .names == df .columns .names
1476+ or time_mask_view [name ].index .names == df .index .names
1477+ ), (
1478+ f"Levels mismatch, index.names={ df .index .names } , "
1479+ f"columns.names={ df .columns .names } , "
1480+ f"mask.index.names={ time_mask_view [name ].index .names } , "
1481+ f"mask.columns.names={ time_mask_view [name ].columns .names } "
1482+ )
14411483 if verbose :
14421484 print (
14431485 f"[CubeLogs.to_excel] compute mask for view { name !r} with shape "
@@ -1553,20 +1595,24 @@ def to_excel(
15531595
15541596 if verbose :
15551597 print (f"[CubeLogs.to_excel] applies style to { output !r} " )
1556- apply_excel_style (writer , f_highlights ) # type: ignore[arg-type]
1598+ apply_excel_style (writer , f_highlights , time_mask_view = time_mask_view ) # type: ignore[arg-type]
15571599 if verbose :
15581600 print (f"[CubeLogs.to_excel] done with { len (views )} views" )
15591601
1560- def cube_time (self , fill_other_dates : bool = False ) -> "CubeLogs" :
1602+ def cube_time (self , fill_other_dates : bool = False , threshold : float = 1.2 ) -> "CubeLogs" :
15611603 """
15621604 Aggregates the data over time to detect changes on the last value.
15631605 If *fill_other_dates* is True, all dates are kept, but values
15641606 are filled with 0.
1607+ *threshold* determines the bandwith within the values are expected,
1608+ should be a factor of the standard deviation.
15651609 """
15661610 unique_time = self .data [self .time ].unique ()
15671611 assert len (unique_time ) > 2 , f"Not enough dates to proceed: unique_time={ unique_time } "
15681612 gr = self .data [[* self .keys_no_time , * self .values ]].groupby (self .keys_no_time )
1569- dgr = gr .agg (lambda series : int (breaking_last_point (series )[0 ]))
1613+ dgr = gr .agg (
1614+ lambda series , th = threshold : int (breaking_last_point (series , threshold = th )[0 ])
1615+ )
15701616 tm = unique_time .max ()
15711617 dgr [self .time ] = tm
15721618 if fill_other_dates :
0 commit comments