Skip to content

Commit 82d705a

Browse files
committed
add style
1 parent 9631b58 commit 82d705a

File tree

2 files changed

+53
-7
lines changed

2 files changed

+53
-7
lines changed

_unittests/ut_helpers/test_log_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def test_historical_cube_time(self):
445445
]
446446
)
447447
cube = CubeLogs(df, keys=["^m_*", "exporter"], time="date").load()
448-
cube_time = cube.cube_time()
448+
cube_time = cube.cube_time(threshold=1.1)
449449
v = cube_time.data["time_p"].tolist()
450450
self.assertEqual([0, -1], v)
451451

onnx_diagnostic/helpers/log_helper.py

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def mann_kendall(series: Sequence[float], threshold: float = 0.5):
5959
return trend, test
6060

6161

62-
def breaking_last_point(signal: Sequence[float], threshold: float = 1.1):
62+
def breaking_last_point(signal: Sequence[float], threshold: float = 1.2):
6363
"""
6464
Assuming a timeseries is constant, we check the last value
6565
is not an outlier.
@@ -374,17 +374,21 @@ def __repr__(self) -> str:
374374
def apply_excel_style(
375375
filename_or_writer: Any,
376376
f_highlights: Optional[Dict[str, Callable[[Any], CubeViewDef.HighLightKind]]] = None,
377+
time_mask_view: Optional[Dict[str, pandas.DataFrame]] = None,
377378
):
378379
"""
379380
Applies styles on all sheets in a file unless the sheet is too big.
380381
381382
:param filename_or_writer: filename, modified inplace
382383
:param f_highlight: color function to apply, one per sheet
384+
:param time_mask_view: if specified, it contains dataframe with the same shape
385+
and values in {-1, 0, +1} which indicates if a value is unexpectedly lower (-1)
386+
or higher (+1), it changes the color of the background then.
383387
"""
384388
from openpyxl import load_workbook
385389
from openpyxl.styles import Alignment
386390
from openpyxl.utils import get_column_letter
387-
from openpyxl.styles import Font # , PatternFill, numbers
391+
from openpyxl.styles import Font, PatternFill
388392

389393
if isinstance(filename_or_writer, str):
390394
workbook = load_workbook(filename_or_writer)
@@ -393,6 +397,9 @@ def apply_excel_style(
393397
workbook = filename_or_writer.book
394398
save = False
395399

400+
mask_low = PatternFill(fgColor="8888DD", fill_type="solid")
401+
mask_high = PatternFill(fgColor="DD8888", fill_type="solid")
402+
396403
left = Alignment(horizontal="left")
397404
left_shrink = Alignment(horizontal="left", shrink_to_fit=True)
398405
right = Alignment(horizontal="right")
@@ -402,6 +409,14 @@ def apply_excel_style(
402409
}
403410

404411
for name in workbook.sheetnames:
412+
if time_mask_view and name in time_mask_view:
413+
mask = time_mask_view[name]
414+
with pandas.ExcelWriter(io.BytesIO(), engine="openpyxl") as mask_writer:
415+
mask.to_excel(mask_writer, sheet_name=name)
416+
sheet_mask = mask_writer.sheets[name]
417+
else:
418+
sheet_mask = None
419+
405420
f_highlight = f_highlights.get(name, None) if f_highlights else None
406421
sheet = workbook[name]
407422
n_rows = sheet.max_row
@@ -479,6 +494,16 @@ def apply_excel_style(
479494
h = f_highlight(cell.value)
480495
if h in font_colors:
481496
cell.font = font_colors[h]
497+
498+
if sheet_mask is not None:
499+
for i in range(1, n_rows + 1):
500+
for j, (cell, cell_mask) in enumerate(zip(sheet[i], sheet_mask[i])):
501+
if j > n_cols:
502+
break
503+
if cell_mask.value not in (1, -1):
504+
continue
505+
cell.fill = mask_low if cell_mask.value < 0 else mask_high
506+
482507
if save:
483508
workbook.save(filename_or_writer)
484509

@@ -1402,7 +1427,7 @@ def to_excel(
14021427
raw: Optional[str] = "raw",
14031428
verbose: int = 0,
14041429
csv: Optional[Sequence[str]] = None,
1405-
time_mask: bool = True,
1430+
time_mask: bool = False,
14061431
):
14071432
"""
14081433
Creates an excel file with a list of views.
@@ -1438,6 +1463,23 @@ def to_excel(
14381463
df, tview = self.view(view, return_view_def=True, verbose=max(verbose - 1, 0))
14391464
if cube_time is not None:
14401465
time_mask_view[name] = cube_time.view(view)
1466+
print("----")
1467+
print(df)
1468+
print("-")
1469+
print(time_mask_view[name])
1470+
assert time_mask_view[name].shape == df.shape, (
1471+
f"Shape mismatch between the view {df.shape} and the mask "
1472+
f"{time_mask_view[name].shape}"
1473+
)
1474+
assert (
1475+
time_mask_view[name].columns.names == df.columns.names
1476+
or time_mask_view[name].index.names == df.index.names
1477+
), (
1478+
f"Levels mismatch, index.names={df.index.names}, "
1479+
f"columns.names={df.columns.names}, "
1480+
f"mask.index.names={time_mask_view[name].index.names}, "
1481+
f"mask.columns.names={time_mask_view[name].columns.names}"
1482+
)
14411483
if verbose:
14421484
print(
14431485
f"[CubeLogs.to_excel] compute mask for view {name!r} with shape "
@@ -1553,20 +1595,24 @@ def to_excel(
15531595

15541596
if verbose:
15551597
print(f"[CubeLogs.to_excel] applies style to {output!r}")
1556-
apply_excel_style(writer, f_highlights) # type: ignore[arg-type]
1598+
apply_excel_style(writer, f_highlights, time_mask_view=time_mask_view) # type: ignore[arg-type]
15571599
if verbose:
15581600
print(f"[CubeLogs.to_excel] done with {len(views)} views")
15591601

1560-
def cube_time(self, fill_other_dates: bool = False) -> "CubeLogs":
1602+
def cube_time(self, fill_other_dates: bool = False, threshold: float = 1.2) -> "CubeLogs":
15611603
"""
15621604
Aggregates the data over time to detect changes on the last value.
15631605
If *fill_other_dates* is True, all dates are kept, but values
15641606
are filled with 0.
1607+
*threshold* determines the bandwith within the values are expected,
1608+
should be a factor of the standard deviation.
15651609
"""
15661610
unique_time = self.data[self.time].unique()
15671611
assert len(unique_time) > 2, f"Not enough dates to proceed: unique_time={unique_time}"
15681612
gr = self.data[[*self.keys_no_time, *self.values]].groupby(self.keys_no_time)
1569-
dgr = gr.agg(lambda series: int(breaking_last_point(series)[0]))
1613+
dgr = gr.agg(
1614+
lambda series, th=threshold: int(breaking_last_point(series, threshold=th)[0])
1615+
)
15701616
tm = unique_time.max()
15711617
dgr[self.time] = tm
15721618
if fill_other_dates:

0 commit comments

Comments
 (0)