Skip to content

Commit a3fd059

Browse files
committed
fix dtypes
1 parent 82d705a commit a3fd059

File tree

2 files changed

+95
-28
lines changed

2 files changed

+95
-28
lines changed

onnx_diagnostic/_command_lines_parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,7 @@ def _cmd_agg(argv: List[Any]):
819819
verbose=args.verbose,
820820
csv=args.csv.split(","),
821821
raw=args.raw,
822+
time_mask=True,
822823
)
823824
if args.verbose:
824825
print(f"Wrote {args.output!r}")

onnx_diagnostic/helpers/log_helper.py

Lines changed: 94 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,36 @@ def breaking_last_point(signal: Sequence[float], threshold: float = 1.2):
6868
:return: significant change (-1, 0, +1), test value
6969
"""
7070
signal = np.asarray(signal)
71-
m = np.mean(signal[:-1])
71+
if not np.issubdtype(signal.dtype, np.number):
72+
return 0, np.nan
73+
assert len(signal.shape) == 1, f"Unexpected signal shape={signal.shape}, signal={signal}"
74+
if signal.shape[0] <= 2:
75+
return 0, 0
76+
77+
has_value = ~(np.isnan(signal).all()) and ~(np.isinf(signal).all())
78+
if np.isnan(signal[-1]) or np.isinf(signal[-1]):
79+
return (-1, np.inf) if has_value else (0, 0)
80+
81+
try:
82+
m = np.mean(signal[:-1])
83+
except (TypeError, ValueError):
84+
# Not a numerical type
85+
return 0, np.nan
86+
87+
if np.isnan(m) or np.isinf(m):
88+
return (1, np.inf) if np.isinf(signal[-2]) or np.isnan(signal[-2]) else (0, 0)
7289
v = np.std(signal[:-1])
7390
if v == 0:
7491
test = signal[-1] - m
92+
assert not np.isnan(
93+
test
94+
), f"Unexpected test value, test={test}, signal={signal}, m={m}, v={v}"
7595
trend = np.sign(test)
7696
return trend, trend
7797
test = (signal[-1] - m) / v
98+
assert not np.isnan(
99+
test
100+
), f"Unexpected test value, test={test}, signal={signal}, m={m}, v={v}"
78101
trend = np.sign(test) if np.abs(test) > threshold else 0
79102
return trend, test
80103

@@ -261,6 +284,42 @@ def open_dataframe(
261284
raise ValueError(f"Unexpected value for data: {data!r}")
262285

263286

287+
def align_dataframe_with(
288+
df: pandas.DataFrame, baseline: pandas.DataFrame, fill_value: float = 0
289+
) -> Optional[pandas.DataFrame]:
290+
"""
291+
Modifies the first dataframe *df* to get the exact same number of columns and rows.
292+
They must share the same levels on both axes. Empty cells are filled with 0.
293+
We only keep the numerical columns. The function return None if the output is empty.
294+
"""
295+
df = df.select_dtypes(include="number")
296+
if df.shape[1] == 0:
297+
return None
298+
bool_cols = list(df.select_dtypes(include="bool").columns)
299+
if bool_cols:
300+
df[bool_cols] = df[bool_cols].astype(int)
301+
assert (
302+
df.columns.names == baseline.columns.names or df.index.names == baseline.index.names
303+
), (
304+
f"Levels mismatch, expected index.names={baseline.index.names}, "
305+
f"expected columns.names={baseline.columns.names}, "
306+
f"got index.names={df.index.names}, "
307+
f"got columns.names={df.columns.names}"
308+
)
309+
dtypes = set(df[c].dtype for c in df.columns)
310+
assert all(np.issubdtype(dt, np.number) for dt in dtypes), (
311+
f"All columns in the first dataframe are expected to share "
312+
f"the same type or be at least numerical but got {dtypes}\n{df}"
313+
)
314+
common_index = df.index.intersection(baseline.index)
315+
cp = pandas.DataFrame(float(fill_value), index=baseline.index, columns=baseline.columns)
316+
for c in df.columns:
317+
if c not in cp.columns or not np.issubdtype(df[c].dtype, np.number):
318+
continue
319+
cp.loc[common_index, c] = df.loc[common_index, c].astype(cp[c].dtype)
320+
return cp
321+
322+
264323
class CubeViewDef:
265324
"""
266325
Defines how to compute a view.
@@ -397,8 +456,8 @@ def apply_excel_style(
397456
workbook = filename_or_writer.book
398457
save = False
399458

400-
mask_low = PatternFill(fgColor="8888DD", fill_type="solid")
401-
mask_high = PatternFill(fgColor="DD8888", fill_type="solid")
459+
mask_low = PatternFill(fgColor="AAAAF0", fill_type="solid")
460+
mask_high = PatternFill(fgColor="F0AAAA", fill_type="solid")
402461

403462
left = Alignment(horizontal="left")
404463
left_shrink = Alignment(horizontal="left", shrink_to_fit=True)
@@ -927,12 +986,17 @@ def load(self, verbose: int = 0):
927986
shape = self.data.shape
928987
if verbose:
929988
print(f"[CubeLogs.load] removed columns, shape={self.data.shape}")
989+
assert self.data.shape[0] > 0 or self._data.shape[0] == 0, (
990+
f"The preprocessing reduced shape {shape} to {self.data.shape}, "
991+
f"initial shape={self._data.shape}."
992+
)
930993
self._preprocess()
931994
if verbose:
932995
print(f"[CubeLogs.load] preprocess, shape={self.data.shape}")
933-
assert (
934-
self.data.shape[0] > 0
935-
), f"The preprocessing reduced shape {shape} to {self.data.shape}."
996+
assert self.data.shape[0] > 0 or self._data.shape[0] == 0, (
997+
f"The preprocessing reduced shape {shape} to {self.data.shape}, "
998+
f"initial shape={self._data.shape}."
999+
)
9361000
if self.recent and verbose:
9371001
print(f"[CubeLogs.load] keep most recent data.shape={self.data.shape}")
9381002

@@ -1462,29 +1526,19 @@ def to_excel(
14621526
continue
14631527
df, tview = self.view(view, return_view_def=True, verbose=max(verbose - 1, 0))
14641528
if cube_time is not None:
1465-
time_mask_view[name] = cube_time.view(view)
1466-
print("----")
1467-
print(df)
1468-
print("-")
1469-
print(time_mask_view[name])
1470-
assert time_mask_view[name].shape == df.shape, (
1471-
f"Shape mismatch between the view {df.shape} and the mask "
1472-
f"{time_mask_view[name].shape}"
1473-
)
1474-
assert (
1475-
time_mask_view[name].columns.names == df.columns.names
1476-
or time_mask_view[name].index.names == df.index.names
1477-
), (
1478-
f"Levels mismatch, index.names={df.index.names}, "
1479-
f"columns.names={df.columns.names}, "
1480-
f"mask.index.names={time_mask_view[name].index.names}, "
1481-
f"mask.columns.names={time_mask_view[name].columns.names}"
1482-
)
1483-
if verbose:
1484-
print(
1485-
f"[CubeLogs.to_excel] compute mask for view {name!r} with shape "
1529+
cube_mask = cube_time.view(view)
1530+
aligned = align_dataframe_with(cube_mask, df)
1531+
if aligned is not None:
1532+
assert aligned.shape == df.shape, (
1533+
f"Shape mismatch between the view {df.shape} and the mask "
14861534
f"{time_mask_view[name].shape}"
14871535
)
1536+
time_mask_view[name] = aligned
1537+
if verbose:
1538+
print(
1539+
f"[CubeLogs.to_excel] compute mask for view {name!r} "
1540+
f"with shape {aligned.shape}"
1541+
)
14881542
if tview is None:
14891543
continue
14901544
memory = df.memory_usage(deep=True).sum()
@@ -1609,11 +1663,17 @@ def cube_time(self, fill_other_dates: bool = False, threshold: float = 1.2) -> "
16091663
"""
16101664
unique_time = self.data[self.time].unique()
16111665
assert len(unique_time) > 2, f"Not enough dates to proceed: unique_time={unique_time}"
1612-
gr = self.data[[*self.keys_no_time, *self.values]].groupby(self.keys_no_time)
1666+
gr = self.data[[*self.keys_no_time, *self.values]].groupby(
1667+
self.keys_no_time, dropna=False
1668+
)
16131669
dgr = gr.agg(
16141670
lambda series, th=threshold: int(breaking_last_point(series, threshold=th)[0])
16151671
)
16161672
tm = unique_time.max()
1673+
assert dgr.shape[0] > 0, (
1674+
f"Unexpected output shape={dgr.shape}, unique_time={unique_time}, "
1675+
f"data.shape={self.data.shape}"
1676+
)
16171677
dgr[self.time] = tm
16181678
if fill_other_dates:
16191679
other_df = []
@@ -1626,6 +1686,11 @@ def cube_time(self, fill_other_dates: bool = False, threshold: float = 1.2) -> "
16261686
df[c] = 0
16271687
other_df.append(df)
16281688
dgr = pandas.concat([dgr, *other_df], axis=0)
1689+
assert dgr.shape[0] > 0, (
1690+
f"Unexpected output shape={dgr.shape}, unique_time={unique_time}, "
1691+
f"data.shape={self.data.shape}, "
1692+
f"other_df shapes={[df.shape for df in other_df]}"
1693+
)
16291694
return self.clone(data=dgr.reset_index(drop=False))
16301695

16311696

@@ -1724,6 +1789,7 @@ def clone(self, data: Optional[pandas.DataFrame] = None) -> "CubeLogs":
17241789
time=self.time,
17251790
keys=self.keys_no_time,
17261791
values=self.values,
1792+
recent=False,
17271793
)
17281794
cube.load()
17291795
return cube

0 commit comments

Comments
 (0)