Skip to content

Commit 59c5ea5

Browse files
authored
Better documentation and new metrics for timeseries (#181)
* better dco * use 4.53.1 * add a statistique * mypy * cube_time * cubetime * add style * fix dtypes * fix issues * fix ut
1 parent 86103d8 commit 59c5ea5

File tree

4 files changed

+420
-14
lines changed

4 files changed

+420
-14
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
matrix:
1717
os: [ubuntu-latest]
1818
python: ['3.10', '3.11', '3.12', '3.13']
19-
transformers: ['4.48.3', '4.51.3', '4.52.4', '4.53.0', 'main']
19+
transformers: ['4.48.3', '4.51.3', '4.52.4', '4.53.1', 'main']
2020
torch: ['2.7', 'main']
2121
exclude:
2222
- python: '3.10'
@@ -28,7 +28,7 @@ jobs:
2828
- python: '3.10'
2929
transformers: 'main'
3030
- python: '3.11'
31-
transformers: '4.53.0'
31+
transformers: '4.53.1'
3232
- python: '3.11'
3333
transformers: 'main'
3434
- python: '3.13'

_unittests/ut_helpers/test_log_helper.py

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
enumerate_csv_files,
1515
open_dataframe,
1616
filter_data,
17+
mann_kendall,
18+
breaking_last_point,
1719
)
1820

1921

@@ -207,7 +209,7 @@ def test_enumerate_csv_files(self):
207209
self.assertIn("RAWFILENAME", cube.data.columns)
208210

209211
def test_cube_logs_performance1(self):
210-
output = self.get_dump_file("test_cube_logs_performance.xlsx")
212+
output = self.get_dump_file("test_cube_logs_performance1.xlsx")
211213
filename = os.path.join(os.path.dirname(__file__), "data", "data-agg.zip")
212214
assert list(enumerate_csv_files(filename))
213215
dfs = [open_dataframe(df) for df in enumerate_csv_files(filename)]
@@ -232,7 +234,7 @@ def test_cube_logs_performance1(self):
232234
self.assertExists(output)
233235

234236
def test_cube_logs_performance2(self):
235-
output = self.get_dump_file("test_cube_logs_performance.xlsx")
237+
output = self.get_dump_file("test_cube_logs_performance2.xlsx")
236238
filename = os.path.join(os.path.dirname(__file__), "data", "data-agg.zip")
237239
assert list(enumerate_csv_files(filename))
238240
dfs = [open_dataframe(df) for df in enumerate_csv_files(filename)]
@@ -256,6 +258,16 @@ def test_cube_logs_performance2(self):
256258
)
257259
self.assertExists(output)
258260

261+
def test_cube_logs_performance_cube_time(self):
262+
filename = os.path.join(os.path.dirname(__file__), "data", "data-agg.zip")
263+
assert list(enumerate_csv_files(filename))
264+
dfs = [open_dataframe(df) for df in enumerate_csv_files(filename)]
265+
assert dfs, f"{filename!r} empty"
266+
cube = CubeLogsPerformance(dfs, keep_last_date=True)
267+
cube.load()
268+
ct = cube.clone()
269+
self.assertEqual((52, 106), ct.shape)
270+
259271
def test_duplicate(self):
260272
df = pandas.DataFrame(
261273
[
@@ -402,6 +414,60 @@ def test_filter_data(self):
402414
df2 = filter_data(df, "", "model_exporter:onnx-dynamo;T", verbose=1)
403415
self.assertEqualDataFrame(df[df.model_exporter != "onnx-dynamo"], df2)
404416

417+
def test_mann_kendall(self):
418+
test = mann_kendall(list(range(5)))
419+
self.assertEqual((np.float64(1.0), np.float64(0.5196152422706631)), test)
420+
test = mann_kendall(list(range(3)))
421+
self.assertEqual((0, np.float64(0.24618298195866545)), test)
422+
test = mann_kendall(list(range(5, 0, -1)))
423+
self.assertEqual((np.float64(-1.0), np.float64(-0.5196152422706631)), test)
424+
425+
def test_breaking_last_point(self):
426+
test = breaking_last_point([1, 1, 1, 2])
427+
self.assertEqual((1, np.float64(1.0)), test)
428+
test = breaking_last_point([1, 1, 1.1, 2])
429+
self.assertEqual((np.float64(1.0), np.float64(20.50609665440986)), test)
430+
test = breaking_last_point([-1, -1, -1.1, -2])
431+
self.assertEqual((np.float64(-1.0), np.float64(-20.50609665440986)), test)
432+
test = breaking_last_point([1, 1, 1.1, 1])
433+
self.assertEqual((np.float64(0.0), np.float64(-0.7071067811865491)), test)
434+
435+
def test_historical_cube_time(self):
436+
# case 1
437+
df = pandas.DataFrame(
438+
[
439+
dict(date="2025/01/01", time_p=0.51, exporter="E1", m_name="A", m_cls="CA"),
440+
dict(date="2025/01/02", time_p=0.62, exporter="E1", m_name="A", m_cls="CA"),
441+
dict(date="2025/01/03", time_p=0.62, exporter="E1", m_name="A", m_cls="CA"),
442+
dict(date="2025/01/01", time_p=0.51, exporter="E2", m_name="A", m_cls="CA"),
443+
dict(date="2025/01/02", time_p=0.62, exporter="E2", m_name="A", m_cls="CA"),
444+
dict(date="2025/01/03", time_p=0.50, exporter="E2", m_name="A", m_cls="CA"),
445+
]
446+
)
447+
cube = CubeLogs(df, keys=["^m_*", "exporter"], time="date").load()
448+
cube_time = cube.cube_time(threshold=1.1)
449+
v = cube_time.data["time_p"].tolist()
450+
self.assertEqual([0, -1], v)
451+
452+
@hide_stdout()
453+
def test_historical_cube_time_mask(self):
454+
output = self.get_dump_file("test_historical_cube_time_mask.xlsx")
455+
df = pandas.DataFrame(
456+
[
457+
dict(date="2025/01/01", time_p=0.51, exporter="E1", m_name="A", m_cls="CA"),
458+
dict(date="2025/01/02", time_p=0.62, exporter="E1", m_name="A", m_cls="CA"),
459+
dict(date="2025/01/03", time_p=0.62, exporter="E1", m_name="A", m_cls="CA"),
460+
dict(date="2025/01/01", time_p=0.51, exporter="E2", m_name="A", m_cls="CA"),
461+
dict(date="2025/01/02", time_p=0.62, exporter="E2", m_name="A", m_cls="CA"),
462+
dict(date="2025/01/03", time_p=0.50, exporter="E2", m_name="A", m_cls="CA"),
463+
dict(date="2025/01/01", time_p=0.71, exporter="E2", m_name="B", m_cls="CA"),
464+
dict(date="2025/01/02", time_p=0.72, exporter="E2", m_name="B", m_cls="CA"),
465+
dict(date="2025/01/03", time_p=0.70, exporter="E2", m_name="B", m_cls="CA"),
466+
]
467+
)
468+
cube = CubeLogs(df, keys=["^m_*", "exporter"], time="date").load()
469+
cube.to_excel(output, views=["time_p"], time_mask=True, verbose=1)
470+
405471

406472
if __name__ == "__main__":
407473
unittest.main(verbosity=2)

onnx_diagnostic/_command_lines_parser.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -657,9 +657,16 @@ def get_parser_agg() -> ArgumentParser:
657657
),
658658
epilog=textwrap.dedent(
659659
"""
660-
examples:\n
660+
examples:
661661
662662
python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1
663+
python -m onnx_diagnostic agg agg.xlsx raw/*.zip raw/*.csv -v 1 \\
664+
--no-raw --keep-last-date --filter-out "exporter:test-exporter"
665+
666+
Another to create timeseries:
667+
668+
python -m onnx_diagnostic agg history.xlsx raw/*.csv -v 1 --no-raw \\
669+
--no-recent
663670
"""
664671
),
665672
formatter_class=RawTextHelpFormatter,
@@ -812,6 +819,7 @@ def _cmd_agg(argv: List[Any]):
812819
verbose=args.verbose,
813820
csv=args.csv.split(","),
814821
raw=args.raw,
822+
time_mask=True,
815823
)
816824
if args.verbose:
817825
print(f"Wrote {args.output!r}")

0 commit comments

Comments
 (0)