Skip to content

Commit eb15c53

Browse files
committed
fix issues
1 parent 1b3052e commit eb15c53

File tree

4 files changed

+149
-6
lines changed

4 files changed

+149
-6
lines changed

_unittests/ut_helpers/test_log_helper.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
import io
2+
import os
23
import textwrap
34
import unittest
5+
import zipfile
46
import pandas
57
from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
6-
from onnx_diagnostic.helpers.log_helper import CubeLogs, CubeViewDef
8+
from onnx_diagnostic.helpers.log_helper import (
9+
CubeLogs,
10+
CubeViewDef,
11+
enumerate_csv_files,
12+
open_dataframe,
13+
)
714

815

916
class TestLogHelper(ExtTestCase):
@@ -152,6 +159,21 @@ def test_cube_logs_excel(self):
152159
)
153160
self.assertExists(output)
154161

162+
def test_enumerate_csv_files(self):
163+
df = self.df1()
164+
filename = self.get_dump_file("test_enumerate_csv_files.csv")
165+
df.to_csv(filename, index=False)
166+
zip_file = self.get_dump_file("test_enumerate_csv_files.zip")
167+
with zipfile.ZipFile(zip_file, "w", zipfile.ZIP_DEFLATED) as zipf:
168+
zipf.write(filename)
169+
170+
dirname = os.path.dirname(filename)
171+
data = [os.path.join(dirname, "*.csv"), os.path.join(dirname, "*.zip")]
172+
dfs = list(enumerate_csv_files(data, verbose=1))
173+
self.assertNotEmpty(dfs)
174+
for df in dfs:
175+
open_dataframe(df)
176+
155177

156178
if __name__ == "__main__":
157179
unittest.main(verbosity=2)

_unittests/ut_helpers/test_ort_session_tinyllm.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,10 @@ def test_check_allruntimes_on_tiny_llm(self):
8282
model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
8383
expected = model(**copy.deepcopy(inputs))
8484

85-
with torch_export_patches(patch_transformers=True):
85+
with torch_export_patches(patch_transformers=True, stop_if_static=1):
8686
if to_onnx:
8787
proto = to_onnx(model, (), kwargs=copy.deepcopy(inputs), dynamic_shapes=ds)
8888
else:
89-
stop
9089
proto = torch.onnx.export(
9190
model, (), kwargs=copy.deepcopy(inputs), dynamic_shapes=ds, dynamo=True
9291
).model_proto

onnx_diagnostic/helpers/log_helper.py

Lines changed: 123 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,132 @@
1+
import datetime
2+
import glob
3+
import os
14
import re
2-
from typing import Any, Callable, Dict, Optional, Sequence, Tuple
5+
import zipfile
6+
from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple, Union
37
from .helper import string_sig
48
import pandas
59
from pandas.api.types import is_numeric_dtype
610

711

12+
def enumerate_csv_files(
13+
data: Union[
14+
pandas.DataFrame, List[Union[str, Tuple[str, str]]], str, Tuple[str, str, str, str]
15+
],
16+
verbose: int = 0,
17+
) -> Iterator[Union[pandas.DataFrame, str, Tuple[str, str, str, str]]]:
18+
"""
19+
Enumerates files considered for the aggregation.
20+
Only csv files are considered.
21+
If a zip file is given, the function digs into the zip files and
22+
loops over csv candidates.
23+
24+
:param data: dataframe with the raw data or a file or list of files
25+
26+
data can contains:
27+
* a dataframe
28+
* a string for a filename, zip or csv
29+
* a list of string
30+
* a tuple
31+
"""
32+
if not isinstance(data, list):
33+
data = [data]
34+
for itn, filename in enumerate(data):
35+
if isinstance(filename, pandas.DataFrame):
36+
if verbose:
37+
print(f"[enumerate_csv_files] data[{itn}] is a dataframe")
38+
yield filename
39+
continue
40+
41+
if isinstance(filename, tuple):
42+
# A file in a zipfile
43+
if verbose:
44+
print(f"[enumerate_csv_files] data[{itn}] is {filename!r}")
45+
yield filename
46+
continue
47+
48+
if os.path.exists(filename):
49+
ext = os.path.splitext(filename)[-1]
50+
if ext == ".csv":
51+
# We check the first line is ok.
52+
if verbose:
53+
print(f"[enumerate_csv_files] data[{itn}] is a csv file: {filename!r}]")
54+
with open(filename, "r", encoding="utf-8") as f:
55+
line = f.readline()
56+
if "~help" in line or (",CMD" not in line and ",DATE" not in line):
57+
continue
58+
dt = datetime.datetime.fromtimestamp(os.stat(filename).st_mtime)
59+
du = dt.strftime("%Y-%m-%d %H:%M:%S")
60+
yield (os.path.split(filename)[-1], du, filename, "")
61+
continue
62+
63+
if ext == ".zip":
64+
if verbose:
65+
print(f"[enumerate_csv_files] data[{itn}] is a zip file: {filename!r}]")
66+
zf = zipfile.ZipFile(filename, "r")
67+
for ii, info in enumerate(zf.infolist()):
68+
name = info.filename
69+
ext = os.path.splitext(name)[-1]
70+
if ext != ".csv":
71+
continue
72+
if verbose:
73+
print(
74+
f"[enumerate_csv_files] data[{itn}][{ii}] is a csv file: {name!r}]"
75+
)
76+
with zf.open(name) as f:
77+
line = f.readline()
78+
yield (
79+
os.path.split(name)[-1],
80+
"%04d-%02d-%02d %02d:%02d:%02d" % info.date_time,
81+
name,
82+
filename,
83+
)
84+
zf.close()
85+
continue
86+
87+
raise AssertionError(f"Unexpected format {filename!r}, cannot read it.")
88+
89+
# filename is a pattern.
90+
found = glob.glob(filename)
91+
if verbose and not found:
92+
print(f"[enumerate_csv_files] unable to find file in {filename!r}")
93+
for ii, f in enumerate(found):
94+
if verbose:
95+
print(f"[enumerate_csv_files] data[{itn}][{ii}] {f!r} from {filename!r}")
96+
yield from enumerate_csv_files(f, verbose=verbose)
97+
98+
99+
def open_dataframe(
100+
data: Union[str, Tuple[str, str, str, str], pandas.DataFrame],
101+
) -> pandas.DataFrame:
102+
"""
103+
Opens a filename.
104+
105+
:param data: a dataframe, a filename, a tuple indicating the file is coming
106+
from a zip file
107+
:return: a dataframe
108+
"""
109+
if isinstance(data, pandas.DataFrame):
110+
return data
111+
if isinstance(data, str):
112+
df = pandas.read_csv(data)
113+
df["RAWFILENAME"] = data
114+
return df
115+
if isinstance(data, tuple):
116+
if not data[-1]:
117+
df = pandas.read_csv(data[2])
118+
df["RAWFILENAME"] = data[2]
119+
return df
120+
zf = zipfile.ZipFile(data[-1])
121+
with zf.open(data[2]) as f:
122+
df = pandas.read_csv(f)
123+
df["RAWFILENAME"] = f"{data[-1]}/{data[2]}"
124+
zf.close()
125+
return df
126+
127+
raise ValueError(f"Unexpected value for data: {data!r}")
128+
129+
8130
class CubeViewDef:
9131
"""
10132
Defines how to compute a view.

onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ def get_tiny_llm(
5858
num_key_value_heads = config["num_key_value_heads"]
5959

6060
batch = torch.export.Dim("batch", min=1, max=1024)
61-
seq_length = torch.export.Dim("seq_length", min=1, max=4096)
62-
cache_length = torch.export.Dim("cache_length", min=1, max=4096)
61+
seq_length = torch.export.Dim("seq_length", min=1, max=8192)
62+
cache_length = torch.export.Dim("cache_length", min=1, max=8192)
6363

6464
shapes = {
6565
"input_ids": {0: batch, 1: seq_length},

0 commit comments

Comments
 (0)