Skip to content

Commit fb2578d

Browse files
committed
ci test
2 parents 1e7754c + 1f7a57f commit fb2578d

File tree

84 files changed

+2724
-339
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+2724
-339
lines changed

.github/scripts/check-ut.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,17 @@ def print_summary():
233233
print("### Results Summary")
234234
print_header = True
235235

236+
totals = {
237+
'Category': '**Total**',
238+
'UT': '',
239+
'Test cases': 0,
240+
'Passed': 0,
241+
'Skipped': 0,
242+
'Failures': 0,
243+
'Errors': 0,
244+
'Source': ''
245+
}
246+
236247
for summary in summaries:
237248
print_md_row({
238249
'Category': summary['Category'],
@@ -246,6 +257,14 @@ def print_summary():
246257
}, print_header)
247258
print_header = False
248259

260+
totals['Test cases'] += summary['Test cases']
261+
totals['Passed'] += summary['Passed']
262+
totals['Skipped'] += summary['Skipped']
263+
totals['Failures'] += summary['Failures']
264+
totals['Errors'] += summary['Errors']
265+
266+
print_md_row(totals)
267+
249268
def main():
250269
for input_file in args.input_files:
251270
if input_file.endswith('.log'):

.github/scripts/microbench_summary.py

Lines changed: 351 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,351 @@
1+
"""
2+
Microbenchmark Summary Tool - Parses performance logs and generates CSV/Excel reports
3+
# Usage
4+
# Summary forward op time, forward_op_summary.csv is forward summary file
5+
python microbench_summary.py path/to/profile's log forward_op_summary.csv
6+
# Summary backward op time, backward_op_summary.csv is backward summary file, True means summary backward, default is false.
7+
python microbench_summary.py path/to/profile's log backward_op_summary.csv --backward
8+
"""
9+
10+
import re
11+
import pandas as pd
12+
import glob
13+
import os
14+
import argparse
15+
import bisect
16+
from pathlib import Path
17+
from typing import Dict, List
18+
19+
def main():
20+
parser = argparse.ArgumentParser(
21+
description="Parse performance logs and generate summary reports",
22+
formatter_class=argparse.ArgumentDefaultsHelpFormatter
23+
)
24+
parser.add_argument("log_dir", help="Directory containing log files")
25+
parser.add_argument("output_file", help="Output CSV file path")
26+
parser.add_argument("--backward", action="store_true",
27+
help="Process backward operations instead of forward")
28+
args = parser.parse_args()
29+
30+
try:
31+
df = parse_logs(args.log_dir, args.backward)
32+
if df.empty:
33+
print("Warning: No valid data found in log files!")
34+
return
35+
36+
save_reports(df, args.output_file)
37+
print(f"Successfully generated reports: {args.output_file} and {args.output_file.replace('.csv', '.xlsx')}")
38+
except Exception as e:
39+
print(f"Error: {str(e)}")
40+
raise
41+
42+
def parse_logs(log_dir: str, get_backward: bool = False) -> pd.DataFrame:
43+
data = []
44+
base_columns = [
45+
"case_name", "datatype", "op_name", "shape", "channels_last", "dim",
46+
"output_size", "P", "reduce", "kernel_size", "stride", "replacement",
47+
"num_samples", "scale_factor", "mode", "padding_mode", "align_corners",
48+
"shifts", "affine", "backward", "time(us)"
49+
]
50+
51+
for log_file in glob.glob(os.path.join(log_dir, "*.log")):
52+
try:
53+
with open(log_file) as f:
54+
content = f.read()
55+
56+
case_name = Path(log_file).stem
57+
base_op_name = case_name.split('.')[-1]
58+
op_name, time_pattern = get_op_pattern(base_op_name, get_backward)
59+
60+
# First find all shape lines and their positions
61+
shape_matches = list(re.finditer(r"(shape\s*[:=].*?)(?=\n\S|$)", content))
62+
shape_lines = [match.group(0) for match in shape_matches]
63+
shape_positions = [match.start() for match in shape_matches]
64+
# Parse all E2E forward time in the log
65+
E2E_forward_times = []
66+
E2E_total_times = []
67+
e2e_forward_time_matches = re.finditer(r"E2E forward time:\s*(\d+\.?\d*)", content)
68+
for match in e2e_forward_time_matches:
69+
time_val = float(match.group(1)) * 1_000_000
70+
time_pos = match.start()
71+
# Find the preceding shape for this E2E time
72+
preceding_shape_idx = bisect.bisect_right(shape_positions, time_pos) - 1
73+
if preceding_shape_idx >= 0:
74+
E2E_forward_times.append((preceding_shape_idx, time_val))
75+
76+
e2e_total_time_matches = re.finditer(r"E2E total time:\s*(\d+\.?\d*)", content)
77+
for match in e2e_total_time_matches:
78+
time_val = float(match.group(1)) * 1_000_000
79+
time_pos = match.start()
80+
# Find the preceding shape for this E2E time
81+
preceding_shape_idx = bisect.bisect_right(shape_positions, time_pos) - 1
82+
if preceding_shape_idx >= 0:
83+
E2E_total_times.append((preceding_shape_idx, time_val))
84+
85+
# Determine if we need E2E time column
86+
has_e2e_forward = len(E2E_forward_times) > 0
87+
has_e2e_total = len(E2E_total_times) > 0
88+
columns = base_columns.copy()
89+
if has_e2e_forward:
90+
columns.append("E2E forward time(us)")
91+
if has_e2e_total:
92+
columns.append("E2E total time(us)")
93+
94+
if get_backward and base_op_name == "l1_loss":
95+
process_l1_loss(content, case_name, data, columns)
96+
continue
97+
98+
# Find all time matches and their positions
99+
time_matches = []
100+
for match in re.finditer(fr"{time_pattern}.*?(?:\s+\S+){{8}}\s+(\d+\.?\d*)([a-zA-Z]*)", content):
101+
time = match.group(1)
102+
unit = match.group(2)
103+
time_pos = match.start()
104+
# Find the preceding shape for this time
105+
preceding_shape_idx = bisect.bisect_right(shape_positions, time_pos) - 1
106+
if preceding_shape_idx >= 0:
107+
time_matches.append((time, unit, preceding_shape_idx))
108+
109+
# Create mappings from shape index to E2E times
110+
shape_to_e2e_forward = {}
111+
for shape_idx, time in E2E_forward_times:
112+
shape_to_e2e_forward[shape_idx] = time
113+
114+
shape_to_e2e_total = {}
115+
for shape_idx, time in E2E_total_times:
116+
shape_to_e2e_total[shape_idx] = time
117+
118+
# time_matches = extract_times(content, time_pattern, get_backward)
119+
# shape_lines = re.findall(r"(shape\s*[:=].*?)(?=\n\S|$)", content)
120+
# Process matched times, skipping zeros but maintaining shape relationships
121+
processed_shapes = set()
122+
for time, unit, shape_idx in time_matches:
123+
time_us = convert_to_us(float(time), unit)
124+
if time_us == 0:
125+
continue
126+
127+
shape_text = shape_lines[shape_idx]
128+
if shape_idx in processed_shapes:
129+
continue
130+
processed_shapes.add(shape_idx)
131+
params = extract_params(shape_text)
132+
133+
if get_backward and params.get("backward", "False") == "False":
134+
continue
135+
136+
record = create_record(params, case_name, op_name, str(get_backward), time_us)
137+
138+
# Add E2E times if available for this specific shape
139+
if has_e2e_forward:
140+
record["E2E forward time(us)"] = shape_to_e2e_forward.get(shape_idx, "")
141+
if has_e2e_total:
142+
record["E2E total time(us)"] = shape_to_e2e_total.get(shape_idx, "")
143+
144+
data.append([record.get(col, "") for col in columns])
145+
146+
except Exception as e:
147+
print(f"Warning: Error processing {log_file} - {str(e)}")
148+
continue
149+
150+
return pd.DataFrame(data, columns=columns) if data else pd.DataFrame()
151+
152+
def get_op_pattern(base_op_name: str, get_backward: bool) -> tuple:
153+
op_name_map = {
154+
'forward': {
155+
'batch_norm': ('aten::batch_norm', 'aten::batch_norm'),
156+
'unique': ('unique2', 'unique2'),
157+
'fractional_max_pool2d': ('fractional_max_pool2d', r'\bfractional_max_pool2d\b'),
158+
'fractional_max_pool3d': ('fractional_max_pool3d', r'\bfractional_max_pool3d\b'),
159+
'adaptive_max_pool2d': ('adaptive_max_pool2d', r'\badaptive_max_pool2d\b'),
160+
'max_pool3d': ('max_pool3d_with_indices', 'max_pool3d_with_indices '),
161+
'max_pool2d': ('max_pool2d_with_indices', 'max_pool2d_with_indices '),
162+
'exponential': ('exponential_', r'\bexponential_\b'),
163+
'geometric': ('geometric_', r'\bgeometric_\b'),
164+
'uniform': ('uniform_', r'\buniform_\b'),
165+
'random': ('random_', r'\brandom_\b'),
166+
'log_normal': ('log_normal_', r'\blog_normal_\b'),
167+
'normal': ('normal_', r'\bnormal_\b'),
168+
'bernoulli': ('bernoulli_', r'\bbernoulli_\b'),
169+
'cauchy': ('cauchy_', r'\bcauchy_\b'),
170+
'embedding_bag': ('_embedding_bag', r'\b_embedding_bag\b'),
171+
'nonzero': ('nonzero', r'\bnonzero\b'),
172+
'index_fill': ('index_fill_', r'\bindex_fill_\b'),
173+
'index_put': ('index_put_', r'\bindex_put_\b'),
174+
'put': ('put_', r'\bput_\b'),
175+
'masked_fill': ('masked_fill_', r'\bmasked_fill_\b'),
176+
'scatter_add': ('scatter_add_', r'\bscatter_add_\b'),
177+
'scatter': ('scatter_', r'\bscatter_\b'),
178+
'dropout': ('dropout', r'\bdropout\b'),
179+
'layer_norm': ('layer_norm', r'\blayer_norm\b'),
180+
'ctc_loss': ('_ctc_loss', r'\b_ctc_loss\b'),
181+
'adaptive_avg_pool2d': ('adaptive_avg_pool2d', r'\badaptive_avg_pool2d\b'),
182+
'softmax': ('aten::softmax', 'aten::softmax'),
183+
'group_norm': ('aten::group_norm', 'aten::group_norm'),
184+
},
185+
'backward': {
186+
'batch_norm': ('batch_norm_backward', 'batch_norm_backward'),
187+
'fractional_max_pool2d': ('fractional_max_pool2d_backward', r'\bfractional_max_pool2d_backward\b'),
188+
'fractional_max_pool3d': ('fractional_max_pool3d_backward', r'\bfractional_max_pool3d_backward\b'),
189+
'adaptive_max_pool2d': ('adaptive_max_pool2d_backward', r'\badaptive_max_pool2d_backward\b'),
190+
'max_unpool2d': ('MaxUnpool2DBackward0', 'MaxUnpool2DBackward0 '),
191+
'max_unpool3d': ('MaxUnpool3DBackward0', 'MaxUnpool3DBackward0 '),
192+
'max_pool3d': ('max_pool3d_with_indices_backward', 'max_pool3d_with_indices_backward '),
193+
'max_pool2d': ('max_pool2d_with_indices_backward', 'max_pool2d_with_indices_backward '),
194+
'col2im': ('Col2ImBackward0', 'Col2ImBackward0 '),
195+
'im2col': ('Im2ColBackward0', 'Im2ColBackward0 '),
196+
'flip': ('FlipBackward0', 'FlipBackward0 '),
197+
'matmul': ('MmBackward0', 'MmBackward0 '),
198+
'roll': ('RollBackward0', 'RollBackward0 '),
199+
'softmax': ('softmax_backward_data', 'softmax_backward_data '),
200+
'remainder': ('RemainderBackward0', 'RemainderBackward0 '),
201+
'smooth_l1_loss': ('smooth_l1_loss_backward', 'smooth_l1_loss_backward'),
202+
'l1_loss': ('l1_loss', 'l1_loss'),
203+
}
204+
}
205+
206+
mode = 'backward' if get_backward else 'forward'
207+
208+
for op_pattern in op_name_map[mode]:
209+
if op_pattern in base_op_name:
210+
return op_name_map[mode][op_pattern]
211+
212+
if get_backward:
213+
return (f"{base_op_name}_backward", f"{base_op_name}_backward ")
214+
else:
215+
return (base_op_name, f"{base_op_name} ")
216+
217+
def process_l1_loss(content: str, case_name: str, data: List, columns: List):
218+
shape_matches = list(re.finditer(r"(shape\s*[:=].*?)(?=\n\S|$)", content))
219+
shape_lines = [match.group(0) for match in shape_matches]
220+
shape_positions = [match.start() for match in shape_matches]
221+
222+
# Parse E2E times if present in columns
223+
has_e2e_forward = "E2E forward time(us)" in columns
224+
has_e2e_total = "E2E total time(us)" in columns
225+
226+
# Create mappings from shape index to E2E times
227+
shape_to_e2e_forward = {}
228+
shape_to_e2e_total = {}
229+
230+
if has_e2e_forward:
231+
e2e_forward_time_matches = re.finditer(r"E2E forward time:\s*(\d+\.?\d*)", content)
232+
for match in e2e_forward_time_matches:
233+
time_val = float(match.group(1)) * 1_000_000
234+
time_pos = match.start()
235+
preceding_shape_idx = bisect.bisect_right(shape_positions, time_pos) - 1
236+
if preceding_shape_idx >= 0:
237+
shape_to_e2e_forward[preceding_shape_idx] = time_val
238+
239+
if has_e2e_total:
240+
e2e_total_time_matches = re.finditer(r"E2E total time:\s*(\d+\.?\d*)", content)
241+
for match in e2e_total_time_matches:
242+
time_val = float(match.group(1)) * 1_000_000
243+
time_pos = match.start()
244+
preceding_shape_idx = bisect.bisect_right(shape_positions, time_pos) - 1
245+
if preceding_shape_idx >= 0:
246+
shape_to_e2e_total[preceding_shape_idx] = time_val
247+
248+
filtered_content = [line for line in content.split('\n') if "autograd::engine" not in line]
249+
filtered_content = '\n'.join(filtered_content)
250+
abs_times = re.findall(r"AbsBackward0(?:\s+\S+){8}\s+(\d+\.?\d*)([a-zA-Z]*)", filtered_content)
251+
mean_times = re.findall(r"MeanBackward0(?:\s+\S+){8}\s+(\d+\.?\d*)([a-zA-Z]*)", filtered_content)
252+
shape_lines = re.findall(r"(shape\s*[:=].*?)(?=\n\S|$)", content)
253+
254+
for i, (time, unit) in enumerate(abs_times[:6]):
255+
if i >= len(shape_lines):
256+
break
257+
time_us = convert_to_us(float(time), unit)
258+
params = extract_params(shape_lines[i])
259+
record = create_record(params, case_name, "AbsBackward0", "True", time_us)
260+
261+
# Add E2E times if available
262+
if has_e2e_forward:
263+
record["E2E forward time(us)"] = shape_to_e2e_forward.get(i, "")
264+
if has_e2e_total:
265+
record["E2E total time(us)"] = shape_to_e2e_total.get(i, "")
266+
267+
data.append([record.get(col, "") for col in columns])
268+
269+
for i, (time, unit) in enumerate(mean_times):
270+
if (i + 6) >= len(shape_lines):
271+
break
272+
time_us = convert_to_us(float(time), unit)
273+
params = extract_params(shape_lines[i + 6])
274+
record = create_record(params, case_name, "MeanBackward0", "True", time_us)
275+
276+
# Add E2E times if available
277+
if has_e2e_forward:
278+
record["E2E forward time(us)"] = shape_to_e2e_forward.get(i + 6, "")
279+
if has_e2e_total:
280+
record["E2E total time(us)"] = shape_to_e2e_total.get(i + 6, "")
281+
282+
data.append([record.get(col, "") for col in columns])
283+
284+
def extract_times(content: str, pattern: str, get_backward: bool) -> List:
285+
lines = content.split('\n')
286+
results = []
287+
for line in lines:
288+
if get_backward and any(x in pattern for x in ["Col2ImBackward0", "Im2ColBackward0",
289+
"FlipBackward0", "MmBackward0",
290+
"RollBackward0", "MaxUnpool2DBackward0", "MaxUnpool3DBackward0"]):
291+
if "autograd::engine" in line:
292+
continue
293+
294+
match = re.search(fr"{pattern}.*?(?:\s+\S+){{8}}\s+(\d+\.?\d*)([a-zA-Z]*)", line)
295+
if match:
296+
results.append((match.group(1), match.group(2)))
297+
298+
return results
299+
300+
def create_record(params: Dict, case_name: str, op_name: str,
301+
backward: str, time_us: float) -> Dict:
302+
return {
303+
"P": params.get("p", ""),
304+
**params,
305+
"case_name": case_name,
306+
"op_name": op_name,
307+
"backward": backward,
308+
"time(us)": time_us
309+
}
310+
311+
def convert_to_us(value: float, unit: str) -> float:
312+
unit = unit.lower()
313+
if unit == "ms":
314+
return value * 1000
315+
elif unit == "s":
316+
return value * 1_000_000
317+
return value
318+
319+
def extract_params(text: str) -> Dict:
320+
params = {}
321+
pairs = re.split(r'[;]', text.strip())
322+
323+
for pair in pairs:
324+
if not any(delim in pair for delim in [':', '=']):
325+
continue
326+
327+
delim = ':' if ':' in pair else '='
328+
key, value = pair.split(delim, 1)
329+
key = key.strip().lower()
330+
value = value.strip()
331+
332+
if key in ['p', 'P']:
333+
key = 'p'
334+
elif key in ['dims', 'dim']:
335+
key = 'dim'
336+
elif key in ['shape']:
337+
key = 'shape'
338+
339+
params[key] = value
340+
341+
return params
342+
343+
def save_reports(df: pd.DataFrame, csv_path: str):
344+
os.makedirs(os.path.dirname(csv_path) or '.', exist_ok=True)
345+
df.to_csv(csv_path, index=False, sep=';')
346+
excel_path = csv_path.replace('.csv', '.xlsx')
347+
df.to_excel(excel_path, index=False)
348+
349+
350+
if __name__ == "__main__":
351+
main()

0 commit comments

Comments
 (0)