Skip to content

Commit 4bf0e36

Browse files
committed
[llvm-advisor] Add performance and timing analysis parsers
- implement time-trace parser - add runtime trace parser for offloading analysis - add compilation phases parser for build pipeline analysis - add ftime-report parser for detailed timing breakdowns
1 parent a40b66f commit 4bf0e36

File tree

4 files changed

+614
-0
lines changed

4 files changed

+614
-0
lines changed
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
9+
import re
10+
from typing import Dict, List, Any
11+
from .base_parser import BaseParser
12+
from ..models import FileType, ParsedFile
13+
14+
15+
class CompilationPhasesParser(BaseParser):
16+
def __init__(self):
17+
super().__init__(FileType.COMPILATION_PHASES)
18+
# Pattern for -ccc-print-bindings output: # "target" - "tool", inputs: [...], output: "..."
19+
self.binding_pattern = re.compile(
20+
r'^#\s+"([^"]+)"\s+-\s+"([^"]+)",\s+inputs:\s+\[([^\]]*)\],\s+output:\s+"([^"]*)"'
21+
)
22+
# Fallback patterns for other compilation phase formats
23+
self.phase_pattern = re.compile(r"^(\w+):\s*(.+)")
24+
self.timing_pattern = re.compile(r"(\d+(?:\.\d+)?)\s*(ms|s|us)")
25+
26+
def parse(self, file_path: str) -> ParsedFile:
27+
content = self.read_file_safe(file_path)
28+
if content is None:
29+
return self.create_parsed_file(
30+
file_path, [], {"error": "File too large or unreadable"}
31+
)
32+
33+
try:
34+
lines = content.split("\n")
35+
phases_data = self._parse_compilation_phases(lines)
36+
37+
total_time = sum(
38+
phase.get("duration", 0)
39+
for phase in phases_data["phases"]
40+
if phase.get("duration") is not None
41+
)
42+
43+
metadata = {
44+
"file_size": self.get_file_size(file_path),
45+
"total_phases": len(phases_data["phases"]),
46+
"total_bindings": len(phases_data["bindings"]),
47+
"unique_tools": len(phases_data["tool_counts"]),
48+
"total_time": total_time,
49+
"time_unit": phases_data["time_unit"],
50+
"tool_counts": phases_data["tool_counts"],
51+
}
52+
53+
return self.create_parsed_file(file_path, phases_data, metadata)
54+
55+
except Exception as e:
56+
return self.create_parsed_file(file_path, [], {"error": str(e)})
57+
58+
def _parse_compilation_phases(self, lines: List[str]) -> Dict[str, Any]:
59+
phases_data = {
60+
"phases": [],
61+
"bindings": [],
62+
"tool_counts": {},
63+
"time_unit": "ms",
64+
"summary": {},
65+
"clang_version": None,
66+
"target": None,
67+
"thread_model": None,
68+
"installed_dir": None,
69+
"file_type": None, # Track what type of file this is
70+
}
71+
72+
# First pass: determine file type based on content
73+
has_bindings = any(
74+
self.binding_pattern.match(line.strip()) for line in lines if line.strip()
75+
)
76+
has_compilation_phases_header = any(
77+
line.strip() == "COMPILATION PHASES:" for line in lines
78+
)
79+
80+
if has_bindings:
81+
phases_data["file_type"] = "bindings"
82+
elif has_compilation_phases_header:
83+
phases_data["file_type"] = "phases"
84+
else:
85+
phases_data["file_type"] = "unknown"
86+
87+
for line in lines:
88+
line = line.strip()
89+
if not line:
90+
continue
91+
92+
# Parse -ccc-print-bindings output (only for bindings files)
93+
if phases_data["file_type"] == "bindings":
94+
binding_match = self.binding_pattern.match(line)
95+
if binding_match:
96+
target = binding_match.group(1)
97+
tool = binding_match.group(2)
98+
inputs_str = binding_match.group(3)
99+
output = binding_match.group(4)
100+
101+
# Parse inputs array
102+
inputs = []
103+
if inputs_str.strip():
104+
# Simple parsing of quoted inputs: "file1", "file2", ...
105+
import re
106+
107+
input_matches = re.findall(r'"([^"]*)"', inputs_str)
108+
inputs = input_matches
109+
110+
binding_entry = {
111+
"target": target,
112+
"tool": tool,
113+
"inputs": inputs,
114+
"output": output,
115+
}
116+
117+
phases_data["bindings"].append(binding_entry)
118+
119+
# Count tools for summary
120+
if tool in phases_data["tool_counts"]:
121+
phases_data["tool_counts"][tool] += 1
122+
else:
123+
phases_data["tool_counts"][tool] = 1
124+
125+
continue
126+
127+
# Extract compiler information (only for phases files)
128+
if phases_data["file_type"] == "phases":
129+
# Extract clang version
130+
if line.startswith("clang version"):
131+
phases_data["clang_version"] = line
132+
continue
133+
134+
# Extract target
135+
if line.startswith("Target:"):
136+
phases_data["target"] = line.replace("Target:", "").strip()
137+
continue
138+
139+
# Extract thread model
140+
if line.startswith("Thread model:"):
141+
phases_data["thread_model"] = line.replace(
142+
"Thread model:", ""
143+
).strip()
144+
continue
145+
146+
# Extract installed directory
147+
if line.startswith("InstalledDir:"):
148+
phases_data["installed_dir"] = line.replace(
149+
"InstalledDir:", ""
150+
).strip()
151+
continue
152+
153+
# Parse phase information (fallback for timing data)
154+
phase_match = self.phase_pattern.match(line)
155+
if phase_match:
156+
phase_name = phase_match.group(1)
157+
phase_info = phase_match.group(2)
158+
159+
# Extract timing information if present
160+
timing_match = self.timing_pattern.search(phase_info)
161+
duration = None
162+
time_unit = "ms"
163+
164+
if timing_match:
165+
duration = float(timing_match.group(1))
166+
time_unit = timing_match.group(2)
167+
168+
# Convert to consistent unit (milliseconds)
169+
if time_unit == "s":
170+
duration *= 1000
171+
elif time_unit == "us":
172+
duration /= 1000
173+
174+
phase_entry = {
175+
"name": phase_name,
176+
"info": phase_info,
177+
"duration": duration,
178+
"time_unit": time_unit,
179+
}
180+
181+
phases_data["phases"].append(phase_entry)
182+
continue
183+
184+
# Handle simple timing lines like "Frontend: 123.45ms"
185+
if ":" in line:
186+
parts = line.split(":", 1)
187+
if len(parts) == 2:
188+
phase_name = parts[0].strip()
189+
timing_info = parts[1].strip()
190+
191+
timing_match = self.timing_pattern.search(timing_info)
192+
if timing_match:
193+
duration = float(timing_match.group(1))
194+
time_unit = timing_match.group(2)
195+
196+
# Convert to milliseconds
197+
if time_unit == "s":
198+
duration *= 1000
199+
elif time_unit == "us":
200+
duration /= 1000
201+
202+
phase_entry = {
203+
"name": phase_name,
204+
"info": timing_info,
205+
"duration": duration,
206+
"time_unit": "ms",
207+
}
208+
209+
phases_data["phases"].append(phase_entry)
210+
211+
# Calculate summary statistics
212+
durations = [
213+
p["duration"] for p in phases_data["phases"] if p["duration"] is not None
214+
]
215+
phases_data["summary"] = {
216+
"total_time": sum(durations) if durations else 0,
217+
"avg_time": sum(durations) / len(durations) if durations else 0,
218+
"max_time": max(durations) if durations else 0,
219+
"min_time": min(durations) if durations else 0,
220+
"total_bindings": len(phases_data["bindings"]),
221+
"unique_tools": len(phases_data["tool_counts"]),
222+
"tool_counts": phases_data["tool_counts"],
223+
}
224+
225+
return phases_data
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
9+
import re
10+
from typing import Dict, List, Any
11+
from .base_parser import BaseParser
12+
from ..models import FileType, ParsedFile
13+
14+
15+
class FTimeReportParser(BaseParser):
16+
def __init__(self):
17+
super().__init__(FileType.FTIME_REPORT)
18+
# Patterns to match ftime-report output
19+
# Pattern for: 0.0112 (100.0%) 0.0020 (100.0%) 0.0132 (100.0%) 0.0132 (100.0%) Front end
20+
# This needs to be reviewed with more files and outputs
21+
self.timing_line_pattern = re.compile(
22+
r"^\s*(\d+\.\d+)\s+\((\d+\.\d+)%\)\s+(\d+\.\d+)\s+\((\d+\.\d+)%\)\s+(\d+\.\d+)\s+\((\d+\.\d+)%\)\s+(\d+\.\d+)\s+\((\d+\.\d+)%\)\s+(.+)$"
23+
)
24+
self.total_pattern = re.compile(r"Total Execution Time:\s+(\d+\.\d+)\s+seconds")
25+
26+
def parse(self, file_path: str) -> ParsedFile:
27+
content = self.read_file_safe(file_path)
28+
if content is None:
29+
return self.create_parsed_file(
30+
file_path, [], {"error": "File too large or unreadable"}
31+
)
32+
33+
try:
34+
lines = content.split("\n")
35+
timing_data = self._parse_ftime_report(lines)
36+
37+
# Calculate statistics
38+
total_time = timing_data.get("total_execution_time", 0)
39+
timing_entries = timing_data.get("timings", [])
40+
41+
metadata = {
42+
"file_size": self.get_file_size(file_path),
43+
"total_execution_time": total_time,
44+
"timing_entries_count": len(timing_entries),
45+
"top_time_consumer": (
46+
timing_entries[0]["name"] if timing_entries else None
47+
),
48+
"top_time_percentage": (
49+
timing_entries[0]["percentage"] if timing_entries else 0
50+
),
51+
}
52+
53+
return self.create_parsed_file(file_path, timing_data, metadata)
54+
55+
except Exception as e:
56+
return self.create_parsed_file(file_path, [], {"error": str(e)})
57+
58+
def _parse_ftime_report(self, lines: List[str]) -> Dict[str, Any]:
59+
timing_data = {"timings": [], "total_execution_time": 0, "summary": {}}
60+
61+
parsing_timings = False
62+
63+
for line in lines:
64+
line = line.strip()
65+
if not line:
66+
continue
67+
68+
# Check for total execution time
69+
total_match = self.total_pattern.search(line)
70+
if total_match:
71+
timing_data["total_execution_time"] = float(total_match.group(1))
72+
continue
73+
74+
# Check if we're in the timing section
75+
if "---User Time---" in line and "--System Time--" in line:
76+
parsing_timings = True
77+
continue
78+
79+
# Parse timing lines
80+
if parsing_timings:
81+
# Check if this line ends the timing section
82+
if not line or "===" in line:
83+
parsing_timings = False
84+
continue
85+
86+
timing_match = self.timing_line_pattern.match(line)
87+
if timing_match:
88+
user_time = float(timing_match.group(1))
89+
user_percent = float(timing_match.group(2))
90+
system_time = float(timing_match.group(3))
91+
system_percent = float(timing_match.group(4))
92+
total_time = float(timing_match.group(5))
93+
total_percent = float(timing_match.group(6))
94+
wall_time = float(timing_match.group(7))
95+
wall_percent = float(timing_match.group(8))
96+
name = timing_match.group(9).strip()
97+
98+
timing_entry = {
99+
"name": name,
100+
"user_time": user_time,
101+
"user_percentage": user_percent,
102+
"system_time": system_time,
103+
"system_percentage": system_percent,
104+
"total_time": total_time,
105+
"total_percentage": total_percent,
106+
"wall_time": wall_time,
107+
"wall_percentage": wall_percent,
108+
"time_seconds": wall_time, # Use wall time as primary metric
109+
"percentage": wall_percent, # Use wall percentage as primary metric
110+
"time_ms": wall_time * 1000,
111+
}
112+
113+
timing_data["timings"].append(timing_entry)
114+
115+
# Sort timings by time (descending)
116+
timing_data["timings"].sort(key=lambda x: x["time_seconds"], reverse=True)
117+
118+
# Calculate summary
119+
if timing_data["timings"]:
120+
timing_data["summary"] = {
121+
"total_phases": len(timing_data["timings"]),
122+
"slowest_phase": timing_data["timings"][0]["name"],
123+
"slowest_time": timing_data["timings"][0]["time_seconds"],
124+
"fastest_phase": timing_data["timings"][-1]["name"],
125+
"fastest_time": timing_data["timings"][-1]["time_seconds"],
126+
}
127+
128+
return timing_data
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
9+
from .time_trace_parser import TimeTraceParser
10+
from ..models import FileType
11+
12+
13+
class RuntimeTraceParser(TimeTraceParser):
14+
def __init__(self):
15+
# Runtime trace uses the same Chrome trace format as time-trace
16+
super().__init__()
17+
self.file_type = FileType.RUNTIME_TRACE

0 commit comments

Comments
 (0)