Skip to content

Commit 5f047d9

Browse files
committed
[llvm-advisor] Add core data models and artifact collection system
- implement data models for compilation artifacts - add artifact collector for discovering and organizing build outputs - support for multiple file types and compilation units with metadata
1 parent 0ba6b35 commit 5f047d9

File tree

3 files changed

+391
-0
lines changed

3 files changed

+391
-0
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
#
9+
# This is the artifact collector module. It provides logic for discovering and
10+
# parsing build artifacts for LLVM Advisor analysis.
11+
#
12+
# ===----------------------------------------------------------------------===#
13+
14+
import os
15+
from typing import Dict, List, Any, Optional
16+
from pathlib import Path
17+
18+
from .models import FileType, CompilationUnit, ParsedFile
19+
from .parsers import (
20+
RemarksParser,
21+
TimeTraceParser,
22+
DiagnosticsParser,
23+
ASTParser,
24+
PGOProfileParser,
25+
XRayParser,
26+
StaticAnalyzerParser,
27+
IRParser,
28+
ObjdumpParser,
29+
IncludeTreeParser,
30+
AssemblyParser,
31+
PreprocessedParser,
32+
SARIFParser,
33+
MacroExpansionParser,
34+
DependenciesParser,
35+
BinarySizeParser,
36+
DebugParser,
37+
SymbolsParser,
38+
RuntimeTraceParser,
39+
CompilationPhasesParser,
40+
FTimeReportParser,
41+
VersionInfoParser,
42+
PreprocessedParser as SourcesParser, # Reuse for simple text files
43+
)
44+
45+
46+
class ArtifactCollector:
47+
def __init__(self):
48+
self.parsers = {
49+
FileType.REMARKS: RemarksParser(),
50+
FileType.TIME_TRACE: TimeTraceParser(),
51+
FileType.DIAGNOSTICS: DiagnosticsParser(),
52+
FileType.AST_JSON: ASTParser(),
53+
FileType.PGO_PROFILE: PGOProfileParser(),
54+
FileType.XRAY: XRayParser(),
55+
FileType.STATIC_ANALYZER: StaticAnalyzerParser(),
56+
FileType.IR: IRParser(),
57+
FileType.OBJDUMP: ObjdumpParser(),
58+
FileType.INCLUDE_TREE: IncludeTreeParser(),
59+
FileType.ASSEMBLY: AssemblyParser(),
60+
FileType.PREPROCESSED: PreprocessedParser(),
61+
FileType.STATIC_ANALYSIS_SARIF: SARIFParser(),
62+
FileType.MACRO_EXPANSION: MacroExpansionParser(),
63+
FileType.DEPENDENCIES: DependenciesParser(),
64+
FileType.BINARY_SIZE: BinarySizeParser(),
65+
FileType.DEBUG: DebugParser(),
66+
FileType.SYMBOLS: SymbolsParser(),
67+
FileType.RUNTIME_TRACE: RuntimeTraceParser(),
68+
FileType.COMPILATION_PHASES: CompilationPhasesParser(),
69+
FileType.FTIME_REPORT: FTimeReportParser(),
70+
FileType.VERSION_INFO: VersionInfoParser(),
71+
FileType.SOURCES: SourcesParser(),
72+
}
73+
74+
# Map directory names to file types
75+
self.dir_to_type = {
76+
"remarks": FileType.REMARKS,
77+
"time-trace": FileType.TIME_TRACE,
78+
"diagnostics": FileType.DIAGNOSTICS,
79+
"ast-json": FileType.AST_JSON,
80+
"pgo-profile": FileType.PGO_PROFILE,
81+
"xray": FileType.XRAY,
82+
"static-analyzer": FileType.STATIC_ANALYZER,
83+
"ir": FileType.IR,
84+
"objdump": FileType.OBJDUMP,
85+
"include-tree": FileType.INCLUDE_TREE,
86+
"assembly": FileType.ASSEMBLY,
87+
"preprocessed": FileType.PREPROCESSED,
88+
"static-analysis-sarif": FileType.STATIC_ANALYSIS_SARIF,
89+
"macro-expansion": FileType.MACRO_EXPANSION,
90+
"dependencies": FileType.DEPENDENCIES,
91+
"binary-size": FileType.BINARY_SIZE,
92+
"debug": FileType.DEBUG,
93+
"symbols": FileType.SYMBOLS,
94+
"runtime-trace": FileType.RUNTIME_TRACE,
95+
"compilation-phases": FileType.COMPILATION_PHASES,
96+
"ftime-report": FileType.FTIME_REPORT,
97+
"version-info": FileType.VERSION_INFO,
98+
"sources": FileType.SOURCES,
99+
}
100+
101+
def discover_compilation_units(self, advisor_dir: str) -> List[CompilationUnit]:
102+
"""Discover all compilation units in the .llvm-advisor directory."""
103+
compilation_units = []
104+
advisor_path = Path(advisor_dir)
105+
106+
if not advisor_path.exists():
107+
return compilation_units
108+
109+
# Each subdirectory represents a compilation unit
110+
for unit_dir in advisor_path.iterdir():
111+
if not unit_dir.is_dir():
112+
continue
113+
114+
# Check if this is the new nested structure or old flat structure
115+
units = self._scan_compilation_unit_with_runs(unit_dir)
116+
compilation_units.extend(units)
117+
118+
return compilation_units
119+
120+
def _scan_compilation_unit_with_runs(self, unit_dir: Path) -> List[CompilationUnit]:
121+
"""Scan a compilation unit directory that contains timestamped runs."""
122+
units = []
123+
124+
# unit_dir contains timestamped run directories
125+
run_dirs = []
126+
for item in unit_dir.iterdir():
127+
if item.is_dir() and item.name.startswith(unit_dir.name + "_"):
128+
run_dirs.append(item)
129+
130+
if not run_dirs:
131+
# No timestamped runs found, skip this unit
132+
return units
133+
134+
# Sort by timestamp (newest first)
135+
run_dirs.sort(key=lambda x: x.name, reverse=True)
136+
137+
# Use the most recent run
138+
latest_run = run_dirs[0]
139+
unit = self._scan_single_run(latest_run, unit_dir.name)
140+
if unit:
141+
# Store run timestamp info in metadata
142+
unit.metadata = getattr(unit, "metadata", {})
143+
unit.metadata["run_timestamp"] = latest_run.name.split("_", 1)[-1]
144+
unit.metadata["run_path"] = str(latest_run)
145+
unit.metadata["available_runs"] = [r.name for r in run_dirs]
146+
units.append(unit)
147+
148+
return units
149+
150+
def _scan_single_run(
151+
self, run_dir: Path, unit_name: str
152+
) -> Optional[CompilationUnit]:
153+
"""Scan a single run directory for artifacts."""
154+
artifacts = {}
155+
156+
# Scan each artifact type directory
157+
for artifact_dir in run_dir.iterdir():
158+
if not artifact_dir.is_dir():
159+
continue
160+
161+
dir_name = artifact_dir.name
162+
if dir_name not in self.dir_to_type:
163+
continue
164+
165+
file_type = self.dir_to_type[dir_name]
166+
artifact_files = []
167+
168+
# Collect all files in this artifact directory
169+
for file_path in artifact_dir.rglob("*"):
170+
if file_path.is_file():
171+
artifact_files.append(str(file_path))
172+
173+
if artifact_files:
174+
artifacts[file_type] = artifact_files
175+
176+
if artifacts:
177+
return CompilationUnit(
178+
name=unit_name, path=str(run_dir), artifacts=artifacts
179+
)
180+
181+
return None
182+
183+
def parse_compilation_unit(
184+
self, unit: CompilationUnit
185+
) -> Dict[FileType, List[ParsedFile]]:
186+
"""Parse all artifacts for a compilation unit."""
187+
parsed_artifacts = {}
188+
189+
for file_type, file_paths in unit.artifacts.items():
190+
if file_type not in self.parsers:
191+
continue
192+
193+
parser = self.parsers[file_type]
194+
parsed_files = []
195+
196+
for file_path in file_paths:
197+
try:
198+
if parser.can_parse(file_path):
199+
parsed_file = parser.parse(file_path)
200+
parsed_files.append(parsed_file)
201+
except Exception as e:
202+
# Create error entry for failed parsing
203+
error_file = ParsedFile(
204+
file_type=file_type,
205+
file_path=file_path,
206+
data={},
207+
metadata={"error": f"Failed to parse: {str(e)}"},
208+
)
209+
parsed_files.append(error_file)
210+
211+
if parsed_files:
212+
parsed_artifacts[file_type] = parsed_files
213+
214+
return parsed_artifacts
215+
216+
def parse_all_units(
217+
self, advisor_dir: str
218+
) -> Dict[str, Dict[FileType, List[ParsedFile]]]:
219+
"""Parse all compilation units in the advisor directory."""
220+
units = self.discover_compilation_units(advisor_dir)
221+
parsed_units = {}
222+
223+
for unit in units:
224+
parsed_artifacts = self.parse_compilation_unit(unit)
225+
if parsed_artifacts:
226+
parsed_units[unit.name] = parsed_artifacts
227+
228+
return parsed_units
229+
230+
def get_summary_statistics(
231+
self, parsed_units: Dict[str, Dict[FileType, List[ParsedFile]]]
232+
) -> Dict[str, Any]:
233+
"""Generate summary statistics for all parsed data."""
234+
stats = {
235+
"total_units": len(parsed_units),
236+
"total_files": 0,
237+
"file_types": {},
238+
"errors": 0,
239+
"units": {},
240+
}
241+
242+
for unit_name, artifacts in parsed_units.items():
243+
unit_stats = {"file_types": {}, "total_files": 0, "errors": 0}
244+
245+
for file_type, parsed_files in artifacts.items():
246+
type_name = file_type.value
247+
file_count = len(parsed_files)
248+
error_count = sum(1 for f in parsed_files if "error" in f.metadata)
249+
250+
unit_stats["file_types"][type_name] = {
251+
"count": file_count,
252+
"errors": error_count,
253+
}
254+
unit_stats["total_files"] += file_count
255+
unit_stats["errors"] += error_count
256+
257+
# Update global stats
258+
if type_name not in stats["file_types"]:
259+
stats["file_types"][type_name] = {"count": 0, "errors": 0}
260+
261+
stats["file_types"][type_name]["count"] += file_count
262+
stats["file_types"][type_name]["errors"] += error_count
263+
264+
stats["units"][unit_name] = unit_stats
265+
stats["total_files"] += unit_stats["total_files"]
266+
stats["errors"] += unit_stats["errors"]
267+
268+
return stats
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
9+
from dataclasses import dataclass
10+
from typing import Dict, List, Any, Optional
11+
from enum import Enum
12+
13+
14+
class FileType(Enum):
15+
REMARKS = "remarks"
16+
TIME_TRACE = "time-trace"
17+
DIAGNOSTICS = "diagnostics"
18+
AST_JSON = "ast-json"
19+
PGO_PROFILE = "pgo-profile"
20+
XRAY = "xray"
21+
STATIC_ANALYZER = "static-analyzer"
22+
IR = "ir"
23+
OBJDUMP = "objdump"
24+
INCLUDE_TREE = "include-tree"
25+
ASSEMBLY = "assembly"
26+
PREPROCESSED = "preprocessed"
27+
STATIC_ANALYSIS_SARIF = "static-analysis-sarif"
28+
MACRO_EXPANSION = "macro-expansion"
29+
DEPENDENCIES = "dependencies"
30+
BINARY_SIZE = "binary-size"
31+
DEBUG = "debug"
32+
SYMBOLS = "symbols"
33+
RUNTIME_TRACE = "runtime-trace"
34+
COMPILATION_PHASES = "compilation-phases"
35+
FTIME_REPORT = "ftime-report"
36+
VERSION_INFO = "version-info"
37+
SOURCES = "sources"
38+
39+
40+
@dataclass
41+
class SourceLocation:
42+
file: Optional[str] = None
43+
line: Optional[int] = None
44+
column: Optional[int] = None
45+
46+
47+
@dataclass
48+
class CompilationUnit:
49+
name: str
50+
path: str
51+
artifacts: Dict[FileType, List[str]]
52+
metadata: Dict[str, Any] = None
53+
54+
def __post_init__(self):
55+
if self.metadata is None:
56+
self.metadata = {}
57+
58+
59+
@dataclass
60+
class ParsedFile:
61+
file_type: FileType
62+
file_path: str
63+
data: Any
64+
metadata: Dict[str, Any]
65+
66+
67+
@dataclass
68+
class Diagnostic:
69+
level: str
70+
message: str
71+
location: Optional[SourceLocation] = None
72+
code: Optional[str] = None
73+
74+
75+
@dataclass
76+
class Remark:
77+
pass_name: str
78+
function: str
79+
message: str
80+
location: Optional[SourceLocation] = None
81+
args: Dict[str, Any] = None
82+
83+
84+
@dataclass
85+
class TraceEvent:
86+
name: str
87+
category: str
88+
phase: str
89+
timestamp: int
90+
duration: Optional[int] = None
91+
pid: Optional[int] = None
92+
tid: Optional[int] = None
93+
args: Dict[str, Any] = None
94+
95+
96+
@dataclass
97+
class Symbol:
98+
name: str
99+
address: Optional[str] = None
100+
size: Optional[int] = None
101+
type: Optional[str] = None
102+
section: Optional[str] = None
103+
104+
105+
@dataclass
106+
class BinarySize:
107+
section: str
108+
size: int
109+
percentage: Optional[float] = None
110+
111+
112+
@dataclass
113+
class Dependency:
114+
source: str
115+
target: str
116+
type: Optional[str] = None

0 commit comments

Comments
 (0)