Skip to content

Commit 0406a8b

Browse files
committed
[llvm-advisor] Add static analysis and profiling tool parsers
- implement static analyzer parser for code quality analysis - add SARIF parser for standardized analysis result format - add objdump parser for object file inspection - add PGO profile parser for profile-guided optimization data - add XRay parser for runtime tracing and profiling - add version info parser for toolchain metadata
1 parent a79cb5a commit 0406a8b

File tree

7 files changed

+490
-1
lines changed

7 files changed

+490
-1
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
9+
import re
10+
from typing import Dict, List, Any
11+
from .base_parser import BaseParser
12+
from ..models import FileType, ParsedFile, Symbol
13+
14+
15+
class ObjdumpParser(BaseParser):
16+
def __init__(self):
17+
super().__init__(FileType.OBJDUMP)
18+
self.symbol_pattern = re.compile(
19+
r"^([0-9a-fA-F]+)\s+([lgw!])\s+([dDfFoO])\s+(\S+)\s+([0-9a-fA-F]+)\s+(.+)"
20+
)
21+
self.section_pattern = re.compile(
22+
r"^Idx\s+Name\s+Size\s+VMA\s+LMA\s+File Offset\s+Algn"
23+
)
24+
self.disasm_pattern = re.compile(
25+
r"^\s*([0-9a-fA-F]+):\s+([0-9a-fA-F\s]+)\s+(.+)"
26+
)
27+
28+
def parse(self, file_path: str) -> ParsedFile:
29+
content = self.read_file_safe(file_path)
30+
if content is None:
31+
return self.create_parsed_file(
32+
file_path, {}, {"error": "File too large or unreadable"}
33+
)
34+
35+
try:
36+
lines = content.split("\n")
37+
objdump_data = self._parse_objdump_content(lines)
38+
39+
metadata = {
40+
"file_size": self.get_file_size(file_path),
41+
"total_lines": len(lines),
42+
**objdump_data["summary"],
43+
}
44+
45+
return self.create_parsed_file(file_path, objdump_data, metadata)
46+
47+
except Exception as e:
48+
return self.create_parsed_file(file_path, {}, {"error": str(e)})
49+
50+
def _parse_objdump_content(self, lines: List[str]) -> Dict[str, Any]:
51+
objdump_data = {
52+
"symbols": [],
53+
"sections": [],
54+
"disassembly": [],
55+
"headers": [],
56+
"summary": {},
57+
}
58+
59+
current_section = None
60+
in_symbol_table = False
61+
in_disassembly = False
62+
63+
for line in lines:
64+
line = line.rstrip()
65+
66+
if not line:
67+
continue
68+
69+
# Detect sections
70+
if "SYMBOL TABLE:" in line:
71+
in_symbol_table = True
72+
continue
73+
elif "Disassembly of section" in line:
74+
in_disassembly = True
75+
current_section = line
76+
continue
77+
elif line.startswith("Contents of section"):
78+
current_section = line
79+
continue
80+
81+
# Parse symbol table
82+
if in_symbol_table and self.symbol_pattern.match(line):
83+
match = self.symbol_pattern.match(line)
84+
if match:
85+
symbol = Symbol(
86+
name=match.group(6),
87+
address=match.group(1),
88+
type=match.group(3),
89+
section=match.group(4),
90+
)
91+
objdump_data["symbols"].append(symbol)
92+
93+
# Parse disassembly
94+
elif in_disassembly and self.disasm_pattern.match(line):
95+
match = self.disasm_pattern.match(line)
96+
if match:
97+
objdump_data["disassembly"].append(
98+
{
99+
"address": match.group(1),
100+
"bytes": match.group(2).strip(),
101+
"instruction": match.group(3),
102+
}
103+
)
104+
105+
# Collect headers and other info
106+
elif line.startswith("Program Header:") or line.startswith(
107+
"Section Headers:"
108+
):
109+
objdump_data["headers"].append(line)
110+
111+
objdump_data["summary"] = {
112+
"symbol_count": len(objdump_data["symbols"]),
113+
"disasm_count": len(objdump_data["disassembly"]),
114+
"section_count": len(objdump_data["sections"]),
115+
"header_count": len(objdump_data["headers"]),
116+
}
117+
118+
return objdump_data
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
9+
from typing import Dict, List, Any
10+
from .base_parser import BaseParser
11+
from ..models import FileType, ParsedFile
12+
13+
14+
class PGOProfileParser(BaseParser):
15+
def __init__(self):
16+
super().__init__(FileType.PGO_PROFILE)
17+
18+
def parse(self, file_path: str) -> ParsedFile:
19+
content = self.read_file_safe(file_path)
20+
if content is None:
21+
return self.create_parsed_file(
22+
file_path, {}, {"error": "File too large or unreadable"}
23+
)
24+
25+
try:
26+
lines = content.split("\n")
27+
profile_data = {"functions": [], "counters": [], "raw_lines": []}
28+
29+
current_function = None
30+
31+
for line in lines:
32+
line = line.strip()
33+
if not line:
34+
continue
35+
36+
profile_data["raw_lines"].append(line)
37+
38+
# Simple pattern matching for PGO profile data
39+
if line.startswith("# Func Hash:") or line.startswith("Function:"):
40+
current_function = line
41+
profile_data["functions"].append(line)
42+
elif line.startswith("# Num Counters:") or line.isdigit():
43+
profile_data["counters"].append(line)
44+
45+
metadata = {
46+
"total_functions": len(profile_data["functions"]),
47+
"total_counters": len(profile_data["counters"]),
48+
"total_lines": len(profile_data["raw_lines"]),
49+
"file_size": self.get_file_size(file_path),
50+
}
51+
52+
return self.create_parsed_file(file_path, profile_data, metadata)
53+
54+
except Exception as e:
55+
return self.create_parsed_file(file_path, {}, {"error": str(e)})
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
9+
import json
10+
from typing import List, Dict, Any
11+
from .base_parser import BaseParser
12+
from ..models import FileType, ParsedFile, Diagnostic, SourceLocation
13+
14+
15+
class SARIFParser(BaseParser):
16+
def __init__(self):
17+
super().__init__(FileType.STATIC_ANALYSIS_SARIF)
18+
19+
def parse(self, file_path: str) -> ParsedFile:
20+
content = self.read_file_safe(file_path)
21+
if content is None:
22+
return self.create_parsed_file(
23+
file_path, [], {"error": "File too large or unreadable"}
24+
)
25+
26+
try:
27+
sarif_data = json.loads(content)
28+
diagnostics = []
29+
30+
# Parse SARIF format
31+
runs = sarif_data.get("runs", [])
32+
for run in runs:
33+
results = run.get("results", [])
34+
for result in results:
35+
diagnostic = self._parse_sarif_result(result, run)
36+
if diagnostic:
37+
diagnostics.append(diagnostic)
38+
39+
metadata = {
40+
"total_results": len(diagnostics),
41+
"file_size": self.get_file_size(file_path),
42+
"sarif_version": sarif_data.get("$schema", ""),
43+
"runs_count": len(runs),
44+
}
45+
46+
return self.create_parsed_file(file_path, diagnostics, metadata)
47+
48+
except Exception as e:
49+
return self.create_parsed_file(file_path, [], {"error": str(e)})
50+
51+
def _parse_sarif_result(
52+
self, result: Dict[str, Any], run: Dict[str, Any]
53+
) -> Diagnostic:
54+
try:
55+
message = result.get("message", {}).get("text", "")
56+
rule_id = result.get("ruleId", "")
57+
58+
# Extract level from result
59+
level = result.get("level", "info")
60+
61+
# Extract location
62+
location = None
63+
locations = result.get("locations", [])
64+
if locations:
65+
physical_location = locations[0].get("physicalLocation", {})
66+
artifact_location = physical_location.get("artifactLocation", {})
67+
region = physical_location.get("region", {})
68+
69+
if artifact_location.get("uri"):
70+
location = SourceLocation(
71+
file=artifact_location.get("uri"),
72+
line=region.get("startLine"),
73+
column=region.get("startColumn"),
74+
)
75+
76+
return Diagnostic(
77+
level=level, message=message, location=location, code=rule_id
78+
)
79+
80+
except Exception:
81+
return None
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
9+
import re
10+
from typing import List, Dict, Any
11+
from .base_parser import BaseParser
12+
from ..models import FileType, ParsedFile, Diagnostic, SourceLocation
13+
14+
15+
class StaticAnalyzerParser(BaseParser):
16+
def __init__(self):
17+
super().__init__(FileType.STATIC_ANALYZER)
18+
# Pattern for static analyzer output
19+
self.analyzer_pattern = re.compile(
20+
r"(?P<file>[^:]+):(?P<line>\d+):(?P<column>\d+):\s*(?P<level>\w+):\s*(?P<message>.+)"
21+
)
22+
23+
def parse(self, file_path: str) -> ParsedFile:
24+
content = self.read_file_safe(file_path)
25+
if content is None:
26+
return self.create_parsed_file(
27+
file_path, [], {"error": "File too large or unreadable"}
28+
)
29+
30+
try:
31+
lines = content.split("\n")
32+
results = []
33+
34+
for line in lines:
35+
line = line.strip()
36+
if not line:
37+
continue
38+
39+
# Try to parse as diagnostic
40+
diagnostic = self._parse_analyzer_line(line)
41+
if diagnostic:
42+
results.append(diagnostic)
43+
else:
44+
# Store as raw line for other analysis results
45+
results.append({"type": "raw", "content": line})
46+
47+
# Count diagnostic types
48+
diagnostic_count = sum(1 for r in results if isinstance(r, Diagnostic))
49+
raw_count = len(results) - diagnostic_count
50+
51+
metadata = {
52+
"total_results": len(results),
53+
"diagnostic_count": diagnostic_count,
54+
"raw_count": raw_count,
55+
"file_size": self.get_file_size(file_path),
56+
}
57+
58+
return self.create_parsed_file(file_path, results, metadata)
59+
60+
except Exception as e:
61+
return self.create_parsed_file(file_path, [], {"error": str(e)})
62+
63+
def _parse_analyzer_line(self, line: str) -> Diagnostic:
64+
match = self.analyzer_pattern.match(line)
65+
if match:
66+
try:
67+
location = SourceLocation(
68+
file=match.group("file"),
69+
line=int(match.group("line")),
70+
column=int(match.group("column")),
71+
)
72+
73+
return Diagnostic(
74+
level=match.group("level"),
75+
message=match.group("message"),
76+
location=location,
77+
)
78+
except ValueError:
79+
pass
80+
81+
return None

0 commit comments

Comments
 (0)