Skip to content

Commit a79cb5a

Browse files
committed
[llvm-advisor] Add preprocessing and dependency analysis parsers
- implement preprocessed source parser for macro analysis - add macro expansion parser for preprocessing inspection - add include tree parser for dependency visualization - add build dependencies parser for project structure analysis - add debug information parser for DWARF data processing
1 parent bfda88d commit a79cb5a

File tree

5 files changed

+596
-0
lines changed

5 files changed

+596
-0
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
9+
import re
10+
from typing import Dict, List, Any
11+
from .base_parser import BaseParser
12+
from ..models import FileType, ParsedFile
13+
14+
15+
class DebugParser(BaseParser):
16+
def __init__(self):
17+
super().__init__(FileType.DEBUG)
18+
self.dwarf_pattern = re.compile(r"^\s*<(\d+)><([0-9a-fA-F]+)>:\s*(.+)")
19+
self.debug_line_pattern = re.compile(
20+
r"^\s*Line\s+(\d+),\s*column\s+(\d+),\s*(.+)"
21+
)
22+
23+
def parse(self, file_path: str) -> ParsedFile:
24+
content = self.read_file_safe(file_path)
25+
if content is None:
26+
return self.create_parsed_file(
27+
file_path, {}, {"error": "File too large or unreadable"}
28+
)
29+
30+
try:
31+
lines = content.split("\n")
32+
debug_data = self._parse_debug_info(lines)
33+
34+
metadata = {
35+
"file_size": self.get_file_size(file_path),
36+
"total_lines": len(lines),
37+
**debug_data["summary"],
38+
}
39+
40+
return self.create_parsed_file(file_path, debug_data, metadata)
41+
42+
except Exception as e:
43+
return self.create_parsed_file(file_path, {}, {"error": str(e)})
44+
45+
def _parse_debug_info(self, lines: List[str]) -> Dict[str, Any]:
46+
debug_data = {
47+
"dwarf_entries": [],
48+
"line_info": [],
49+
"sections": {},
50+
"summary": {},
51+
}
52+
53+
current_section = None
54+
55+
for line in lines:
56+
original_line = line
57+
line = line.strip()
58+
59+
if not line:
60+
continue
61+
62+
# Detect debug sections
63+
if line.startswith(".debug_"):
64+
current_section = line
65+
debug_data["sections"][current_section] = []
66+
continue
67+
68+
# Parse DWARF entries
69+
dwarf_match = self.dwarf_pattern.match(original_line)
70+
if dwarf_match:
71+
entry = {
72+
"depth": int(dwarf_match.group(1)),
73+
"offset": dwarf_match.group(2),
74+
"content": dwarf_match.group(3),
75+
}
76+
debug_data["dwarf_entries"].append(entry)
77+
78+
if current_section:
79+
debug_data["sections"][current_section].append(entry)
80+
continue
81+
82+
# Parse debug line information
83+
line_match = self.debug_line_pattern.match(original_line)
84+
if line_match:
85+
line_info = {
86+
"line": int(line_match.group(1)),
87+
"column": int(line_match.group(2)),
88+
"info": line_match.group(3),
89+
}
90+
debug_data["line_info"].append(line_info)
91+
92+
if current_section:
93+
debug_data["sections"][current_section].append(line_info)
94+
continue
95+
96+
# Store other debug information by section
97+
if current_section:
98+
debug_data["sections"][current_section].append({"raw": line})
99+
100+
debug_data["summary"] = {
101+
"dwarf_entry_count": len(debug_data["dwarf_entries"]),
102+
"line_info_count": len(debug_data["line_info"]),
103+
"section_count": len(debug_data["sections"]),
104+
}
105+
106+
return debug_data
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
9+
from typing import Dict, List, Any
10+
from .base_parser import BaseParser
11+
from ..models import FileType, ParsedFile, Dependency
12+
13+
14+
class DependenciesParser(BaseParser):
15+
def __init__(self):
16+
super().__init__(FileType.DEPENDENCIES)
17+
18+
def parse(self, file_path: str) -> ParsedFile:
19+
content = self.read_file_safe(file_path)
20+
if content is None:
21+
return self.create_parsed_file(
22+
file_path, [], {"error": "File too large or unreadable"}
23+
)
24+
25+
try:
26+
lines = content.split("\n")
27+
dependencies = self._parse_dependencies(lines)
28+
29+
# Calculate statistics
30+
sources = set()
31+
targets = set()
32+
for dep in dependencies:
33+
sources.add(dep.source)
34+
targets.add(dep.target)
35+
36+
metadata = {
37+
"file_size": self.get_file_size(file_path),
38+
"total_dependencies": len(dependencies),
39+
"unique_sources": len(sources),
40+
"unique_targets": len(targets),
41+
"unique_files": len(sources.union(targets)),
42+
}
43+
44+
return self.create_parsed_file(file_path, dependencies, metadata)
45+
46+
except Exception as e:
47+
return self.create_parsed_file(file_path, [], {"error": str(e)})
48+
49+
def _parse_dependencies(self, lines: List[str]) -> List[Dependency]:
50+
dependencies = []
51+
current_target = None
52+
53+
for line in lines:
54+
line = line.strip()
55+
if not line:
56+
continue
57+
58+
# Handle make-style dependencies (target: source1 source2 ...)
59+
if ":" in line and not line.startswith(" ") and not line.startswith("\t"):
60+
parts = line.split(":", 1)
61+
if len(parts) == 2:
62+
target = parts[0].strip()
63+
sources = parts[1].strip()
64+
current_target = target
65+
66+
if sources:
67+
for source in sources.split():
68+
source = source.strip()
69+
if source and source != "\\":
70+
dependencies.append(
71+
Dependency(
72+
source=source, target=target, type="dependency"
73+
)
74+
)
75+
76+
# Handle continuation lines
77+
elif (line.startswith(" ") or line.startswith("\t")) and current_target:
78+
sources = line.strip()
79+
for source in sources.split():
80+
source = source.strip()
81+
if source and source != "\\":
82+
dependencies.append(
83+
Dependency(
84+
source=source, target=current_target, type="dependency"
85+
)
86+
)
87+
88+
# Handle simple dependency lists (one per line)
89+
elif "->" in line or "=>" in line:
90+
if "->" in line:
91+
parts = line.split("->", 1)
92+
else:
93+
parts = line.split("=>", 1)
94+
95+
if len(parts) == 2:
96+
source = parts[0].strip()
97+
target = parts[1].strip()
98+
dependencies.append(
99+
Dependency(source=source, target=target, type="dependency")
100+
)
101+
102+
# Reset current target for new sections
103+
elif not line.startswith(" ") and not line.startswith("\t"):
104+
current_target = None
105+
106+
return dependencies
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# ===----------------------------------------------------------------------===//
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===----------------------------------------------------------------------===//
8+
9+
import re
10+
from typing import Dict, List, Any
11+
from .base_parser import BaseParser
12+
from ..models import FileType, ParsedFile, Dependency
13+
14+
15+
class IncludeTreeParser(BaseParser):
16+
def __init__(self):
17+
super().__init__(FileType.INCLUDE_TREE)
18+
self.include_pattern = re.compile(r"^(\s*)(\S+)\s*(?:\(([^)]+)\))?")
19+
20+
def parse(self, file_path: str) -> ParsedFile:
21+
content = self.read_file_safe(file_path)
22+
if content is None:
23+
return self.create_parsed_file(
24+
file_path, [], {"error": "File too large or unreadable"}
25+
)
26+
27+
try:
28+
lines = content.split("\n")
29+
include_data = self._parse_include_tree(lines)
30+
31+
metadata = {
32+
"file_size": self.get_file_size(file_path),
33+
"total_includes": len(include_data["dependencies"]),
34+
"unique_files": len(include_data["files"]),
35+
"max_depth": include_data["max_depth"],
36+
}
37+
38+
return self.create_parsed_file(file_path, include_data, metadata)
39+
40+
except Exception as e:
41+
return self.create_parsed_file(file_path, [], {"error": str(e)})
42+
43+
def _parse_include_tree(self, lines: List[str]) -> Dict[str, Any]:
44+
include_data = {"dependencies": [], "files": set(), "tree": [], "max_depth": 0}
45+
46+
stack = [] # Stack to track parent files
47+
48+
for line in lines:
49+
if not line.strip():
50+
continue
51+
52+
match = self.include_pattern.match(line)
53+
if match:
54+
indent = len(match.group(1))
55+
file_path = match.group(2)
56+
extra_info = match.group(3)
57+
58+
depth = indent // 2 # Assuming 2 spaces per indent level
59+
include_data["max_depth"] = max(include_data["max_depth"], depth)
60+
61+
# Adjust stack based on current depth
62+
while len(stack) > depth:
63+
stack.pop()
64+
65+
# Add to files set
66+
include_data["files"].add(file_path)
67+
68+
# Create dependency relationship
69+
if stack:
70+
parent = stack[-1]
71+
dependency = Dependency(
72+
source=parent, target=file_path, type="include"
73+
)
74+
include_data["dependencies"].append(dependency)
75+
76+
# Add tree entry
77+
tree_entry = {
78+
"file": file_path,
79+
"depth": depth,
80+
"parent": stack[-1] if stack else None,
81+
"extra_info": extra_info,
82+
}
83+
include_data["tree"].append(tree_entry)
84+
85+
# Push current file onto stack
86+
stack.append(file_path)
87+
88+
# Convert set to list for JSON serialization
89+
include_data["files"] = list(include_data["files"])
90+
91+
return include_data

0 commit comments

Comments
 (0)