llvm
diff --git a/‎llvm/tools/llvm-advisor/tools/common/parsers/assembly_parser.py‎
Lines changed: 138 additions & 0 deletions b/‎llvm/tools/llvm-advisor/tools/common/parsers/assembly_parser.py‎
Lines changed: 138 additions & 0 deletions
diff --git a/‎llvm/tools/llvm-advisor/tools/common/parsers/ast_parser.py‎
Lines changed: 111 additions & 0 deletions b/‎llvm/tools/llvm-advisor/tools/common/parsers/ast_parser.py‎
Lines changed: 111 additions & 0 deletions
diff --git a/‎llvm/tools/llvm-advisor/tools/common/parsers/binary_size_parser.py‎
Lines changed: 109 additions & 0 deletions b/‎llvm/tools/llvm-advisor/tools/common/parsers/binary_size_parser.py‎
Lines changed: 109 additions & 0 deletions
@@ -0,0 +1,138 @@
+# ===----------------------------------------------------------------------===//
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# ===----------------------------------------------------------------------===//
+
+import re
+from typing import Dict, List, Any
+from .base_parser import BaseParser
+from ..models import FileType, ParsedFile
+
+
+class AssemblyParser(BaseParser):
+    def __init__(self):
+        super().__init__(FileType.ASSEMBLY)
+        self.label_pattern = re.compile(r"^(\w+):")
+        self.instruction_pattern = re.compile(r"^\s+(\w+)")
+        self.section_pattern = re.compile(r"^\s*\.(text|data|bss|rodata)")
+
+    def parse(self, file_path: str) -> ParsedFile:
+        if self.is_large_file(file_path):
+            return self._parse_large_assembly(file_path)
+
+        content = self.read_file_safe(file_path)
+        if content is None:
+            return self.create_parsed_file(
+                file_path, {}, {"error": "File too large or unreadable"}
+            )
+
+        try:
+            lines = content.split("\n")
+            asm_data = self._analyze_assembly_content(lines)
+
+            metadata = {
+                "file_size": self.get_file_size(file_path),
+                "total_lines": len(lines),
+                **asm_data["summary"],
+            }
+
+            return self.create_parsed_file(file_path, asm_data, metadata)
+
+        except Exception as e:
+            return self.create_parsed_file(file_path, {}, {"error": str(e)})
+
+    def _parse_large_assembly(self, file_path: str) -> ParsedFile:
+        try:
+            asm_data = {"labels": [], "instructions": {}, "sections": [], "summary": {}}
+            line_count = 0
+
+            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+                for line in f:
+                    line_count += 1
+
+                    # Only parse first 5000 lines for large files
+                    if line_count > 5000:
+                        break
+
+                    line = line.strip()
+                    if not line or line.startswith("#") or line.startswith(";"):
+                        continue
+
+                    # Parse labels
+                    label_match = self.label_pattern.match(line)
+                    if label_match:
+                        asm_data["labels"].append(label_match.group(1))
+
+                    # Parse instructions
+                    inst_match = self.instruction_pattern.match(line)
+                    if inst_match:
+                        inst = inst_match.group(1)
+                        asm_data["instructions"][inst] = (
+                            asm_data["instructions"].get(inst, 0) + 1
+                        )
+
+                    # Parse sections
+                    section_match = self.section_pattern.match(line)
+                    if section_match:
+                        asm_data["sections"].append(section_match.group(1))
+
+            asm_data["summary"] = {
+                "label_count": len(asm_data["labels"]),
+                "instruction_types": len(asm_data["instructions"]),
+                "total_instructions": sum(asm_data["instructions"].values()),
+                "section_count": len(set(asm_data["sections"])),
+                "analyzed_lines": line_count,
+                "is_partial": True,
+            }
+
+            metadata = {
+                "file_size": self.get_file_size(file_path),
+                **asm_data["summary"],
+            }
+
+            return self.create_parsed_file(file_path, asm_data, metadata)
+
+        except Exception as e:
+            return self.create_parsed_file(file_path, {}, {"error": str(e)})
+
+    def _analyze_assembly_content(self, lines: List[str]) -> Dict[str, Any]:
+        asm_data = {"labels": [], "instructions": {}, "sections": [], "summary": {}}
+
+        for line in lines:
+            original_line = line
+            line = line.strip()
+
+            if not line or line.startswith("#") or line.startswith(";"):
+                continue
+
+            # Parse labels
+            label_match = self.label_pattern.match(line)
+            if label_match:
+                asm_data["labels"].append(label_match.group(1))
+                continue
+
+            # Parse instructions
+            inst_match = self.instruction_pattern.match(original_line)
+            if inst_match:
+                inst = inst_match.group(1)
+                asm_data["instructions"][inst] = (
+                    asm_data["instructions"].get(inst, 0) + 1
+                )
+                continue
+
+            # Parse sections
+            section_match = self.section_pattern.match(line)
+            if section_match:
+                asm_data["sections"].append(section_match.group(1))
+
+        asm_data["summary"] = {
+            "label_count": len(asm_data["labels"]),
+            "instruction_types": len(asm_data["instructions"]),
+            "total_instructions": sum(asm_data["instructions"].values()),
+            "section_count": len(set(asm_data["sections"])),
+        }
+
+        return asm_data
@@ -0,0 +1,111 @@
+# ===----------------------------------------------------------------------===//
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# ===----------------------------------------------------------------------===//
+
+import json
+from typing import Dict, Any
+from .base_parser import BaseParser
+from ..models import FileType, ParsedFile
+
+
+class ASTParser(BaseParser):
+    def __init__(self):
+        super().__init__(FileType.AST_JSON)
+
+    def parse(self, file_path: str) -> ParsedFile:
+        if self.is_large_file(file_path):
+            return self._parse_large_ast(file_path)
+
+        content = self.read_file_safe(file_path)
+        if content is None:
+            return self.create_parsed_file(
+                file_path, {}, {"error": "File too large or unreadable"}
+            )
+
+        try:
+            ast_data = json.loads(content)
+
+            # Extract summary information
+            summary = self._extract_ast_summary(ast_data)
+
+            metadata = {
+                "file_size": self.get_file_size(file_path),
+                "ast_summary": summary,
+            }
+
+            return self.create_parsed_file(file_path, ast_data, metadata)
+
+        except Exception as e:
+            return self.create_parsed_file(file_path, {}, {"error": str(e)})
+
+    def _parse_large_ast(self, file_path: str) -> ParsedFile:
+        try:
+            # For large AST files, just extract basic info
+            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+                # Read first chunk to get basic structure
+                chunk = f.read(10000)  # 10KB
+
+                # Try to parse at least the root node
+                if chunk.startswith("{"):
+                    bracket_count = 0
+                    for i, char in enumerate(chunk):
+                        if char == "{":
+                            bracket_count += 1
+                        elif char == "}":
+                            bracket_count -= 1
+                            if bracket_count == 0:
+                                try:
+                                    partial_data = json.loads(chunk[: i + 1])
+                                    summary = self._extract_ast_summary(
+                                        partial_data, partial=True
+                                    )
+
+                                    metadata = {
+                                        "file_size": self.get_file_size(file_path),
+                                        "ast_summary": summary,
+                                        "is_partial": True,
+                                    }
+
+                                    return self.create_parsed_file(
+                                        file_path, partial_data, metadata
+                                    )
+                                except:
+                                    break
+
+            metadata = {
+                "file_size": self.get_file_size(file_path),
+                "error": "File too large to parse completely",
+            }
+
+            return self.create_parsed_file(file_path, {}, metadata)
+
+        except Exception as e:
+            return self.create_parsed_file(file_path, {}, {"error": str(e)})
+
+    def _extract_ast_summary(
+        self, ast_data: Dict[str, Any], partial: bool = False
+    ) -> Dict[str, Any]:
+        summary = {
+            "root_kind": ast_data.get("kind", "unknown"),
+            "root_id": ast_data.get("id", "unknown"),
+            "has_inner": "inner" in ast_data,
+            "is_partial": partial,
+        }
+
+        if "inner" in ast_data and isinstance(ast_data["inner"], list):
+            summary["inner_count"] = len(ast_data["inner"])
+
+            # Count node types
+            node_types = {}
+            for node in ast_data["inner"]:
+                if isinstance(node, dict) and "kind" in node:
+                    kind = node["kind"]
+                    node_types[kind] = node_types.get(kind, 0) + 1
+
+            summary["node_types"] = node_types
+
+        return summary
@@ -0,0 +1,109 @@
+# ===----------------------------------------------------------------------===//
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# ===----------------------------------------------------------------------===//
+
+import re
+from typing import Dict, List, Any
+from .base_parser import BaseParser
+from ..models import FileType, ParsedFile, BinarySize
+
+
+class BinarySizeParser(BaseParser):
+    def __init__(self):
+        super().__init__(FileType.BINARY_SIZE)
+        # Pattern for size output like: "1234 5678 90 12345 section_name"
+        self.size_pattern = re.compile(r"^\s*(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(.+)$")
+        # Pattern for nm-style output with size
+        self.nm_pattern = re.compile(
+            r"^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+([A-Za-z])\s+(.+)$"
+        )
+
+    def parse(self, file_path: str) -> ParsedFile:
+        content = self.read_file_safe(file_path)
+        if content is None:
+            return self.create_parsed_file(
+                file_path, [], {"error": "File too large or unreadable"}
+            )
+
+        try:
+            lines = content.split("\n")
+            size_data = self._parse_size_output(lines)
+
+            total_size = sum(item.size for item in size_data)
+
+            metadata = {
+                "file_size": self.get_file_size(file_path),
+                "total_sections": len(size_data),
+                "total_binary_size": total_size,
+            }
+
+            return self.create_parsed_file(file_path, size_data, metadata)
+
+        except Exception as e:
+            return self.create_parsed_file(file_path, [], {"error": str(e)})
+
+    def _parse_size_output(self, lines: List[str]) -> List[BinarySize]:
+        size_data = []
+        total_size = 0
+
+        for line in lines:
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+
+            # Try standard size format first
+            size_match = self.size_pattern.match(line)
+            if size_match:
+                text_size = int(size_match.group(1))
+                data_size = int(size_match.group(2))
+                bss_size = int(size_match.group(3))
+                total = int(size_match.group(4))
+                name = size_match.group(5)
+
+                # Add individual sections if they have non-zero sizes
+                if text_size > 0:
+                    size_data.append(BinarySize(section=f"{name}.text", size=text_size))
+                if data_size > 0:
+                    size_data.append(BinarySize(section=f"{name}.data", size=data_size))
+                if bss_size > 0:
+                    size_data.append(BinarySize(section=f"{name}.bss", size=bss_size))
+
+                total_size += total
+                continue
+
+            # Try nm-style format
+            nm_match = self.nm_pattern.match(line)
+            if nm_match:
+                address = nm_match.group(1)
+                size = int(nm_match.group(2), 16)
+                symbol_type = nm_match.group(3)
+                name = nm_match.group(4)
+
+                size_data.append(BinarySize(section=name, size=size))
+                total_size += size
+                continue
+
+            # Try to parse generic "section: size" format
+            if ":" in line:
+                parts = line.split(":", 1)
+                if len(parts) == 2:
+                    section_name = parts[0].strip()
+                    size_part = parts[1].strip()
+
+                    # Extract numeric size
+                    size_numbers = re.findall(r"\d+", size_part)
+                    if size_numbers:
+                        size = int(size_numbers[0])
+                        size_data.append(BinarySize(section=section_name, size=size))
+                        total_size += size
+
+        # Calculate percentages
+        if total_size > 0:
+            for item in size_data:
+                item.percentage = (item.size / total_size) * 100
+
+        return size_data