|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +""" |
| 4 | +ELF Dependency Analyzer for Ubuntu |
| 5 | +
|
| 6 | +This script analyzes ELF (Executable and Linkable Format) binaries to determine their runtime |
| 7 | +package dependencies on Ubuntu systems. It can process individual files or recursively analyze directories. |
| 8 | +
|
| 9 | +The script provides information about: |
| 10 | +- Required packages and their versions |
| 11 | +- Custom or non-APT libraries |
| 12 | +- Core system libraries |
| 13 | +- Missing libraries |
| 14 | +- Other special cases |
| 15 | +
|
| 16 | +Usage: |
| 17 | + python3 elf_dependency_analyzer.py [file_or_directory] [file_or_directory] ... |
| 18 | +
|
| 19 | +Requirements: |
| 20 | +- Python 3.6+ |
| 21 | +- prettytable (pip install prettytable) |
| 22 | +- ldd (usually pre-installed on Linux systems) |
| 23 | +- file (usually pre-installed on Linux systems) |
| 24 | +- dpkg (pre-installed on Ubuntu) |
| 25 | +""" |
| 26 | + |
| 27 | +import os |
| 28 | +import subprocess |
| 29 | +import re |
| 30 | +import sys |
| 31 | +import argparse |
| 32 | +from collections import defaultdict |
| 33 | +from prettytable import PrettyTable |
| 34 | + |
| 35 | +def run_command(command): |
| 36 | + """ |
| 37 | + Execute a shell command and return its output. |
| 38 | +
|
| 39 | + Args: |
| 40 | + command (list): The command to execute as a list of strings. |
| 41 | +
|
| 42 | + Returns: |
| 43 | + str: The output of the command, or None if an error occurred. |
| 44 | + """ |
| 45 | + try: |
| 46 | + return subprocess.check_output(command, stderr=subprocess.STDOUT).decode('utf-8') |
| 47 | + except subprocess.CalledProcessError as e: |
| 48 | + print(f"Error running command {' '.join(command)}: {e.output.decode('utf-8').strip()}") |
| 49 | + return None |
| 50 | + |
| 51 | +def get_package_info(lib_path): |
| 52 | + """ |
| 53 | + Get package information for a given library path. |
| 54 | +
|
| 55 | + Args: |
| 56 | + lib_path (str): The path to the library. |
| 57 | +
|
| 58 | + Returns: |
| 59 | + tuple: A tuple containing the package name and full package information. |
| 60 | + """ |
| 61 | + if lib_path.startswith('/usr/local/cloudberry-db'): |
| 62 | + return "cloudberry-custom", f"Cloudberry custom library: {lib_path}" |
| 63 | + |
| 64 | + dpkg_output = run_command(['dpkg', '-S', lib_path]) |
| 65 | + if dpkg_output: |
| 66 | + package_name = dpkg_output.split(':')[0] |
| 67 | + return package_name, dpkg_output.strip() |
| 68 | + |
| 69 | + # List of core system libraries that might not be individually tracked by dpkg |
| 70 | + core_libs = { |
| 71 | + 'libc.so': 'libc6', |
| 72 | + 'libm.so': 'libc6', |
| 73 | + 'libdl.so': 'libc6', |
| 74 | + 'libpthread.so': 'libc6', |
| 75 | + 'libresolv.so': 'libc6', |
| 76 | + 'librt.so': 'libc6', |
| 77 | + 'libgcc_s.so': 'libgcc-s1', |
| 78 | + 'libstdc++.so': 'libstdc++6', |
| 79 | + 'libz.so': 'zlib1g', |
| 80 | + 'libbz2.so': 'libbz2-1.0', |
| 81 | + 'libpam.so': 'libpam0g', |
| 82 | + 'libaudit.so': 'libaudit1', |
| 83 | + 'libcap-ng.so': 'libcap-ng0', |
| 84 | + 'libkeyutils.so': 'libkeyutils1', |
| 85 | + 'liblzma.so': 'liblzma5', |
| 86 | + 'libcom_err.so': 'libcomerr2' |
| 87 | + } |
| 88 | + |
| 89 | + lib_name = os.path.basename(lib_path) |
| 90 | + for core_lib, package in core_libs.items(): |
| 91 | + if lib_name.startswith(core_lib): |
| 92 | + return package, f"Core system library: {lib_path}" |
| 93 | + |
| 94 | + # If not a recognized core library, return as system library |
| 95 | + file_output = run_command(['file', lib_path]) |
| 96 | + if file_output: |
| 97 | + return "system-library", f"System library: {lib_path} - {file_output.strip()}" |
| 98 | + |
| 99 | + return None |
| 100 | + |
| 101 | +def print_summary(packages, special_cases, missing_libraries, binary_path): |
| 102 | + """ |
| 103 | + Print a summary of the dependencies for a binary. |
| 104 | +
|
| 105 | + Args: |
| 106 | + packages (list): List of package tuples (package_name, full_package_name). |
| 107 | + special_cases (list): List of special case strings. |
| 108 | + missing_libraries (list): List of missing library names. |
| 109 | + binary_path (str): Path to the binary being analyzed. |
| 110 | + """ |
| 111 | + print("\nSummary of runtime dependencies:") |
| 112 | + table = PrettyTable(['Category', 'Package/Library', 'Details']) |
| 113 | + table.align['Category'] = 'l' |
| 114 | + table.align['Package/Library'] = 'l' |
| 115 | + table.align['Details'] = 'l' |
| 116 | + |
| 117 | + categories = { |
| 118 | + 'cloudberry-custom': 'Cloudberry Custom', |
| 119 | + 'system-library': 'System Library', |
| 120 | + } |
| 121 | + |
| 122 | + for package_name, full_package_name in sorted(set(packages)): |
| 123 | + category = categories.get(package_name, 'System Package') |
| 124 | + table.add_row([category, package_name, full_package_name]) |
| 125 | + |
| 126 | + print(table) |
| 127 | + |
| 128 | + if missing_libraries: |
| 129 | + print("\nMISSING LIBRARIES:") |
| 130 | + for lib in missing_libraries: |
| 131 | + print(f" - {lib}") |
| 132 | + |
| 133 | + if special_cases: |
| 134 | + print("\nSPECIAL CASES:") |
| 135 | + for case in special_cases: |
| 136 | + print(f" - {case}") |
| 137 | + |
| 138 | +def process_binary(binary_path): |
| 139 | + """ |
| 140 | + Process a single binary file to determine its dependencies. |
| 141 | +
|
| 142 | + Args: |
| 143 | + binary_path (str): Path to the binary file. |
| 144 | +
|
| 145 | + Returns: |
| 146 | + tuple: A tuple containing lists of packages, special cases, and missing libraries. |
| 147 | + """ |
| 148 | + print(f"Binary: {binary_path}\n") |
| 149 | + print("Libraries and their corresponding packages:") |
| 150 | + packages, special_cases, missing_libraries = [], [], [] |
| 151 | + |
| 152 | + ldd_output = run_command(['ldd', binary_path]) |
| 153 | + if ldd_output is None: |
| 154 | + return packages, special_cases, missing_libraries |
| 155 | + |
| 156 | + for line in ldd_output.splitlines(): |
| 157 | + if "=>" not in line: |
| 158 | + continue |
| 159 | + |
| 160 | + parts = line.split('=>') |
| 161 | + lib_name = parts[0].strip() |
| 162 | + lib_path = parts[1].split()[0].strip() |
| 163 | + lib_path = os.path.realpath(lib_path) |
| 164 | + |
| 165 | + if lib_path == "not": |
| 166 | + missing_libraries.append(lib_name) |
| 167 | + print(f"MISSING: {line.strip()}") |
| 168 | + else: |
| 169 | + package_info = get_package_info(lib_path) |
| 170 | + if package_info: |
| 171 | + print(f"{lib_path} => {package_info[1]}") |
| 172 | + packages.append(package_info) |
| 173 | + else: |
| 174 | + special_case = f"{lib_path} is not found and might be a special case" |
| 175 | + special_cases.append(special_case) |
| 176 | + print(f"{lib_path} => Not found, might be a special case") |
| 177 | + |
| 178 | + print_summary(packages, special_cases, missing_libraries, binary_path) |
| 179 | + print("-------------------------------------------") |
| 180 | + return packages, special_cases, missing_libraries |
| 181 | + |
| 182 | +def is_elf_binary(file_path): |
| 183 | + """ |
| 184 | + Check if a file is an ELF binary. |
| 185 | +
|
| 186 | + Args: |
| 187 | + file_path (str): Path to the file. |
| 188 | +
|
| 189 | + Returns: |
| 190 | + bool: True if the file is an ELF binary, False otherwise. |
| 191 | + """ |
| 192 | + file_output = run_command(['file', file_path]) |
| 193 | + return 'ELF' in file_output and ('executable' in file_output or 'shared object' in file_output) |
| 194 | + |
| 195 | +def print_grand_summary(grand_summary, grand_special_cases, grand_missing_libraries): |
| 196 | + """ |
| 197 | + Print a grand summary of all analyzed binaries. |
| 198 | +
|
| 199 | + Args: |
| 200 | + grand_summary (dict): Dictionary of all packages and their details. |
| 201 | + grand_special_cases (list): List of all special cases. |
| 202 | + grand_missing_libraries (dict): Dictionary of all missing libraries. |
| 203 | + """ |
| 204 | + if grand_summary or grand_special_cases or grand_missing_libraries: |
| 205 | + print("\nGrand Summary of runtime packages required across all binaries:") |
| 206 | + table = PrettyTable(['Package', 'Included Packages']) |
| 207 | + table.align['Package'] = 'l' |
| 208 | + table.align['Included Packages'] = 'l' |
| 209 | + for package_name, full_package_names in sorted(grand_summary.items()): |
| 210 | + included_packages = '\n'.join(sorted(full_package_names)) |
| 211 | + table.add_row([package_name, included_packages]) |
| 212 | + print(table) |
| 213 | + |
| 214 | + if grand_missing_libraries: |
| 215 | + print("\nGrand Summary of MISSING LIBRARIES across all binaries:") |
| 216 | + missing_table = PrettyTable(['Missing Library', 'Referenced By']) |
| 217 | + missing_table.align['Missing Library'] = 'l' |
| 218 | + missing_table.align['Referenced By'] = 'l' |
| 219 | + for lib, binaries in sorted(grand_missing_libraries.items()): |
| 220 | + missing_table.add_row([lib, '\n'.join(sorted(binaries))]) |
| 221 | + print(missing_table) |
| 222 | + |
| 223 | + print("\nGrand Summary of special cases across all binaries:") |
| 224 | + if grand_special_cases: |
| 225 | + special_table = PrettyTable(['Library/Case', 'Referenced By', 'Category']) |
| 226 | + special_table.align['Library/Case'] = 'l' |
| 227 | + special_table.align['Referenced By'] = 'l' |
| 228 | + special_table.align['Category'] = 'l' |
| 229 | + for case, binary in sorted(set(grand_special_cases)): |
| 230 | + category = "System Library" if "system library" in case else "Other" |
| 231 | + library = case.split(" is ")[0] if " is " in case else case |
| 232 | + special_table.add_row([library, binary, category]) |
| 233 | + print(special_table) |
| 234 | + else: |
| 235 | + print("No special cases found.") |
| 236 | + |
| 237 | +def analyze_path(path, grand_summary, grand_special_cases, grand_missing_libraries): |
| 238 | + """ |
| 239 | + Analyze a file or directory for ELF binaries and their dependencies. |
| 240 | +
|
| 241 | + Args: |
| 242 | + path (str): Path to the file or directory to analyze. |
| 243 | + grand_summary (dict): Dictionary to store all package information. |
| 244 | + grand_special_cases (list): List to store all special cases. |
| 245 | + grand_missing_libraries (dict): Dictionary to store all missing libraries. |
| 246 | + """ |
| 247 | + if os.path.isfile(path): |
| 248 | + if is_elf_binary(path): |
| 249 | + packages, special_cases, missing_libraries = process_binary(path) |
| 250 | + for package_name, full_package_name in packages: |
| 251 | + grand_summary[package_name].add(full_package_name) |
| 252 | + grand_special_cases.extend((case, path) for case in special_cases) |
| 253 | + for lib in missing_libraries: |
| 254 | + grand_missing_libraries[lib].add(path) |
| 255 | + elif os.path.isdir(path): |
| 256 | + for root, dirs, files in os.walk(path): |
| 257 | + for file in files: |
| 258 | + file_path = os.path.join(root, file) |
| 259 | + if is_elf_binary(file_path): |
| 260 | + packages, special_cases, missing_libraries = process_binary(file_path) |
| 261 | + for package_name, full_package_name in packages: |
| 262 | + grand_summary[package_name].add(full_package_name) |
| 263 | + grand_special_cases.extend((case, file_path) for case in special_cases) |
| 264 | + for lib in missing_libraries: |
| 265 | + grand_missing_libraries[lib].add(file_path) |
| 266 | + else: |
| 267 | + print(f"Error: {path} is neither a valid file nor a directory.") |
| 268 | + |
| 269 | +def main(): |
| 270 | + """ |
| 271 | + Main function to handle command-line arguments and initiate the analysis. |
| 272 | + """ |
| 273 | + parser = argparse.ArgumentParser(description="ELF Dependency Analyzer for Ubuntu") |
| 274 | + parser.add_argument('paths', nargs='+', help="Paths to files or directories to analyze") |
| 275 | + args = parser.parse_args() |
| 276 | + |
| 277 | + grand_summary = defaultdict(set) |
| 278 | + grand_special_cases = [] |
| 279 | + grand_missing_libraries = defaultdict(set) |
| 280 | + |
| 281 | + for path in args.paths: |
| 282 | + analyze_path(path, grand_summary, grand_special_cases, grand_missing_libraries) |
| 283 | + |
| 284 | + print_grand_summary(grand_summary, grand_special_cases, grand_missing_libraries) |
| 285 | + |
| 286 | +if __name__ == '__main__': |
| 287 | + main() |
0 commit comments