11#!/usr/bin/env python3
22
3- import os
4- import subprocess
5- import re
6- import sys
7- import json
3+ """
4+ ELF Dependency Analyzer
5+
6+ This script analyzes ELF (Executable and Linkable Format) binaries to determine their runtime
7+ package dependencies. It can process individual files or recursively analyze directories.
8+
9+ The script provides information about:
10+ - Required packages and their versions
11+ - Missing libraries
12+ - Custom or non-RPM libraries
13+ - Other special cases
14+
15+ It also groups packages by their high-level dependencies, which can be cached for performance.
16+
17+ Usage:
18+ python3 elf_dependency_analyzer.py [--rebuild-cache] <file_or_directory> [<file_or_directory> ...]
19+
20+ The script will automatically determine if each argument is a file or directory and process accordingly.
21+ Use --rebuild-cache to force rebuilding of the high-level packages cache.
22+
23+ Requirements:
24+ - Python 3.6+
25+ - prettytable (pip install prettytable)
26+ - python-dateutil (pip install python-dateutil)
27+ - ldd (usually pre-installed on Linux systems)
28+ - file (usually pre-installed on Linux systems)
29+ - rpm (usually pre-installed on RPM-based Linux distributions)
30+ - repoquery (part of yum-utils package)
31+
32+ Functions:
33+ - check_requirements(): Checks if all required commands are available.
34+ - run_command(command): Executes a shell command and returns its output.
35+ - parse_ldd_line(line): Parses a line of ldd output to extract the library name.
36+ - find_library_in_ld_library_path(lib_name): Searches for a library in LD_LIBRARY_PATH.
37+ - get_package_info(lib_path): Gets package information for a given library.
38+ - get_package_dependencies(package): Gets dependencies of a package using repoquery.
39+ - build_high_level_packages(grand_summary): Builds a mapping of high-level packages to their dependencies.
40+ - load_or_build_high_level_packages(grand_summary, force_rebuild): Loads or builds the high-level packages cache.
41+ - print_summary(packages, special_cases, missing_libraries, binary_path): Prints a summary for a single binary.
42+ - process_binary(binary_path): Processes a single binary file.
43+ - is_elf_binary(file_path): Checks if a file is an ELF binary.
44+ - print_grand_summary(...): Prints a grand summary of all processed binaries.
45+ - analyze_path(path, grand_summary, grand_special_cases, grand_missing_libraries): Analyzes a file or directory.
46+ - main(): Main function to handle command-line arguments and initiate the analysis.
47+
48+ This script is designed to help system administrators and developers understand the dependencies
49+ of ELF binaries in their systems, which can be useful for troubleshooting, optimizing, or
50+ preparing deployment packages.
51+ """
52+
53+ import os , subprocess , re , sys , json , shutil
854from collections import defaultdict
955from datetime import datetime , timedelta
1056import argparse
1157from prettytable import PrettyTable
1258from dateutil import parser
13- import shutil
1459
1560CACHE_FILE = 'high_level_packages_cache.json'
1661CACHE_EXPIRY_DAYS = 7
1762
1863def check_requirements ():
1964 required_commands = ['ldd' , 'file' , 'rpm' , 'repoquery' ]
20- missing_commands = []
21-
22- for cmd in required_commands :
23- if shutil .which (cmd ) is None :
24- missing_commands .append (cmd )
25-
65+ missing_commands = [cmd for cmd in required_commands if shutil .which (cmd ) is None ]
2666 if missing_commands :
2767 print ("Error: The following required commands are missing:" )
2868 for cmd in missing_commands :
@@ -57,7 +97,6 @@ def get_package_info(lib_path):
5797 lib_path = find_library_in_ld_library_path (lib_name )
5898 if not lib_path :
5999 return None
60-
61100 try :
62101 full_package_name = run_command (['rpm' , '-qf' , lib_path ])
63102 if full_package_name :
@@ -79,13 +118,11 @@ def build_high_level_packages(grand_summary):
79118 all_packages = set ()
80119 for packages in grand_summary .values ():
81120 all_packages .update (package .split ('-' )[0 ] for package in packages )
82-
83121 high_level_packages = {}
84122 for package in all_packages :
85123 deps = get_package_dependencies (package )
86124 if deps :
87125 high_level_packages [package ] = [dep .split ('-' )[0 ] for dep in deps ]
88-
89126 return high_level_packages
90127
91128def load_or_build_high_level_packages (grand_summary , force_rebuild = False ):
@@ -94,26 +131,20 @@ def load_or_build_high_level_packages(grand_summary, force_rebuild=False):
94131 cache_data = json .load (f )
95132 if datetime .now () - parser .parse (cache_data ['timestamp' ]) < timedelta (days = CACHE_EXPIRY_DAYS ):
96133 return cache_data ['packages' ]
97-
98134 packages = build_high_level_packages (grand_summary )
99135 with open (CACHE_FILE , 'w' ) as f :
100- json .dump ({
101- 'timestamp' : datetime .now ().isoformat (),
102- 'packages' : packages
103- }, f )
136+ json .dump ({'timestamp' : datetime .now ().isoformat (), 'packages' : packages }, f )
104137 return packages
105138
106139def print_summary (packages , special_cases , missing_libraries , binary_path ):
107140 print ("\n Summary of unique runtime packages required:" )
108141 table = PrettyTable (['Package Name' , 'Full Package Name' ])
109142 table .align ['Package Name' ] = 'l'
110143 table .align ['Full Package Name' ] = 'l'
111-
112144 unique_packages = sorted (set (packages ))
113145 for package_name , full_package_name in unique_packages :
114146 table .add_row ([package_name , full_package_name ])
115147 print (table )
116-
117148 if missing_libraries :
118149 print ("\n MISSING LIBRARIES:" )
119150 missing_table = PrettyTable (['Missing Library' , 'Referenced By' ])
@@ -122,7 +153,6 @@ def print_summary(packages, special_cases, missing_libraries, binary_path):
122153 for lib in missing_libraries :
123154 missing_table .add_row ([lib , binary_path ])
124155 print (missing_table )
125-
126156 if special_cases :
127157 print ("\n SPECIAL CASES:" )
128158 special_table = PrettyTable (['Library/Case' , 'Referenced By' , 'Category' ])
@@ -140,23 +170,16 @@ def print_summary(packages, special_cases, missing_libraries, binary_path):
140170def process_binary (binary_path ):
141171 print (f"Binary: { binary_path } \n " )
142172 print ("Libraries and their corresponding packages:" )
143-
144- packages = []
145- special_cases = []
146- missing_libraries = []
173+ packages , special_cases , missing_libraries = [], [], []
147174 known_special_cases = ['linux-vdso.so.1' , 'ld-linux-x86-64.so.2' ]
148-
149175 ldd_output = run_command (['ldd' , binary_path ])
150176 if ldd_output is None :
151177 return packages , special_cases , missing_libraries
152-
153178 for line in ldd_output .splitlines ():
154179 if any (special in line for special in known_special_cases ):
155180 continue
156-
157181 parts = line .split ('=>' )
158182 lib_name = parts [0 ].strip ()
159-
160183 if "not found" in line :
161184 missing_libraries .append (lib_name )
162185 print (f"MISSING: { line .strip ()} " )
@@ -185,14 +208,12 @@ def process_binary(binary_path):
185208 special_case = f"{ line .strip ()} is a special case or built-in library"
186209 special_cases .append (special_case )
187210 print (f"{ line .strip ()} => Special case or built-in library" )
188-
189211 if special_cases :
190212 print (f"Special cases found for { binary_path } :" )
191213 for case in special_cases :
192214 print (f" - { case } " )
193215 else :
194216 print (f"No special cases found for { binary_path } " )
195-
196217 print_summary (packages , special_cases , missing_libraries , binary_path )
197218 print ("-------------------------------------------" )
198219 return packages , special_cases , missing_libraries
@@ -205,20 +226,16 @@ def print_grand_summary(grand_summary, grand_special_cases, grand_missing_librar
205226 if grand_summary or grand_special_cases or grand_missing_libraries :
206227 print ("\n Grand Summary of high-level runtime packages required across all binaries:" )
207228 high_level_summary = defaultdict (set )
208-
209229 for package_name , full_package_names in grand_summary .items ():
210230 high_level_package = PACKAGE_TO_HIGH_LEVEL .get (package_name .split ('-' )[0 ], package_name .split ('-' )[0 ])
211231 high_level_summary [high_level_package ].update (full_package_names )
212-
213232 table = PrettyTable (['High-Level Package' , 'Included Packages' ])
214233 table .align ['High-Level Package' ] = 'l'
215234 table .align ['Included Packages' ] = 'l'
216-
217235 for high_level_package , full_package_names in sorted (high_level_summary .items ()):
218236 included_packages = '\n ' .join (sorted (full_package_names ))
219237 table .add_row ([high_level_package , included_packages ])
220238 print (table )
221-
222239 if grand_missing_libraries :
223240 print ("\n Grand Summary of MISSING LIBRARIES across all binaries:" )
224241 missing_table = PrettyTable (['Missing Library' , 'Referenced By' ])
@@ -227,19 +244,16 @@ def print_grand_summary(grand_summary, grand_special_cases, grand_missing_librar
227244 for lib , binaries in sorted (grand_missing_libraries .items ()):
228245 missing_table .add_row ([lib , '\n ' .join (sorted (binaries ))])
229246 print (missing_table )
230-
231247 print ("\n Grand Summary of special cases across all binaries:" )
232248 if grand_special_cases :
233249 special_table = PrettyTable (['Library/Case' , 'Referenced By' , 'Category' ])
234250 special_table .align ['Library/Case' ] = 'l'
235251 special_table .align ['Referenced By' ] = 'l'
236252 special_table .align ['Category' ] = 'l'
237-
238253 for case , binary in sorted (set (grand_special_cases )):
239254 category = "Custom/Non-RPM" if "custom or non-RPM library" in case else "Other"
240255 library = case .split (" is " )[0 ] if " is " in case else case
241256 special_table .add_row ([library , binary , category ])
242-
243257 print (special_table )
244258 else :
245259 print ("No special cases found." )
@@ -265,7 +279,6 @@ def analyze_path(path, grand_summary, grand_special_cases, grand_missing_librari
265279 grand_missing_libraries [lib ].add (file_path )
266280 else :
267281 print (f"Error: { path } is neither a valid file nor a directory." )
268-
269282 if grand_special_cases :
270283 print (f"Accumulated special cases after processing { path } :" )
271284 for case , binary in grand_special_cases :
@@ -275,22 +288,17 @@ def analyze_path(path, grand_summary, grand_special_cases, grand_missing_librari
275288
276289def main ():
277290 check_requirements ()
278-
279291 parser = argparse .ArgumentParser (description = "ELF Dependency Analyzer" )
280292 parser .add_argument ('paths' , nargs = '+' , help = "Paths to files or directories to analyze" )
281293 parser .add_argument ('--rebuild-cache' , action = 'store_true' , help = "Force rebuild of the high-level packages cache" )
282294 args = parser .parse_args ()
283-
284295 grand_summary = defaultdict (set )
285296 grand_special_cases = []
286297 grand_missing_libraries = defaultdict (set )
287-
288298 for path in args .paths :
289299 analyze_path (path , grand_summary , grand_special_cases , grand_missing_libraries )
290-
291300 HIGH_LEVEL_PACKAGES = load_or_build_high_level_packages (grand_summary , args .rebuild_cache )
292301 PACKAGE_TO_HIGH_LEVEL = {low : high for high , lows in HIGH_LEVEL_PACKAGES .items () for low in lows }
293-
294302 print_grand_summary (grand_summary , grand_special_cases , grand_missing_libraries , HIGH_LEVEL_PACKAGES , PACKAGE_TO_HIGH_LEVEL )
295303
296304if __name__ == '__main__' :
0 commit comments