Skip to content
This repository was archived by the owner on Sep 12, 2025. It is now read-only.

Commit 44b1425

Browse files
committed
Update docs
1 parent 57faae9 commit 44b1425

File tree

1 file changed

+54
-46
lines changed

1 file changed

+54
-46
lines changed

scripts/elf_dependency_analyzer.py

Lines changed: 54 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,68 @@
11
#!/usr/bin/env python3
22

3-
import os
4-
import subprocess
5-
import re
6-
import sys
7-
import json
3+
"""
4+
ELF Dependency Analyzer
5+
6+
This script analyzes ELF (Executable and Linkable Format) binaries to determine their runtime
7+
package dependencies. It can process individual files or recursively analyze directories.
8+
9+
The script provides information about:
10+
- Required packages and their versions
11+
- Missing libraries
12+
- Custom or non-RPM libraries
13+
- Other special cases
14+
15+
It also groups packages by their high-level dependencies, which can be cached for performance.
16+
17+
Usage:
18+
python3 elf_dependency_analyzer.py [--rebuild-cache] <file_or_directory> [<file_or_directory> ...]
19+
20+
The script will automatically determine if each argument is a file or directory and process accordingly.
21+
Use --rebuild-cache to force rebuilding of the high-level packages cache.
22+
23+
Requirements:
24+
- Python 3.6+
25+
- prettytable (pip install prettytable)
26+
- python-dateutil (pip install python-dateutil)
27+
- ldd (usually pre-installed on Linux systems)
28+
- file (usually pre-installed on Linux systems)
29+
- rpm (usually pre-installed on RPM-based Linux distributions)
30+
- repoquery (part of yum-utils package)
31+
32+
Functions:
33+
- check_requirements(): Checks if all required commands are available.
34+
- run_command(command): Executes a shell command and returns its output.
35+
- parse_ldd_line(line): Parses a line of ldd output to extract the library name.
36+
- find_library_in_ld_library_path(lib_name): Searches for a library in LD_LIBRARY_PATH.
37+
- get_package_info(lib_path): Gets package information for a given library.
38+
- get_package_dependencies(package): Gets dependencies of a package using repoquery.
39+
- build_high_level_packages(grand_summary): Builds a mapping of high-level packages to their dependencies.
40+
- load_or_build_high_level_packages(grand_summary, force_rebuild): Loads or builds the high-level packages cache.
41+
- print_summary(packages, special_cases, missing_libraries, binary_path): Prints a summary for a single binary.
42+
- process_binary(binary_path): Processes a single binary file.
43+
- is_elf_binary(file_path): Checks if a file is an ELF binary.
44+
- print_grand_summary(...): Prints a grand summary of all processed binaries.
45+
- analyze_path(path, grand_summary, grand_special_cases, grand_missing_libraries): Analyzes a file or directory.
46+
- main(): Main function to handle command-line arguments and initiate the analysis.
47+
48+
This script is designed to help system administrators and developers understand the dependencies
49+
of ELF binaries in their systems, which can be useful for troubleshooting, optimizing, or
50+
preparing deployment packages.
51+
"""
52+
53+
import os, subprocess, re, sys, json, shutil
854
from collections import defaultdict
955
from datetime import datetime, timedelta
1056
import argparse
1157
from prettytable import PrettyTable
1258
from dateutil import parser
13-
import shutil
1459

1560
CACHE_FILE = 'high_level_packages_cache.json'
1661
CACHE_EXPIRY_DAYS = 7
1762

1863
def check_requirements():
1964
required_commands = ['ldd', 'file', 'rpm', 'repoquery']
20-
missing_commands = []
21-
22-
for cmd in required_commands:
23-
if shutil.which(cmd) is None:
24-
missing_commands.append(cmd)
25-
65+
missing_commands = [cmd for cmd in required_commands if shutil.which(cmd) is None]
2666
if missing_commands:
2767
print("Error: The following required commands are missing:")
2868
for cmd in missing_commands:
@@ -57,7 +97,6 @@ def get_package_info(lib_path):
5797
lib_path = find_library_in_ld_library_path(lib_name)
5898
if not lib_path:
5999
return None
60-
61100
try:
62101
full_package_name = run_command(['rpm', '-qf', lib_path])
63102
if full_package_name:
@@ -79,13 +118,11 @@ def build_high_level_packages(grand_summary):
79118
all_packages = set()
80119
for packages in grand_summary.values():
81120
all_packages.update(package.split('-')[0] for package in packages)
82-
83121
high_level_packages = {}
84122
for package in all_packages:
85123
deps = get_package_dependencies(package)
86124
if deps:
87125
high_level_packages[package] = [dep.split('-')[0] for dep in deps]
88-
89126
return high_level_packages
90127

91128
def load_or_build_high_level_packages(grand_summary, force_rebuild=False):
@@ -94,26 +131,20 @@ def load_or_build_high_level_packages(grand_summary, force_rebuild=False):
94131
cache_data = json.load(f)
95132
if datetime.now() - parser.parse(cache_data['timestamp']) < timedelta(days=CACHE_EXPIRY_DAYS):
96133
return cache_data['packages']
97-
98134
packages = build_high_level_packages(grand_summary)
99135
with open(CACHE_FILE, 'w') as f:
100-
json.dump({
101-
'timestamp': datetime.now().isoformat(),
102-
'packages': packages
103-
}, f)
136+
json.dump({'timestamp': datetime.now().isoformat(), 'packages': packages}, f)
104137
return packages
105138

106139
def print_summary(packages, special_cases, missing_libraries, binary_path):
107140
print("\nSummary of unique runtime packages required:")
108141
table = PrettyTable(['Package Name', 'Full Package Name'])
109142
table.align['Package Name'] = 'l'
110143
table.align['Full Package Name'] = 'l'
111-
112144
unique_packages = sorted(set(packages))
113145
for package_name, full_package_name in unique_packages:
114146
table.add_row([package_name, full_package_name])
115147
print(table)
116-
117148
if missing_libraries:
118149
print("\nMISSING LIBRARIES:")
119150
missing_table = PrettyTable(['Missing Library', 'Referenced By'])
@@ -122,7 +153,6 @@ def print_summary(packages, special_cases, missing_libraries, binary_path):
122153
for lib in missing_libraries:
123154
missing_table.add_row([lib, binary_path])
124155
print(missing_table)
125-
126156
if special_cases:
127157
print("\nSPECIAL CASES:")
128158
special_table = PrettyTable(['Library/Case', 'Referenced By', 'Category'])
@@ -140,23 +170,16 @@ def print_summary(packages, special_cases, missing_libraries, binary_path):
140170
def process_binary(binary_path):
141171
print(f"Binary: {binary_path}\n")
142172
print("Libraries and their corresponding packages:")
143-
144-
packages = []
145-
special_cases = []
146-
missing_libraries = []
173+
packages, special_cases, missing_libraries = [], [], []
147174
known_special_cases = ['linux-vdso.so.1', 'ld-linux-x86-64.so.2']
148-
149175
ldd_output = run_command(['ldd', binary_path])
150176
if ldd_output is None:
151177
return packages, special_cases, missing_libraries
152-
153178
for line in ldd_output.splitlines():
154179
if any(special in line for special in known_special_cases):
155180
continue
156-
157181
parts = line.split('=>')
158182
lib_name = parts[0].strip()
159-
160183
if "not found" in line:
161184
missing_libraries.append(lib_name)
162185
print(f"MISSING: {line.strip()}")
@@ -185,14 +208,12 @@ def process_binary(binary_path):
185208
special_case = f"{line.strip()} is a special case or built-in library"
186209
special_cases.append(special_case)
187210
print(f"{line.strip()} => Special case or built-in library")
188-
189211
if special_cases:
190212
print(f"Special cases found for {binary_path}:")
191213
for case in special_cases:
192214
print(f" - {case}")
193215
else:
194216
print(f"No special cases found for {binary_path}")
195-
196217
print_summary(packages, special_cases, missing_libraries, binary_path)
197218
print("-------------------------------------------")
198219
return packages, special_cases, missing_libraries
@@ -205,20 +226,16 @@ def print_grand_summary(grand_summary, grand_special_cases, grand_missing_librar
205226
if grand_summary or grand_special_cases or grand_missing_libraries:
206227
print("\nGrand Summary of high-level runtime packages required across all binaries:")
207228
high_level_summary = defaultdict(set)
208-
209229
for package_name, full_package_names in grand_summary.items():
210230
high_level_package = PACKAGE_TO_HIGH_LEVEL.get(package_name.split('-')[0], package_name.split('-')[0])
211231
high_level_summary[high_level_package].update(full_package_names)
212-
213232
table = PrettyTable(['High-Level Package', 'Included Packages'])
214233
table.align['High-Level Package'] = 'l'
215234
table.align['Included Packages'] = 'l'
216-
217235
for high_level_package, full_package_names in sorted(high_level_summary.items()):
218236
included_packages = '\n'.join(sorted(full_package_names))
219237
table.add_row([high_level_package, included_packages])
220238
print(table)
221-
222239
if grand_missing_libraries:
223240
print("\nGrand Summary of MISSING LIBRARIES across all binaries:")
224241
missing_table = PrettyTable(['Missing Library', 'Referenced By'])
@@ -227,19 +244,16 @@ def print_grand_summary(grand_summary, grand_special_cases, grand_missing_librar
227244
for lib, binaries in sorted(grand_missing_libraries.items()):
228245
missing_table.add_row([lib, '\n'.join(sorted(binaries))])
229246
print(missing_table)
230-
231247
print("\nGrand Summary of special cases across all binaries:")
232248
if grand_special_cases:
233249
special_table = PrettyTable(['Library/Case', 'Referenced By', 'Category'])
234250
special_table.align['Library/Case'] = 'l'
235251
special_table.align['Referenced By'] = 'l'
236252
special_table.align['Category'] = 'l'
237-
238253
for case, binary in sorted(set(grand_special_cases)):
239254
category = "Custom/Non-RPM" if "custom or non-RPM library" in case else "Other"
240255
library = case.split(" is ")[0] if " is " in case else case
241256
special_table.add_row([library, binary, category])
242-
243257
print(special_table)
244258
else:
245259
print("No special cases found.")
@@ -265,7 +279,6 @@ def analyze_path(path, grand_summary, grand_special_cases, grand_missing_librari
265279
grand_missing_libraries[lib].add(file_path)
266280
else:
267281
print(f"Error: {path} is neither a valid file nor a directory.")
268-
269282
if grand_special_cases:
270283
print(f"Accumulated special cases after processing {path}:")
271284
for case, binary in grand_special_cases:
@@ -275,22 +288,17 @@ def analyze_path(path, grand_summary, grand_special_cases, grand_missing_librari
275288

276289
def main():
277290
check_requirements()
278-
279291
parser = argparse.ArgumentParser(description="ELF Dependency Analyzer")
280292
parser.add_argument('paths', nargs='+', help="Paths to files or directories to analyze")
281293
parser.add_argument('--rebuild-cache', action='store_true', help="Force rebuild of the high-level packages cache")
282294
args = parser.parse_args()
283-
284295
grand_summary = defaultdict(set)
285296
grand_special_cases = []
286297
grand_missing_libraries = defaultdict(set)
287-
288298
for path in args.paths:
289299
analyze_path(path, grand_summary, grand_special_cases, grand_missing_libraries)
290-
291300
HIGH_LEVEL_PACKAGES = load_or_build_high_level_packages(grand_summary, args.rebuild_cache)
292301
PACKAGE_TO_HIGH_LEVEL = {low: high for high, lows in HIGH_LEVEL_PACKAGES.items() for low in lows}
293-
294302
print_grand_summary(grand_summary, grand_special_cases, grand_missing_libraries, HIGH_LEVEL_PACKAGES, PACKAGE_TO_HIGH_LEVEL)
295303

296304
if __name__ == '__main__':

0 commit comments

Comments
 (0)