Skip to content

Commit f0686d1

Browse files
authored
Merge pull request #3839 from Flamefire/findPythonDeps
Add script to find dependencies of Python packages
2 parents ba19b7a + 7f8ab8d commit f0686d1

File tree

1 file changed

+196
-0
lines changed

1 file changed

+196
-0
lines changed
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
#!/usr/bin/env python
2+
3+
import argparse
4+
import json
5+
import os
6+
import re
7+
import shutil
8+
import subprocess
9+
import sys
10+
import tempfile
11+
from contextlib import contextmanager
12+
from pprint import pprint
13+
try:
14+
import pkg_resources
15+
except ImportError as e:
16+
print('pkg_resources could not be imported: %s\nYou might need to install setuptools!' % e)
17+
sys.exit(1)
18+
19+
20+
@contextmanager
21+
def temporary_directory(*args, **kwargs):
22+
"""Resource wrapper over tempfile.mkdtemp"""
23+
name = tempfile.mkdtemp(*args, **kwargs)
24+
try:
25+
yield name
26+
finally:
27+
shutil.rmtree(name)
28+
29+
30+
def extract_pkg_name(package_spec):
31+
return re.split('<|>|=|~', args.package, 1)[0]
32+
33+
34+
def can_run(cmd, argument):
35+
"""Check if the given cmd and argument can be run successfully"""
36+
with open(os.devnull, 'w') as FNULL:
37+
try:
38+
return subprocess.call([cmd, argument], stdout=FNULL, stderr=subprocess.STDOUT) == 0
39+
except (subprocess.CalledProcessError, OSError):
40+
return False
41+
42+
43+
def run_cmd(arguments, action_desc, capture_stderr=True, **kwargs):
44+
"""Run the command and return the return code and output"""
45+
extra_args = kwargs or {}
46+
if sys.version_info[0] >= 3:
47+
extra_args['universal_newlines'] = True
48+
stderr = subprocess.STDOUT if capture_stderr else subprocess.PIPE
49+
p = subprocess.Popen(arguments, stdout=subprocess.PIPE, stderr=stderr, **extra_args)
50+
out, _ = p.communicate()
51+
if p.returncode != 0:
52+
raise RuntimeError('Failed to %s: %s' % (action_desc, out))
53+
return out
54+
55+
56+
def run_in_venv(cmd, venv_path, action_desc):
57+
"""Run the given command in the virtualenv at the given path"""
58+
cmd = 'source %s/bin/activate && %s' % (venv_path, cmd)
59+
return run_cmd(cmd, action_desc, shell=True, executable='/bin/bash')
60+
61+
62+
def get_dep_tree(package_spec, verbose):
63+
"""Get the dep-tree for installing the given Python package spec"""
64+
package_name = extract_pkg_name(package_spec)
65+
with temporary_directory(suffix=package_name + '-deps') as tmp_dir:
66+
# prevent pip from (ab)using $HOME/.cache/pip
67+
os.environ['XDG_CACHE_HOME'] = os.path.join(tmp_dir, 'pip-cache')
68+
venv_dir = os.path.join(tmp_dir, 'venv')
69+
if verbose:
70+
print('Creating virtualenv at ' + venv_dir)
71+
run_cmd(['virtualenv', '--system-site-packages', venv_dir], action_desc='create virtualenv')
72+
if verbose:
73+
print('Updating pip in virtualenv')
74+
run_in_venv('pip install --upgrade pip', venv_dir, action_desc='update pip')
75+
if verbose:
76+
print('Installing %s into virtualenv' % package_spec)
77+
out = run_in_venv('pip install "%s"' % package_spec, venv_dir, action_desc='install ' + package_spec)
78+
print('%s installed: %s' % (package_spec, out))
79+
# install pipdeptree, figure out dependency tree for installed package
80+
run_in_venv('pip install pipdeptree', venv_dir, action_desc='install pipdeptree')
81+
dep_tree = run_in_venv('pipdeptree -j -p "%s"' % package_name,
82+
venv_dir, action_desc='collect dependencies')
83+
return json.loads(dep_tree)
84+
85+
86+
def find_deps(pkgs, dep_tree):
87+
"""Recursively resolve dependencies of the given package(s) and return them"""
88+
res = []
89+
for pkg in pkgs:
90+
matching_entries = [entry for entry in dep_tree
91+
if pkg in (entry['package']['package_name'], entry['package']['key'])]
92+
if not matching_entries:
93+
raise RuntimeError("Found no installed package for '%s' in %s" % (pkg, dep_tree))
94+
if len(matching_entries) > 1:
95+
raise RuntimeError("Found multiple installed packages for '%s' in %s" % (pkg, dep_tree))
96+
entry = matching_entries[0]
97+
res.append((entry['package']['package_name'], entry['package']['installed_version']))
98+
deps = (dep['package_name'] for dep in entry['dependencies'])
99+
res.extend(find_deps(deps, dep_tree))
100+
return res
101+
102+
103+
def print_deps(package, verbose):
104+
if verbose:
105+
print('Getting dep tree of ' + package)
106+
dep_tree = get_dep_tree(package, verbose)
107+
if verbose:
108+
print('Extracting dependencies of ' + package)
109+
deps = find_deps([extract_pkg_name(package)], dep_tree)
110+
111+
installed_modules = {mod.project_name for mod in pkg_resources.working_set}
112+
if verbose:
113+
print("Installed modules: %s" % installed_modules)
114+
115+
# iterate over deps in reverse order, get rid of duplicates along the way
116+
# also filter out Python packages that are already installed in current environment
117+
res = []
118+
handled = set()
119+
for dep in reversed(deps):
120+
if dep not in handled:
121+
handled.add(dep)
122+
if dep[0] in installed_modules:
123+
if verbose:
124+
print("Skipping installed module '%s'" % dep[0])
125+
else:
126+
res.append(dep)
127+
128+
print("List of dependencies in (likely) install order:")
129+
pprint(res, indent=4)
130+
print("Sorted list of dependencies:")
131+
pprint(sorted(res), indent=4)
132+
133+
134+
examples = [
135+
'Example usage with EasyBuild (after installing dependency modules):',
136+
'\t' + sys.argv[0] + ' --ec TensorFlow-2.3.4.eb tensorflow==2.3.4',
137+
'Which is the same as:',
138+
'\t' + ' && '.join(['eb TensorFlow-2.3.4.eb --dump-env',
139+
'source TensorFlow-2.3.4.env',
140+
sys.argv[0] + ' tensorflow==2.3.4',
141+
]),
142+
]
143+
parser = argparse.ArgumentParser(
144+
description='Find dependencies of Python packages by installing it in a temporary virtualenv. ',
145+
epilog='\n'.join(examples),
146+
formatter_class=argparse.RawDescriptionHelpFormatter
147+
)
148+
parser.add_argument('package', metavar='python-pkg-spec',
149+
help='Python package spec, e.g. tensorflow==2.3.4')
150+
parser.add_argument('--ec', metavar='easyconfig', help='EasyConfig to use as the build environment. '
151+
'You need to have dependency modules installed already!')
152+
parser.add_argument('--verbose', help='Verbose output', action='store_true')
153+
args = parser.parse_args()
154+
155+
if args.ec:
156+
if not can_run('eb', '--version'):
157+
print('EasyBuild not found or executable. Make sure it is in your $PATH when using --ec!')
158+
sys.exit(1)
159+
if args.verbose:
160+
print('Checking with EasyBuild for missing dependencies')
161+
missing_dep_out = run_cmd(['eb', args.ec, '--missing'],
162+
capture_stderr=False,
163+
action_desc='Get missing dependencies'
164+
)
165+
missing_deps = [dep for dep in missing_dep_out.split('\n')
166+
if dep.startswith('*') and '(%s)' % args.ec not in dep
167+
]
168+
if missing_deps:
169+
print('You need to install all modules on which %s depends first!' % args.ec)
170+
print('\n\t'.join(['Missing:'] + missing_deps))
171+
sys.exit(1)
172+
173+
with temporary_directory() as tmp_dir:
174+
old_dir = os.getcwd()
175+
os.chdir(tmp_dir)
176+
if args.verbose:
177+
print('Running EasyBuild to get build environment')
178+
run_cmd(['eb', args.ec, '--dump-env', '--force'], action_desc='Dump build environment')
179+
os.chdir(old_dir)
180+
181+
cmd = 'source %s/*.env && %s %s "%s"' % (tmp_dir, sys.executable, sys.argv[0], args.package)
182+
if args.verbose:
183+
cmd += ' --verbose'
184+
print('Restarting script in new build environment')
185+
186+
out = run_cmd(cmd, action_desc='Run in new environment', shell=True, executable='/bin/bash')
187+
print(out)
188+
else:
189+
if not can_run('virtualenv', '--version'):
190+
print('Virtualenv not found or executable. ' +
191+
'Make sure it is installed (e.g. in the currently loaded Python module)!')
192+
sys.exit(1)
193+
if 'PIP_PREFIX' in os.environ:
194+
print("$PIP_PREFIX is set. Unsetting it as it doesn't work well with virtualenv.")
195+
del os.environ['PIP_PREFIX']
196+
print_deps(args.package, args.verbose)

0 commit comments

Comments
 (0)