13
13
from scipy .stats import kendalltau , gamma
14
14
import pickle
15
15
import pickletools
16
+ from collections .abc import Iterable
17
+
18
+ def flatten (nestedList ):
19
+ '''Flatten a nested list. Controls for str values in list, such that the str
20
+ values are not expanded into a list of single characters.
21
+
22
+ Parameters
23
+ ----------
24
+ nestedList : list
25
+ A nested list of strings.
26
+
27
+ Yields
28
+ ------
29
+ list
30
+ A flattened list of strings.
31
+ '''
32
+ for item in nestedList :
33
+ if isinstance (item , Iterable ) and not isinstance (item , (str , bytes )):
34
+ yield from flatten (item )
35
+ else :
36
+ yield item
16
37
17
38
class JSONFiles :
18
39
@classmethod
@@ -997,13 +1018,14 @@ def createRequirementsJSON(cls, jPath=Path.cwd()):
997
1018
picklePackages = []
998
1019
pickleFiles = cls .getPickleFile (jPath )
999
1020
for pickleFile in pickleFiles :
1000
- picklePackages .append (cls .getDependenciesFromPickleFile (pickleFile ))
1001
-
1002
- codeDependencies = cls .getCodeDependencies (jPath )
1021
+ picklePackages .append (cls .getDependenciesFromPickleFile (cls , pickleFile ))
1003
1022
1004
- packageList = picklePackages + codeDependencies
1005
- packageAndVersion = cls .getLocalPackageVersion (list (set (packageList )))
1023
+ codeDependencies = cls .getCodeDependencies (cls , jPath )
1006
1024
1025
+ packageList = list (picklePackages ) + codeDependencies
1026
+ packageList = list (set (list (flatten (packageList ))))
1027
+ packageList = cls .removeStdlibPackages (packageList )
1028
+ packageAndVersion = cls .getLocalPackageVersion (packageList )
1007
1029
# Identify packages with missing versions
1008
1030
missingPackageVersions = [item [0 ] for item in packageAndVersion if not item [1 ]]
1009
1031
@@ -1012,7 +1034,7 @@ def createRequirementsJSON(cls, jPath=Path.cwd()):
1012
1034
jsonStep = json .dumps (
1013
1035
[
1014
1036
{
1015
- "Warning" : "The versions for the following packages could not be determined" ,
1037
+ "Warning" : "The versions for the following packages could not be determined: " ,
1016
1038
"Packages" : ", " .join (missingPackageVersions )
1017
1039
}
1018
1040
],
@@ -1042,10 +1064,10 @@ def createRequirementsJSON(cls, jPath=Path.cwd()):
1042
1064
)
1043
1065
file .write (jsonStep )
1044
1066
1045
- def getLocalPackageVersion (self , packageList ):
1046
- '''Get package versions from the local environment. For Python versions
1047
- < 3.8, if the package does not contain an attribute of "__version__",
1048
- "version", or " VERSION", no package version will be found.
1067
+ def getLocalPackageVersion (packageList ):
1068
+ '''Get package versions from the local environment. If the package
1069
+ does not contain an attribute of "__version__", "version", or
1070
+ "VERSION", no package version will be found.
1049
1071
1050
1072
Parameters
1051
1073
----------
@@ -1057,20 +1079,16 @@ def getLocalPackageVersion(self, packageList):
1057
1079
list
1058
1080
Nested list of Python package names and found versions.
1059
1081
'''
1060
- packageAndVersion = []
1061
- if sys .version_info [1 ] >= 8 :
1062
- from importlib .metadata import version
1063
- for package in packageList :
1064
- try :
1065
- packageAndVersion .append ([package ,version (package )])
1066
- except PackageNotFoundError :
1067
- print ("Warning: Package {} was not found in the local environment, so a version could not be determined." .format (package ))
1068
- print ("The pip installation command will not include a version number for {}." ).format (package )
1069
- packageAndVersion .append ([package , None ])
1082
+ def packageNotFoundOutput (package , packageAndVersion ):
1083
+ print ("Warning: Package {} was not found in the local environment, so a version could not be determined." .format (package ))
1084
+ print ("The pip installation command will not include a version number for {}." .format (package ))
1085
+ packageAndVersion .append ([package , None ])
1070
1086
return packageAndVersion
1071
- else :
1072
- import importlib
1073
- for package in packageList :
1087
+
1088
+ packageAndVersion = []
1089
+ import importlib
1090
+ for package in packageList :
1091
+ try :
1074
1092
name = importlib .import_module (package )
1075
1093
try :
1076
1094
packageAndVersion .append ([package , name .__version__ ])
@@ -1082,12 +1100,13 @@ def getLocalPackageVersion(self, packageList):
1082
1100
try :
1083
1101
packageAndVersion .append ([package , name .VERSION ])
1084
1102
except AttributeError :
1085
- print ("Warning: Package {} was not found in the local environment, so a version could not be determined." .format (package ))
1086
- print ("The pip installation command will not include a version number for {}." .format (package ))
1087
- packageAndVersion .append ([package , None ])
1088
- return packageAndVersion
1103
+ packageAndVersion = packageNotFoundOutput (package , packageAndVersion )
1104
+ except ModuleNotFoundError :
1105
+ packageAndVersion = packageNotFoundOutput (package , packageAndVersion )
1089
1106
1090
- def getCodeDependencies (self , jPath = Path .cwd ()):
1107
+ return packageAndVersion
1108
+
1109
+ def getCodeDependencies (cls , jPath = Path .cwd ()):
1091
1110
'''Get the package dependencies for all Python scripts in the
1092
1111
provided directory path. Note that currently this functionality
1093
1112
only works for .py files.
@@ -1108,12 +1127,11 @@ def getCodeDependencies(self, jPath=Path.cwd()):
1108
1127
1109
1128
importInfo = []
1110
1129
for file in fileNames :
1111
- importInfo .append (self .findImports (file ))
1112
- importInfo = list (set (importInfo ))
1113
-
1130
+ importInfo .append (cls .findImports (file ))
1131
+ importInfo = list (set (flatten (importInfo )))
1114
1132
return importInfo
1115
1133
1116
- def findImports (self , fPath ):
1134
+ def findImports (fPath ):
1117
1135
'''Find import calls in provided Python code path. Ignores
1118
1136
built in Python modules.
1119
1137
@@ -1156,7 +1174,7 @@ def findImports(self, fPath):
1156
1174
except ValueError :
1157
1175
return modules
1158
1176
1159
- def getPickleFile (self , pPath ):
1177
+ def getPickleFile (pPath = Path . cwd () ):
1160
1178
"""
1161
1179
Given a file path, retrieve the pickle file(s).
1162
1180
@@ -1176,7 +1194,7 @@ def getPickleFile(self, pPath):
1176
1194
fileNames .extend (sorted (Path (pPath ).glob ("*.pickle" )))
1177
1195
return fileNames
1178
1196
1179
- def getDependenciesFromPickleFile (self , pickleFile ):
1197
+ def getDependenciesFromPickleFile (cls , pickleFile ):
1180
1198
"""
1181
1199
Reads the pickled byte stream from a file object, serializes the pickled byte
1182
1200
stream as a bytes object, and inspects the bytes object for all Python modules
@@ -1189,19 +1207,20 @@ def getDependenciesFromPickleFile(self, pickleFile):
1189
1207
1190
1208
Returns
1191
1209
-------
1192
- set
1193
- A set of modules obtained from the pickle stream.
1210
+ list
1211
+ A list of modules obtained from the pickle stream. Duplicates are removed and
1212
+ Python built-in modules are removed.
1194
1213
"""
1195
1214
1196
1215
with (open (pickleFile , "rb" )) as openfile :
1197
1216
obj = pickle .load (openfile )
1198
1217
dumps = pickle .dumps (obj )
1199
1218
1200
- modules = {mod .split ("." )[0 ] for mod , _ in self .getPackageNames (dumps )}
1219
+ modules = {mod .split ("." )[0 ] for mod , _ in cls .getPackageNames (dumps )}
1201
1220
modules .discard ("builtins" )
1202
- return modules
1221
+ return list ( modules )
1203
1222
1204
- def getPackageNames (self , stream ):
1223
+ def getPackageNames (stream ):
1205
1224
"""
1206
1225
Generates (module, class_name) tuples from a pickle stream. Extracts all class names referenced
1207
1226
by GLOBAL and STACK_GLOBAL opcodes.
@@ -1259,4 +1278,57 @@ def getPackageNames(self, stream):
1259
1278
if len (after ) == 1 and opcode .arg is not None :
1260
1279
stack .append (arg )
1261
1280
else :
1262
- stack .extend (after )
1281
+ stack .extend (after )
1282
+
1283
+ def removeStdlibPackages (packageList ):
1284
+ '''Remove any packages from the required list of installed packages that are part of the Python
1285
+ Standard Library.
1286
+
1287
+ Parameters
1288
+ ----------
1289
+ packageList : list
1290
+ List of all packages found that are not Python built-in packages.
1291
+
1292
+ Returns
1293
+ -------
1294
+ list
1295
+ List of all packages found that are not Python built-in packages or part of the Python
1296
+ Standard Library.
1297
+ '''
1298
+ py10stdlib = ['_aix_support' , '_heapq' , 'lzma' , 'gc' , 'mailcap' , 'winsound' , 'sre_constants' , 'netrc' , 'audioop' ,
1299
+ 'xdrlib' , 'code' , '_pyio' , '_gdbm' , 'unicodedata' , 'pwd' , 'xml' , '_symtable' , 'pkgutil' , '_decimal' ,
1300
+ '_compat_pickle' , '_frozen_importlib_external' , '_signal' , 'fcntl' , 'wsgiref' , 'uu' , 'textwrap' ,
1301
+ '_codecs_iso2022' , 'keyword' , 'distutils' , 'binascii' , 'email' , 'reprlib' , 'cmd' , 'cProfile' ,
1302
+ 'dataclasses' , '_sha512' , 'ntpath' , 'readline' , 'signal' , '_elementtree' , 'dis' , 'rlcompleter' ,
1303
+ '_json' , '_ssl' , '_sha3' , '_winapi' , 'telnetlib' , 'pyexpat' , '_lzma' , 'http' , 'poplib' , 'tokenize' ,
1304
+ '_dbm' , '_io' , 'linecache' , 'json' , 'faulthandler' , 'hmac' , 'aifc' , '_csv' , '_codecs_hk' , 'selectors' ,
1305
+ '_random' , '_pickle' , '_lsprof' , 'turtledemo' , 'cgitb' , '_sitebuiltins' , 'binhex' , 'fnmatch' ,
1306
+ 'sysconfig' , 'datetime' , 'quopri' , 'copyreg' , '_pydecimal' , 'pty' , 'stringprep' , 'bisect' , '_abc' ,
1307
+ '_codecs_jp' , '_md5' , 'errno' , 'compileall' , '_threading_local' , 'dbm' , 'builtins' , 'difflib' ,
1308
+ 'imghdr' , '__future__' , '_statistics' , 'getopt' , 'xmlrpc' , '_sqlite3' , '_sha1' , 'shelve' ,
1309
+ '_posixshmem' , 'struct' , 'timeit' , 'ensurepip' , 'pathlib' , 'ctypes' , '_multiprocessing' , 'tty' ,
1310
+ '_weakrefset' , 'sqlite3' , 'tracemalloc' , 'venv' , 'unittest' , '_blake2' , 'mailbox' , 'resource' ,
1311
+ 'shutil' , 'winreg' , '_opcode' , '_codecs_tw' , '_operator' , 'imp' , '_string' , 'os' , 'opcode' ,
1312
+ '_zoneinfo' , '_posixsubprocess' , 'copy' , 'symtable' , 'itertools' , 'sre_parse' , '_bisect' , '_imp' , 're' ,
1313
+ 'ast' , 'zlib' , 'fractions' , 'pickle' , 'profile' , 'sys' , 'ssl' , 'cgi' , 'enum' , 'modulefinder' ,
1314
+ 'py_compile' , '_curses' , '_functools' , 'cmath' , '_crypt' , 'contextvars' , 'math' , 'uuid' , 'argparse' ,
1315
+ '_frozen_importlib' , 'inspect' , 'posix' , 'statistics' , 'marshal' , 'nis' , '_bz2' , 'pipes' ,
1316
+ 'socketserver' , 'pstats' , 'site' , 'trace' , 'lib2to3' , 'zipapp' , 'runpy' , 'sre_compile' , 'time' ,
1317
+ 'pprint' , 'base64' , '_stat' , '_ast' , 'pdb' , '_markupbase' , '_bootsubprocess' , '_collections' , '_sre' ,
1318
+ 'msilib' , 'crypt' , 'gettext' , 'mimetypes' , '_overlapped' , 'asyncore' , 'zipimport' , 'chunk' , 'atexit' ,
1319
+ 'graphlib' , '_multibytecodec' , 'gzip' , 'io' , 'logging' , 'nntplib' , 'genericpath' , 'syslog' , 'token' ,
1320
+ '_msi' , 'idlelib' , '_hashlib' , 'threading' , 'select' , 'doctest' , 'getpass' , '_sha256' , 'importlib' ,
1321
+ '_tracemalloc' , 'multiprocessing' , 'calendar' , '_codecs_cn' , '_tkinter' , '_uuid' , 'socket' ,
1322
+ 'antigravity' , 'string' , '_locale' , '_thread' , 'grp' , 'this' , 'zoneinfo' , 'abc' , 'operator' , 'colorsys' ,
1323
+ 'tabnanny' , '_weakref' , 'imaplib' , 'concurrent' , 'subprocess' , '_compression' , 'pyclbr' , 'tarfile' ,
1324
+ 'numbers' , 'queue' , 'posixpath' , 'smtpd' , 'webbrowser' , 'asynchat' , 'weakref' , 'filecmp' , 'decimal' ,
1325
+ '_py_abc' , 'collections' , 'tempfile' , '_collections_abc' , 'sched' , 'locale' , 'secrets' , 'msvcrt' ,
1326
+ 'asyncio' , 'array' , '_codecs_kr' , '_scproxy' , '_strptime' , 'heapq' , '_socket' , 'sndhdr' , 'types' , 'nt' ,
1327
+ '_datetime' , 'shlex' , 'tkinter' , 'curses' , 'encodings' , 'pickletools' , 'html' , '_codecs' , 'codeop' ,
1328
+ '_ctypes' , 'bz2' , 'contextlib' , 'platform' , 'termios' , '_asyncio' , 'ftplib' , 'pydoc_data' ,
1329
+ '_contextvars' , 'codecs' , 'traceback' , 'pydoc' , 'fileinput' , 'ossaudiodev' , 'urllib' , 'csv' , 'sunau' ,
1330
+ '_curses_panel' , 'wave' , 'mmap' , 'warnings' , 'functools' , 'ipaddress' , 'nturl2path' , 'optparse' , '_queue' ,
1331
+ 'turtle' , 'spwd' , 'stat' , 'configparser' , '_warnings' , 'bdb' , '_osx_support' , 'typing' , 'zipfile' , 'glob' ,
1332
+ 'random' , 'smtplib' , 'plistlib' , 'hashlib' , '_struct' ]
1333
+ packageList = [package for package in packageList if package not in py10stdlib ]
1334
+ return packageList
0 commit comments