@@ -1009,11 +1009,11 @@ def create_requirements_json(cls, json_path=Path.cwd()):
1009
1009
----------
1010
1010
json_path : str, optional
1011
1011
The path to a Python project, by default the current working directory.
1012
+
1012
1013
Yields
1013
1014
------
1014
1015
requirements.json : file
1015
- JSON file used to create a specific Python environment in a SAS Model Manager published
1016
- container.
1016
+ JSON file used to create a specific Python environment in a SAS Model Manager published container.
1017
1017
"""
1018
1018
1019
1019
pickle_packages = []
@@ -1037,13 +1037,15 @@ def create_requirements_json(cls, json_path=Path.cwd()):
1037
1037
json_step = json .dumps (
1038
1038
[
1039
1039
{
1040
- "Warning" : "The versions for the following packages could not be determined:" ,
1040
+ "Warning" : "The existence and/or versions for the following packages could not be "
1041
+ "determined:" ,
1041
1042
"Packages" : ", " .join (missing_package_versions ),
1042
1043
}
1043
1044
],
1044
1045
indent = 4 ,
1045
1046
)
1046
1047
file .write (json_step )
1048
+
1047
1049
for package , version in package_and_version :
1048
1050
if version :
1049
1051
json_step = json .dumps (
@@ -1055,16 +1057,6 @@ def create_requirements_json(cls, json_path=Path.cwd()):
1055
1057
],
1056
1058
indent = 4 ,
1057
1059
)
1058
- else :
1059
- json_step = json .dumps (
1060
- [
1061
- {
1062
- "step" : "install " + package ,
1063
- "command" : "pip install " + package ,
1064
- }
1065
- ],
1066
- indent = 4 ,
1067
- )
1068
1060
file .write (json_step )
1069
1061
1070
1062
@classmethod
@@ -1087,11 +1079,10 @@ def get_local_package_version(cls, package_list):
1087
1079
1088
1080
def package_not_found_output (package_name , package_versions ):
1089
1081
print (
1090
- f"Warning: Package { package_name } was not found in the local environment, so a version could not be "
1091
- "determined."
1092
- )
1093
- print (
1094
- f"The pip installation command will not include a version number for { package_name } ."
1082
+ f"Warning: Package { package_name } was not found in the local environment. Either { package_name } is not "
1083
+ f"a valid Python package, or the package is not present in this environment. The requirements.json file"
1084
+ f" will include a commented out version of the pip installation command at the bottom of the file. "
1085
+ f"Please review the file and verify that the package exists and input the version needed."
1095
1086
)
1096
1087
package_versions .append ([package_name , None ])
1097
1088
return package_versions
@@ -1234,16 +1225,17 @@ def get_pickle_dependencies(cls, pickle_file):
1234
1225
obj = pickle .load (open_file )
1235
1226
dumps = pickle .dumps (obj )
1236
1227
1237
- modules = {mod .split ("." )[0 ] for mod , _ in cls .get_package_names (dumps )}
1238
- modules .discard ("builtins" )
1239
- return list (modules )
1228
+ modules = cls .get_package_names (dumps )
1229
+ return modules
1240
1230
1241
1231
@classmethod
1242
1232
def get_package_names (cls , stream ):
1243
1233
"""
1244
- Generates (module, class_name) tuples from a pickle stream. Extracts all class names referenced
1245
- by GLOBAL and STACK_GLOBAL opcodes.
1234
+ Generates a list of found `package` names from a pickle stream. In most cases, the `packages` returned by the
1235
+ function will be valid Python packages. A check is made in get_local_package_version to ensure that the package
1236
+ is in fact a valid Python package.
1246
1237
1238
+ This code has been adapted from the following stackoverflow example and utilizes the pickletools package.
1247
1239
Credit: modified from https://stackoverflow.com/questions/64850179/inspecting-a-pickle-dump-for-dependencies
1248
1240
More information here: https://github.com/python/cpython/blob/main/Lib/pickletools.py
1249
1241
@@ -1252,54 +1244,48 @@ def get_package_names(cls, stream):
1252
1244
stream : bytes or str
1253
1245
A file like object or string containing the pickle.
1254
1246
1255
- Yields
1256
- ------
1257
- tuple
1258
- Generated ( module, class_name) tuples .
1247
+ Returns
1248
+ -------
1249
+ list
1250
+ List of package names found as module dependencies in the pickle file .
1259
1251
"""
1252
+ # Collect all the opcodes, arguments, and position values from the pickle stream into three lists
1253
+ opcode , arg , pos = [], [], []
1254
+ for o , a , p in pickletools .genops (stream ):
1255
+ opcode .append (o .name )
1256
+ arg .append (a )
1257
+ pos .append (p )
1258
+
1259
+ # Convert to a pandas dataframe for ease of conditional filtering
1260
+ df_pickle = pd .DataFrame ({"opcode" : opcode , "arg" : arg , "pos" : pos })
1261
+
1262
+ # For all opcodes labelled GLOBAL or STACK_GLOBAL pull out the package names
1263
+ global_stack = df_pickle [
1264
+ (df_pickle .opcode == "GLOBAL" ) | (df_pickle .opcode == "STACK_GLOBAL" )
1265
+ ]
1266
+ # From the argument column, split the string of the form `X.Y.Z` by `.` and return only the unique `X's`
1267
+ stack_packages = (
1268
+ global_stack .arg .str .split ().str [0 ].str .split ("." ).str [0 ].unique ().tolist ()
1269
+ )
1260
1270
1261
- stack , mark_stack , memo = [], [], []
1262
- mark = pickletools .markobject
1263
-
1264
- # Step through the pickle stack and retrieve names used by STACK_GLOBAL
1265
- for opcode , arg , pos in pickletools .genops (stream ):
1266
-
1267
- before , after = opcode .stack_before , opcode .stack_after
1268
- number_to_pop = len (before )
1269
-
1270
- if opcode .name == "GLOBAL" :
1271
- yield tuple (arg .split (1 , None ))
1272
- elif opcode .name == "STACK_GLOBAL" :
1273
- yield stack [- 2 ], stack [- 1 ]
1274
- elif mark in before or (
1275
- opcode .name == "POP" and stack and stack [- 1 ] is mark
1276
- ):
1277
- mark_stack .pop ()
1278
- while stack [- 1 ] is not mark :
1279
- stack .pop ()
1280
- stack .pop ()
1281
- try :
1282
- number_to_pop = before .index (mark )
1283
- except ValueError :
1284
- number_to_pop = 0
1285
- elif opcode .name in {"PUT" , "BINPUT" , "LONG_BINPUT" , "MEMOIZE" }:
1286
- if opcode .name == "MEMOIZE" :
1287
- memo .append (stack [- 1 ])
1288
- else :
1289
- memo [arg ] = stack [- 1 ]
1290
- number_to_pop , after = 0 , [] # memoize and put; do not pop the stack
1291
- elif opcode .name in {"GET" , "BINGET" , "LONG_BINGET" }:
1292
- arg = memo [arg ]
1293
-
1294
- if number_to_pop :
1295
- del stack [- number_to_pop :]
1296
- if mark in after :
1297
- mark_stack .append (pos )
1298
-
1299
- if len (after ) == 1 and opcode .arg is not None :
1300
- stack .append (arg )
1301
- else :
1302
- stack .extend (after )
1271
+ # For all opcodes labelled BINUNICODE or SHORT_BINUNICODE pull out the package names
1272
+ binunicode = df_pickle [
1273
+ (df_pickle .opcode == "BINUNICODE" )
1274
+ | (df_pickle .opcode == "SHORT_BINUNICODE" )
1275
+ ]
1276
+ # From the argument column, split the string by `.`, then return only unique cells with at least one split
1277
+ arg_binunicode = binunicode .arg .str .split ("." )
1278
+ unicode_packages = (
1279
+ arg_binunicode .loc [arg_binunicode .str .len () > 1 ].str [0 ].unique ().tolist ()
1280
+ )
1281
+ # Remove invalid `package` names from the list
1282
+ unicode_packages = [x for x in unicode_packages if x .isidentifier ()]
1283
+
1284
+ # Combine the two package lists and remove any duplicates
1285
+ packages = list (set (stack_packages + unicode_packages ))
1286
+
1287
+ # Return the package list without any None values
1288
+ return [x for x in packages if x ]
1303
1289
1304
1290
@classmethod
1305
1291
def remove_standard_library_packages (cls , package_list ):
0 commit comments