10
10
import math
11
11
import numpy as np
12
12
from scipy .stats import kendalltau , gamma
13
- import types
14
13
import pickle
15
14
import pickletools
16
- import os
17
15
18
- # %%
19
16
class JSONFiles :
20
17
@classmethod
21
18
def writeVarJSON (cls , inputData , isInput = True , jPath = Path .cwd ()):
@@ -305,7 +302,7 @@ def writeFileMetadataJSON(cls, modelPrefix, jPath=Path.cwd(), isH2OModel=False):
305
302
306
303
@classmethod
307
304
def writeBaseFitStat (
308
- self , csvPath = None , jPath = Path .cwd (), userInput = False , tupleList = None
305
+ cls , csvPath = None , jPath = Path .cwd (), userInput = False , tupleList = None
309
306
):
310
307
"""
311
308
Writes a JSON file to display fit statistics for the model in SAS Open Model Manager.
@@ -376,7 +373,7 @@ def writeBaseFitStat(
376
373
]
377
374
378
375
nullJSONPath = Path (__file__ ).resolve ().parent / "null_dmcas_fitstat.json"
379
- nullJSONDict = self .readJSONFile (nullJSONPath )
376
+ nullJSONDict = cls .readJSONFile (nullJSONPath )
380
377
381
378
dataMap = [{}, {}, {}]
382
379
for i in range (3 ):
@@ -386,19 +383,19 @@ def writeBaseFitStat(
386
383
for paramTuple in tupleList :
387
384
# ignore incorrectly formatted input arguments
388
385
if type (paramTuple ) == tuple and len (paramTuple ) == 3 :
389
- paramName = self .formatParameter (paramTuple [0 ])
386
+ paramName = cls .formatParameter (paramTuple [0 ])
390
387
if paramName not in validParams :
391
388
continue
392
389
if type (paramTuple [2 ]) == str :
393
- dataRole = self .convertDataRole (paramTuple [2 ])
390
+ dataRole = cls .convertDataRole (paramTuple [2 ])
394
391
else :
395
392
dataRole = paramTuple [2 ]
396
393
dataMap [dataRole - 1 ]["dataMap" ][paramName ] = paramTuple [1 ]
397
394
398
395
if userInput :
399
396
while True :
400
397
paramName = input ("Parameter name: " )
401
- paramName = self .formatParameter (paramName )
398
+ paramName = cls .formatParameter (paramName )
402
399
if paramName not in validParams :
403
400
print ("Not a valid parameter. Please see documentation." )
404
401
if input ("More parameters? (Y/N)" ) == "N" :
@@ -408,7 +405,7 @@ def writeBaseFitStat(
408
405
dataRole = input ("Data role: " )
409
406
410
407
if type (dataRole ) is str :
411
- dataRole = self .convertDataRole (dataRole )
408
+ dataRole = cls .convertDataRole (dataRole )
412
409
dataMap [dataRole - 1 ]["dataMap" ][paramName ] = paramValue
413
410
414
411
if input ("More parameters? (Y/N)" ) == "N" :
@@ -418,11 +415,11 @@ def writeBaseFitStat(
418
415
csvData = pd .read_csv (csvPath )
419
416
for i , row in enumerate (csvData .values ):
420
417
paramName , paramValue , dataRole = row
421
- paramName = self .formatParameter (paramName )
418
+ paramName = cls .formatParameter (paramName )
422
419
if paramName not in validParams :
423
420
continue
424
421
if type (dataRole ) is str :
425
- dataRole = self .convertDataRole (dataRole )
422
+ dataRole = cls .convertDataRole (dataRole )
426
423
dataMap [dataRole - 1 ]["dataMap" ][paramName ] = paramValue
427
424
428
425
outJSON = nullJSONDict
@@ -439,7 +436,7 @@ def writeBaseFitStat(
439
436
440
437
@classmethod
441
438
def calculateFitStat (
442
- self , validateData = None , trainData = None , testData = None , jPath = Path .cwd ()
439
+ cls , validateData = None , trainData = None , testData = None , jPath = Path .cwd ()
443
440
):
444
441
"""
445
442
Calculates fit statistics from user data and predictions and then writes to
@@ -499,7 +496,7 @@ def calculateFitStat(
499
496
)
500
497
501
498
nullJSONPath = Path (__file__ ).resolve ().parent / "null_dmcas_fitstat.json"
502
- nullJSONDict = self .readJSONFile (nullJSONPath )
499
+ nullJSONDict = cls .readJSONFile (nullJSONPath )
503
500
504
501
dataSets = [[[None ], [None ]], [[None ], [None ]], [[None ], [None ]]]
505
502
@@ -598,7 +595,7 @@ def calculateFitStat(
598
595
599
596
@classmethod
600
597
def generateROCLiftStat (
601
- self ,
598
+ cls ,
602
599
targetName ,
603
600
targetValue ,
604
601
swatConn ,
@@ -656,10 +653,10 @@ def generateROCLiftStat(
656
653
)
657
654
658
655
nullJSONROCPath = Path (__file__ ).resolve ().parent / "null_dmcas_roc.json"
659
- nullJSONROCDict = self .readJSONFile (nullJSONROCPath )
656
+ nullJSONROCDict = cls .readJSONFile (nullJSONROCPath )
660
657
661
658
nullJSONLiftPath = Path (__file__ ).resolve ().parent / "null_dmcas_lift.json"
662
- nullJSONLiftDict = self .readJSONFile (nullJSONLiftPath )
659
+ nullJSONLiftDict = cls .readJSONFile (nullJSONLiftPath )
663
660
664
661
dataSets = [pd .DataFrame (), pd .DataFrame (), pd .DataFrame ()]
665
662
columns = ["actual" , "predict" ]
@@ -965,25 +962,85 @@ def convertDataRole(self, dataRole):
965
962
966
963
return conversion
967
964
968
- def getCurrentScopedImports (self ):
965
+ @classmethod
966
+ def createRequirementsJSON (cls , jPath = Path .cwd ()):
969
967
"""
970
- Gets the Python modules from the current scope's global variables.
968
+ Searches the model directory for Python scripts and pickle files and determines
969
+ their Python package dependencies. Found dependencies are then matched to the package
970
+ version found in the current working environment. Then the package and version are
971
+ written to a requirements.json file.
972
+
973
+ WARNING:
974
+ The methods utilized in this function can determine package dependencies from provided
975
+ scripts and pickle files, but CANNOT determine the required package versions without
976
+ being in the development environment which they were originally created.
977
+
978
+ This function works best when run in the model development environment and is likely to
979
+ throw errors if run in another environment (and/or produce incorrect package versions).
980
+ In the case of using this function outside of the model development environment, it is
981
+ recommended to the user that they adjust the requirements.json file's package versions
982
+ to match the model development environment.
983
+
984
+ Parameters
985
+ ----------
986
+ jPath : str, optional
987
+ The path to a Python project, by default Path.cwd().
971
988
972
989
Yields
973
- -------
974
- str
975
- Name of the package that is generated.
990
+ ------
991
+ requirements.json : file
992
+ JSON file used to create a specific Python environment in a SAS Model Manager published
993
+ container.
976
994
"""
977
995
978
- for name , val in globals ().items ():
979
- if isinstance (val , types .ModuleType ):
980
- # Split ensures you get root package, not just imported function
981
- name = val .__name__ .split ("." )[0 ]
982
- yield name
983
- elif isinstance (val , type ):
984
- name = val .__module__ .split ("." )[0 ]
985
- yield name
996
+ picklePackages = []
997
+ pickleFiles = cls .getPickleFile (jPath )
998
+ for pickleFile in pickleFiles :
999
+ picklePackages .append (cls .getDependenciesFromPickleFile (pickleFile ))
1000
+
1001
+ codeDependencies = cls .getCodeDependencies (jPath )
1002
+
1003
+ packageList = picklePackages + codeDependencies
1004
+ packageAndVersion = cls .getLocalPackageVersion ()
1005
+
1006
+ with open (Path (jPath ) / "requirements.json" ) as file :
1007
+ for package , version in packageAndVersion :
1008
+ jsonStep = json .dumps (
1009
+ [
1010
+ {
1011
+ "step" : "install " + package ,
1012
+ "command" : "pip install " + package + "==" + version ,
1013
+ }
1014
+ ],
1015
+ indent = 4 ,
1016
+ )
1017
+ file .write (jsonStep )
1018
+
1019
+ def getCodeDependencies (self , jPath , debug = False ):
1020
+ from ..utils import functionInspector
1021
+ import inspect
986
1022
1023
+ fileNames = []
1024
+ fileNames .extend (sorted (Path (jPath ).glob ("*.py" )))
1025
+
1026
+ strScoreCode = ''
1027
+ for file in fileNames :
1028
+ with open (file , "r" ) as code :
1029
+ strScoreCode = strScoreCode + code .read ()
1030
+
1031
+ stringFunctionInspector = inspect .getsource (functionInspector )
1032
+
1033
+ execCode = strScoreCode + stringFunctionInspector + '''import logging
1034
+ if __name__ == "__main__":
1035
+ debug = {}
1036
+ logLevel = logging.DEBUG if debug else logging.INFO
1037
+ logging.basicConfig(level=logLevel, format="%%(levelname)s: %%(message)s")
1038
+
1039
+ symbols, dependencies = findDependencies()
1040
+ print(dependencies)
1041
+ ''' .format (debug )
1042
+
1043
+ exec (execCode )
987
1044
def getPickleFile (self , pPath ):
988
1045
"""
989
1046
Given a file path, retrieve the pickle file(s).
@@ -1025,60 +1082,11 @@ def getDependenciesFromPickleFile(self, pickleFile):
1025
1082
obj = pickle .load (openfile )
1026
1083
dumps = pickle .dumps (obj )
1027
1084
1028
- modules = {mod .split ("." )[0 ] for mod , _ in self .getNames (dumps )}
1085
+ modules = {mod .split ("." )[0 ] for mod , _ in self .getPackageNames (dumps )}
1086
+ modules .discard ("builtins" )
1029
1087
return modules
1030
1088
1031
- @classmethod
1032
- def createRequirementsJSON (self , jPath = Path .cwd ()):
1033
- """
1034
- Searches the root of the project for all Python modules and writes them to a requirements.json file.
1035
-
1036
- Parameters
1037
- ----------
1038
- jPath : str, optional
1039
- The path to a Python project, by default Path.cwd().
1040
- """
1041
-
1042
- module_version_map = {}
1043
- pickle_files = self .get_pickle_file (jPath )
1044
- requirements_txt_file = os .path .join (jPath , "requirements.txt" )
1045
- with open (requirements_txt_file , "r" ) as f :
1046
- modules_requirements_txt = set ()
1047
- for pickle_file in pickle_files :
1048
- modules_pickle = self .get_modules_from_pickle_file (pickle_file )
1049
- for line in f :
1050
- module_parts = line .rstrip ().split ("==" )
1051
- module = module_parts [0 ]
1052
- version = module_parts [1 ]
1053
- module_version_map [module ] = version
1054
- modules_requirements_txt .add (module )
1055
- pip_name_list = list (modules_requirements_txt .union (modules_pickle ))
1056
-
1057
- for item in pip_name_list :
1058
- if item in module_version_map :
1059
- if module_version_map [item ] == "0.0.0" :
1060
- print (
1061
- "Warning: No pip install name found for package: "
1062
- + item .split ("==" )[0 ]
1063
- )
1064
- pip_name_list .remove (item )
1065
-
1066
- j = json .dumps (
1067
- [
1068
- {
1069
- "step" : "install " + i ,
1070
- "command" : "pip install " + i + "==" + module_version_map [i ],
1071
- }
1072
- if i in module_version_map
1073
- else {"step" : "install " + i , "command" : "pip install " + i }
1074
- for i in pip_name_list
1075
- ],
1076
- indent = 4 ,
1077
- )
1078
- with open (os .path .join (jPath , "requirements.json" ), "w" ) as file :
1079
- print (j , file = file )
1080
-
1081
- def getNames (self , stream ):
1089
+ def getPackageNames (self , stream ):
1082
1090
"""
1083
1091
Generates (module, class_name) tuples from a pickle stream. Extracts all class names referenced
1084
1092
by GLOBAL and STACK_GLOBAL opcodes.
@@ -1092,50 +1100,48 @@ def getNames(self, stream):
1092
1100
A file like object or string containing the pickle.
1093
1101
1094
1102
Yields
1095
- -------
1103
+ ------
1096
1104
tuple
1097
1105
Generated (module, class_name) tuples.
1098
1106
"""
1099
1107
1100
1108
stack , markstack , memo = [], [], []
1101
- mo = pickletools .markobject
1109
+ mark = pickletools .markobject
1102
1110
1103
- for op , arg , pos in pickletools .genops (stream ):
1104
- # simulate the pickle stack and marking scheme, insofar
1105
- # necessary to allow us to retrieve the names used by STACK_GLOBAL
1111
+ # Step through the pickle stack and retrieve names used by STACK_GLOBAL
1112
+ for opcode , arg , pos in pickletools .genops (stream ):
1106
1113
1107
- before , after = op .stack_before , op .stack_after
1114
+ before , after = opcode .stack_before , opcode .stack_after
1108
1115
numtopop = len (before )
1109
1116
1110
- if op .name == "GLOBAL" :
1117
+ if opcode .name == "GLOBAL" :
1111
1118
yield tuple (arg .split (1 , None ))
1112
- elif op .name == "STACK_GLOBAL" :
1119
+ elif opcode .name == "STACK_GLOBAL" :
1113
1120
yield (stack [- 2 ], stack [- 1 ])
1114
-
1115
- elif mo in before or (op .name == "POP" and stack and stack [- 1 ] is mo ):
1121
+ elif mark in before or (opcode .name == "POP" and stack and stack [- 1 ] is mark ):
1116
1122
markpos = markstack .pop ()
1117
- while stack [- 1 ] is not mo :
1123
+ while stack [- 1 ] is not mark :
1118
1124
stack .pop ()
1119
1125
stack .pop ()
1120
1126
try :
1121
- numtopop = before .index (mo )
1127
+ numtopop = before .index (mark )
1122
1128
except ValueError :
1123
1129
numtopop = 0
1124
- elif op .name in {"PUT" , "BINPUT" , "LONG_BINPUT" , "MEMOIZE" }:
1125
- if op .name == "MEMOIZE" :
1130
+ elif opcode .name in {"PUT" , "BINPUT" , "LONG_BINPUT" , "MEMOIZE" }:
1131
+ if opcode .name == "MEMOIZE" :
1126
1132
memo .append (stack [- 1 ])
1127
1133
else :
1128
1134
memo [arg ] = stack [- 1 ]
1129
- numtopop , after = 0 , [] # memoize and put do not pop the stack
1130
- elif op .name in {"GET" , "BINGET" , "LONG_BINGET" }:
1135
+ numtopop , after = 0 , [] # memoize and put; do not pop the stack
1136
+ elif opcode .name in {"GET" , "BINGET" , "LONG_BINGET" }:
1131
1137
arg = memo [arg ]
1132
1138
1133
1139
if numtopop :
1134
1140
del stack [- numtopop :]
1135
- if mo in after :
1141
+ if mark in after :
1136
1142
markstack .append (pos )
1137
1143
1138
- if len (after ) == 1 and op .arg is not None :
1144
+ if len (after ) == 1 and opcode .arg is not None :
1139
1145
stack .append (arg )
1140
1146
else :
1141
1147
stack .extend (after )
0 commit comments