1
+ # Copyright (c) 2020, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from pathlib import Path
5
+ import json
6
+
7
+ class mlflowModel :
8
+ def readMLmodelFile (self , mPath = Path .cwd ()):
9
+ with open (mPath , 'r' ) as mFile :
10
+ mLines = mFile .readlines ()
11
+
12
+ # More verbose substring acceptance is needed for each possible model type
13
+ # For now, stick with those models which are based on pickle files and don't report model type or version
14
+ #ind = self.findSubstringIndex(mLines, 'loader_module')
15
+ #package = mLines[ind[0]].strip().split(' ')[1].split('.')[1]
16
+
17
+ #ind = self.findSubstringIndex(mLines, package + '_version')
18
+ #packageVersion = mLines[ind[0]].strip().split(' ')[1]
19
+
20
+ varList = ['python_version' , 'serialization_format' , 'run_id' , 'model_path' ]
21
+ for i , varString in enumerate (varList ):
22
+ index = [i for i , s in enumerate (mLines ) if varString in s ]
23
+ if not index :
24
+ raise ValueError ('This MLFlow model type is not currently supported.' )
25
+ varList [i ] = {varList [i ]: mLines [index [0 ]].strip ().split (' ' )[1 ]}
26
+
27
+ varDict = {k : v for d in varList for k , v in d .items }
28
+ varDict ['mlflowPath' ] = mPath
29
+
30
+ indIn = [i for i , s in enumerate (mLines ) if 'inputs:' in s ]
31
+ indOut = [i for i , s in enumerate (mLines ) if 'outputs:' in s ]
32
+
33
+ inputs = mLines [indIn [0 ]:indOut [0 ]]
34
+ outputs = mLines [indOut [0 ]:- 1 ]
35
+
36
+ inputsDict = json .loads ('' .join ([s .strip () for s in inputs ])[9 :- 1 ])
37
+ outputsDict = json .loads ('' .join ([s .strip () for s in outputs ])[10 :- 1 ])
38
+
39
+ return varDict , inputsDict , outputsDict #,package, packageVersion
0 commit comments