Skip to content

Commit b157042

Browse files
Merge pull request #189 from Thilakraj1998/main
Major changes (Save & load)
2 parents 8455d6c + 7cdd7fa commit b157042

File tree

6 files changed

+49
-48
lines changed

6 files changed

+49
-48
lines changed

blobcity/code_gen/PyMeta.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,5 +82,5 @@ class PyComments:
8282
'datasplit':"\n### Train & Test\n# The train-test split is a procedure for evaluating the performance of an algorithm.\n# The procedure involves taking a dataset and dividing it into two subsets.\n# The first subset is utilized to fit/train the model.\n# The second subset is used for prediction.\n# The main motive is to estimate the performance of the model on new data.\n",
8383
'metrics':"\n### Accuracy Metrics\n# Performance metrics are a part of every machine learning pipeline. \n# They tell you if you're making progress, and put a number on it. All machine learning models,\n# whether it's linear regression, or a SOTA technique like BERT, need a metric to judge performance.\n",
8484
'x&y':"\n### Feature Selection\n# It is the process of reducing the number of input variables when developing a predictive model.\n# Used to reduce the number of input variables to reduce the computational cost of modelling and,\n# in some cases,to improve the performance of the model.\n",
85-
'cor_matrix': "### Correlation Matrix\n# In order to check the correlation between the features, we will plot a correlation matrix.\n# It is effective in summarizing a large amount of data where the goal is to see patterns."
85+
'cor_matrix': "### Correlation Matrix\n# In order to check the correlation between the features, we will plot a correlation matrix.\n# It is effective in summarizing a large amount of data where the goal is to see patterns.\n"
8686
}

blobcity/config/tuner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ def classification_metrics(y_true,y_pred):
9595
"""
9696
result=dict()
9797
result['F1-Score']=f1_score(y_true, y_pred, average="weighted")
98-
result['precision']=precision_score(y_true, y_pred,average="weighted")
99-
result['recall']=recall_score(y_true, y_pred,average="weighted")
98+
result['Precision']=precision_score(y_true, y_pred,average="weighted")
99+
result['Recall']=recall_score(y_true, y_pred,average="weighted")
100100
return result
101101

102102
def metricResults(y_true,y_pred,ptype):

blobcity/main/driver.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,20 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
16-
import pickle
15+
import os
16+
import dill
1717
import numpy as np
1818
import pandas as pd
19+
import autokeras as ak
20+
import tensorflow as tf
1921
from blobcity.store import DictClass
2022
from blobcity.utils import get_dataframe_type,dataCleaner
2123
from blobcity.utils import AutoFeatureSelection as AFS
2224
from blobcity.main.modelSelection import model_search
2325
from blobcity.code_gen import yml_reader,code_generator
2426
from sklearn.preprocessing import MinMaxScaler
2527
from sklearn.feature_selection import SelectKBest,f_regression,f_classif
26-
def train(file=None, df=None, target=None,features=None,accuracy_criteria=0.99):
28+
def train(file=None, df=None, target=None,features=None,use_neural=False,accuracy_criteria=0.99):
2729
"""
2830
param1: string: dataset file path
2931
@@ -56,35 +58,41 @@ def train(file=None, df=None, target=None,features=None,accuracy_criteria=0.99):
5658
CleanedDF=dataCleaner(dataframe,features,target,dict_class)
5759
#model search space
5860
accuracy_criteria= accuracy_criteria if accuracy_criteria<=1.0 else (accuracy_criteria/100)
59-
modelClass = model_search(CleanedDF,target,dict_class,use_neural=False,accuracy_criteria=accuracy_criteria)
61+
modelClass = model_search(CleanedDF,target,dict_class,use_neural=use_neural,accuracy_criteria=accuracy_criteria)
6062
modelClass.yamldata=dict_class.getdict()
6163
modelClass.feature_importance_=dict_class.feature_importance if(features==None) else calculate_feature_importance(CleanedDF.drop(target,axis=1),CleanedDF[target],dict_class)
6264
dict_class.resetVar()
6365
return modelClass
6466

65-
def load(modelFile,h5_path=None):
67+
def load(model_path=None):
6668
"""
6769
param1: string: (required) the filepath to the stored model. Supports .pkl models.
68-
param2: string: the filepath to the stored h5 file, provide only if saved h5 file.
6970
returns: Model file
7071
71-
function loads the serialized model from .pkl or .h5 format to usable format.
72+
function loads the serialized model from .pkl format to usable format.
7273
"""
73-
path_components = modelFile.split('.')
74-
extension = path_components[1] if len(path_components)<=2 else path_components[-1]
75-
76-
if extension == 'pkl' and h5_path in [None,""]:
77-
model = pickle.load(open(modelFile, 'rb'))
78-
79-
""" elif os.path.splitext(h5_path)[1] == '.h5' and h5_path!=None:
80-
print("pkl path: {}, h5 path : {}".format(os.path.splitext(modelFile),os.path.splitext(h5_path)))
81-
if os.path.splitext(h5_path)[0] == os.path.splitext(modelFile)[0]:
82-
tfmodel = tf.keras.models.load_model(h5_path)
83-
model=pickle.load(open(modelFile, 'rb'))
84-
model.model=tfmodel
74+
if model_path not in [None,""]:
75+
path_components = model_path.split('.')
76+
extension = path_components[1] if len(path_components)<=2 else path_components[-1]
77+
base_path=os.path.splitext(model_path)[0]
78+
if extension == 'pkl':
79+
model = dill.load(open(model_path, 'rb'))
80+
if model.yamldata['model']['type'] in ['TF','tf','Tensorflow']:
81+
if model.yamldata['model']['save_type']=='h5':
82+
h5_path=base_path+".h5"
83+
if os.path.isfile(h5_path):model.model=tf.keras.models.load_model(h5_path)
84+
else: raise FileNotFoundError(f"{h5_path} file doest exists in the directory")
85+
elif model.yamldata['model']['save_type']=='pb':
86+
if os.path.isdir(base_path):model.model=tf.keras.models.load_model(base_path, custom_objects=ak.CUSTOM_OBJECTS)
87+
else: raise FileNotFoundError(f"{base_path} Folder doest exists")
88+
else:
89+
raise TypeError(f"{model.yamldata['model']['save_type']}, not supported save format")
90+
return model
8591
else:
86-
raise ValueError("file name for pickle and h5 file should be same") """
87-
return model
92+
raise TypeError(f"{extension}, file type must be .pkl")
93+
else:
94+
raise TypeError(f"{model_path}, path can't be None or Null")
95+
8896

8997
def spill(filepath,yaml_path=None,doc=None):
9098
"""

blobcity/store/Model.py

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
# limitations under the License.
1414

1515
import os
16-
import pickle
17-
import time
16+
import dill
1817
import numpy as np
1918
import pandas as pd
2019
import seaborn as sns
@@ -141,30 +140,22 @@ def save(self, model_path=None):
141140
if model_path not in [None,""]:
142141
path_components = model_path.split('.')
143142
extension = path_components[1] if len(path_components)<=2 else path_components[-1]
144-
145-
if extension == '/':
146-
final_path = os.path.join(model_path, 'autoaimodel.pkl')
147-
pickle.dump(self, open(final_path, 'wb'))
148-
print("The model is stored at {}".format(final_path))
149-
elif extension == 'pkl':
143+
if extension == 'pkl' and self.yamldata['model']['type'] not in ['TF','tf','Tensorflow']:
150144
final_path = model_path
151-
pickle.dump(self, open(final_path, 'wb'))
145+
dill.dump(self, open(final_path, 'wb'))
152146
print("The model is stored at {}".format(final_path))
153-
154-
"""
155-
elif extension == 'h5' or self.yamldata['model']['type'] in ['TF','tf']:
156-
model_path = model_path if model_path!="./" else os.path.join(model_path, 'autoaimodel.h5')
157-
class_path = model_path if model_path!="./" else os.path.join(model_path, 'autoaimodel.pkl')
158-
try:
159-
tfmodel_temp=self.model
160-
self.model.save(model_path)
161-
self.model=None
162-
pickle.dump(self, open(class_path, 'wb'))
163-
self.model=tfmodel_temp
164-
print("The model is stored at {}".format(model_path))
165-
return model_path
166-
except:
167-
raise TypeError("Your model is not a Keras model of type .h5. Try .pkl extension.") """
147+
elif extension=='pkl' and self.yamldata['model']['type'] in ['TF','tf','Tensorflow']:
148+
base_path=os.path.splitext(model_path)[0]
149+
tmp=self.model
150+
if self.yamldata['problem']['type']=="Classification":
151+
tmp.export_model().save(base_path+".h5")
152+
elif self.yamldata['problem']['type']=="Regression":
153+
tmp.export_model().save(base_path, save_format="tf")
154+
else:
155+
raise TypeError("Wrong problem type identified")
156+
self.model=None
157+
dill.dump(self, open(model_path, 'wb'))
158+
self.model=tmp
168159
else:
169160
raise TypeError(f"{extension} file type must be .pkl")
170161
else:

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
dill>=0.3.4
12
cliff>=3.6.0
23
joblib>=1.0.0
34
numpy>=1.21.0

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ classifiers =
1919
packages = find:
2020
python_requires = >=3.6
2121
install_requires=
22+
dill>=0.3.4
2223
cliff>=3.6.0
2324
joblib>=1.0.0
2425
numpy>=1.21.0

0 commit comments

Comments
 (0)