22from __future__ import division
33import os , sys
44import time
5- import random
65from progressbar import ProgressBar
76
87import numpy as np
98import pandas as pd
109
11- from sklearn .ensemble import RandomForestRegressor
10+ from sklearn .ensemble import RandomForestRegressor , RandomForestClassifier
1211from sklearn .model_selection import train_test_split
1312from sklearn .metrics import mean_absolute_error , r2_score
1413
@@ -82,13 +81,17 @@ def mpds_get_data(prop_id, descriptor_kappa):
8281 (props ['Value' ] > prop_semantics [prop_id ]['interval' ][0 ]) & \
8382 (props ['Value' ] < prop_semantics [prop_id ]['interval' ][1 ])
8483 ]
84+
8585 if prop_id not in ['m' , 'd' ]:
8686 to_drop = props [
8787 (props ['Cname' ] == 'Temperature' ) & (props ['Cunits' ] == 'K' ) & ((props ['Cvalue' ] < 200 ) | (props ['Cvalue' ] > 400 ))
8888 ]
8989 print ("Rows to neglect by temperature: %s" % len (to_drop ))
9090 props .drop (to_drop .index , inplace = True )
9191
92+ if prop_id == 't' :
93+ props ['Value' ] *= 100000 # normalization 10**5
94+
9295 phases_compounds = dict (zip (props ['Phase' ], props ['Compound' ])) # keep the mapping for future
9396 avgprops = props .groupby ('Phase' )['Value' ].mean ().to_frame ().reset_index ().rename (columns = {'Value' : 'Avgvalue' })
9497 phases = np .unique (avgprops ['Phase' ].astype (int )).tolist ()
@@ -110,7 +113,8 @@ def mpds_get_data(prop_id, descriptor_kappa):
110113 phases = phases
111114 )):
112115 crystal = MPDSDataRetrieval .compile_crystal (item , 'ase' )
113- if not crystal : continue
116+ if not crystal :
117+ continue
114118 descriptor = get_descriptor (crystal , kappa = descriptor_kappa )
115119
116120 if len (descriptor ) < min_descriptor_len :
@@ -187,7 +191,7 @@ def tune_model(data_file):
187191 results .sort (key = lambda x : (- x [1 ], x [2 ]))
188192
189193 print ("Best result:" , results [- 1 ])
190- parameter_b = results [- 1 ][ 0 ]
194+ parameter_b , avg_mae , avg_r2 = results [- 1 ]
191195
192196 print ("a = %s b = %s" % (parameter_a , parameter_b ))
193197
0 commit comments