Adding direct band gap and property tune

blokhin · blokhin · commit b117c869fb42 · 2018-03-21T00:05:55.000+01:00
diff --git a/mpds_ml_labs/prediction.py b/mpds_ml_labs/prediction.py
@@ -7,13 +7,13 @@
 
 
 prop_semantics = {
-    #'w': {
-    #    'name': 'band gap for direct transition',
-    #    'units': 'eV',
-    #    'symbol': 'e<sub>dir.</sub>',
-    #    'rounding': 1,
-    #    'interval': [0.01, 20]
-    #},
+    'w': {
+        'name': 'band gap for direct transition',
+        'units': 'eV',
+        'symbol': 'e<sub>dir.</sub>',
+        'rounding': 1,
+        'interval': [0.01, 20]
+    },
     'z': {
         'name': 'isothermal bulk modulus',
         'units': 'GPa',
@@ -59,8 +59,8 @@
     't': {
         'name': 'linear thermal expansion coefficient',
         'units': 'K-1',
-        'symbol': '&Theta;<sub>D</sub>',
-        'rounding': 6,
+        'symbol': '&Theta;<sub>D</sub>(10<sup>5</sup>)',
+        'rounding': 2,
         'interval': [-0.001, 0.001]
     }
 }
diff --git a/train_model.py b/train_model.py
@@ -2,13 +2,12 @@
 from __future__ import division
 import os, sys
 import time
-import random
 from progressbar import ProgressBar
 
 import numpy as np
 import pandas as pd
 
-from sklearn.ensemble import RandomForestRegressor
+from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_absolute_error, r2_score
 
@@ -82,13 +81,17 @@ def mpds_get_data(prop_id, descriptor_kappa):
         (props['Value'] > prop_semantics[prop_id]['interval'][0]) & \
         (props['Value'] < prop_semantics[prop_id]['interval'][1])
     ]
+
     if prop_id not in ['m', 'd']:
         to_drop = props[
             (props['Cname'] == 'Temperature') & (props['Cunits'] == 'K') & ((props['Cvalue'] < 200) | (props['Cvalue'] > 400))
         ]
         print("Rows to neglect by temperature: %s" % len(to_drop))
         props.drop(to_drop.index, inplace=True)
 
+    if prop_id == 't':
+        props['Value'] *= 100000 # normalization 10**5
+
     phases_compounds = dict(zip(props['Phase'], props['Compound'])) # keep the mapping for future
     avgprops = props.groupby('Phase')['Value'].mean().to_frame().reset_index().rename(columns={'Value': 'Avgvalue'})
     phases = np.unique(avgprops['Phase'].astype(int)).tolist()
@@ -110,7 +113,8 @@ def mpds_get_data(prop_id, descriptor_kappa):
         phases=phases
     )):
         crystal = MPDSDataRetrieval.compile_crystal(item, 'ase')
-        if not crystal: continue
+        if not crystal:
+            continue
         descriptor = get_descriptor(crystal, kappa=descriptor_kappa)
 
         if len(descriptor) < min_descriptor_len:
@@ -187,7 +191,7 @@ def tune_model(data_file):
     results.sort(key=lambda x: (-x[1], x[2]))
 
     print("Best result:", results[-1])
-    parameter_b = results[-1][0]
+    parameter_b, avg_mae, avg_r2 = results[-1]
 
     print("a = %s b = %s" % (parameter_a, parameter_b))