Skip to content

Commit 2ddf820

Browse files
committed
add links to Sergei's results
1 parent a7cb9d2 commit 2ddf820

File tree

2 files changed

+130
-6
lines changed

2 files changed

+130
-6
lines changed

P1B2/README.md

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,29 @@ Best model saved to: model.A=sigmoid.B=64.D=None.E=20.L1=1024.L2=512.L3=256.P=1e
103103
Evaluation on test data: {'accuracy': 0.5500}
104104
```
105105

106-
### Running the XGBoost classifier
106+
### Preliminary performance
107+
108+
The XGBoost classifier below achieves ~55% average accuracy on
109+
validation data in the five-fold cross validation experiment. This
110+
suggests there may be a low ceiling for the MLP results; there may not
111+
be enough information in this set of SNP data to classify cancer types
112+
accurately.
107113

108114
```
109115
cd P1B2
110116
python p1b2_xgboost.py
117+
```
118+
119+
David Anthony Brewster, Jr, Yashasvi Sharma, and Sergei Maslov have
120+
achived 60.2% accuracy in their experiment. See the following links for details:
121+
122+
* [DOE-NCI Confluence P1B2 scatch pad](https://doe-nci.atlassian.net/wiki/display/PROJECT/P1B2+Networks)
123+
* [David Brewster's github repo](https://github.com/ljeabmreosn/NeuralNetwork/tree/master/benchmarks/P1B2)
124+
125+
Try the following commands to run a replicate of their network (modified for the P1B2 data loader).
111126

112127
```
113-
The XGBoost classifier achieves ~55% average accuracy on validation
114-
data in the five-fold cross validation experiment. This suggests there
115-
may be a low ceiling for the MLP results; there may not be enough
116-
information in this set of SNP data to classify cancer types
117-
accurately.
128+
cd P1B2
129+
python contrib/David_Brewster/p1b2_ljeabmreosn_mlp.py
130+
```
131+
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
from __future__ import print_function
2+
3+
from keras.models import Model, Sequential
4+
from keras.layers import Dense, Dropout, Input, advanced_activations
5+
from keras.optimizers import SGD
6+
from keras.callbacks import EarlyStopping, ModelCheckpoint
7+
8+
from ..datasets import p1b2
9+
10+
import argparse
11+
12+
args = None
13+
def set_args():
14+
global args
15+
parser = argparse.ArgumentParser(description='Variate some music.')
16+
parser.add_argument('--train', dest='training_file', help='input training data')
17+
parser.add_argument('--test', dest='testing_file', help='input testing data')
18+
parser.add_argument('--trainvar', dest='training_variable', help='variable you want to train')
19+
args = parser.parse_args()
20+
21+
22+
BEST_MODEL_PATH = 'benchmarks/P1B2/best.hdf5'
23+
24+
25+
(X_train, y_train), (X_test, y_test) = (None, None), (None, None)
26+
input_dim = None
27+
output_dim = None
28+
29+
def create_model():
30+
# advanced activation not used yet
31+
srelu = advanced_activations.SReLU(
32+
t_left_init='zero',
33+
a_left_init='glorot_uniform',
34+
t_right_init='glorot_uniform',
35+
a_right_init='one'
36+
)
37+
38+
# create and return model
39+
model = Sequential()
40+
model.add(Dense(256, input_dim=input_dim, activation='sigmoid'))
41+
model.add(Dense(256, activation='sigmoid'))
42+
model.add(Dense(output_dim, activation='sigmoid'))
43+
return model
44+
45+
def train_model(model):
46+
opt = 'rmsprop'
47+
model_checkpoint = ModelCheckpoint(
48+
filepath=BEST_MODEL_PATH,
49+
monitor='val_acc',
50+
verbose=0,
51+
save_best_only=True,
52+
mode='auto'
53+
)
54+
overfitting_stopper = EarlyStopping(
55+
monitor='val_acc',
56+
min_delta=0,
57+
patience=5,
58+
verbose=1,
59+
mode='auto'
60+
)
61+
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
62+
model_history = model.fit(X_train, y_train,
63+
batch_size = 64,
64+
nb_epoch = 50,
65+
#shuffle = True,
66+
validation_split = 0.2,
67+
#verbose = 2
68+
callbacks = [overfitting_stopper, model_checkpoint]
69+
)
70+
71+
def load_model_from_path(weights_path):
72+
model = create_model()
73+
model.load_weights(weights_path)
74+
return model
75+
76+
def save_weights(model):
77+
for i, layer in enumerate(model.layers):
78+
weights = layer.get_weights()
79+
print('LAYER {}'.format(i))
80+
print(weights)
81+
print(weights.s)
82+
input()
83+
84+
85+
86+
87+
##################### START OF PROGRAM ##############################
88+
set_args()
89+
# gather input
90+
if args != None and args.training_file and args.testing_file and args.training_variable:
91+
try:
92+
(X_train, y_train), (X_test, y_test) = p1b2.load_data_from_file(
93+
train=args.training_file,
94+
test=args.testing_file,
95+
trainvar=args.training_variable)
96+
except Exception as e:
97+
print(e)
98+
print('(Hint: Are you sure the input files are in valid format?)')
99+
else:
100+
(X_train, y_train), (X_test, y_test) = p1b2.load_data_from_url(n_cols=10000)
101+
102+
input_dim = X_train.shape[1]
103+
output_dim = y_train.shape[1]
104+
105+
# train our model
106+
train_model(create_model())
107+
best_model = load_model_from_path(BEST_MODEL_PATH)
108+
y_pred = best_model.predict(X_test)
109+
accuracy = p1b2.evaluate(y_test, y_pred)
110+

0 commit comments

Comments
 (0)