Skip to content

Commit 13a8739

Browse files
author
Thomas Brettin
committed
single node, multi XPU
1 parent a6cc8b0 commit 13a8739

File tree

1 file changed

+271
-0
lines changed

1 file changed

+271
-0
lines changed

examples/ADRP/reg_go3.py

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
import argparse
2+
import math
3+
import os
4+
5+
import matplotlib
6+
import numpy as np
7+
import pandas as pd
8+
9+
matplotlib.use("Agg")
10+
11+
import matplotlib.pyplot as plt
12+
import tensorflow.keras as ke
13+
from sklearn.model_selection import train_test_split
14+
from sklearn.preprocessing import StandardScaler
15+
from tensorflow.keras import backend as K
16+
from tensorflow.keras.callbacks import (
17+
CSVLogger,
18+
EarlyStopping,
19+
ModelCheckpoint,
20+
ReduceLROnPlateau,
21+
)
22+
from tensorflow.keras.layers import Dense, Dropout, Input
23+
from tensorflow.keras.models import Model, model_from_json, model_from_yaml
24+
from tensorflow.keras.optimizers import SGD
25+
import tensorflow as tf
26+
27+
file_path = os.path.dirname(os.path.realpath(__file__))
28+
29+
strategy = tf.distribute.MirroredStrategy([
30+
'/xpu:0'
31+
# ,'/xpu:1'
32+
# ,'/xpu:2'
33+
# ,'/xpu:3'
34+
# ,'/xpu:4'
35+
# ,'/xpu:5'
36+
# ,'/xpu:6','/xpu:7'
37+
# ,'/xpu:8','/xpu:9'
38+
# ,'/xpu:10','/xpu:11'
39+
])
40+
print('tensorflow version: {}'.format(tf.__version__))
41+
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
42+
43+
# parse args
44+
psr = argparse.ArgumentParser(description="input csv file")
45+
psr.add_argument("--in", default="in_file")
46+
psr.add_argument("--ep", type=int, default=400)
47+
args = vars(psr.parse_args())
48+
49+
EPOCH = args["ep"]
50+
BATCH = 32
51+
GLOBAL_BATCH_SIZE = BATCH * strategy.num_replicas_in_sync
52+
DR = 0.1 # Dropout rate
53+
data_path = args["in"]
54+
print(args)
55+
56+
def r2(y_true, y_pred):
57+
SS_res = K.sum(K.square(y_true - y_pred))
58+
SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
59+
return 1 - SS_res / (SS_tot + K.epsilon())
60+
61+
62+
def load_data():
63+
64+
data_path = args["in"]
65+
66+
df = (pd.read_csv(data_path, skiprows=1).values).astype("float32")
67+
df_y = df[:, 0].astype("float32")
68+
df_x = df[:, 1:PL].astype(np.float32)
69+
print('df_y: {}\n{}'.format(df_y.shape, df_y))
70+
print('df_x: {}\n{}'.format(df_x.shape, df_x))
71+
72+
scaler = StandardScaler()
73+
df_x = scaler.fit_transform(df_x)
74+
75+
X_train, X_test, Y_train, Y_test = train_test_split(
76+
df_x, df_y, test_size=0.20, random_state=42
77+
)
78+
79+
return X_train, Y_train, X_test, Y_test
80+
81+
def load_data_from_parquet():
82+
83+
data_path = args["in"]
84+
85+
df=pd.read_parquet(data_path)
86+
df_y = df['reg'].values.astype("float32")
87+
df_x = df.iloc[:,6:].values.astype("float32")
88+
print('df_y: {}\n{}'.format(df_y.shape, df_y))
89+
print('df_x: {}\n{}'.format(df_x.shape, df_x))
90+
91+
scaler = StandardScaler()
92+
df_x = scaler.fit_transform(df_x)
93+
94+
X_train, X_test, Y_train, Y_test = train_test_split(
95+
df_x, df_y, test_size=0.20, random_state=42
96+
)
97+
98+
return X_train, Y_train, X_test, Y_test
99+
100+
101+
#X_train, Y_train, X_test, Y_test = load_data()
102+
X_train, Y_train, X_test, Y_test = load_data_from_parquet()
103+
print("X_train shape:", X_train.shape)
104+
print("X_test shape:", X_test.shape)
105+
print("Y_train_shape: ", Y_train.shape)
106+
print("Y_test shape: ", Y_test.shape)
107+
108+
steps = X_train.shape[0]//GLOBAL_BATCH_SIZE
109+
validation_steps = X_test.shape[0]//GLOBAL_BATCH_SIZE
110+
print('samples {}, global_batch_size {}, steps {}'.format(X_train.shape[0], GLOBAL_BATCH_SIZE, steps))
111+
print('val samples {}, global_batch_size {}, val_steps {}'.format(X_test.shape[0], GLOBAL_BATCH_SIZE, validation_steps))
112+
113+
114+
train_ds = tf.data.Dataset.from_tensor_slices((X_train, Y_train)).batch(GLOBAL_BATCH_SIZE,
115+
drop_remainder=True,
116+
num_parallel_calls=None,
117+
deterministic=None,
118+
).repeat(EPOCH)
119+
val_ds = tf.data.Dataset.from_tensor_slices((X_test, Y_test)).batch(GLOBAL_BATCH_SIZE,
120+
drop_remainder=True,
121+
num_parallel_calls=None,
122+
deterministic=None,).repeat(EPOCH)
123+
124+
options = tf.data.Options()
125+
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
126+
train_ds = train_ds.with_options(options)
127+
val_ds = val_ds.with_options(options)
128+
129+
train_dist = strategy.experimental_distribute_dataset(train_ds)
130+
val_dist = strategy.experimental_distribute_dataset(val_ds)
131+
132+
133+
with strategy.scope():
134+
#inputs = Input(shape=(PS,))
135+
inputs = Input(shape=(1826,))
136+
x = Dense(250, activation="relu")(inputs)
137+
x = Dropout(DR)(x)
138+
x = Dense(125, activation="relu")(x)
139+
x = Dropout(DR)(x)
140+
x = Dense(60, activation="relu")(x)
141+
x = Dropout(DR)(x)
142+
x = Dense(30, activation="relu")(x)
143+
x = Dropout(DR)(x)
144+
outputs = Dense(1, activation="relu")(x)
145+
146+
model = Model(inputs=inputs, outputs=outputs)
147+
model.summary()
148+
model.compile(
149+
loss="mean_squared_error",
150+
optimizer=SGD(lr=0.0001, momentum=0.9),
151+
metrics=["mae", r2],
152+
)
153+
154+
# set up a bunch of callbacks to do work during model training..
155+
156+
checkpointer = ModelCheckpoint(
157+
filepath="reg_go.autosave.model.h5",
158+
verbose=1,
159+
save_weights_only=False,
160+
save_best_only=True,
161+
)
162+
csv_logger = CSVLogger("reg_go.training.log")
163+
reduce_lr = ReduceLROnPlateau(
164+
monitor="val_loss",
165+
factor=0.75,
166+
patience=20,
167+
verbose=1,
168+
mode="auto",
169+
epsilon=0.0001,
170+
cooldown=3,
171+
min_lr=0.000000001,
172+
)
173+
early_stop = EarlyStopping(monitor="val_loss", patience=100, verbose=1, mode="auto")
174+
175+
from datetime import datetime as dt
176+
print("{} calling model.fit".format(dt.fromtimestamp(dt.timestamp(dt.now())).strftime("%D %H:%M:%S.%s")))
177+
history = model.fit(
178+
#X_train,
179+
#Y_train,
180+
train_dist,
181+
batch_size=GLOBAL_BATCH_SIZE,
182+
steps_per_epoch=int(steps),
183+
epochs=EPOCH,
184+
verbose=1,
185+
#validation_data=(X_test, Y_test),
186+
validation_data=val_dist,
187+
validation_steps=validation_steps,
188+
callbacks=[checkpointer, csv_logger, reduce_lr, early_stop],
189+
)
190+
print("{} done calling model.fit".format(dt.fromtimestamp(dt.timestamp(dt.now())).strftime("%D %H:%M:%S.%s")))
191+
192+
193+
score = model.evaluate(X_test, Y_test, verbose=0)
194+
195+
print(score)
196+
197+
print(history.history.keys())
198+
# dict_keys(['val_loss', 'val_mae', 'val_r2', 'loss', 'mae', 'r2', 'lr'])
199+
200+
# summarize history for MAE
201+
# plt.plot(history.history['mean_absolute_error'])
202+
plt.plot(history.history["mae"])
203+
# plt.plot(history.history['val_mean_absolute_error'])
204+
plt.plot(history.history["val_mae"])
205+
206+
plt.title("Model Mean Absolute Error")
207+
plt.ylabel("mae")
208+
plt.xlabel("epoch")
209+
plt.legend(["train", "test"], loc="upper left")
210+
211+
plt.savefig("reg_go.mae.png", bbox_inches="tight")
212+
plt.savefig("reg_go.mae.pdf", bbox_inches="tight")
213+
214+
plt.close()
215+
216+
# summarize history for loss
217+
plt.plot(history.history["loss"])
218+
plt.plot(history.history["val_loss"])
219+
plt.title("Model Loss")
220+
plt.ylabel("loss")
221+
plt.xlabel("epoch")
222+
plt.legend(["train", "test"], loc="upper left")
223+
224+
plt.savefig("reg_go.loss.png", bbox_inches="tight")
225+
plt.savefig("reg_go.loss.pdf", bbox_inches="tight")
226+
227+
plt.close()
228+
229+
print("Test val_loss:", score[0])
230+
print("Test val_mae:", score[1])
231+
232+
# serialize model to JSON
233+
model_json = model.to_json()
234+
with open("reg_go.model.json", "w") as json_file:
235+
json_file.write(model_json)
236+
237+
# serialize weights to HDF5
238+
model.save_weights("reg_go.model.h5")
239+
print("Saved model to disk")
240+
241+
# load json and create model
242+
json_file = open("reg_go.model.json", "r")
243+
loaded_model_json = json_file.read()
244+
json_file.close()
245+
loaded_model_json = model_from_json(loaded_model_json)
246+
247+
# load weights into new model
248+
loaded_model_json.load_weights("reg_go.model.h5")
249+
print("Loaded json model from disk")
250+
251+
# evaluate json loaded model on test data
252+
loaded_model_json.compile(
253+
loss="mean_squared_error", optimizer="SGD", metrics=["mean_absolute_error"]
254+
)
255+
score_json = loaded_model_json.evaluate(X_test, Y_test, verbose=0)
256+
257+
print("json Validation loss:", score_json[0])
258+
print("json Validation mae:", score_json[1])
259+
260+
predict_json_train = loaded_model_json.predict(X_train)
261+
262+
predict_json_test = loaded_model_json.predict(X_test)
263+
264+
pred_train = predict_json_train[:, 0]
265+
pred_test = predict_json_test[:, 0]
266+
267+
np.savetxt("pred_train.csv", pred_train, delimiter=".", newline="\n", fmt="%.3f")
268+
np.savetxt("pred_test.csv", pred_test, delimiter=",", newline="\n", fmt="%.3f")
269+
270+
print("Correlation prediction on test and Y_test:", np.corrcoef(pred_test, Y_test))
271+
print("Correlation prediction on train and Y_train:", np.corrcoef(pred_train, Y_train))

0 commit comments

Comments
 (0)