added horovod functions and run scripts

akvasan2 · akvasan2 · commit ba993e12fed3 · 2023-03-08T18:16:01.000Z
diff --git a/Pilot1/ST1/clr_callback.py b/Pilot1/ST1/clr_callback.py
@@ -0,0 +1,133 @@
+from tensorflow.keras.callbacks import *
+from tensorflow.keras import backend as K
+import numpy as np
+
+class CyclicLR(Callback):
+    """This callback implements a cyclical learning rate policy (CLR).
+    The method cycles the learning rate between two boundaries with
+    some constant frequency, as detailed in this paper (https://arxiv.org/abs/1506.01186).
+    The amplitude of the cycle can be scaled on a per-iteration or 
+    per-cycle basis.
+    This class has three built-in policies, as put forth in the paper.
+    "triangular":
+        A basic triangular cycle w/ no amplitude scaling.
+    "triangular2":
+        A basic triangular cycle that scales initial amplitude by half each cycle.
+    "exp_range":
+        A cycle that scales initial amplitude by gamma**(cycle iterations) at each 
+        cycle iteration.
+    For more detail, please see paper.
+    
+    # Example
+        ```python
+            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
+                                step_size=2000., mode='triangular')
+            model.fit(X_train, Y_train, callbacks=[clr])
+        ```
+    
+    Class also supports custom scaling functions:
+        ```python
+            clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.))
+            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
+                                step_size=2000., scale_fn=clr_fn,
+                                scale_mode='cycle')
+            model.fit(X_train, Y_train, callbacks=[clr])
+        ```    
+    # Arguments
+        base_lr: initial learning rate which is the
+            lower boundary in the cycle.
+        max_lr: upper boundary in the cycle. Functionally,
+            it defines the cycle amplitude (max_lr - base_lr).
+            The lr at any cycle is the sum of base_lr
+            and some scaling of the amplitude; therefore 
+            max_lr may not actually be reached depending on
+            scaling function.
+        step_size: number of training iterations per
+            half cycle. Authors suggest setting step_size
+            2-8 x training iterations in epoch.
+        mode: one of {triangular, triangular2, exp_range}.
+            Default 'triangular'.
+            Values correspond to policies detailed above.
+            If scale_fn is not None, this argument is ignored.
+        gamma: constant in 'exp_range' scaling function:
+            gamma**(cycle iterations)
+        scale_fn: Custom scaling policy defined by a single
+            argument lambda function, where 
+            0 <= scale_fn(x) <= 1 for all x >= 0.
+            mode paramater is ignored 
+        scale_mode: {'cycle', 'iterations'}.
+            Defines whether scale_fn is evaluated on 
+            cycle number or cycle iterations (training
+            iterations since start of cycle). Default is 'cycle'.
+    """
+
+    def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular',
+                 gamma=1., scale_fn=None, scale_mode='cycle'):
+        super(CyclicLR, self).__init__()
+
+        self.base_lr = base_lr
+        self.max_lr = max_lr
+        self.step_size = step_size
+        self.mode = mode
+        self.gamma = gamma
+        if scale_fn == None:
+            if self.mode == 'triangular':
+                self.scale_fn = lambda x: 1.
+                self.scale_mode = 'cycle'
+            elif self.mode == 'triangular2':
+                self.scale_fn = lambda x: 1/(2.**(x-1))
+                self.scale_mode = 'cycle'
+            elif self.mode == 'exp_range':
+                self.scale_fn = lambda x: gamma**(x)
+                self.scale_mode = 'iterations'
+        else:
+            self.scale_fn = scale_fn
+            self.scale_mode = scale_mode
+        self.clr_iterations = 0.
+        self.trn_iterations = 0.
+        self.history = {}
+
+        self._reset()
+
+    def _reset(self, new_base_lr=None, new_max_lr=None,
+               new_step_size=None):
+        """Resets cycle iterations.
+        Optional boundary/step size adjustment.
+        """
+        if new_base_lr != None:
+            self.base_lr = new_base_lr
+        if new_max_lr != None:
+            self.max_lr = new_max_lr
+        if new_step_size != None:
+            self.step_size = new_step_size
+        self.clr_iterations = 0.
+        
+    def clr(self):
+        cycle = np.floor(1+self.clr_iterations/(2*self.step_size))
+        x = np.abs(self.clr_iterations/self.step_size - 2*cycle + 1)
+        if self.scale_mode == 'cycle':
+            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(cycle)
+        else:
+            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(self.clr_iterations)
+        
+    def on_train_begin(self, logs={}):
+        logs = logs or {}
+
+        if self.clr_iterations == 0:
+            K.set_value(self.model.optimizer.lr, self.base_lr)
+        else:
+            K.set_value(self.model.optimizer.lr, self.clr())        
+            
+    def on_batch_end(self, epoch, logs=None):
+        
+        logs = logs or {}
+        self.trn_iterations += 1
+        self.clr_iterations += 1
+
+        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
+        self.history.setdefault('iterations', []).append(self.trn_iterations)
+
+        for k, v in logs.items():
+            self.history.setdefault(k, []).append(v)
+        
+        K.set_value(self.model.optimizer.lr, self.clr())
diff --git a/Pilot1/ST1/smiles_regress_transformer.run.hvd.py b/Pilot1/ST1/smiles_regress_transformer.run.hvd.py
@@ -0,0 +1,127 @@
+############# Module Loading ##############
+import argparse
+import os
+import numpy as np
+import matplotlib
+import pandas as pd
+
+matplotlib.use("Agg")
+
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import backend as K
+from tensorflow.keras import layers
+from tensorflow.keras.callbacks import (
+    CSVLogger,
+    EarlyStopping,
+    ModelCheckpoint,
+    ReduceLROnPlateau,
+)
+
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.preprocessing import sequence, text
+import horovod.keras as hvd ### importing horovod to use data parallelization in another step
+import keras_tuner
+
+from clr_callback import *
+from smiles_regress_transformer_funcs_hvd import *
+
+#######Argument parsing#############
+
+file_path = os.path.dirname(os.path.realpath(__file__))
+
+# psr and args take input outside of the script and assign:
+# (1) file paths for data_path_train and data_path_vali
+# (2) number of training epochs
+
+psr = argparse.ArgumentParser(description="input csv file")
+psr.add_argument("--in_train", default="in_train")
+psr.add_argument("--in_vali", default="in_vali")
+psr.add_argument("--ep", type=int, default=400)
+psr.add_argument("--num_heads", type=int, default=16)
+psr.add_argument("--DR_TB", type=float, default=0.1)
+psr.add_argument("--DR_ff", type=float, default=0.1)
+psr.add_argument("--activation", default="activation")
+psr.add_argument("--drop_post_MHA", type=bool, default=True)
+psr.add_argument("--lr", type=float, default=1e-5)
+psr.add_argument("--loss_fn", default="mean_squared_error")
+psr.add_argument("--hvd_switch", type=bool, default=True)
+
+args = vars(psr.parse_args()) # returns dictionary mapping of an object
+
+######## Set  hyperparameters ########
+
+EPOCH = args["ep"]
+num_heads = args["num_heads"]
+DR_TB = args["DR_TB"]
+DR_ff = args["DR_ff"]
+activation = args["activation"]
+dropout1 = args["drop_post_MHA"]
+lr = args["lr"]
+loss_fn = args["loss_fn"]
+BATCH = 32 # batch size used for training
+vocab_size = 40000
+maxlen = 250
+#act_fn='elu'
+#embed_dim = 128   # Embedding size for each token
+#num_heads = 16   # Number of attention heads
+#ff_dim = 128   # Hidden layer size in feed forward network inside transformer
+checkpt_file = "smile_regress.autosave.model.h5"
+csv_file = "smile_regress.training.log"
+patience_red_lr = 20
+patience_early_stop = 100
+hvd_switch = args["hvd_switch"]
+
+########Create training and validation data##### 
+
+#x: tokenized sequence data, y: single value dock score 
+data_path_train = args["in_train"]
+data_path_vali = args["in_vali"]
+
+data_train = pd.read_csv(data_path_train)
+data_vali = pd.read_csv(data_path_vali)
+
+data_train.head()
+# Dataset has type and smiles as the two fields
+# reshaping: y formatted as [[y_1],[y_2],...] with floats
+y_train = data_train["type"].values.reshape(-1, 1) * 1.0 
+y_val = data_vali["type"].values.reshape(-1, 1) * 1.0
+
+tokenizer = text.Tokenizer(num_words=vocab_size)
+tokenizer.fit_on_texts(data_train["smiles"])
+
+x_train = prep_text(data_train["smiles"], tokenizer, maxlen)
+x_val = prep_text(data_vali["smiles"], tokenizer, maxlen)
+
+######## Implement horovod if necessary ########
+if hvd_switch:
+    lr, x_train, y_train = initialize_hvd(lr, x_train, y_train)
+    x_train, y_train = implement_hvd(x_train, y_train)
+
+
+ ######## Build model #############
+
+model = build_model(num_heads, DR_TB, DR_ff, activation, dropout1, lr, loss_fn, hvd_switch)
+   
+####### Set callbacks ##############
+callbacks = callback_setting (
+        hvd_switch,
+        checkpt_file,
+        lr,
+        csv_file,
+        patience_red_lr,
+        patience_early_stop
+        )
+
+####### Train model! #########
+
+history = model.fit(
+    x_train,
+    y_train,
+    batch_size=BATCH,
+    epochs=EPOCH,
+    verbose=1,
+    validation_data=(x_val, y_val),
+    callbacks=callbacks,
+)
+
diff --git a/Pilot1/ST1/smiles_regress_transformer_funcs_hvd.py b/Pilot1/ST1/smiles_regress_transformer_funcs_hvd.py