andreped
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 42 additions & 0 deletions b/‎.github/workflows/test.yml‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎tests/test_adaptive_gradient_clipping.py‎
Lines changed: 30 additions & 12 deletions b/‎tests/test_adaptive_gradient_clipping.py‎
Lines changed: 30 additions & 12 deletions
diff --git a/‎tests/test_batch_norm.py‎
Lines changed: 30 additions & 20 deletions b/‎tests/test_batch_norm.py‎
Lines changed: 30 additions & 20 deletions
diff --git a/‎tests/test_bn_convnd.py‎
Lines changed: 38 additions & 25 deletions b/‎tests/test_bn_convnd.py‎
Lines changed: 38 additions & 25 deletions
diff --git a/‎tests/test_expected_result.py‎
Lines changed: 17 additions & 10 deletions b/‎tests/test_expected_result.py‎
Lines changed: 17 additions & 10 deletions
@@ -98,3 +98,45 @@ jobs:
           pytest -v tests/test_mp_batch_norm.py
           pytest -v tests/test_optimizer_distribute.py
           pytest -v tests/test_model_distribute.py
+
+
+  tf-compability:
+    needs: build
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-20.04]
+        python-version: ["3.6"]
+        tf-version: [2.2.0, 2.3.0, 2.4.0, 2.5.0, 2.6.2]
+
+    steps:
+      - uses: actions/checkout@v1
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: pip install wheel setuptools flake8 pytest-cov
+
+      - name: Install tensorflow-datasets
+        run: |
+            pip install tensorflow==${{ matrix.tf-version }} "tensorflow-datasets<=4.8.2"
+            pip install "protobuf<=3.20" --force-reinstall
+
+      - name: Download artifact
+        uses: actions/download-artifact@master
+        with:
+          name: "Python wheel"
+
+      - name: Install wheel
+        run: pip install --find-links=${{github.workspace}} gradient_accumulator
+      
+      - name: Debug pip deps
+        run: pip list
+
+      - name: Test library accessibility
+        run: python -c "from gradient_accumulator import GradientAccumulateModel, GradientAccumulateOptimizer"
+
+      - name: Run tests
+        run: pytest -v tests/test_model_expected_result.py
@@ -1,16 +1,24 @@
+import os
+
 import tensorflow as tf
 import tensorflow_datasets as tfds
+from tensorflow.keras import mixed_precision
 from tensorflow.keras.models import load_model
+
 from gradient_accumulator import GradientAccumulateModel
 from gradient_accumulator import unitwise_norm
-from tensorflow.keras import mixed_precision
-import os
+
 from .utils import normalize_img
 
 
 def test_unitwise_norm():
     for i in range(7):
-        x = tf.zeros([1,] * i)
+        x = tf.zeros(
+            [
+                1,
+            ]
+            * i
+        )
         try:
             unitwise_norm(x)
         except ValueError as e:
@@ -22,8 +30,8 @@ def test_unitwise_norm():
 def test_train_mnist():
     # load dataset
     (ds_train, ds_test), ds_info = tfds.load(
-        'mnist',
-        split=['train', 'test'],
+        "mnist",
+        split=["train", "test"],
         shuffle_files=True,
         as_supervised=True,
         with_info=True,
@@ -35,7 +43,7 @@ def test_train_mnist():
     # build train pipeline
     ds_train = ds_train.map(normalize_img)
     ds_train = ds_train.cache()
-    ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
+    ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples)
     ds_train = ds_train.batch(100)  # multiplum of 8
     ds_train = ds_train.prefetch(1)
 
@@ -46,14 +54,24 @@ def test_train_mnist():
     ds_test = ds_test.prefetch(1)
 
     # create model
-    model = tf.keras.models.Sequential([
-        tf.keras.layers.Flatten(input_shape=(28, 28)),
-        tf.keras.layers.Dense(32, activation='relu'),  # 32 multiplum of 8
-        tf.keras.layers.Dense(10, dtype='float32')  # output not numerically stable with float16
-    ])
+    model = tf.keras.models.Sequential(
+        [
+            tf.keras.layers.Flatten(input_shape=(28, 28)),
+            tf.keras.layers.Dense(32, activation="relu"),  # 32 multiplum of 8
+            tf.keras.layers.Dense(
+                10, dtype="float32"
+            ),  # output not numerically stable with float16
+        ]
+    )
 
     # wrap model to use gradient accumulation
-    model = GradientAccumulateModel(accum_steps=4, mixed_precision=False, use_agc=True, inputs=model.input, outputs=model.output)
+    model = GradientAccumulateModel(
+        accum_steps=4,
+        mixed_precision=False,
+        use_agc=True,
+        inputs=model.input,
+        outputs=model.output,
+    )
 
     # need to scale optimizer for mixed precision
     opt = tf.keras.optimizers.SGD(1e-2)
 
@@ -1,27 +1,33 @@
+import os
+import random as python_random
+
+import numpy as np
 import tensorflow as tf
 import tensorflow_datasets as tfds
 from tensorflow.keras.models import load_model
+
 from gradient_accumulator import GradientAccumulateModel
 from gradient_accumulator.layers import AccumBatchNormalization
-import random as python_random
-import numpy as np
-import os
-from .utils import reset, normalize_img
+
+from .utils import normalize_img
+from .utils import reset
 
 
-def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epochs:int = 3):
+def run_experiment(
+    custom_bn: bool = True, bs: int = 100, accum_steps: int = 1, epochs: int = 3
+):
     # load dataset
     (ds_train, ds_test), ds_info = tfds.load(
-        'mnist',
-        split=['train', 'test'],
+        "mnist",
+        split=["train", "test"],
         shuffle_files=True,
         as_supervised=True,
         with_info=True,
     )
 
     # build train pipeline
     ds_train = ds_train.map(normalize_img)
-    ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
+    ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples)
     ds_train = ds_train.batch(bs)
     ds_train = ds_train.prefetch(1)
 
@@ -39,17 +45,21 @@ def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epo
         normalization_layer = tf.keras.layers.Activation("linear")
 
     # create model
-    model = tf.keras.models.Sequential([
-        tf.keras.layers.Flatten(input_shape=(28, 28)),
-        tf.keras.layers.Dense(32),
-        normalization_layer,  # @TODO: BN before or after ReLU? Leads to different performance
-        tf.keras.layers.Activation("relu"),
-        tf.keras.layers.Dense(10)
-    ])
+    model = tf.keras.models.Sequential(
+        [
+            tf.keras.layers.Flatten(input_shape=(28, 28)),
+            tf.keras.layers.Dense(32),
+            normalization_layer,  # @TODO: BN before or after ReLU? Leads to different performance
+            tf.keras.layers.Activation("relu"),
+            tf.keras.layers.Dense(10),
+        ]
+    )
 
     # wrap model to use gradient accumulation
     if accum_steps > 1:
-        model = GradientAccumulateModel(accum_steps=accum_steps, inputs=model.input, outputs=model.output)
+        model = GradientAccumulateModel(
+            accum_steps=accum_steps, inputs=model.input, outputs=model.output
+        )
 
     # compile model
     model.compile(
@@ -79,10 +89,10 @@ def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epo
 def test_compare_bn_layers():
     # set seed
     reset()
-    
+
     # custom BN without accum
     result1 = run_experiment(custom_bn=True, accum_steps=1, epochs=3)[1]
-    
+
     # reset before second run to get "identical" results
     reset()
 
@@ -98,10 +108,10 @@ def test_compare_bn_layers():
 def test_compare_accum_bn_expected_result():
     # set seed
     reset()
-    
+
     # custom BN without accum
     result1 = run_experiment(custom_bn=True, accum_steps=4, bs=25)[1]
-    
+
     # reset before second run to get "identical" results
     reset()
 
 
@@ -1,11 +1,14 @@
+import numpy as np
 import tensorflow as tf
 from tensorflow.keras.models import load_model
+
 from gradient_accumulator import GradientAccumulateModel
 from gradient_accumulator.layers import AccumBatchNormalization
-import numpy as np
 
 
-def test_bn_conv2d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
+def test_bn_conv2d(
+    custom_bn: bool = True, accum_steps: int = 1, epochs: int = 1
+):
     # make toy dataset
     data = np.random.randint(2, size=(16, 8, 8, 1))
     gt = np.expand_dims(np.random.randint(2, size=16), axis=-1)
@@ -19,20 +22,24 @@ def test_bn_conv2d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
         normalization_layer = tf.keras.layers.Activation("linear")
 
     # create model
-    model = tf.keras.models.Sequential([
-        tf.keras.layers.Conv2D(4, 3, input_shape=(8, 8, 1)),
-        normalization_layer,
-        tf.keras.layers.Activation("relu"),
-        tf.keras.layers.Flatten(),
-        tf.keras.layers.Dense(4),
-        normalization_layer,  # @TODO: BN before or after ReLU? Leads to different performance
-        tf.keras.layers.Activation("relu"),
-        tf.keras.layers.Dense(1, activation="sigmoid"),
-    ])
+    model = tf.keras.models.Sequential(
+        [
+            tf.keras.layers.Conv2D(4, 3, input_shape=(8, 8, 1)),
+            normalization_layer,
+            tf.keras.layers.Activation("relu"),
+            tf.keras.layers.Flatten(),
+            tf.keras.layers.Dense(4),
+            normalization_layer,  # @TODO: BN before or after ReLU? Leads to different performance
+            tf.keras.layers.Activation("relu"),
+            tf.keras.layers.Dense(1, activation="sigmoid"),
+        ]
+    )
 
     # wrap model to use gradient accumulation
     if accum_steps > 1:
-        model = GradientAccumulateModel(accum_steps=accum_steps, inputs=model.input, outputs=model.output)
+        model = GradientAccumulateModel(
+            accum_steps=accum_steps, inputs=model.input, outputs=model.output
+        )
 
     # compile model
     model.compile(
@@ -60,7 +67,9 @@ def test_bn_conv2d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
     return result
 
 
-def test_bn_conv3d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
+def test_bn_conv3d(
+    custom_bn: bool = True, accum_steps: int = 1, epochs: int = 1
+):
     # make toy dataset
     data = np.random.randint(2, size=(16, 8, 8, 8, 1))
     gt = np.expand_dims(np.random.randint(2, size=16), axis=-1)
@@ -74,20 +83,24 @@ def test_bn_conv3d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
         normalization_layer = tf.keras.layers.Activation("linear")
 
     # create model
-    model = tf.keras.models.Sequential([
-        tf.keras.layers.Conv3D(4, 3, input_shape=(8, 8, 8, 1)),
-        normalization_layer,
-        tf.keras.layers.Activation("relu"),
-        tf.keras.layers.Flatten(),
-        tf.keras.layers.Dense(4),
-        normalization_layer,  # @TODO: BN before or after ReLU? Leads to different performance
-        tf.keras.layers.Activation("relu"),
-        tf.keras.layers.Dense(1, activation="sigmoid"),
-    ])
+    model = tf.keras.models.Sequential(
+        [
+            tf.keras.layers.Conv3D(4, 3, input_shape=(8, 8, 8, 1)),
+            normalization_layer,
+            tf.keras.layers.Activation("relu"),
+            tf.keras.layers.Flatten(),
+            tf.keras.layers.Dense(4),
+            normalization_layer,  # @TODO: BN before or after ReLU? Leads to different performance
+            tf.keras.layers.Activation("relu"),
+            tf.keras.layers.Dense(1, activation="sigmoid"),
+        ]
+    )
 
     # wrap model to use gradient accumulation
     if accum_steps > 1:
-        model = GradientAccumulateModel(accum_steps=accum_steps, inputs=model.input, outputs=model.output)
+        model = GradientAccumulateModel(
+            accum_steps=accum_steps, inputs=model.input, outputs=model.output
+        )
 
     # compile model
     model.compile(
 
@@ -1,12 +1,17 @@
+import os
+import random as python_random
+
 import numpy as np
 import tensorflow as tf
-import random as python_random
-import os
-from .utils import get_opt, normalize_img, reset
 import tensorflow_datasets as tfds
 from tensorflow.keras.models import load_model
-from gradient_accumulator import GradientAccumulateModel, GradientAccumulateOptimizer
 
+from gradient_accumulator import GradientAccumulateModel
+from gradient_accumulator import GradientAccumulateOptimizer
+
+from .utils import get_opt
+from .utils import normalize_img
+from .utils import reset
 
 # get current tf minor version
 tf_version = int(tf.version.VERSION.split(".")[1])
@@ -15,8 +20,8 @@
 def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
     # load dataset
     (ds_train, ds_test), ds_info = tfds.load(
-        'mnist',
-        split=['train', 'test'],
+        "mnist",
+        split=["train", "test"],
         shuffle_files=True,
         as_supervised=True,
         with_info=True,
@@ -35,7 +40,7 @@ def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
     # create model
     input = tf.keras.layers.Input(shape=(28, 28))
     x = tf.keras.layers.Flatten(input_shape=(28, 28))(input)
-    x = tf.keras.layers.Dense(128, activation='relu')(x)
+    x = tf.keras.layers.Dense(128, activation="relu")(x)
     output = tf.keras.layers.Dense(10)(x)
 
     opt = get_opt(opt_name="SGD", tf_version=tf_version)
@@ -45,14 +50,16 @@ def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
     else:
         if modeloropt == "model":
             # wrap model to use gradient accumulation
-            model = GradientAccumulateModel(accum_steps=accum_steps, inputs=input, outputs=output)
+            model = GradientAccumulateModel(
+                accum_steps=accum_steps, inputs=input, outputs=output
+            )
         else:
             # wrap optimizer to use gradient accumulation
             opt = GradientAccumulateOptimizer(opt, accum_steps=accum_steps)
 
             # compile model
             model = tf.keras.Model(inputs=input, outputs=output)
-    
+
     # compile model
     model.compile(
         optimizer=opt,
@@ -91,7 +98,7 @@ def test_expected_result():
 
     # run again with different batch size and number of accumulations
     result2 = run_experiment(bs=50, accum_steps=2, epochs=2, modeloropt="opt")
-    
+
     # reset again
     reset()