Skip to content

Commit af5dc34

Browse files
authored
Merge pull request #99 from andreped/tf-fix
Added model wrapper test for tf<2.8 + refactored tests
2 parents 0bc538c + 783cf71 commit af5dc34

16 files changed

+576
-232
lines changed

.github/workflows/test.yml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,45 @@ jobs:
9898
pytest -v tests/test_mp_batch_norm.py
9999
pytest -v tests/test_optimizer_distribute.py
100100
pytest -v tests/test_model_distribute.py
101+
102+
103+
tf-compability:
104+
needs: build
105+
runs-on: ${{ matrix.os }}
106+
strategy:
107+
matrix:
108+
os: [ubuntu-20.04]
109+
python-version: ["3.6"]
110+
tf-version: [2.2.0, 2.3.0, 2.4.0, 2.5.0, 2.6.2]
111+
112+
steps:
113+
- uses: actions/checkout@v1
114+
- name: Set up Python ${{ matrix.python-version }}
115+
uses: actions/setup-python@v2
116+
with:
117+
python-version: ${{ matrix.python-version }}
118+
119+
- name: Install dependencies
120+
run: pip install wheel setuptools flake8 pytest-cov
121+
122+
- name: Install tensorflow-datasets
123+
run: |
124+
pip install tensorflow==${{ matrix.tf-version }} "tensorflow-datasets<=4.8.2"
125+
pip install "protobuf<=3.20" --force-reinstall
126+
127+
- name: Download artifact
128+
uses: actions/download-artifact@master
129+
with:
130+
name: "Python wheel"
131+
132+
- name: Install wheel
133+
run: pip install --find-links=${{github.workspace}} gradient_accumulator
134+
135+
- name: Debug pip deps
136+
run: pip list
137+
138+
- name: Test library accessibility
139+
run: python -c "from gradient_accumulator import GradientAccumulateModel, GradientAccumulateOptimizer"
140+
141+
- name: Run tests
142+
run: pytest -v tests/test_model_expected_result.py

tests/test_adaptive_gradient_clipping.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
1+
import os
2+
13
import tensorflow as tf
24
import tensorflow_datasets as tfds
5+
from tensorflow.keras import mixed_precision
36
from tensorflow.keras.models import load_model
7+
48
from gradient_accumulator import GradientAccumulateModel
59
from gradient_accumulator import unitwise_norm
6-
from tensorflow.keras import mixed_precision
7-
import os
10+
811
from .utils import normalize_img
912

1013

1114
def test_unitwise_norm():
1215
for i in range(7):
13-
x = tf.zeros([1,] * i)
16+
x = tf.zeros(
17+
[
18+
1,
19+
]
20+
* i
21+
)
1422
try:
1523
unitwise_norm(x)
1624
except ValueError as e:
@@ -22,8 +30,8 @@ def test_unitwise_norm():
2230
def test_train_mnist():
2331
# load dataset
2432
(ds_train, ds_test), ds_info = tfds.load(
25-
'mnist',
26-
split=['train', 'test'],
33+
"mnist",
34+
split=["train", "test"],
2735
shuffle_files=True,
2836
as_supervised=True,
2937
with_info=True,
@@ -35,7 +43,7 @@ def test_train_mnist():
3543
# build train pipeline
3644
ds_train = ds_train.map(normalize_img)
3745
ds_train = ds_train.cache()
38-
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
46+
ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples)
3947
ds_train = ds_train.batch(100) # multiplum of 8
4048
ds_train = ds_train.prefetch(1)
4149

@@ -46,14 +54,24 @@ def test_train_mnist():
4654
ds_test = ds_test.prefetch(1)
4755

4856
# create model
49-
model = tf.keras.models.Sequential([
50-
tf.keras.layers.Flatten(input_shape=(28, 28)),
51-
tf.keras.layers.Dense(32, activation='relu'), # 32 multiplum of 8
52-
tf.keras.layers.Dense(10, dtype='float32') # output not numerically stable with float16
53-
])
57+
model = tf.keras.models.Sequential(
58+
[
59+
tf.keras.layers.Flatten(input_shape=(28, 28)),
60+
tf.keras.layers.Dense(32, activation="relu"), # 32 multiplum of 8
61+
tf.keras.layers.Dense(
62+
10, dtype="float32"
63+
), # output not numerically stable with float16
64+
]
65+
)
5466

5567
# wrap model to use gradient accumulation
56-
model = GradientAccumulateModel(accum_steps=4, mixed_precision=False, use_agc=True, inputs=model.input, outputs=model.output)
68+
model = GradientAccumulateModel(
69+
accum_steps=4,
70+
mixed_precision=False,
71+
use_agc=True,
72+
inputs=model.input,
73+
outputs=model.output,
74+
)
5775

5876
# need to scale optimizer for mixed precision
5977
opt = tf.keras.optimizers.SGD(1e-2)

tests/test_batch_norm.py

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,33 @@
1+
import os
2+
import random as python_random
3+
4+
import numpy as np
15
import tensorflow as tf
26
import tensorflow_datasets as tfds
37
from tensorflow.keras.models import load_model
8+
49
from gradient_accumulator import GradientAccumulateModel
510
from gradient_accumulator.layers import AccumBatchNormalization
6-
import random as python_random
7-
import numpy as np
8-
import os
9-
from .utils import reset, normalize_img
11+
12+
from .utils import normalize_img
13+
from .utils import reset
1014

1115

12-
def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epochs:int = 3):
16+
def run_experiment(
17+
custom_bn: bool = True, bs: int = 100, accum_steps: int = 1, epochs: int = 3
18+
):
1319
# load dataset
1420
(ds_train, ds_test), ds_info = tfds.load(
15-
'mnist',
16-
split=['train', 'test'],
21+
"mnist",
22+
split=["train", "test"],
1723
shuffle_files=True,
1824
as_supervised=True,
1925
with_info=True,
2026
)
2127

2228
# build train pipeline
2329
ds_train = ds_train.map(normalize_img)
24-
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
30+
ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples)
2531
ds_train = ds_train.batch(bs)
2632
ds_train = ds_train.prefetch(1)
2733

@@ -39,17 +45,21 @@ def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epo
3945
normalization_layer = tf.keras.layers.Activation("linear")
4046

4147
# create model
42-
model = tf.keras.models.Sequential([
43-
tf.keras.layers.Flatten(input_shape=(28, 28)),
44-
tf.keras.layers.Dense(32),
45-
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
46-
tf.keras.layers.Activation("relu"),
47-
tf.keras.layers.Dense(10)
48-
])
48+
model = tf.keras.models.Sequential(
49+
[
50+
tf.keras.layers.Flatten(input_shape=(28, 28)),
51+
tf.keras.layers.Dense(32),
52+
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
53+
tf.keras.layers.Activation("relu"),
54+
tf.keras.layers.Dense(10),
55+
]
56+
)
4957

5058
# wrap model to use gradient accumulation
5159
if accum_steps > 1:
52-
model = GradientAccumulateModel(accum_steps=accum_steps, inputs=model.input, outputs=model.output)
60+
model = GradientAccumulateModel(
61+
accum_steps=accum_steps, inputs=model.input, outputs=model.output
62+
)
5363

5464
# compile model
5565
model.compile(
@@ -79,10 +89,10 @@ def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epo
7989
def test_compare_bn_layers():
8090
# set seed
8191
reset()
82-
92+
8393
# custom BN without accum
8494
result1 = run_experiment(custom_bn=True, accum_steps=1, epochs=3)[1]
85-
95+
8696
# reset before second run to get "identical" results
8797
reset()
8898

@@ -98,10 +108,10 @@ def test_compare_bn_layers():
98108
def test_compare_accum_bn_expected_result():
99109
# set seed
100110
reset()
101-
111+
102112
# custom BN without accum
103113
result1 = run_experiment(custom_bn=True, accum_steps=4, bs=25)[1]
104-
114+
105115
# reset before second run to get "identical" results
106116
reset()
107117

tests/test_bn_convnd.py

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1+
import numpy as np
12
import tensorflow as tf
23
from tensorflow.keras.models import load_model
4+
35
from gradient_accumulator import GradientAccumulateModel
46
from gradient_accumulator.layers import AccumBatchNormalization
5-
import numpy as np
67

78

8-
def test_bn_conv2d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
9+
def test_bn_conv2d(
10+
custom_bn: bool = True, accum_steps: int = 1, epochs: int = 1
11+
):
912
# make toy dataset
1013
data = np.random.randint(2, size=(16, 8, 8, 1))
1114
gt = np.expand_dims(np.random.randint(2, size=16), axis=-1)
@@ -19,20 +22,24 @@ def test_bn_conv2d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
1922
normalization_layer = tf.keras.layers.Activation("linear")
2023

2124
# create model
22-
model = tf.keras.models.Sequential([
23-
tf.keras.layers.Conv2D(4, 3, input_shape=(8, 8, 1)),
24-
normalization_layer,
25-
tf.keras.layers.Activation("relu"),
26-
tf.keras.layers.Flatten(),
27-
tf.keras.layers.Dense(4),
28-
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
29-
tf.keras.layers.Activation("relu"),
30-
tf.keras.layers.Dense(1, activation="sigmoid"),
31-
])
25+
model = tf.keras.models.Sequential(
26+
[
27+
tf.keras.layers.Conv2D(4, 3, input_shape=(8, 8, 1)),
28+
normalization_layer,
29+
tf.keras.layers.Activation("relu"),
30+
tf.keras.layers.Flatten(),
31+
tf.keras.layers.Dense(4),
32+
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
33+
tf.keras.layers.Activation("relu"),
34+
tf.keras.layers.Dense(1, activation="sigmoid"),
35+
]
36+
)
3237

3338
# wrap model to use gradient accumulation
3439
if accum_steps > 1:
35-
model = GradientAccumulateModel(accum_steps=accum_steps, inputs=model.input, outputs=model.output)
40+
model = GradientAccumulateModel(
41+
accum_steps=accum_steps, inputs=model.input, outputs=model.output
42+
)
3643

3744
# compile model
3845
model.compile(
@@ -60,7 +67,9 @@ def test_bn_conv2d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
6067
return result
6168

6269

63-
def test_bn_conv3d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
70+
def test_bn_conv3d(
71+
custom_bn: bool = True, accum_steps: int = 1, epochs: int = 1
72+
):
6473
# make toy dataset
6574
data = np.random.randint(2, size=(16, 8, 8, 8, 1))
6675
gt = np.expand_dims(np.random.randint(2, size=16), axis=-1)
@@ -74,20 +83,24 @@ def test_bn_conv3d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
7483
normalization_layer = tf.keras.layers.Activation("linear")
7584

7685
# create model
77-
model = tf.keras.models.Sequential([
78-
tf.keras.layers.Conv3D(4, 3, input_shape=(8, 8, 8, 1)),
79-
normalization_layer,
80-
tf.keras.layers.Activation("relu"),
81-
tf.keras.layers.Flatten(),
82-
tf.keras.layers.Dense(4),
83-
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
84-
tf.keras.layers.Activation("relu"),
85-
tf.keras.layers.Dense(1, activation="sigmoid"),
86-
])
86+
model = tf.keras.models.Sequential(
87+
[
88+
tf.keras.layers.Conv3D(4, 3, input_shape=(8, 8, 8, 1)),
89+
normalization_layer,
90+
tf.keras.layers.Activation("relu"),
91+
tf.keras.layers.Flatten(),
92+
tf.keras.layers.Dense(4),
93+
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
94+
tf.keras.layers.Activation("relu"),
95+
tf.keras.layers.Dense(1, activation="sigmoid"),
96+
]
97+
)
8798

8899
# wrap model to use gradient accumulation
89100
if accum_steps > 1:
90-
model = GradientAccumulateModel(accum_steps=accum_steps, inputs=model.input, outputs=model.output)
101+
model = GradientAccumulateModel(
102+
accum_steps=accum_steps, inputs=model.input, outputs=model.output
103+
)
91104

92105
# compile model
93106
model.compile(

tests/test_expected_result.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1+
import os
2+
import random as python_random
3+
14
import numpy as np
25
import tensorflow as tf
3-
import random as python_random
4-
import os
5-
from .utils import get_opt, normalize_img, reset
66
import tensorflow_datasets as tfds
77
from tensorflow.keras.models import load_model
8-
from gradient_accumulator import GradientAccumulateModel, GradientAccumulateOptimizer
98

9+
from gradient_accumulator import GradientAccumulateModel
10+
from gradient_accumulator import GradientAccumulateOptimizer
11+
12+
from .utils import get_opt
13+
from .utils import normalize_img
14+
from .utils import reset
1015

1116
# get current tf minor version
1217
tf_version = int(tf.version.VERSION.split(".")[1])
@@ -15,8 +20,8 @@
1520
def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
1621
# load dataset
1722
(ds_train, ds_test), ds_info = tfds.load(
18-
'mnist',
19-
split=['train', 'test'],
23+
"mnist",
24+
split=["train", "test"],
2025
shuffle_files=True,
2126
as_supervised=True,
2227
with_info=True,
@@ -35,7 +40,7 @@ def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
3540
# create model
3641
input = tf.keras.layers.Input(shape=(28, 28))
3742
x = tf.keras.layers.Flatten(input_shape=(28, 28))(input)
38-
x = tf.keras.layers.Dense(128, activation='relu')(x)
43+
x = tf.keras.layers.Dense(128, activation="relu")(x)
3944
output = tf.keras.layers.Dense(10)(x)
4045

4146
opt = get_opt(opt_name="SGD", tf_version=tf_version)
@@ -45,14 +50,16 @@ def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
4550
else:
4651
if modeloropt == "model":
4752
# wrap model to use gradient accumulation
48-
model = GradientAccumulateModel(accum_steps=accum_steps, inputs=input, outputs=output)
53+
model = GradientAccumulateModel(
54+
accum_steps=accum_steps, inputs=input, outputs=output
55+
)
4956
else:
5057
# wrap optimizer to use gradient accumulation
5158
opt = GradientAccumulateOptimizer(opt, accum_steps=accum_steps)
5259

5360
# compile model
5461
model = tf.keras.Model(inputs=input, outputs=output)
55-
62+
5663
# compile model
5764
model.compile(
5865
optimizer=opt,
@@ -91,7 +98,7 @@ def test_expected_result():
9198

9299
# run again with different batch size and number of accumulations
93100
result2 = run_experiment(bs=50, accum_steps=2, epochs=2, modeloropt="opt")
94-
101+
95102
# reset again
96103
reset()
97104

0 commit comments

Comments
 (0)