Reduce flakiness of memory test (#8651)

awaelchli · carmocca · pre-commit-ci[bot] · lexierule · commit e622bcaee0e4 · 2021-08-18T15:41:49.000-04:00
Co-authored-by: Carlos Mocholí &lt;carlossmocholi@gmail.com&gt;
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
@@ -1863,7 +1863,12 @@ class Check(Callback):
         def on_epoch_start(self, trainer, *_):
             assert isinstance(trainer.training_type_plugin.model, DistributedDataParallel)
 
-    initial = torch.cuda.memory_allocated(0)
+    def current_memory():
+        # before measuring the memory force release any leftover allocations, including CUDA tensors
+        gc.collect()
+        return torch.cuda.memory_allocated(0)
+
+    initial = current_memory()
 
     model = TestModel()
     trainer_kwargs = dict(
@@ -1881,22 +1886,13 @@ def on_epoch_start(self, trainer, *_):
     assert list(trainer.optimizers[0].state.values())[0]["exp_avg_sq"].device == torch.device("cpu")
     assert trainer.callback_metrics["train_loss"].device == torch.device("cpu")
 
-    # before measuring the memory force release any leftover allocations, including CUDA tensors
-    gc.collect()
-    memory_1 = torch.cuda.memory_allocated(0)
-    assert memory_1 == initial
+    assert current_memory() <= initial
 
     deepcopy(trainer)
 
-    # before measuring the memory force release any leftover allocations, including CUDA tensors
-    gc.collect()
-    memory_2 = torch.cuda.memory_allocated(0)
-    assert memory_2 == initial
+    assert current_memory() <= initial
 
     trainer_2 = Trainer(**trainer_kwargs)
     trainer_2.fit(model)
 
-    # before measuring the memory force release any leftover allocations, including CUDA tensors
-    gc.collect()
-    memory_3 = torch.cuda.memory_allocated(0)
-    assert memory_3 == initial
+    assert current_memory() <= initial