Run standalone tests in batches (#13673)

carmocca · web-flow · commit d058190b6dc0 · 2022-07-18T12:10:35.000Z
diff --git a/tests/tests_pytorch/run_standalone_tests.sh b/tests/tests_pytorch/run_standalone_tests.sh
@@ -18,7 +18,7 @@ set -e
 # this environment variable allows special tests to run
 export PL_RUN_STANDALONE_TESTS=1
 # python arguments
-defaults='-m coverage run --source pytorch_lightning --append -m pytest --capture=no'
+defaults='-m coverage run --source pytorch_lightning --append -m pytest --no-header'
 
 # find tests marked as `@RunIf(standalone=True)`. done manually instead of with pytest because it is faster
 grep_output=$(grep --recursive --word-regexp . --regexp 'standalone=True' --include '*.py')
@@ -40,22 +40,47 @@ parametrizations_arr=($parametrizations)
 # tests to skip - space separated
 blocklist='profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx utilities/test_warnings.py'
 report=''
+test_batch_size=6
+
+rm -f standalone_test_output.txt  # in case it exists, remove it
+function show_batched_output {
+  if [ -f standalone_test_output.txt ]; then  # if exists
+    cat standalone_test_output.txt
+    rm standalone_test_output.txt
+  fi
+}
+trap show_batched_output EXIT  # show the output on exit
 
 for i in "${!parametrizations_arr[@]}"; do
   parametrization=${parametrizations_arr[$i]}
 
   # check blocklist
   if echo $blocklist | grep -F "${parametrization}"; then
     report+="Skipped\t$parametrization\n"
-    continue
+    # do not continue the loop because we might need to wait for batched jobs
+  else
+    echo "Running $parametrization"
+    # execute the test in the background
+    # redirect to a log file that buffers test output. since the tests will run in the background, we cannot let them
+    # output to std{out,err} because the outputs would be garbled together
+    python ${defaults} "$parametrization" &>> standalone_test_output.txt &
+    # save the PID in an array
+    pids[${i}]=$!
+    # add row to the final report
+    report+="Ran\t$parametrization\n"
   fi
 
-  # run the test
-  echo "Running $parametrization"
-  python ${defaults} "$parametrization"
-
-  report+="Ran\t$parametrization\n"
+  if ((($i + 1) % $test_batch_size == 0)); then
+    # wait for running tests
+    for pid in ${pids[*]}; do wait $pid; done
+    unset pids  # empty the array
+    show_batched_output
+  fi
 done
+# wait for leftover tests
+for pid in ${pids[*]}; do wait $pid; done
+show_batched_output
+echo "Batched mode finished. Continuing with the rest of standalone tests."
 
 if nvcc --version; then
     nvprof --profile-from-start off -o trace_name.prof -- python ${defaults} profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx
diff --git a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
@@ -26,7 +26,7 @@
 from torch.utils.data import DataLoader
 from torchmetrics import Accuracy
 
-from pytorch_lightning import LightningDataModule, LightningModule, seed_everything, Trainer
+from pytorch_lightning import LightningDataModule, LightningModule, Trainer
 from pytorch_lightning.callbacks import Callback, LearningRateMonitor, ModelCheckpoint
 from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset
 from pytorch_lightning.plugins import DeepSpeedPrecisionPlugin
@@ -712,7 +712,6 @@ def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config
 @pytest.mark.parametrize(("accumulate_grad_batches", "automatic_optimization"), [(1, False), (2, True)])
 @RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
 def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization, accumulate_grad_batches):
-    seed_everything(1)
     if automatic_optimization:
         model = ModelParallelClassificationModel()
     else:
@@ -734,9 +733,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization
     trainer.fit(model, datamodule=dm)
 
     results = trainer.test(datamodule=dm)
-    assert results[0]["test_acc"] > 0.7
     saved_results = trainer.test(ckpt_path=ck.best_model_path, datamodule=dm)
-    assert saved_results[0]["test_acc"] > 0.7
     assert saved_results == results
 
     if automatic_optimization:
@@ -752,9 +749,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization
         enable_progress_bar=False,
         enable_model_summary=False,
     )
-
-    results = trainer.test(model, datamodule=dm, ckpt_path=ck.best_model_path)
-    assert results[0]["test_acc"] > 0.7
+    trainer.test(model, datamodule=dm, ckpt_path=ck.best_model_path)
 
 
 @RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
@@ -861,7 +856,6 @@ def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) ->
 @RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
 def test_deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, offload_optimizer):
     """Test to ensure with Stage 2 and multiple GPUs, accumulated grad batches works."""
-    seed_everything(42)
 
     class VerificationCallback(Callback):
         def __init__(self):
@@ -1109,7 +1103,7 @@ def test_dataloader(self):
 @pytest.mark.parametrize("max_epoch", [2])
 @pytest.mark.parametrize("limit_train_batches", [2])
 @RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
-def test_scheduler_step_count(mock_step, max_epoch, limit_train_batches, interval):
+def test_scheduler_step_count(mock_step, tmpdir, max_epoch, limit_train_batches, interval):
     """Test to ensure that the scheduler is called the correct amount of times during training when scheduler is
     set to step or epoch."""
 
@@ -1124,7 +1118,7 @@ def configure_optimizers(self):
 
     model = TestModel()
     trainer = Trainer(
-        default_root_dir=os.getcwd(),
+        default_root_dir=tmpdir,
         limit_train_batches=limit_train_batches,
         limit_val_batches=0,
         max_epochs=max_epoch,