Skip to content

Commit 2a8789e

Browse files
authored
ci/tests: cleaning standalone script (#19141)
* tests: cleaning standalone script * switch * from tests * -m * collect * array * tests_fabric/ * .. * path prefix * pl * cleaning * test_pytorch_profiler_nested_emit_nvtx * Apply suggestions from code review * Apply suggestions from code review * todo
1 parent 2e77862 commit 2a8789e

File tree

12 files changed

+99
-107
lines changed

12 files changed

+99
-107
lines changed

.azure/gpu-tests-fabric.yml

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ jobs:
4949
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
5050
FREEZE_REQUIREMENTS: "1"
5151
PIP_CACHE_DIR: "/var/tmp/pip"
52+
PL_RUN_CUDA_TESTS: "1"
5253
container:
5354
image: $(image)
5455
# default shm size is 64m. Increase it to avoid:
@@ -126,19 +127,16 @@ jobs:
126127
condition: and(succeeded(), eq(variables['PACKAGE_NAME'], 'fabric'))
127128
displayName: "Adjust tests & examples"
128129
129-
- bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest -v --durations=50
130-
workingDirectory: tests/tests_fabric
131-
env:
132-
PL_RUN_CUDA_TESTS: "1"
130+
- bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest . -v --durations=50
131+
workingDirectory: tests/tests_fabric/
133132
displayName: "Testing: fabric standard"
134133
timeoutInMinutes: "10"
135134

136-
- bash: bash run_standalone_tests.sh
137-
workingDirectory: tests/tests_fabric
135+
- bash: bash ../run_standalone_tests.sh "."
136+
workingDirectory: tests/tests_fabric/
138137
env:
139-
PL_RUN_CUDA_TESTS: "1"
140138
PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)
141-
displayName: "Testing: fabric standalone tests"
139+
displayName: "Testing: fabric standalone"
142140
timeoutInMinutes: "10"
143141

144142
- bash: |
@@ -152,12 +150,12 @@ jobs:
152150
./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
153151
--flags=gpu,pytest,${COVERAGE_SOURCE} --name="GPU-coverage" --env=linux,azure
154152
ls -l
155-
workingDirectory: tests/tests_fabric
153+
workingDirectory: tests/tests_fabric/
156154
displayName: "Statistics"
157155
158156
- script: |
159157
set -e
160158
bash run_fabric_examples.sh --accelerator=cuda --devices=1
161159
bash run_fabric_examples.sh --accelerator=cuda --devices=2 --strategy ddp
162-
workingDirectory: examples
160+
workingDirectory: examples/
163161
displayName: "Testing: fabric examples"

.azure/gpu-tests-pytorch.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ jobs:
5959
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
6060
FREEZE_REQUIREMENTS: "1"
6161
PIP_CACHE_DIR: "/var/tmp/pip"
62+
PL_RUN_CUDA_TESTS: "1"
6263
container:
6364
image: $(image)
6465
# default shm size is 64m. Increase it to avoid:
@@ -154,16 +155,13 @@ jobs:
154155
155156
- bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest -v --durations=50
156157
workingDirectory: tests/tests_pytorch
157-
env:
158-
PL_RUN_CUDA_TESTS: "1"
159158
displayName: "Testing: PyTorch standard"
160159
timeoutInMinutes: "35"
161160

162-
- bash: bash run_standalone_tests.sh
161+
- bash: bash ../run_standalone_tests.sh "."
163162
workingDirectory: tests/tests_pytorch
164163
env:
165164
PL_USE_MOCKED_MNIST: "1"
166-
PL_RUN_CUDA_TESTS: "1"
167165
PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)
168166
displayName: "Testing: PyTorch standalone tests"
169167
timeoutInMinutes: "35"
@@ -172,7 +170,6 @@ jobs:
172170
workingDirectory: tests/tests_pytorch
173171
env:
174172
PL_USE_MOCKED_MNIST: "1"
175-
PL_RUN_CUDA_TESTS: "1"
176173
displayName: "Testing: PyTorch standalone tasks"
177174
timeoutInMinutes: "10"
178175

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ addopts = [
269269
"--ignore=legacy/checkpoints",
270270
]
271271
markers = [
272-
"cloud:Run the cloud tests for example",
272+
"cloud: Run the cloud tests for example",
273273
]
274274
filterwarnings = [
275275
"error::FutureWarning",

src/lightning/pytorch/utilities/testing/_runif.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from lightning_utilities.core.imports import RequirementCache
1717

1818
from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_2_0
19-
from lightning.fabric.utilities.testing import _runif_reasons as FabricRunIf
19+
from lightning.fabric.utilities.testing import _runif_reasons as fabric_run_if
2020
from lightning.pytorch.accelerators.cpu import _PSUTIL_AVAILABLE
2121
from lightning.pytorch.callbacks.progress.rich_progress import _RICH_AVAILABLE
2222
from lightning.pytorch.core.module import _ONNX_AVAILABLE
@@ -68,7 +68,7 @@ def _runif_reasons(
6868
6969
"""
7070

71-
reasons, kwargs = FabricRunIf(
71+
reasons, kwargs = fabric_run_if(
7272
min_cuda_gpus=min_cuda_gpus,
7373
min_torch=min_torch,
7474
max_torch=max_torch,

tests/tests_pytorch/run_standalone_tests.sh renamed to tests/run_standalone_tests.sh

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,19 @@ source="${PL_STANDALONE_TESTS_SOURCE:-"lightning"}"
2323
# this environment variable allows special tests to run
2424
export PL_RUN_STANDALONE_TESTS=1
2525
# python arguments
26-
defaults="-m coverage run --source ${source} --append -m pytest --no-header -v -s --timeout 120"
26+
defaults=" -m coverage run --source ${source} --append -m pytest --no-header -v -s --timeout 120 "
2727
echo "Using defaults: ${defaults}"
2828

29-
# find tests marked as `@RunIf(standalone=True)`. done manually instead of with pytest because it is faster
30-
grep_output=$(grep --recursive --word-regexp . --regexp 'standalone=True' --include '*.py')
29+
# get the testing location as the fist argument
30+
test_path=$1
31+
printf "source path: $test_path\n"
3132

32-
# file paths, remove duplicates
33-
files=$(echo "$grep_output" | cut -f1 -d: | sort | uniq)
34-
35-
# get the list of parametrizations. we need to call them separately. the last two lines are removed.
36-
# note: if there's a syntax error, this will fail with some garbled output
37-
if [[ "$OSTYPE" == "darwin"* ]]; then
38-
parametrizations=$(python3 -m pytest $files --collect-only --quiet "$@" | tail -r | sed -e '1,3d' | tail -r)
39-
else
40-
parametrizations=$(python3 -m pytest $files --collect-only --quiet "$@" | head -n -2)
41-
fi
42-
# remove the "tests/tests_pytorch/" path suffixes
43-
path_suffix=$(basename "$(dirname "$(pwd)")")/$(basename "$(pwd)")"/" # https://stackoverflow.com/a/8223345
44-
parametrizations=${parametrizations//$path_suffix/}
33+
# collect all tests with parametrization based filtering with PL_RUN_STANDALONE_TESTS
34+
standalone_tests=$(python -m pytest $test_path -q --collect-only --pythonwarnings ignore)
35+
printf "Collected tests: \n $standalone_tests"
36+
# match only lines with tests
37+
parametrizations=$(grep -oP '\S+::test_\S+' <<< "$standalone_tests")
38+
# convert the list to be array
4539
parametrizations_arr=($parametrizations)
4640

4741
report=''
@@ -61,30 +55,25 @@ function show_batched_output {
6155
}
6256
trap show_batched_output EXIT # show the output on exit
6357

58+
# remove the "tests/tests_pytorch/" path suffixes
59+
path_prefix=$(basename "$(dirname "$(pwd)")")/$(basename "$(pwd)")"/" # https://stackoverflow.com/a/8223345
6460
for i in "${!parametrizations_arr[@]}"; do
65-
parametrization=${parametrizations_arr[$i]}
61+
parametrization=${parametrizations_arr[$i]//$path_prefix/}
6662
prefix="$((i+1))/${#parametrizations_arr[@]}"
6763

68-
# check blocklist
69-
if [[ "${parametrization}" == *"test_pytorch_profiler_nested_emit_nvtx"* ]]; then
70-
echo "$prefix: Skipping $parametrization"
71-
report+="Skipped\t$parametrization\n"
72-
# do not continue the loop because we might need to wait for batched jobs
73-
else
74-
echo "$prefix: Running $parametrization"
64+
echo "$prefix: Running $parametrization"
7565

76-
# fix the port to avoid race condition when batched distributed tests select the port randomly
77-
export MASTER_PORT=$((29500 + $i % $test_batch_size))
66+
# fix the port to avoid race condition when batched distributed tests select the port randomly
67+
export MASTER_PORT=$((29500 + $i % $test_batch_size))
7868

79-
# execute the test in the background
80-
# redirect to a log file that buffers test output. since the tests will run in the background, we cannot let them
81-
# output to std{out,err} because the outputs would be garbled together
82-
python3 ${defaults} "$parametrization" &>> standalone_test_output.txt &
83-
# save the PID in an array
84-
pids[${i}]=$!
85-
# add row to the final report
86-
report+="Ran\t$parametrization\n"
87-
fi
69+
# execute the test in the background
70+
# redirect to a log file that buffers test output. since the tests will run in the background, we cannot let them
71+
# output to std{out,err} because the outputs would be garbled together
72+
python ${defaults} "$parametrization" &>> standalone_test_output.txt &
73+
# save the PID in an array
74+
pids[${i}]=$!
75+
# add row to the final report
76+
report+="Ran\t$parametrization\n"
8877

8978
if ((($i + 1) % $test_batch_size == 0)); then
9079
# wait for running tests

tests/tests_fabric/conftest.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -192,22 +192,23 @@ def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.C
192192

193193
for kwarg, env_var in options.items():
194194
# this will compute the intersection of all tests selected per environment variable
195-
if os.getenv(env_var, "0") == "1":
196-
conditions.append(env_var)
197-
for i, test in reversed(list(enumerate(items))): # loop in reverse, since we are going to pop items
198-
already_skipped = any(marker.name == "skip" for marker in test.own_markers)
199-
if already_skipped:
200-
# the test was going to be skipped anyway, filter it out
201-
items.pop(i)
202-
skipped += 1
203-
continue
204-
has_runif_with_kwarg = any(
205-
marker.name == "skipif" and marker.kwargs.get(kwarg) for marker in test.own_markers
206-
)
207-
if not has_runif_with_kwarg:
208-
# the test has `@RunIf(kwarg=True)`, filter it out
209-
items.pop(i)
210-
filtered += 1
195+
if os.getenv(env_var, "0") != "1":
196+
continue
197+
conditions.append(env_var)
198+
for i, test in reversed(list(enumerate(items))): # loop in reverse, since we are going to pop items
199+
already_skipped = any(marker.name == "skip" for marker in test.own_markers)
200+
if already_skipped:
201+
# the test was going to be skipped anyway, filter it out
202+
items.pop(i)
203+
skipped += 1
204+
continue
205+
has_runif_with_kwarg = any(
206+
marker.name == "skipif" and marker.kwargs.get(kwarg) for marker in test.own_markers
207+
)
208+
if not has_runif_with_kwarg:
209+
# the test has `@RunIf(kwarg=True)`, filter it out
210+
items.pop(i)
211+
filtered += 1
211212

212213
if config.option.verbose >= 0 and (filtered or skipped):
213214
writer = config.get_terminal_writer()

tests/tests_fabric/run_standalone_tests.sh

Lines changed: 0 additions & 1 deletion
This file was deleted.

tests/tests_fabric/run_tpu_tests.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ cd tests/tests_fabric
3535
PL_RUN_TPU_TESTS=1 python3 -m coverage run --source=lightning -m pytest -vv --durations=0 --timeout 60 ./
3636

3737
echo "--- Running standalone Fabric tests ---"
38-
PL_RUN_TPU_TESTS=1 PL_STANDALONE_TESTS_BATCH_SIZE=1 bash run_standalone_tests.sh
38+
PL_RUN_TPU_TESTS=1 PL_STANDALONE_TESTS_BATCH_SIZE=1 bash ../run_standalone_tests.sh "."
3939

4040
echo "--- Generating coverage ---"
4141
python3 -m coverage xml

tests/tests_pytorch/conftest.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -318,22 +318,23 @@ def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.C
318318

319319
for kwarg, env_var in options.items():
320320
# this will compute the intersection of all tests selected per environment variable
321-
if os.getenv(env_var, "0") == "1":
322-
conditions.append(env_var)
323-
for i, test in reversed(list(enumerate(items))): # loop in reverse, since we are going to pop items
324-
already_skipped = any(marker.name == "skip" for marker in test.own_markers)
325-
if already_skipped:
326-
# the test was going to be skipped anyway, filter it out
327-
items.pop(i)
328-
skipped += 1
329-
continue
330-
has_runif_with_kwarg = any(
331-
marker.name == "skipif" and marker.kwargs.get(kwarg) for marker in test.own_markers
332-
)
333-
if not has_runif_with_kwarg:
334-
# the test has `@RunIf(kwarg=True)`, filter it out
335-
items.pop(i)
336-
filtered += 1
321+
if os.getenv(env_var, "0") != "1":
322+
continue
323+
conditions.append(env_var)
324+
for i, test in reversed(list(enumerate(items))): # loop in reverse, since we are going to pop items
325+
already_skipped = any(marker.name == "skip" for marker in test.own_markers)
326+
if already_skipped:
327+
# the test was going to be skipped anyway, filter it out
328+
items.pop(i)
329+
skipped += 1
330+
continue
331+
has_runif_with_kwarg = any(
332+
marker.name == "skipif" and marker.kwargs.get(kwarg) for marker in test.own_markers
333+
)
334+
if not has_runif_with_kwarg:
335+
# the test has `@RunIf(kwarg=True)`, filter it out
336+
items.pop(i)
337+
filtered += 1
337338

338339
if config.option.verbose >= 0 and (filtered or skipped):
339340
writer = config.get_terminal_writer()

tests/tests_pytorch/profilers/test_profiler.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -471,20 +471,22 @@ def look_for_trace(trace_dir):
471471
assert look_for_trace(tmpdir / "lightning_logs" / "version_0")
472472

473473

474-
@RunIf(min_cuda_gpus=1, standalone=True)
475-
def test_pytorch_profiler_nested_emit_nvtx():
476-
"""This test check emit_nvtx is correctly supported."""
477-
profiler = PyTorchProfiler(use_cuda=True, emit_nvtx=True)
478-
model = BoringModel()
479-
trainer = Trainer(
480-
fast_dev_run=True,
481-
profiler=profiler,
482-
accelerator="gpu",
483-
devices=1,
484-
enable_progress_bar=False,
485-
enable_model_summary=False,
486-
)
487-
trainer.fit(model)
474+
# Todo: this test has not been running as all our CI GPU runners have higher capacity
475+
# @RunIf(min_cuda_gpus=1, standalone=True)
476+
# @pytest.mark.skipif(torch.cuda.get_device_capability()[0] >= 8)
477+
# def test_pytorch_profiler_nested_emit_nvtx():
478+
# """This test check emit_nvtx is correctly supported."""
479+
# profiler = PyTorchProfiler(use_cuda=True, emit_nvtx=True)
480+
# model = BoringModel()
481+
# trainer = Trainer(
482+
# fast_dev_run=True,
483+
# profiler=profiler,
484+
# accelerator="gpu",
485+
# devices=1,
486+
# enable_progress_bar=False,
487+
# enable_model_summary=False,
488+
# )
489+
# trainer.fit(model)
488490

489491

490492
def test_register_record_function(tmpdir):

0 commit comments

Comments
 (0)