From 2182f8145d0cd1737f5a365a74ee7a3aedcdb017 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 31 Jan 2025 07:29:47 +1100 Subject: [PATCH 1/7] restrict where we run test_torchinductor tests --- recipe/meta.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index e8c57a42..849b36b2 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -450,7 +450,9 @@ outputs: "test/test_nn.py", "test/test_torch.py", "test/test_xnnpack_integration.py", - ] + (cuda_compiler_version != "None") * ["test/inductor/test_torchinductor.py"]) %} + ]) %} + # test_torchinductor adds >4h in test runtime in emulation; not supported yet for 3.13 + {% set tests = tests ~ " test/inductor/test_torchinductor.py" %} # [py<313 and not aarch64] {% set skips = "(TestTorch and test_print)" %} # tolerance violation with openblas From 7c4fed7ccac5f221d91e49ae532f9a8ac48ea394 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 31 Jan 2025 07:35:54 +1100 Subject: [PATCH 2/7] temporary: skip CPU build --- recipe/meta.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 849b36b2..6d199f4d 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -76,6 +76,8 @@ build: number: {{ build }} # cuda 11.8 was dropped due to maintenance effort, see discussion in #177 skip: true # [cuda_compiler_version == "11.8"] + # temporary skip to avoid wasting resources while unbreak CUDA builds + skip: true # [cuda_compiler_version == "None"] # This logic allows two rc variants to be defined in the conda_build_config, but only one to actually be built. # We want to be able to define two variants in the cbc so we can assign different labels to each in the upload channel # (by zipping is_rc with channel_targets). This prevents rc builds being used unless specifically requested. From ef1444368302254a012920a823dcfb00de9c0778 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 31 Jan 2025 10:34:54 +1100 Subject: [PATCH 3/7] skip two more failing tests --- recipe/meta.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 6d199f4d..40143700 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -484,8 +484,12 @@ outputs: {% set skips = skips ~ " or test_ctc_loss_cudnn_tensor_cuda " %} # [unix and cuda_compiler_version != "None"] {% set skips = skips ~ " or (TestTorch and test_index_add_correctness)" %} # [unix and cuda_compiler_version != "None"] # These tests require higher-resource or more recent GPUs than the CI provides - {% set skips = skips ~ " or (TritonCodeGenTests and test_sdpa_inference_mode_aot_compile)" %} # [unix and cuda_compiler_version != "None"] - {% set skips = skips ~ " or (TestNN and test_grid_sample)" %} # [unix and cuda_compiler_version != "None"] + {% set skips = skips ~ " or test_sdpa_inference_mode_aot_compile" %} # [linux and cuda_compiler_version != "None"] + {% set skips = skips ~ " or (TestNN and test_grid_sample)" %} # [linux and cuda_compiler_version != "None"] + # don't mess with tests that rely on GPU failure handling + {% set skips = skips ~ " or test_indirect_device_assert" %} # [linux and cuda_compiler_version != "None"] + # test that fails to find temporary resource + {% set skips = skips ~ " or (GPUTests and test_scatter_reduce2)" %} # [linux and cuda_compiler_version != "None"] # MKL problems {% set skips = skips ~ " or (TestLinalgCPU and test_inverse_errors_large_cpu)" %} # [unix and blas_impl == "mkl" and cuda_compiler_version != "None"] # these tests are failing with low -n values From 902ee87914089f14f5f3f3d775078d2291149f52 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 31 Jan 2025 22:26:17 +1100 Subject: [PATCH 4/7] add back CUDA compiler to pytorch testing; torch.compile needs it --- recipe/meta.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 40143700..d8d56668 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -365,6 +365,8 @@ outputs: requires: - {{ compiler('c') }} - {{ compiler('cxx') }} + # for torch.compile tests + - {{ compiler('cuda') }} # [cuda_compiler_version != "None"] - ninja - boto3 - hypothesis From 0b65ac042c54746b59d0e3fbe9d6365944b360c0 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 1 Feb 2025 06:46:01 +1100 Subject: [PATCH 5/7] increase timeout yet again --- .github/workflows/conda-build.yml | 2 +- conda-forge.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index 40a005a9..3c036ad9 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -16,7 +16,7 @@ jobs: build: name: ${{ matrix.CONFIG }} runs-on: ${{ matrix.runs_on }} - timeout-minutes: 900 + timeout-minutes: 1080 strategy: fail-fast: false matrix: diff --git a/conda-forge.yml b/conda-forge.yml index 47ae951d..e8a1366f 100644 --- a/conda-forge.yml +++ b/conda-forge.yml @@ -16,7 +16,7 @@ github: tooling_branch_name: main github_actions: self_hosted: true - timeout_minutes: 900 + timeout_minutes: 1080 triggers: - push - pull_request From 95db499db3c12a12472050abf12cfdd26383ac71 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 1 Feb 2025 07:04:43 +1100 Subject: [PATCH 6/7] run test_torchinductor tests for only one python version --- recipe/meta.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index d8d56668..86a5e42b 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -443,7 +443,6 @@ outputs: #- python ./smoke_test/smoke_test.py --package torchonly # a reasonably safe subset of tests that should run under 15 minutes - # The inductor tests test torch.compile {% set tests = " ".join([ "test/test_autograd.py", "test/test_autograd_fallback.py", @@ -455,8 +454,9 @@ outputs: "test/test_torch.py", "test/test_xnnpack_integration.py", ]) %} - # test_torchinductor adds >4h in test runtime in emulation; not supported yet for 3.13 - {% set tests = tests ~ " test/inductor/test_torchinductor.py" %} # [py<313 and not aarch64] + # tests torch.compile; avoid on aarch because it adds >4h in test runtime in emulation; + # they add a lot of runtime (15->60min on windows), so run them for only one python version + {% set tests = tests ~ " test/inductor/test_torchinductor.py" %} # [py==312 and not aarch64] {% set skips = "(TestTorch and test_print)" %} # tolerance violation with openblas From d04bba891c9aee352136b8257c09ca8965f82718 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 1 Feb 2025 07:22:17 +1100 Subject: [PATCH 7/7] temporary: also skip aarch+CUDA, which is already passing --- recipe/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 86a5e42b..223e370d 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -77,7 +77,7 @@ build: # cuda 11.8 was dropped due to maintenance effort, see discussion in #177 skip: true # [cuda_compiler_version == "11.8"] # temporary skip to avoid wasting resources while unbreak CUDA builds - skip: true # [cuda_compiler_version == "None"] + skip: true # [cuda_compiler_version == "None" or aarch64] # This logic allows two rc variants to be defined in the conda_build_config, but only one to actually be built. # We want to be able to define two variants in the cbc so we can assign different labels to each in the upload channel # (by zipping is_rc with channel_targets). This prevents rc builds being used unless specifically requested.