diff --git a/.github/workflows/GPU.yml b/.github/workflows/GPU.yml index 79b209fc..b0d43c58 100644 --- a/.github/workflows/GPU.yml +++ b/.github/workflows/GPU.yml @@ -19,13 +19,15 @@ concurrency: jobs: cuda-tests: name: "CUDA GPU Tests" - runs-on: [self-hosted, Linux, X64, gpu] + runs-on: [self-hosted, gpu-v100] timeout-minutes: 240 steps: - uses: actions/checkout@v6 - uses: julia-actions/setup-julia@v2 with: version: "1" + - name: Check GPU status + run: nvidia-smi - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 env: @@ -39,7 +41,7 @@ jobs: gpu-docs: name: "Documentation" - runs-on: [self-hosted, Linux, X64, gpu] + runs-on: [self-hosted, gpu-v100] timeout-minutes: 240 if: github.event_name == 'push' || !github.event.pull_request.draft steps: diff --git a/LocalPreferences.toml b/LocalPreferences.toml new file mode 100644 index 00000000..b65c691f --- /dev/null +++ b/LocalPreferences.toml @@ -0,0 +1,7 @@ +[CUDA_Runtime_jll] +version = "12.6" + +[CUDA_Driver_jll] +# Disable forward-compat driver — V100 runners need the system driver +# since CUDA_Driver_jll v13+ drops compute capability 7.0 support +compat = "false" diff --git a/Project.toml b/Project.toml index 26bd4891..11ff1f8f 100644 --- a/Project.toml +++ b/Project.toml @@ -24,6 +24,8 @@ SteadyStateDiffEq = "9672c7b4-1e72-59bd-8a11-6ac3964bc41f" [compat] ADTypes = "1" Aqua = "0.8" +CUDA_Driver_jll = "0, 13" +CUDA_Runtime_jll = "0" ChainRulesCore = "1" CommonSolve = "0.2" ConcreteStructs = "0.2" @@ -38,6 +40,7 @@ SafeTestsets = "0.1" InteractiveUtils = "<0.0.1, 1" Lux = "1" LuxCore = "1" +LuxCUDA = "0.3" LuxTestUtils = "1, 2" MLDataDevices = "1" NLsolve = "4" @@ -64,6 +67,9 @@ ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" +CUDA_Driver_jll = "4ee394cb-3365-5eb0-8335-949819d2adfc" +CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" +LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LuxTestUtils = "ac9de150-d08f-4546-94fb-7472b5760531" @@ -78,4 +84,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Aqua", "Documenter", "ExplicitImports", "ForwardDiff", "Functors", "GPUArraysCore", "InteractiveUtils", "LuxTestUtils", "MLDataDevices", "NLsolve", "NonlinearSolve", "OrdinaryDiffEq", "Pkg", "SafeTestsets", "SciMLSensitivity", "StableRNGs", "Test", "Zygote"] +test = ["Aqua", "CUDA_Driver_jll", "CUDA_Runtime_jll", "Documenter", "ExplicitImports", "ForwardDiff", "Functors", "GPUArraysCore", "InteractiveUtils", "LuxCUDA", "LuxTestUtils", "MLDataDevices", "NLsolve", "NonlinearSolve", "OrdinaryDiffEq", "Pkg", "SafeTestsets", "SciMLSensitivity", "StableRNGs", "Test", "Zygote"] diff --git a/docs/LocalPreferences.toml b/docs/LocalPreferences.toml new file mode 100644 index 00000000..b65c691f --- /dev/null +++ b/docs/LocalPreferences.toml @@ -0,0 +1,7 @@ +[CUDA_Runtime_jll] +version = "12.6" + +[CUDA_Driver_jll] +# Disable forward-compat driver — V100 runners need the system driver +# since CUDA_Driver_jll v13+ drops compute capability 7.0 support +compat = "false" diff --git a/docs/Project.toml b/docs/Project.toml index 20417b8d..e5d98ce3 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,4 +1,6 @@ [deps] +CUDA_Driver_jll = "4ee394cb-3365-5eb0-8335-949819d2adfc" +CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DeepEquilibriumNetworks = "6748aba7-0e9b-415e-a410-ae3cc0ecb334" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" diff --git a/docs/make.jl b/docs/make.jl index a6232983..7f66c5bd 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -14,6 +14,7 @@ makedocs(; clean = true, doctest = false, # Tested in CI linkcheck = true, + warnonly = [:example_block], format = Documenter.HTML(; assets = ["assets/favicon.ico"], canonical = "https://docs.sciml.ai/DeepEquilibriumNetworks/stable/"