diff --git a/ci/cscs-gh200.yml b/ci/cscs-gh200.yml new file mode 100644 index 00000000..afca96d8 --- /dev/null +++ b/ci/cscs-gh200.yml @@ -0,0 +1,36 @@ +include: + - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' + +unit_test: + extends: .uenv-runner-daint-gh200 + image: julia/25.5:v1 + script: + - export MPICH_GPU_SUPPORT_ENABLED=1 + - julia -e 'println("Instantiating project"); + using Pkg; + Pkg.activate(pwd())' + - julia -e 'println("Running tests"); + using Pkg; + Pkg.activate(pwd()); + Pkg.test("Chmy"; test_args=["--backends=CUDA"])' + variables: + WITH_UENV_VIEW: 'juliaup' + SLURM_JOB_NUM_NODES: 1 + SLURM_NTASKS_PER_NODE: 1 + SLURM_GPUS_PER_TASK: 1 + SLURM_TIMELIMIT: "00:15:00" + +perf_test: + extends: .baremetal-runner-daint-gh200 + script: + - echo "Preparing the test environment (single rank)" + - export MPICH_GPU_SUPPORT_ENABLED=1 + - srun -n 1 --uenv julia/25.5:v1 --view=juliaup julia --project=. -e 'using Pkg; Pkg.instantiate()' + - srun -n 1 --uenv julia/25.5:v1 --view=juliaup julia --project=. -e 'using Pkg; Pkg.add("CUDA")' + - echo "Running the reference test (multiple ranks)" + - srun --uenv julia/25.5:v1 --view=juliaup julia --project=. examples/stokes_3d_inc_ve_T_mpi_perf.jl + variables: + SLURM_JOB_NUM_NODES: 2 + SLURM_NTASKS_PER_NODE: 4 + SLURM_GPUS_PER_TASK: 1 + SLURM_TIMELIMIT: "00:10:00" diff --git a/examples/stokes_3d_inc_ve_T_mpi_perf.jl b/examples/stokes_3d_inc_ve_T_mpi_perf.jl index 7b03e5de..dbabb9e4 100644 --- a/examples/stokes_3d_inc_ve_T_mpi_perf.jl +++ b/examples/stokes_3d_inc_ve_T_mpi_perf.jl @@ -1,13 +1,13 @@ using Chmy using KernelAbstractions using Printf -using JSON +# using JSON # using CairoMakie -using AMDGPU -AMDGPU.allowscalar(false) -# using CUDA -# CUDA.allowscalar(false) +# using AMDGPU +# AMDGPU.allowscalar(false) +using CUDA +CUDA.allowscalar(false) using MPI MPI.Init() @@ -86,8 +86,8 @@ end end @views function main(backend=CPU(); nxyz_l=(126, 126, 126)) - # arch = Arch(backend, MPI.COMM_WORLD, (0, 0, 0); device_id=1) - arch = Arch(backend, MPI.COMM_WORLD, (0, 0, 0)) + arch = Arch(backend, MPI.COMM_WORLD, (0, 0, 0); device_id=1) + # arch = Arch(backend, MPI.COMM_WORLD, (0, 0, 0)) topo = topology(arch) me = global_rank(topo) # geometry @@ -237,12 +237,13 @@ end return end -input = open(JSON.parse, joinpath(@__DIR__, "params.json")) -params = NamedTuple(Symbol.(keys(input)) .=> values(input)) -res = params.res -# res = 640 +# input = open(JSON.parse, joinpath(@__DIR__, "params.json")) +# params = NamedTuple(Symbol.(keys(input)) .=> values(input)) +# res = params.res +res = 512 -main(ROCBackend(); nxyz_l=(res, res, res) .- 2) +# main(ROCBackend(); nxyz_l=(res, res, res) .- 2) +main(CUDABackend(); nxyz_l=(res, res, res) .- 2) # main(; nxyz_l=(254, 254, 254)) MPI.Finalize()