-
Notifications
You must be signed in to change notification settings - Fork 183
Offloading CUDA #4166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Offloading CUDA #4166
Changes from 16 commits
0134ed4
28ab1b5
42d2e01
8daaf5d
0995127
38fd7ba
fb9be7f
b6c6d4c
69508ab
c120d5b
bbd958f
f6ed362
a46028e
57b7a97
74e4cfb
65bceb0
c20a3e7
383529a
25ea718
3350bde
763fe6b
fc13aa2
af7e6ce
c988926
d075453
1765f5b
af69f87
84f8851
b00c615
9e9fe08
7a3c5da
7f69823
c23f37e
13498cf
58db56b
a380acf
18f4daa
319aa19
80b8e2b
bbf1825
07a2a20
97e41fa
25d356b
6ff1e91
86ae6a8
c96c15b
61f65b1
64f67dc
c57acf0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| name: Install and test Firedrake (CUDA) | ||
|
|
||
| on: | ||
| push: | ||
| branches: | ||
| - master | ||
| pull_request: | ||
|
|
||
| concurrency: | ||
| # Cancels jobs running if new commits are pushed | ||
| group: > | ||
| ${{ github.workflow }}- | ||
| ${{ github.event.pull_request.number || github.ref }} | ||
| cancel-in-progress: true | ||
|
|
||
| jobs: | ||
| test: | ||
| name: Install and test Firedrake (Linux) | ||
| strategy: | ||
| # We want to know all of the tests which fail, so don't kill real if | ||
| # complex fails and vice-versa | ||
| fail-fast: false | ||
| matrix: | ||
| arch: [default] | ||
| runs-on: [self-hosted, Linux] | ||
| container: | ||
| image: firedrakeproject/firedrake-env:latest | ||
| env: | ||
| FIREDRAKE_CI: 1 | ||
| OMP_NUM_THREADS: 1 | ||
| steps: | ||
| - name: Pre-run cleanup | ||
| # Make sure the current directory is empty | ||
| run: find . -delete | ||
|
|
||
| - uses: actions/checkout@v4 | ||
| with: | ||
| path: firedrake-repo | ||
|
|
||
| - name: Install system dependencies | ||
| run: | | ||
| sudo apt-get update | ||
| sudo apt-get -y install \ | ||
| $(python3 ./firedrake-repo/scripts/firedrake-configure --arch ${{ matrix.arch }} --show-system-packages) | ||
|
|
||
| - name: Install PETSc | ||
| run: | | ||
| git clone --depth 1 https://github.com/firedrakeproject/petsc.git | ||
| cd petsc | ||
| # TODO update configure file | ||
| ./configure --with-make-np=12 --with-c2html=0 --with-debugging=0 --with-fortran-bindings=0 --with-shared-libraries=1 --with-strict-petscerrorcode PETSC_ARCH=arch-firedrake-default --COPTFLAGS=-O3 -march=native -mtune=native --CXXOPTFLAGS=-O3 -march=native -mtune=native --FOPTFLAGS=-O3 -march=native -mtune=native --download-bison --download-fftw --download-hdf5 --download-hwloc --download-metis --download-mumps --download-netcdf --download-pnetcdf --download-ptscotch --download-scalapack --download-suitesparse --download-superlu_dist --download-zlib --download-hypre --with-cuda --with-cuda-dir=/usr/local/cuda CUDAPPFLAGS=-Wno-deprecated-gpu-targets --download-openmpi | ||
| make | ||
| # TODO: This fails for some reason | ||
| # make check | ||
|
|
||
| - name: Install Firedrake | ||
| id: install | ||
| run: | | ||
| # TODO update configure file for the exports | ||
| # export $(python3 ./firedrake-repo/scripts/firedrake-configure --arch ${{ matrix.arch }} --show-env) | ||
| export PETSC_DIR=./petsc | ||
| export PETSC_ARCH=arch-firedrake-default | ||
| export MPI_HOME=$PETSC_DIR/$PETSC_ARCH | ||
| export CC=$PETSC_DIR/$PETSC_ARCH/bin/mpicc | ||
| export CXX=$PETSC_DIR/$PETSC_ARCH/bin/mpicxx | ||
| export MPICC=$CC | ||
| export MPI_HOME=$PETSC_DIR/$PETSC_ARCH | ||
| export PATH=$PETSC_DIR/$PETSC_ARCH/bin:$PATH | ||
| export HDF5_MPI=ON | ||
| python3 -m venv venv | ||
| . venv/bin/activate | ||
| : # Force a rebuild of petsc4py as the cached one will not link to the fresh | ||
| : # install of PETSc. A similar trick may be needed for compiled dependencies | ||
| : # like h5py or mpi4py if changing HDF5/MPI libraries. | ||
| pip cache remove petsc4py | ||
| pip install --verbose --no-binary h5py './firedrake-repo[ci]' | ||
| firedrake-clean | ||
| : # Extra test dependencies | ||
| pip install matplotlib vtk | ||
| pip list | ||
|
|
||
| - name: Run smoke tests | ||
| run: | | ||
| . venv/bin/activate | ||
| firedrake-check | ||
| timeout-minutes: 10 | ||
|
|
||
| - name: Post-run cleanup | ||
| if: always() | ||
| run: find . -delete | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| Main Stage 366614 | ||
| Main Stage;firedrake 44369 | ||
| Main Stage;firedrake;firedrake.solving.solve 86 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve 196 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve 140 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval 736 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;ParLoopExecute 212 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;ParLoopExecute;Parloop_Cells_wrap_form0_cell_integral 112 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;ParLoopExecute;Parloop_Cells_wrap_form0_cell_integral;pyop2.global_kernel.GlobalKernel.compile 415552 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;firedrake.tsfc_interface.compile_form 42597 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval 866 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval;ParLoopExecute 149 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval;ParLoopExecute;Parloop_Cells_wrap_form00_cell_integral 136 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval;ParLoopExecute;Parloop_Cells_wrap_form00_cell_integral;pyop2.global_kernel.GlobalKernel.compile 407506 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__ 1771 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__;firedrake.tsfc_interface.compile_form 56423 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__;firedrake.tsfc_interface.compile_form;firedrake.formmanipulation.split_form 1907 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__;firedrake.solving_utils._SNESContext.__init__ 618 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__ 145 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__;firedrake.ufl_expr.action 4387 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__;firedrake.variational_solver.NonlinearVariationalProblem.__init__ 332 | ||
| Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__;firedrake.variational_solver.NonlinearVariationalProblem.__init__;firedrake.ufl_expr.adjoint 2798 | ||
| Main Stage;firedrake;firedrake.function.Function.interpolate 342 | ||
| Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble 5644 | ||
| Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate 29 | ||
| Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute 298 | ||
| Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel 204 | ||
| Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel;pyop2.global_kernel.GlobalKernel.compile 682292 | ||
| Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.make_interpolator 40658 | ||
| Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write 2473 | ||
| Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate 303 | ||
| Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble 1080 | ||
| Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate 23 | ||
| Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute 328 | ||
| Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel 165 | ||
| Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel;pyop2.global_kernel.GlobalKernel.compile 663410 | ||
| Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.make_interpolator 55147 | ||
| Main Stage;firedrake;firedrake.__init__ 495196 | ||
| Main Stage;firedrake;firedrake.assemble.assemble 949 | ||
| Main Stage;firedrake;firedrake.assemble.assemble;ParLoopExecute 310 | ||
| Main Stage;firedrake;firedrake.assemble.assemble;ParLoopExecute;Parloop_Cells_wrap_form_cell_integral 95 | ||
| Main Stage;firedrake;firedrake.assemble.assemble;ParLoopExecute;Parloop_Cells_wrap_form_cell_integral;pyop2.global_kernel.GlobalKernel.compile 355507 | ||
| Main Stage;firedrake;firedrake.assemble.assemble;firedrake.tsfc_interface.compile_form 20219 | ||
| Main Stage;firedrake;CreateFunctionSpace 919 | ||
| Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace 79 | ||
| Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__ 165 | ||
| Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data 13 | ||
| Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data;firedrake.functionspacedata.FunctionSpaceData.__init__ 825 | ||
| Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data;firedrake.functionspacedata.FunctionSpaceData.__init__;FunctionSpaceData: CreateElement 1274 | ||
| Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data;firedrake.functionspacedata.FunctionSpaceData.__init__;firedrake.mesh.MeshTopology._facets 789 | ||
| Main Stage;firedrake;CreateFunctionSpace;CreateMesh 147 | ||
| Main Stage;firedrake;CreateFunctionSpace;CreateMesh;Mesh: numbering 376 | ||
| Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh 12 | ||
| Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh 11 | ||
| Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh;firedrake.utility_meshes.RectangleMesh 834 | ||
| Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh;firedrake.utility_meshes.RectangleMesh;CreateMesh 676 | ||
| Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh;firedrake.utility_meshes.RectangleMesh;DMPlexInterp 382 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,120 @@ | ||
| from firedrake.preconditioners.base import PCBase | ||
| from firedrake.functionspace import FunctionSpace, MixedFunctionSpace | ||
| from firedrake.petsc import PETSc | ||
| from firedrake.ufl_expr import TestFunction, TrialFunction | ||
| import firedrake.dmhooks as dmhooks | ||
| from firedrake.dmhooks import get_function_space | ||
|
|
||
| __all__ = ("OffloadPC",) | ||
|
|
||
|
|
||
| class OffloadPC(PCBase): | ||
|
||
| """Offload PC from CPU to GPU and back. | ||
|
|
||
| Internally this makes a PETSc PC object that can be controlled by | ||
| options using the extra options prefix ``offload_``. | ||
| """ | ||
|
|
||
| _prefix = "offload_" | ||
|
|
||
| def initialize(self, pc): | ||
| with PETSc.Log.Event("Event: initialize offload"): # | ||
| A, P = pc.getOperators() | ||
|
|
||
| outer_pc = pc | ||
| appctx = self.get_appctx(pc) | ||
| fcp = appctx.get("form_compiler_parameters") | ||
|
|
||
| V = get_function_space(pc.getDM()) | ||
| if len(V) == 1: | ||
| V = FunctionSpace(V.mesh(), V.ufl_element()) | ||
| else: | ||
| V = MixedFunctionSpace([V_ for V_ in V]) | ||
| test = TestFunction(V) | ||
| trial = TrialFunction(V) | ||
|
|
||
| (a, bcs) = self.form(pc, test, trial) | ||
|
|
||
| if P.type == "assembled": | ||
| context = P.getPythonContext() | ||
| # It only makes sense to preconditioner/invert a diagonal | ||
| # block in general. That's all we're going to allow. | ||
| if not context.on_diag: | ||
| raise ValueError("Only makes sense to invert diagonal block") | ||
|
|
||
| prefix = pc.getOptionsPrefix() | ||
| options_prefix = prefix + self._prefix | ||
|
|
||
| mat_type = PETSc.Options().getString(options_prefix + "mat_type", "cusparse") | ||
|
|
||
| # Convert matrix to ajicusparse | ||
| with PETSc.Log.Event("Event: matrix offload"): | ||
| P_cu = P.convert(mat_type='aijcusparse') # todo | ||
|
|
||
| # Transfer nullspace | ||
| P_cu.setNullSpace(P.getNullSpace()) | ||
| tnullsp = P.getTransposeNullSpace() | ||
| if tnullsp.handle != 0: | ||
| P_cu.setTransposeNullSpace(tnullsp) | ||
| P_cu.setNearNullSpace(P.getNearNullSpace()) | ||
|
|
||
| # PC object set-up | ||
| pc = PETSc.PC().create(comm=outer_pc.comm) | ||
| pc.incrementTabLevel(1, parent=outer_pc) | ||
|
|
||
| # We set a DM and an appropriate SNESContext on the constructed PC | ||
| # so one can do e.g. multigrid or patch solves. | ||
| dm = outer_pc.getDM() | ||
| self._ctx_ref = self.new_snes_ctx( | ||
| outer_pc, a, bcs, mat_type, | ||
| fcp=fcp, options_prefix=options_prefix | ||
| ) | ||
|
||
|
|
||
| pc.setDM(dm) | ||
| pc.setOptionsPrefix(options_prefix) | ||
| pc.setOperators(A, P_cu) | ||
| self.pc = pc | ||
| with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref, save=False): | ||
| pc.setFromOptions() | ||
|
|
||
| def update(self, pc): | ||
| _, P = pc.getOperators() | ||
| _, P_cu = self.pc.getOperators() | ||
| P.copy(P_cu) | ||
|
|
||
| def form(self, pc, test, trial): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method is already inherited from |
||
| _, P = pc.getOperators() | ||
| if P.getType() == "python": | ||
| context = P.getPythonContext() | ||
| return (context.a, context.row_bcs) | ||
| else: | ||
| context = dmhooks.get_appctx(pc.getDM()) | ||
| return (context.Jp or context.J, context._problem.bcs) | ||
|
|
||
| # Convert vectors to CUDA, solve and get solution on CPU back | ||
| def apply(self, pc, x, y): | ||
| with PETSc.Log.Event("Event: apply offload"): # | ||
| dm = pc.getDM() | ||
| with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): | ||
| with PETSc.Log.Event("Event: vectors offload"): | ||
| y_cu = PETSc.Vec() # begin | ||
| y_cu.createCUDAWithArrays(y) | ||
| x_cu = PETSc.Vec() | ||
| # Passing a vec into another vec doesnt work because original is locked | ||
| x_cu.createCUDAWithArrays(x.array_r) | ||
| with PETSc.Log.Event("Event: solve"): | ||
| self.pc.apply(x_cu, y_cu) | ||
| # Calling data to synchronize vector | ||
| tmp = y_cu.array_r # noqa: F841 | ||
| with PETSc.Log.Event("Event: vectors copy back"): | ||
| y.copy(y_cu) # | ||
|
|
||
| def applyTranspose(self, pc, X, Y): | ||
| raise NotImplementedError | ||
|
|
||
| def view(self, pc, viewer=None): | ||
| super().view(pc, viewer) | ||
| print("viewing PC") | ||
| if hasattr(self, "pc"): | ||
| viewer.printfASCII("PC to solve on GPU\n") | ||
| self.pc.view(viewer) | ||
Uh oh!
There was an error while loading. Please reload this page.