diff --git a/docs/source/_tutorial.rst b/docs/source/_tutorial.rst index 0bcc62418..99958ffcd 100644 --- a/docs/source/_tutorial.rst +++ b/docs/source/_tutorial.rst @@ -33,6 +33,7 @@ Neural Operator Learning - `Introductory Tutorial: Neural Operator Learning with PINA `_ - `Modeling 2D Darcy Flow with the Fourier Neural Operator `_ - `Solving the Kuramoto-Sivashinsky Equation with Averaging Neural Operator `_ +- `Advection Equation with data driven DeepONet `_ Supervised Learning ------------------- @@ -42,4 +43,4 @@ Supervised Learning - `Reduced Order Model with Graph Neural Networks for Unstructured Domains `_ - `Data-driven System Identification with SINDy `_ - `Unstructured Convolutional Autoencoders with Continuous Convolution `_ -- `Reduced Order Modeling with POD-RBF and POD-NN Approaches for Fluid Dynamics `_ +- `Reduced Order Modeling with POD-RBF and POD-NN Approaches for Fluid Dynamics `_ \ No newline at end of file diff --git a/pina/model/deeponet.py b/pina/model/deeponet.py index 6da161665..c65f6b316 100644 --- a/pina/model/deeponet.py +++ b/pina/model/deeponet.py @@ -52,7 +52,8 @@ def __init__( :param reduction: The reduction to be used to reduce the aggregated result of the modules in ``networks`` to the desired output dimension. Available reductions include: sum: ``+``, product: ``*``, - mean: ``mean``, min: ``min``, max: ``max``. Default is ``+``. + mean: ``mean``, min: ``min``, max: ``max``, identity: "id". + Default is ``+``. :type reduction: str or Callable :param bool scale: If ``True``, the final output is scaled before being returned in the forward pass. Default is ``True``. @@ -122,18 +123,8 @@ def __init__( check_consistency(scale, bool) check_consistency(translation, bool) - # check trunk branch nets consistency - shapes = [] - for key, value in networks.items(): + for value in networks.values(): check_consistency(value, (str, int)) - check_consistency(key, torch.nn.Module) - input_ = torch.rand(10, len(value)) - shapes.append(key(input_).shape[-1]) - - if not all(map(lambda x: x == shapes[0], shapes)): - raise ValueError( - "The passed networks have not the same output dimension." - ) # assign trunk and branch net with their input indeces self.models = torch.nn.ModuleList(networks.keys()) @@ -171,6 +162,7 @@ def _symbol_functions(**kwargs): "mean": partial(torch.mean, **kwargs), "min": lambda x: torch.min(x, **kwargs).values, "max": lambda x: torch.max(x, **kwargs).values, + "id": lambda x: x, } def _init_aggregator(self, aggregator): @@ -181,7 +173,7 @@ def _init_aggregator(self, aggregator): :type aggregator: str or Callable :raises ValueError: If the aggregator is not supported. """ - aggregator_funcs = self._symbol_functions(dim=2) + aggregator_funcs = self._symbol_functions(dim=-1) if aggregator in aggregator_funcs: aggregator_func = aggregator_funcs[aggregator] elif isinstance(aggregator, nn.Module) or is_function(aggregator): @@ -264,13 +256,9 @@ def forward(self, x): # reduce output_ = self._reduction(aggregated) if self._reduction_type in self._symbol_functions(dim=-1): - output_ = output_.reshape(-1, 1) - - # scale and translate - output_ *= self._scale - output_ += self._trasl + output_ = output_.reshape(*output_.shape, 1) - return output_ + return self._scale * output_ + self._trasl @property def aggregator(self): diff --git a/tests/test_model/test_deeponet.py b/tests/test_model/test_deeponet.py index 8917811c5..4daa55af4 100644 --- a/tests/test_model/test_deeponet.py +++ b/tests/test_model/test_deeponet.py @@ -9,7 +9,7 @@ data = torch.rand((20, 3)) input_vars = ["a", "b", "c"] input_ = LabelTensor(data, input_vars) -symbol_funcs_red = DeepONet._symbol_functions(dim=-1) +symbol_funcs_red = DeepONet._symbol_functions() output_dims = [1, 5, 10, 20] @@ -26,20 +26,6 @@ def test_constructor(): ) -def test_constructor_fails_when_invalid_inner_layer_size(): - branch_net = FeedForward(input_dimensions=1, output_dimensions=10) - trunk_net = FeedForward(input_dimensions=2, output_dimensions=8) - with pytest.raises(ValueError): - DeepONet( - branch_net=branch_net, - trunk_net=trunk_net, - input_indeces_branch_net=["a"], - input_indeces_trunk_net=["b", "c"], - reduction="+", - aggregator="*", - ) - - def test_forward_extract_str(): branch_net = FeedForward(input_dimensions=1, output_dimensions=10) trunk_net = FeedForward(input_dimensions=2, output_dimensions=10) diff --git a/tests/test_model/test_mionet.py b/tests/test_model/test_mionet.py index 4d59433bf..6e6f57934 100644 --- a/tests/test_model/test_mionet.py +++ b/tests/test_model/test_mionet.py @@ -18,15 +18,6 @@ def test_constructor(): MIONet(networks=networks, reduction="+", aggregator="*") -def test_constructor_fails_when_invalid_inner_layer_size(): - branch_net1 = FeedForward(input_dimensions=1, output_dimensions=10) - branch_net2 = FeedForward(input_dimensions=2, output_dimensions=10) - trunk_net = FeedForward(input_dimensions=1, output_dimensions=12) - networks = {branch_net1: ["x"], branch_net2: ["x", "y"], trunk_net: ["z"]} - with pytest.raises(ValueError): - MIONet(networks=networks, reduction="+", aggregator="*") - - def test_forward_extract_str(): branch_net1 = FeedForward(input_dimensions=1, output_dimensions=10) branch_net2 = FeedForward(input_dimensions=1, output_dimensions=10) diff --git a/tutorials/README.md b/tutorials/README.md index 62150ee67..949f7d6e2 100644 --- a/tutorials/README.md +++ b/tutorials/README.md @@ -36,6 +36,8 @@ Learning Bifurcating PDE Solutions with Physics-Informed Deep Ensembles|[[.ipynb Introductory Tutorial: Neural Operator Learning with PINA |[[.ipynb](tutorial21/tutorial.ipynb),[.py](tutorial21/tutorial.py),[.html](http://mathlab.github.io/PINA/tutorial21/tutorial.html)]| Modeling 2D Darcy Flow with the Fourier Neural Operator |[[.ipynb](tutorial5/tutorial.ipynb),[.py](tutorial5/tutorial.py),[.html](http://mathlab.github.io/PINA/tutorial5/tutorial.html)]| Solving the Kuramoto–Sivashinsky Equation with Averaging Neural Operator |[[.ipynb](tutorial10/tutorial.ipynb),[.py](tutorial10/tutorial.py),[.html](http://mathlab.github.io/PINA/tutorial10/tutorial.html)]| +Advection Equation with data driven DeepONet| [[.ipynb](tutorial24/tutorial.ipynb),[.py](tutorial24/tutorial.py),[.html](http://mathlab.github.io/PINA/tutorial24/tutorial.html)]| + ## Supervised Learning | Description | Tutorial | @@ -46,4 +48,3 @@ Reduced Order Model with Graph Neural Networks for Unstructured Domains| [[.ipyn Data-driven System Identification with SINDy| [[.ipynb](tutorial23/tutorial.ipynb),[.py](tutorial23/tutorial.py),[.html](http://mathlab.github.io/PINA/tutorial23/tutorial.html)]| Unstructured Convolutional Autoencoders with Continuous Convolution |[[.ipynb](tutorial4/tutorial.ipynb),[.py](tutorial4/tutorial.py),[.html](http://mathlab.github.io/PINA/tutorial4/tutorial.html)]| Reduced Order Modeling with POD-RBF and POD-NN Approaches for Fluid Dynamics| [[.ipynb](tutorial8/tutorial.ipynb),[.py](tutorial8/tutorial.py),[.html](http://mathlab.github.io/PINA/tutorial8/tutorial.html)]| - diff --git a/tutorials/static/deeponet.png b/tutorials/static/deeponet.png new file mode 100644 index 000000000..acab017de Binary files /dev/null and b/tutorials/static/deeponet.png differ diff --git a/tutorials/tutorial24/data/advection_input_testing.pt b/tutorials/tutorial24/data/advection_input_testing.pt new file mode 100644 index 000000000..127330052 Binary files /dev/null and b/tutorials/tutorial24/data/advection_input_testing.pt differ diff --git a/tutorials/tutorial24/data/advection_input_training.pt b/tutorials/tutorial24/data/advection_input_training.pt new file mode 100644 index 000000000..b643278c5 Binary files /dev/null and b/tutorials/tutorial24/data/advection_input_training.pt differ diff --git a/tutorials/tutorial24/data/advection_output_testing.pt b/tutorials/tutorial24/data/advection_output_testing.pt new file mode 100644 index 000000000..2e9f16ded Binary files /dev/null and b/tutorials/tutorial24/data/advection_output_testing.pt differ diff --git a/tutorials/tutorial24/data/advection_output_training.pt b/tutorials/tutorial24/data/advection_output_training.pt new file mode 100644 index 000000000..41d134bc2 Binary files /dev/null and b/tutorials/tutorial24/data/advection_output_training.pt differ diff --git a/tutorials/tutorial24/tutorial.ipynb b/tutorials/tutorial24/tutorial.ipynb new file mode 100644 index 000000000..71717f17a --- /dev/null +++ b/tutorials/tutorial24/tutorial.ipynb @@ -0,0 +1,490 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tutorial: Advection Equation with data driven DeepONet\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mathLab/PINA/blob/master/tutorials/tutorial24/tutorial.ipynb)\n", + "\n", + "\n", + "> ##### ⚠️ ***Before starting:***\n", + "> We assume you are already familiar with the concepts covered in the [Getting started with PINA](https://mathlab.github.io/PINA/_tutorial.html#getting-started-with-pina) tutorials. If not, we strongly recommend reviewing them before exploring this advanced topic.\n", + "\n", + "In this tutorial, we demonstrate how to solve the advection operator learning problem using `DeepONet`. We follow the original formulation of Lu *et al.* in [*DeepONet: Learning nonlinear operators for identifying differential equations based on the universal approximation theorem of operator*](https://arxiv.org/abs/1910.03193).\n", + "\n", + "We begin by importing the necessary modules." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "## routine needed to run the notebook on Google Colab\n", + "try:\n", + " import google.colab\n", + "\n", + " IN_COLAB = True\n", + "except:\n", + " IN_COLAB = False\n", + "if IN_COLAB:\n", + " !pip install \"pina-mathlab[tutorial]\"\n", + " # get the data\n", + " !mkdir \"data\"\n", + " !wget \"https://github.com/mathLab/PINA/raw/refs/heads/master/tutorials/tutorial24/data/advection_input_testing.pt\" -O \"data/advection_input_testing.pt\"\n", + " !wget \"https://github.com/mathLab/PINA/raw/refs/heads/master/tutorials/tutorial24/data/advection_input_training.pt\" -O \"data/advection_input_training.pt\"\n", + " !wget \"https://github.com/mathLab/PINA/raw/refs/heads/master/tutorials/tutorial24/data/advection_output_testing.pt\" -O \"data/advection_output_testing.pt\"\n", + " !wget \"https://github.com/mathLab/PINA/raw/refs/heads/master/tutorials/tutorial24/data/advection_output_training.pt\" -O \"data/advection_output_training.pt\"\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import torch\n", + "import warnings\n", + "from functools import partial\n", + "\n", + "\n", + "from pina import Trainer, LabelTensor\n", + "from pina.model import FeedForward, DeepONet\n", + "from pina.solver import SupervisedSolver\n", + "from pina.problem.zoo import SupervisedProblem\n", + "from pina.loss import LpLoss\n", + "\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Advection problem and data preparation\n", + "\n", + "We consider the 1D advection equation\n", + "$$\n", + "\\frac{\\partial u}{\\partial t} + \\frac{\\partial u}{\\partial x} = 0, \n", + "\\quad x \\in [0,2], \\; t \\in [0,1],\n", + "$$\n", + "with periodic boundary conditions. The initial condition is chosen as a Gaussian pulse centered at a random location\n", + "$\\mu \\sim U(0.05, 1)$ and with variance $\\sigma^2 = 0.02$:\n", + "$$\n", + "u_0(x) = \\frac{1}{\\sqrt{\\pi\\sigma^2}} e^{-\\frac{(x - \\mu)^2}{2\\sigma^2}}, \n", + "\\quad x \\in [0,2].\n", + "$$\n", + "\n", + "Our goal is to learn the operator\n", + "$$\n", + "\\mathcal{G}: u_0(x) \\mapsto u(x, t = \\delta) = u_0(x - \\delta),\n", + "$$\n", + "with $\\delta = 0.5$ for this tutorial. In practice, this means learning a mapping from the initial condition to the solution at a fixed later time. \n", + "The dataset therefore consists of trajectories where inputs are initial profiles and outputs are the same profiles shifted by $\\delta$.\n", + "\n", + "The data has shape `[T, Nx, D]`, where:\n", + "- `T` — number of trajectories (100 for training, 1000 for testing),\n", + "- `Nx` — number of spatial grid points (fixed at 100),\n", + "- `D = 1` — single scalar field value `u`.\n", + "\n", + "We now load the dataset and visualize sample trajectories." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# loading training data\n", + "data_0_training = LabelTensor(\n", + " torch.load(\"data/advection_input_training.pt\", weights_only=False),\n", + " labels=\"u0\",\n", + ")\n", + "data_dt_training = LabelTensor(\n", + " torch.load(\"data/advection_output_training.pt\", weights_only=False),\n", + " labels=\"u\",\n", + ")\n", + "\n", + "# loading testing data\n", + "data_0_testing = LabelTensor(\n", + " torch.load(\"data/advection_input_testing.pt\", weights_only=False),\n", + " labels=\"u0\",\n", + ")\n", + "data_dt_testing = LabelTensor(\n", + " torch.load(\"data/advection_output_testing.pt\", weights_only=False),\n", + " labels=\"u\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data are loaded, let's visualize a few of the initial conditions!" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# storing the discretization in space:\n", + "Nx = data_0_training.shape[1]\n", + "\n", + "for idx, i in enumerate(torch.randint(0, data_0_training.shape[0]-1, (3,))):\n", + " u0 = data_0_training[int(i)].extract('u0')\n", + " u = data_dt_training[int(i)].extract('u')\n", + " x = torch.linspace(0, 2, Nx) # the discretization in the spatial dimension is fixed\n", + " plt.subplot(3, 1, idx+1)\n", + " plt.plot(x, u0.flatten(), label=fr'$u_0(x)$')\n", + " plt.plot(x, u.flatten(), label=fr'$u(x, t=\\delta)$')\n", + " plt.xlabel(fr'$x$')\n", + " plt.tight_layout()\n", + " plt.legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great — we have generated a traveling wave and visualized a few samples. Next, we will use this data to train a `DeepONet`.\n", + "\n", + "## DeepONet\n", + "\n", + "The standard `DeepONet` architecture consists of two subnetworks: a **branch** network and a **trunk** network (see figure below).\n", + "\n", + "
\n", + "\"image\n", + "
\n", + "
\n", + "Image source: Moya & Lin (2022)\n", + "
\n", + "\n", + "In our setting:\n", + "- The **branch network** receives the initial condition of each trajectory, with input shape `[B, Nx]` — where `B` is the batch size and `Nx` the spatial discretization points of the field at \\( t = 0 \\).\n", + "- The **trunk network** takes input of shape `[B, 1]`, corresponding to the location at which we evaluate the solution (in this 1D case, the spatial coordinate).\n", + "\n", + "Together, these networks learn the mapping from the initial field to the solution at a later time.\n", + "\n", + "We now define and train the model for the advection problem." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "problem = SupervisedProblem(\n", + " input_=data_0_training,\n", + " output_=data_dt_training,\n", + " input_variables=data_0_training.labels,\n", + " output_variables=data_dt_training.labels,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now proceede to create the trunk and branch networks." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# create Trunk model\n", + "class TrunkNet(torch.nn.Module):\n", + " def __init__(self, **kwargs):\n", + " super().__init__()\n", + " self.trunk = FeedForward(**kwargs)\n", + " def forward(self, x):\n", + " t = torch.zeros(size=(x.shape[0], 1), requires_grad=False) + 0.5 # create an input of only 0.5\n", + " return self.trunk(t)\n", + "\n", + "\n", + "# create Branch model\n", + "class BranchNet(torch.nn.Module):\n", + " def __init__(self, **kwargs):\n", + " super().__init__()\n", + " self.branch = FeedForward(**kwargs)\n", + "\n", + " def forward(self, x):\n", + " return self.branch(x.flatten(1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `TrunkNet` is implemented as a standard `FeedForward` network with a slightly modified `forward` method. In this case, the trunk network simply outputs a tensor filled with the value \\(0.5\\), repeated for each trajectory — corresponding to evaluating the solution at time \\(t = 0.5\\).\n", + "\n", + "The `BranchNet` is also a `FeedForward` network, but its `forward` pass first flattens the input along the last dimension. This produces a vector of length `Nx`, representing the sampled initial condition at the sensor locations.\n", + "\n", + "With both subnetworks defined, we can now instantiate the DeepONet model using the `DeepONet` class from `pina.model`." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# initialize truck and branch net\n", + "trunk = TrunkNet(\n", + " layers=[256] * 4,\n", + " output_dimensions=Nx,\n", + " input_dimensions=1, # time variable dimension\n", + " func=torch.nn.ReLU,\n", + ")\n", + "branch = BranchNet(\n", + " layers=[256] * 4,\n", + " output_dimensions=Nx,\n", + " input_dimensions=Nx, # spatial variable dimension\n", + " func=torch.nn.ReLU,\n", + ")\n", + "\n", + "# initialize the DeepONet model\n", + "model = DeepONet(\n", + " branch_net=branch,\n", + " trunk_net=trunk,\n", + " input_indeces_branch_net=[\"u0\"],\n", + " input_indeces_trunk_net=[\"u0\"],\n", + " reduction=\"id\",\n", + " aggregator=\"*\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The aggregation and reduction functions combine the outputs of the branch and trunk networks. In this example, their outputs are multiplied element-wise, and no reduction is applied — meaning the final output has the same dimensionality as each network’s output.\n", + "\n", + "We train the model using a `SupervisedSolver` with an `MSE` loss. Below, we first define the solver and then the trainer used to run the optimization." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n", + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "89ff4d5ab3784a9bac31831e1edd263f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | | 0/? [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for i in [1, 2, 3]:\n", + " plt.subplot(3, 1, i)\n", + " plt.plot(torch.linspace(0, 2, Nx), solver(data_0_training)[10*i].detach().flatten(), label=r'$u_{NN}$')\n", + " plt.plot(torch.linspace(0, 2, Nx), data_dt_training[10*i].extract('u').flatten(), label=r'$u$')\n", + " plt.xlabel(r'$x$')\n", + " plt.legend(loc='upper right')\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see, they are barely indistinguishable. To better understand the difference, we now plot the residuals, i.e. the difference of the exact solution and the predicted one. " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for i in [1, 2, 3]:\n", + " plt.subplot(3, 1, i)\n", + " plt.plot(torch.linspace(0, 2, Nx), data_dt_training[10*i].extract('u').flatten() - solver(data_0_training)[10*i].detach().flatten(), label=r'$u - u_{NN}$')\n", + " plt.xlabel(r'$x$')\n", + " plt.tight_layout()\n", + " plt.legend(loc='upper right')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What's Next?\n", + "\n", + "We have seen a simple example of using `DeepONet` to learn the advection operator. This only scratches the surface of what neural operators can do. Here are some suggested directions to continue your exploration:\n", + "\n", + "1. **Train on more complex PDEs**: Extend beyond the advection equation to more challenging operators, such as diffusion or nonlinear conservation laws.\n", + "\n", + "2. **Increase training scope**: Experiment with larger datasets, deeper networks, and longer training schedules to unlock the full potential of neural operator learning.\n", + "\n", + "3. **Generalize to the full advection operator**: Train the model to learn the general operator $\\mathcal{G}_t: u_0(x) \\mapsto u(x,t) = u_0(x - t)$ so the network predicts solutions for arbitrary times, not just a single fixed horizon.\n", + "\n", + "4. **Investigate architectural variations**: Compare different operator learning architectures (e.g., Fourier Neural Operators, Physics-Informed DeepONets) to see how they perform on similar problems.\n", + "\n", + "5. **...and much more!**: From adding noise robustness to testing on real scientific datasets, the space of possibilities is wide open.\n", + "\n", + "For more resources and tutorials, check out the [PINA Documentation](https://mathlab.github.io/PINA/)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pina", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.18" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}