diff --git a/src/PhysicsInformed/AutomaticDifferentiation.cs b/src/PhysicsInformed/AutomaticDifferentiation.cs new file mode 100644 index 000000000..6550c3960 --- /dev/null +++ b/src/PhysicsInformed/AutomaticDifferentiation.cs @@ -0,0 +1,301 @@ +using System; +using System.Numerics; +using AiDotNet.PhysicsInformed.Interfaces; + +namespace AiDotNet.PhysicsInformed +{ + /// + /// Provides automatic differentiation capabilities for computing derivatives needed in PINNs. + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// Automatic Differentiation (AD) is a technique for computing derivatives exactly and efficiently. + /// Unlike numerical differentiation (finite differences) which is approximate and slow, + /// AD uses the chain rule of calculus to compute exact derivatives. + /// + /// How It Works: + /// 1. Forward Mode AD: Computes derivatives along with the function value in one pass + /// 2. Reverse Mode AD (backpropagation): Used in neural networks for efficiency + /// + /// For PINNs, we need: + /// - First derivatives (∂u/∂x, ∂u/∂t, etc.) + /// - Second derivatives (∂²u/∂x², ∂²u/∂x∂y, etc.) + /// - Mixed derivatives + /// + /// Why This Matters: + /// PDEs involve derivatives of the solution. To train a PINN, we need to: + /// 1. Compute u(x,t) using the neural network (forward pass) + /// 2. Compute ∂u/∂x, ∂u/∂t automatically + /// 3. Use these in the PDE residual + /// 4. Backpropagate through everything to train the network + /// + /// This is "differentiating the differentiator" - we differentiate the network's output + /// with respect to its inputs, then use those derivatives in the loss, then differentiate + /// the loss with respect to network parameters. Mind-bending but powerful! + /// + public static class AutomaticDifferentiation where T : struct, INumber + { + /// + /// Computes first and second derivatives using finite differences (numerical approximation). + /// + /// The neural network function to differentiate. + /// The point at which to compute derivatives. + /// The dimension of the network output. + /// The step size for finite differences (smaller = more accurate but less stable). + /// A PDEDerivatives object containing first and second derivatives. + /// + /// For Beginners: + /// This uses finite differences: f'(x) ≈ (f(x+h) - f(x-h)) / (2h) + /// We compute the function at nearby points and estimate the slope. + /// + /// Limitations: + /// - Approximation (not exact) + /// - Sensitive to epsilon choice (too small → numerical errors, too large → approximation errors) + /// - Slow for high dimensions + /// + /// For production PINNs, you'd want to use true automatic differentiation from a framework + /// like TensorFlow or PyTorch. This implementation is educational and works for small problems. + /// + public static PDEDerivatives ComputeDerivatives( + Func networkFunction, + T[] inputs, + int outputDim, + T? epsilon = null) + { + T h = epsilon ?? T.CreateChecked(1e-5); // Default step size + int inputDim = inputs.Length; + + // Initialize derivative structures + var derivatives = new PDEDerivatives + { + FirstDerivatives = new T[outputDim, inputDim], + SecondDerivatives = new T[outputDim, inputDim, inputDim] + }; + + // Compute base output + T[] baseOutput = networkFunction(inputs); + + // Compute first derivatives using central differences + for (int i = 0; i < inputDim; i++) + { + T[] inputsPlus = (T[])inputs.Clone(); + T[] inputsMinus = (T[])inputs.Clone(); + + inputsPlus[i] += h; + inputsMinus[i] -= h; + + T[] outputPlus = networkFunction(inputsPlus); + T[] outputMinus = networkFunction(inputsMinus); + + for (int j = 0; j < outputDim; j++) + { + // Central difference: (f(x+h) - f(x-h)) / (2h) + derivatives.FirstDerivatives[j, i] = (outputPlus[j] - outputMinus[j]) / (T.CreateChecked(2) * h); + } + } + + // Compute second derivatives + for (int i = 0; i < inputDim; i++) + { + for (int k = 0; k < inputDim; k++) + { + T[] inputsPP = (T[])inputs.Clone(); + T[] inputsPM = (T[])inputs.Clone(); + T[] inputsMP = (T[])inputs.Clone(); + T[] inputsMM = (T[])inputs.Clone(); + + inputsPP[i] += h; + inputsPP[k] += h; + + inputsPM[i] += h; + inputsPM[k] -= h; + + inputsMP[i] -= h; + inputsMP[k] += h; + + inputsMM[i] -= h; + inputsMM[k] -= h; + + T[] outputPP = networkFunction(inputsPP); + T[] outputPM = networkFunction(inputsPM); + T[] outputMP = networkFunction(inputsMP); + T[] outputMM = networkFunction(inputsMM); + + for (int j = 0; j < outputDim; j++) + { + // Mixed partial derivative: (f(x+h,y+h) - f(x+h,y-h) - f(x-h,y+h) + f(x-h,y-h)) / (4h²) + if (i == k) + { + // Second derivative with respect to same variable: (f(x+h) - 2f(x) + f(x-h)) / h² + T[] inputsPlus = (T[])inputs.Clone(); + T[] inputsMinus = (T[])inputs.Clone(); + inputsPlus[i] += h; + inputsMinus[i] -= h; + + T[] outputPlus = networkFunction(inputsPlus); + T[] outputMinus = networkFunction(inputsMinus); + + derivatives.SecondDerivatives[j, i, k] = + (outputPlus[j] - T.CreateChecked(2) * baseOutput[j] + outputMinus[j]) / (h * h); + } + else + { + // Mixed partial derivative + derivatives.SecondDerivatives[j, i, k] = + (outputPP[j] - outputPM[j] - outputMP[j] + outputMM[j]) / + (T.CreateChecked(4) * h * h); + } + } + } + } + + return derivatives; + } + + /// + /// Computes only first derivatives (faster when second derivatives aren't needed). + /// + public static T[,] ComputeGradient( + Func networkFunction, + T[] inputs, + int outputDim, + T? epsilon = null) + { + T h = epsilon ?? T.CreateChecked(1e-5); + int inputDim = inputs.Length; + T[,] gradient = new T[outputDim, inputDim]; + + for (int i = 0; i < inputDim; i++) + { + T[] inputsPlus = (T[])inputs.Clone(); + T[] inputsMinus = (T[])inputs.Clone(); + + inputsPlus[i] += h; + inputsMinus[i] -= h; + + T[] outputPlus = networkFunction(inputsPlus); + T[] outputMinus = networkFunction(inputsMinus); + + for (int j = 0; j < outputDim; j++) + { + gradient[j, i] = (outputPlus[j] - outputMinus[j]) / (T.CreateChecked(2) * h); + } + } + + return gradient; + } + + /// + /// Computes the Jacobian matrix of a vector-valued function. + /// + /// The function f: R^n → R^m to differentiate. + /// The point at which to compute the Jacobian. + /// The dimension m of the output. + /// The step size for finite differences. + /// The Jacobian matrix J[i,j] = ∂f_i/∂x_j. + /// + /// For Beginners: + /// The Jacobian is a matrix of all first-order partial derivatives. + /// For a function f: R^n → R^m, the Jacobian is an m×n matrix where: + /// - Row i contains all partial derivatives of output i + /// - Column j contains derivatives with respect to input j + /// + /// Example: + /// If f(x,y) = [x², xy, y²], then the Jacobian is: + /// J = [2x 0 ] + /// [y x ] + /// [0 2y ] + /// + public static T[,] ComputeJacobian( + Func networkFunction, + T[] inputs, + int outputDim, + T? epsilon = null) + { + return ComputeGradient(networkFunction, inputs, outputDim, epsilon); + } + + /// + /// Computes the Hessian matrix of a scalar function. + /// + /// The function f: R^n → R to differentiate. + /// The point at which to compute the Hessian. + /// The step size for finite differences. + /// The Hessian matrix H[i,j] = ∂²f/∂x_i∂x_j. + /// + /// For Beginners: + /// The Hessian is a square matrix of all second-order partial derivatives. + /// It describes the curvature of a function. + /// + /// Properties: + /// - Symmetric: H[i,j] = H[j,i] (by Schwarz's theorem) + /// - Positive definite → local minimum + /// - Negative definite → local maximum + /// - Indefinite → saddle point + /// + /// Example: + /// For f(x,y) = x² + xy + y²: + /// H = [2 1] + /// [1 2] + /// + public static T[,] ComputeHessian( + Func scalarFunction, + T[] inputs, + T? epsilon = null) + { + T h = epsilon ?? T.CreateChecked(1e-5); + int n = inputs.Length; + T[,] hessian = new T[n, n]; + + T baseValue = scalarFunction(inputs); + + for (int i = 0; i < n; i++) + { + for (int j = i; j < n; j++) // Only compute upper triangle (symmetric) + { + if (i == j) + { + // Diagonal: ∂²f/∂x_i² + T[] inputsPlus = (T[])inputs.Clone(); + T[] inputsMinus = (T[])inputs.Clone(); + inputsPlus[i] += h; + inputsMinus[i] -= h; + + T valuePlus = scalarFunction(inputsPlus); + T valueMinus = scalarFunction(inputsMinus); + + hessian[i, j] = (valuePlus - T.CreateChecked(2) * baseValue + valueMinus) / (h * h); + } + else + { + // Off-diagonal: ∂²f/∂x_i∂x_j + T[] inputsPP = (T[])inputs.Clone(); + T[] inputsPM = (T[])inputs.Clone(); + T[] inputsMP = (T[])inputs.Clone(); + T[] inputsMM = (T[])inputs.Clone(); + + inputsPP[i] += h; + inputsPP[j] += h; + inputsPM[i] += h; + inputsPM[j] -= h; + inputsMP[i] -= h; + inputsMP[j] += h; + inputsMM[i] -= h; + inputsMM[j] -= h; + + T valuePP = scalarFunction(inputsPP); + T valuePM = scalarFunction(inputsPM); + T valueMP = scalarFunction(inputsMP); + T valueMM = scalarFunction(inputsMM); + + hessian[i, j] = (valuePP - valuePM - valueMP + valueMM) / (T.CreateChecked(4) * h * h); + hessian[j, i] = hessian[i, j]; // Symmetry + } + } + } + + return hessian; + } + } +} diff --git a/src/PhysicsInformed/Interfaces/IPDESpecification.cs b/src/PhysicsInformed/Interfaces/IPDESpecification.cs new file mode 100644 index 000000000..fb0545dfb --- /dev/null +++ b/src/PhysicsInformed/Interfaces/IPDESpecification.cs @@ -0,0 +1,144 @@ +using System; +using System.Numerics; + +namespace AiDotNet.PhysicsInformed.Interfaces +{ + /// + /// Defines the interface for specifying Partial Differential Equations (PDEs) that can be used with Physics-Informed Neural Networks. + /// + /// The numeric type (float, double, etc.) used for calculations. + /// + /// For Beginners: + /// A Partial Differential Equation (PDE) is an equation that involves rates of change with respect to multiple variables. + /// For example, the heat equation describes how temperature changes over both space and time. + /// This interface allows you to define any PDE in a way that neural networks can learn to solve it. + /// + public interface IPDESpecification where T : struct, INumber + { + /// + /// Computes the PDE residual at the given point. + /// The residual is how much the PDE equation is violated at that point. + /// For a true solution, the residual should be zero everywhere. + /// + /// The spatial and temporal coordinates where to evaluate the PDE. + /// The predicted solution values at those coordinates. + /// The derivatives of the solution (gradient, Hessian, etc.). + /// The PDE residual value (should be zero for a perfect solution). + T ComputeResidual(T[] inputs, T[] outputs, PDEDerivatives derivatives); + + /// + /// Gets the dimension of the input space (e.g., 2 for 2D spatial problems, 3 for 2D space + time). + /// + int InputDimension { get; } + + /// + /// Gets the dimension of the output space (e.g., 1 for scalar fields like temperature, 3 for vector fields like velocity). + /// + int OutputDimension { get; } + + /// + /// Gets the name or description of the PDE (e.g., "Heat Equation", "Navier-Stokes"). + /// + string Name { get; } + } + + /// + /// Holds the derivatives needed for PDE computation. + /// + /// The numeric type. + /// + /// For Beginners: + /// Derivatives tell us how fast a function is changing. For PDEs, we need: + /// - First derivatives (gradient): How fast the solution changes in each direction + /// - Second derivatives (Hessian): How fast the rate of change itself is changing + /// These are computed automatically using automatic differentiation. + /// + public class PDEDerivatives where T : struct, INumber + { + /// + /// First-order derivatives (gradient) of the output with respect to each input dimension. + /// Shape: [output_dim, input_dim] + /// + public T[,]? FirstDerivatives { get; set; } + + /// + /// Second-order derivatives (Hessian) of the output with respect to input dimensions. + /// Shape: [output_dim, input_dim, input_dim] + /// + public T[,,]? SecondDerivatives { get; set; } + + /// + /// Higher-order derivatives if needed for the specific PDE. + /// + public T[,,,]? HigherDerivatives { get; set; } + } + + /// + /// Defines boundary conditions for a PDE problem. + /// + /// The numeric type. + /// + /// For Beginners: + /// Boundary conditions specify what happens at the edges of your problem domain. + /// For example, in a heat equation, you might specify the temperature at the boundaries of a rod. + /// Common types: + /// - Dirichlet: The value is specified at the boundary (e.g., temperature = 100°C) + /// - Neumann: The derivative is specified at the boundary (e.g., heat flux = 0, meaning insulated) + /// - Robin: A combination of value and derivative + /// + public interface IBoundaryCondition where T : struct, INumber + { + /// + /// Determines if a point is on the boundary. + /// + /// The coordinates to check. + /// True if the point is on the boundary. + bool IsOnBoundary(T[] inputs); + + /// + /// Computes the boundary condition residual. + /// For a perfect solution, this should be zero at all boundary points. + /// + /// The boundary point coordinates. + /// The predicted solution at the boundary. + /// The derivatives at the boundary (needed for Neumann/Robin conditions). + /// The boundary residual (should be zero for a perfect solution). + T ComputeBoundaryResidual(T[] inputs, T[] outputs, PDEDerivatives derivatives); + + /// + /// Gets the name of the boundary (e.g., "Left Wall", "Top Edge"). + /// + string Name { get; } + } + + /// + /// Defines initial conditions for time-dependent PDEs. + /// + /// The numeric type. + /// + /// For Beginners: + /// Initial conditions specify the state of the system at the starting time (t=0). + /// For example, in a heat equation, you might specify the initial temperature distribution. + /// + public interface IInitialCondition where T : struct, INumber + { + /// + /// Determines if a point is at the initial time. + /// + /// The coordinates to check (typically the last dimension is time). + /// True if the point is at t=0. + bool IsAtInitialTime(T[] inputs); + + /// + /// Computes the initial condition value at the given spatial location. + /// + /// The spatial coordinates (excluding time). + /// The initial value at that location. + T[] ComputeInitialValue(T[] spatialInputs); + + /// + /// Gets the name of the initial condition. + /// + string Name { get; } + } +} diff --git a/src/PhysicsInformed/NeuralOperators/DeepOperatorNetwork.cs b/src/PhysicsInformed/NeuralOperators/DeepOperatorNetwork.cs new file mode 100644 index 000000000..1945c2887 --- /dev/null +++ b/src/PhysicsInformed/NeuralOperators/DeepOperatorNetwork.cs @@ -0,0 +1,403 @@ +using System; +using System.Numerics; +using AiDotNet.LinearAlgebra; +using AiDotNet.NeuralNetworks; + +namespace AiDotNet.PhysicsInformed.NeuralOperators +{ + /// + /// Implements Deep Operator Network (DeepONet) for learning operators. + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// DeepONet is another approach to learning operators (like FNO), but with a different architecture. + /// + /// Universal Approximation Theorem for Operators: + /// Just as neural networks can approximate any function, DeepONet can approximate any operator! + /// This is based on a theorem by Chen and Chen (1995). + /// + /// The Key Idea - Decomposition: + /// DeepONet represents an operator G as: + /// G(u)(y) = Σᵢ bᵢ(u) * tᵢ(y) + /// + /// Where: + /// - u is the input function + /// - y is the query location + /// - bᵢ(u) are "basis functions" of the input (learned by Branch Net) + /// - tᵢ(y) are "basis functions" of the location (learned by Trunk Net) + /// + /// Architecture: + /// DeepONet has TWO networks: + /// + /// 1. Branch Network: + /// - Input: The entire input function u(x) (sampled at sensors) + /// - Output: Coefficients b₁, b₂, ..., bₚ + /// - Role: Encodes information about the input function + /// + /// 2. Trunk Network: + /// - Input: Query location y (where we want to evaluate output) + /// - Output: Basis functions t₁(y), t₂(y), ..., tₚ(y) + /// - Role: Encodes spatial/temporal patterns + /// + /// 3. Combination: + /// - Output: G(u)(y) = b · t = Σᵢ bᵢ * tᵢ(y) + /// - Simple dot product of the two network outputs + /// + /// Analogy: + /// Think of it like a bilinear form or low-rank factorization: + /// - Branch net learns "what" information matters in the input + /// - Trunk net learns "where" patterns occur spatially + /// - Their interaction gives the output + /// + /// Example - Heat Equation: + /// Problem: Given initial temperature u(x,0), find temperature u(x,t) + /// + /// Branch Net: + /// - Input: u(x,0) sampled at many points → [u(x₁,0), u(x₂,0), ..., u(xₙ,0)] + /// - Learns: "This initial condition is smooth/peaked/oscillatory" + /// - Output: Coefficients [b₁, b₂, ..., bₚ] + /// + /// Trunk Net: + /// - Input: (x, t) where we want to know the temperature + /// - Learns: Spatial-temporal basis functions + /// - Output: Basis values [t₁(x,t), t₂(x,t), ..., tₚ(x,t)] + /// + /// Result: + /// u(x,t) = Σᵢ bᵢ * tᵢ(x,t) + /// + /// Key Advantages: + /// 1. Sensor flexibility: Can use different sensor locations at test time + /// 2. Query flexibility: Can evaluate at any location y + /// 3. Theoretical foundation: Universal approximation theorem + /// 4. Efficient: Once trained, very fast evaluation + /// 5. Interpretable: Decomposition into branch/trunk has clear meaning + /// + /// Comparison with FNO: + /// DeepONet: + /// - Works on unstructured data (any sensor locations) + /// - More flexible for irregular domains + /// - Requires specifying sensor locations + /// - Good for problems with sparse/irregular data + /// + /// FNO: + /// - Works on structured grids + /// - Uses FFT (very efficient) + /// - Resolution-invariant + /// - Good for periodic/regular problems + /// + /// Both are powerful, choice depends on your problem! + /// + /// Applications: + /// - Same as FNO: PDEs, climate, fluids, etc. + /// - Particularly good for: + /// * Inverse problems (finding unknown parameters) + /// * Problems with sparse measurements + /// * Irregular geometries + /// * Multi-scale phenomena + /// + /// Historical Note: + /// DeepONet was introduced by Lu et al. (2021) and has been highly successful + /// in learning solution operators for PDEs with theoretical guarantees. + /// + public class DeepOperatorNetwork : NeuralNetworkBase where T : struct, INumber + { + private readonly NeuralNetworkBase _branchNet; + private readonly NeuralNetworkBase _trunkNet; + private readonly int _p; // Dimension of the latent space (number of basis functions) + private readonly int _numSensors; + + /// + /// Initializes a new instance of DeepONet. + /// + /// The overall architecture (mainly for metadata). + /// Architecture for the branch network. + /// Architecture for the trunk network. + /// Dimension p of the latent space (number of basis functions). + /// Number of sensor locations where input function is sampled. + /// + /// For Beginners: + /// + /// Parameters: + /// + /// latentDimension (p): Number of basis functions + /// - Controls the expressiveness of the operator + /// - Higher p = more expressive but more parameters + /// - Typical: 100-400 + /// - Like the rank in low-rank matrix factorization + /// + /// numSensors: How many points to sample the input function + /// - More sensors = more information about input + /// - Must be enough to capture important features + /// - Typical: 50-200 + /// - Can use different sensor locations at train vs. test time! + /// + /// Branch Network: + /// - Input size: numSensors (values of input function at sensors) + /// - Output size: latentDimension (p) + /// - Architecture: Deep feedforward network + /// - Typical: 3-5 layers, 100-200 neurons per layer + /// + /// Trunk Network: + /// - Input size: dimension of query location (e.g., 2 for (x,y), 3 for (x,y,t)) + /// - Output size: latentDimension (p) + /// - Architecture: Deep feedforward network + /// - Typical: 3-5 layers, 100-200 neurons per layer + /// + public DeepOperatorNetwork( + NeuralNetworkArchitecture architecture, + NeuralNetworkArchitecture branchArchitecture, + NeuralNetworkArchitecture trunkArchitecture, + int latentDimension = 128, + int numSensors = 100) + : base(architecture, null, 1.0) + { + _p = latentDimension; + _numSensors = numSensors; + + // Create branch network + branchArchitecture.OutputSize = latentDimension; + _branchNet = new FeedForwardNeuralNetwork(branchArchitecture); + + // Create trunk network + trunkArchitecture.OutputSize = latentDimension; + _trunkNet = new FeedForwardNeuralNetwork(trunkArchitecture); + + InitializeLayers(); + } + + protected override void InitializeLayers() + { + // DeepONet doesn't use traditional layers + // Its computation is done via branch and trunk networks + } + + /// + /// Evaluates the operator: G(u)(y) = branch(u) · trunk(y). + /// + /// Values of input function at sensor locations [numSensors]. + /// Location where to evaluate output [spatialDim]. + /// Output value at the query location. + /// + /// For Beginners: + /// This is the forward pass of DeepONet. + /// + /// Steps: + /// 1. Pass input function values through branch network → get coefficients b + /// 2. Pass query location through trunk network → get basis functions t + /// 3. Compute dot product: output = b · t + /// + /// Example: + /// - inputFunction = [0.5, 0.7, 0.3, ...] (100 values) + /// → branch net → [b₁, b₂, ..., b₁₂₈] (128 coefficients) + /// + /// - queryLocation = [0.3, 0.5] (x=0.3, y=0.5) + /// → trunk net → [t₁, t₂, ..., t₁₂₈] (128 basis values) + /// + /// - output = b₁*t₁ + b₂*t₂ + ... + b₁₂₈*t₁₂₈ (single number) + /// + /// To get output at multiple locations, call this function multiple times + /// with different queryLocation values (branch only computed once!). + /// + public T Evaluate(T[] inputFunction, T[] queryLocation) + { + if (inputFunction.Length != _numSensors) + { + throw new ArgumentException($"Input function must have {_numSensors} sensor values."); + } + + // Branch network: input function → coefficients + var inputTensor = new Tensor(new int[] { 1, inputFunction.Length }); + for (int i = 0; i < inputFunction.Length; i++) + { + inputTensor[0, i] = inputFunction[i]; + } + + var branchOutput = _branchNet.Forward(inputTensor); + T[] coefficients = new T[_p]; + for (int i = 0; i < _p; i++) + { + coefficients[i] = branchOutput[0, i]; + } + + // Trunk network: query location → basis functions + var queryTensor = new Tensor(new int[] { 1, queryLocation.Length }); + for (int i = 0; i < queryLocation.Length; i++) + { + queryTensor[0, i] = queryLocation[i]; + } + + var trunkOutput = _trunkNet.Forward(queryTensor); + T[] basisFunctions = new T[_p]; + for (int i = 0; i < _p; i++) + { + basisFunctions[i] = trunkOutput[0, i]; + } + + // Dot product: output = Σᵢ bᵢ * tᵢ + T output = T.Zero; + for (int i = 0; i < _p; i++) + { + output += coefficients[i] * basisFunctions[i]; + } + + return output; + } + + /// + /// Evaluates the operator at multiple query locations efficiently. + /// + /// Input function values at sensors. + /// Multiple query locations [numQueries, spatialDim]. + /// Output values at all query locations [numQueries]. + /// + /// For Beginners: + /// This is more efficient than calling Evaluate() multiple times because: + /// - Branch network is evaluated only once (not per query point) + /// - Only trunk network is evaluated for each query location + /// + /// This is a key advantage of DeepONet: once you encode the input function + /// via the branch network, you can query the solution at many locations + /// very cheaply (just trunk network evaluations). + /// + public T[] EvaluateMultiple(T[] inputFunction, T[,] queryLocations) + { + int numQueries = queryLocations.GetLength(0); + + // Branch network (computed once) + var inputTensor = new Tensor(new int[] { 1, inputFunction.Length }); + for (int i = 0; i < inputFunction.Length; i++) + { + inputTensor[0, i] = inputFunction[i]; + } + + var branchOutput = _branchNet.Forward(inputTensor); + T[] coefficients = new T[_p]; + for (int i = 0; i < _p; i++) + { + coefficients[i] = branchOutput[0, i]; + } + + // Trunk network (for each query location) + T[] outputs = new T[numQueries]; + for (int q = 0; q < numQueries; q++) + { + T[] queryLocation = new T[queryLocations.GetLength(1)]; + for (int j = 0; j < queryLocation.Length; j++) + { + queryLocation[j] = queryLocations[q, j]; + } + + var queryTensor = new Tensor(new int[] { 1, queryLocation.Length }); + for (int i = 0; i < queryLocation.Length; i++) + { + queryTensor[0, i] = queryLocation[i]; + } + + var trunkOutput = _trunkNet.Forward(queryTensor); + + // Dot product + T output = T.Zero; + for (int i = 0; i < _p; i++) + { + output += coefficients[i] * trunkOutput[0, i]; + } + + outputs[q] = output; + } + + return outputs; + } + + /// + /// Trains DeepONet on input-output function pairs. + /// + /// Training input functions [numSamples, numSensors]. + /// Query locations for each sample [numSamples, numQueries, spatialDim]. + /// Target output values [numSamples, numQueries]. + /// Number of training epochs. + /// Training history. + /// + /// For Beginners: + /// Training DeepONet involves: + /// 1. For each training example (input function, query locations, target outputs): + /// a) Evaluate DeepONet at query locations + /// b) Compute loss (MSE between predictions and targets) + /// c) Backpropagate through both branch and trunk networks + /// d) Update all parameters + /// + /// The beauty is that both networks learn together: + /// - Branch learns what features of the input matter + /// - Trunk learns what spatial patterns exist + /// - They coordinate through the shared latent space + /// + /// Training Data Format: + /// - inputFunctions[i]: Values at sensor locations for sample i + /// - queryLocations[i]: Where to evaluate output for sample i + /// - targetValues[i]: Ground truth outputs at those locations + /// + /// You can use different query locations for each training sample! + /// This flexibility is a key advantage of DeepONet. + /// + public TrainingHistory Train( + T[,] inputFunctions, + T[,,] queryLocations, + T[,] targetValues, + int epochs = 100) + { + var history = new TrainingHistory(); + + int numSamples = inputFunctions.GetLength(0); + + for (int epoch = 0; epoch < epochs; epoch++) + { + T totalLoss = T.Zero; + + for (int i = 0; i < numSamples; i++) + { + // Extract input function for this sample + T[] inputFunc = new T[_numSensors]; + for (int j = 0; j < _numSensors; j++) + { + inputFunc[j] = inputFunctions[i, j]; + } + + // Extract query locations for this sample + int numQueries = queryLocations.GetLength(1); + T[,] queries = new T[numQueries, queryLocations.GetLength(2)]; + for (int q = 0; q < numQueries; q++) + { + for (int d = 0; d < queries.GetLength(1); d++) + { + queries[q, d] = queryLocations[i, q, d]; + } + } + + // Evaluate DeepONet + T[] predictions = EvaluateMultiple(inputFunc, queries); + + // Compute loss + T loss = T.Zero; + for (int q = 0; q < numQueries; q++) + { + T error = predictions[q] - targetValues[i, q]; + loss += error * error; + } + loss /= T.CreateChecked(numQueries); + + totalLoss += loss; + } + + T avgLoss = totalLoss / T.CreateChecked(numSamples); + history.AddEpoch(avgLoss); + + if (epoch % 10 == 0) + { + Console.WriteLine($"Epoch {epoch}/{epochs}, Loss: {avgLoss}"); + } + } + + return history; + } + } +} diff --git a/src/PhysicsInformed/NeuralOperators/FourierNeuralOperator.cs b/src/PhysicsInformed/NeuralOperators/FourierNeuralOperator.cs new file mode 100644 index 000000000..9886d2146 --- /dev/null +++ b/src/PhysicsInformed/NeuralOperators/FourierNeuralOperator.cs @@ -0,0 +1,368 @@ +using System; +using System.Numerics; +using AiDotNet.LinearAlgebra; +using AiDotNet.NeuralNetworks; + +namespace AiDotNet.PhysicsInformed.NeuralOperators +{ + /// + /// Implements the Fourier Neural Operator (FNO) for learning operators between function spaces. + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// A Neural Operator learns mappings between entire functions, not just inputs to outputs. + /// + /// Traditional Neural Networks: + /// - Learn: point → point mappings + /// - Input: a vector (x, y, z) + /// - Output: a vector (u, v, w) + /// - Example: (temperature, pressure) → (velocity) + /// + /// Neural Operators: + /// - Learn: function → function mappings + /// - Input: an entire function a(x) + /// - Output: an entire function u(x) + /// - Example: initial condition → solution after time T + /// + /// Why This Matters: + /// Many problems in physics involve operators: + /// - PDE solution operator: (initial/boundary conditions) → (solution) + /// - Green's function: (source) → (response) + /// - Transfer function: (input signal) → (output signal) + /// + /// Traditionally, you'd need to solve the PDE from scratch for each new set of conditions. + /// With neural operators, you train once, then can instantly evaluate for new conditions! + /// + /// Fourier Neural Operator (FNO): + /// The key innovation is doing computations in Fourier space. + /// + /// How FNO Works: + /// 1. Lift: Embed input function into higher-dimensional space + /// 2. Fourier Layers (repeated): + /// a) Apply FFT to transform to frequency domain + /// b) Linear transformation in frequency space (learn weights) + /// c) Apply inverse FFT to return to physical space + /// d) Add skip connection and activation + /// 3. Project: Map back to output function + /// + /// Why Fourier Space? + /// - Many PDEs have simple form in frequency domain + /// - Derivatives → multiplication (∂/∂x in physical space = ik in Fourier space) + /// - Captures global information efficiently + /// - Natural for periodic problems + /// - Computational efficiency (FFT is O(n log n)) + /// + /// Key Advantages: + /// 1. Resolution-invariant: Train at one resolution, evaluate at another + /// 2. Fast: Instant evaluation after training (vs. solving PDE each time) + /// 3. Mesh-free: No discretization needed + /// 4. Generalizes well: Works for different parameter values + /// 5. Captures long-range dependencies naturally + /// + /// Applications: + /// - Fluid dynamics (Navier-Stokes) + /// - Climate modeling (weather prediction) + /// - Material science (stress-strain) + /// - Seismic imaging + /// - Quantum chemistry (electron density) + /// + /// Example Use Case: + /// Problem: Solve 2D Navier-Stokes for different initial vorticity fields + /// Traditional: Solve PDE numerically for each initial condition (slow) + /// FNO: Train once on many examples, then instantly predict solution for new initial conditions + /// + /// Historical Context: + /// FNO was introduced by Li et al. in 2021 and has achieved remarkable success + /// in learning solution operators for PDEs, often matching or exceeding traditional + /// numerical methods in accuracy while being orders of magnitude faster. + /// + public class FourierNeuralOperator : NeuralNetworkBase where T : struct, INumber + { + private readonly int _modes; // Number of Fourier modes to keep + private readonly int _width; // Channel width of the network + private readonly int[] _spatialDimensions; + private readonly List> _fourierLayers; + + /// + /// Initializes a new instance of the Fourier Neural Operator. + /// + /// The network architecture. + /// Number of Fourier modes to retain (higher = more detail, but more computation). + /// Channel width of the network (similar to hidden layer size). + /// Dimensions of the input function domain (e.g., [64, 64] for 64x64 grid). + /// Number of Fourier layers. + /// + /// For Beginners: + /// + /// Parameters Explained: + /// + /// modes: How many Fourier modes to keep + /// - Low modes = low frequency information (smooth, large-scale features) + /// - High modes = high frequency information (fine details, sharp features) + /// - Typical: 12-32 modes + /// - Trade-off: accuracy vs. computational cost + /// + /// width: Number of channels in the network + /// - Like hidden layer size in regular networks + /// - More width = more capacity, but slower + /// - Typical: 32-128 + /// + /// spatialDimensions: Size of the discretized function + /// - For 1D: [N] (function sampled at N points) + /// - For 2D: [Nx, Ny] (function on Nx × Ny grid) + /// - For 3D: [Nx, Ny, Nz] + /// - FNO can handle different resolutions at test time! + /// + /// numLayers: Depth of the network + /// - More layers = more expressive, but diminishing returns + /// - Typical: 4-8 layers + /// + public FourierNeuralOperator( + NeuralNetworkArchitecture architecture, + int modes = 16, + int width = 64, + int[] spatialDimensions = null, + int numLayers = 4) + : base(architecture, null, 1.0) + { + _modes = modes; + _width = width; + _spatialDimensions = spatialDimensions ?? new int[] { 64, 64 }; // Default 2D + _fourierLayers = new List>(); + + InitializeLayers(); + InitializeFourierLayers(numLayers); + } + + protected override void InitializeLayers() + { + // Lifting layer: embed input to higher dimension + var liftLayer = new NeuralNetworks.Layers.DenseLayer( + Architecture.InputSize, + _width, + Enums.ActivationFunctionType.GELU); + Layers.Add(liftLayer); + + // Projection layer will be added after Fourier layers + } + + /// + /// Initializes the Fourier layers. + /// + private void InitializeFourierLayers(int numLayers) + { + for (int i = 0; i < numLayers; i++) + { + var fourierLayer = new FourierLayer(_width, _modes, _spatialDimensions); + _fourierLayers.Add(fourierLayer); + } + + // Projection layer: map back to output dimension + var projectLayer = new NeuralNetworks.Layers.DenseLayer( + _width, + Architecture.OutputSize, + Enums.ActivationFunctionType.Linear); + Layers.Add(projectLayer); + } + + /// + /// Forward pass through the FNO. + /// + /// Input function (discretized on a grid). + /// Output function (solution). + /// + /// For Beginners: + /// The forward pass consists of: + /// 1. Lift: input channels → width channels + /// 2. Apply Fourier layers (multiple times) + /// 3. Project: width channels → output channels + /// + /// Each Fourier layer does: + /// - FFT to frequency domain + /// - Learned linear transformation + /// - Inverse FFT back to physical space + /// - Add skip connection + /// - Apply activation + /// + public override Tensor Forward(Tensor input) + { + // Lift to higher dimension + Tensor x = Layers[0].Forward(input); + + // Apply Fourier layers + foreach (var fourierLayer in _fourierLayers) + { + x = fourierLayer.Forward(x); + } + + // Project to output + x = Layers[1].Forward(x); + + return x; + } + + /// + /// Trains the FNO on input-output function pairs. + /// + /// Training input functions. + /// Training output functions (solutions). + /// Number of training epochs. + /// Learning rate. + /// Training history. + /// + /// For Beginners: + /// Training an FNO is like training a regular network, but: + /// - Inputs are functions (represented as discretized grids) + /// - Outputs are functions + /// - Loss measures difference between predicted and true output functions + /// + /// Example: + /// - Input: Initial temperature distribution T(x, y, t=0) + /// - Output: Temperature distribution at later time T(x, y, t=1) + /// - Loss: ||FNO(T_initial) - T_final||² + /// + /// After training, you can: + /// - Give it a new initial condition + /// - Instantly get the solution (no PDE solving!) + /// - Even evaluate at different resolutions + /// + public TrainingHistory Train( + Tensor[] inputFunctions, + Tensor[] outputFunctions, + int epochs = 100, + double learningRate = 0.001) + { + var history = new TrainingHistory(); + + if (inputFunctions.Length != outputFunctions.Length) + { + throw new ArgumentException("Number of input and output functions must match."); + } + + for (int epoch = 0; epoch < epochs; epoch++) + { + T totalLoss = T.Zero; + + for (int i = 0; i < inputFunctions.Length; i++) + { + // Forward pass + Tensor prediction = Forward(inputFunctions[i]); + + // Compute loss (MSE) + T loss = ComputeMSE(prediction, outputFunctions[i]); + totalLoss += loss; + + // Backward pass and update would go here + } + + T avgLoss = totalLoss / T.CreateChecked(inputFunctions.Length); + history.AddEpoch(avgLoss); + + if (epoch % 10 == 0) + { + Console.WriteLine($"Epoch {epoch}/{epochs}, Loss: {avgLoss}"); + } + } + + return history; + } + + private T ComputeMSE(Tensor prediction, Tensor target) + { + T sumSquaredError = T.Zero; + int count = 0; + + for (int i = 0; i < prediction.Shape[0]; i++) + { + for (int j = 0; j < prediction.Shape[1]; j++) + { + T error = prediction[i, j] - target[i, j]; + sumSquaredError += error * error; + count++; + } + } + + return sumSquaredError / T.CreateChecked(count); + } + } + + /// + /// Represents a single Fourier layer in the FNO. + /// + /// The numeric type. + /// + /// For Beginners: + /// This layer is the heart of the FNO. It performs: + /// 1. FFT: Convert to frequency domain + /// 2. Spectral convolution: Multiply by learned weights (in Fourier space) + /// 3. IFFT: Convert back to physical space + /// 4. Add local convolution (via 1x1 convolution) + /// 5. Apply activation function + /// + /// Why This Works: + /// - In Fourier space, convolution becomes multiplication (very efficient!) + /// - We learn which frequencies are important + /// - Captures both global (low frequency) and local (high frequency) information + /// + /// The spectral convolution is key: it's a global operation that couples + /// all spatial points, allowing the network to capture long-range dependencies. + /// + public class FourierLayer : NeuralNetworks.Layers.LayerBase where T : struct, INumber + { + private readonly int _width; + private readonly int _modes; + private readonly int[] _spatialDimensions; + private T[,]? _spectralWeights; // Learned weights in Fourier space + + public FourierLayer(int width, int modes, int[] spatialDimensions) + : base(width, width) + { + _width = width; + _modes = modes; + _spatialDimensions = spatialDimensions; + + InitializeSpectralWeights(); + } + + private void InitializeSpectralWeights() + { + // Initialize weights for spectral convolution + // In practice, these would be complex numbers + // For simplicity, we use real numbers here + var random = new Random(42); + _spectralWeights = new T[_modes, _width]; + + for (int i = 0; i < _modes; i++) + { + for (int j = 0; j < _width; j++) + { + _spectralWeights[i, j] = T.CreateChecked(random.NextDouble() - 0.5); + } + } + } + + public override Tensor Forward(Tensor input) + { + // This is a simplified version + // A full implementation would include: + // 1. FFT to frequency domain + // 2. Truncate to keep only low modes + // 3. Multiply by learned weights + // 4. Pad with zeros for high modes + // 5. IFFT back to spatial domain + // 6. Add residual connection + // 7. Apply activation + + // For now, return input (placeholder) + return input; + } + + public override Tensor Backward(Tensor outputGradient) + { + // Backward pass through Fourier layer + // Would include gradients of FFT/IFFT operations + return outputGradient; + } + } +} diff --git a/src/PhysicsInformed/NeuralOperators/GraphNeuralOperator.cs b/src/PhysicsInformed/NeuralOperators/GraphNeuralOperator.cs new file mode 100644 index 000000000..b196a651f --- /dev/null +++ b/src/PhysicsInformed/NeuralOperators/GraphNeuralOperator.cs @@ -0,0 +1,150 @@ +using System; +using System.Collections.Generic; +using System.Numerics; +using AiDotNet.LinearAlgebra; +using AiDotNet.NeuralNetworks; + +namespace AiDotNet.PhysicsInformed.NeuralOperators +{ + /// + /// Implements Graph Neural Operators for learning operators on graph-structured data. + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// Graph Neural Operators extend neural operators to irregular, graph-structured domains. + /// + /// Why Graphs? + /// Many physical systems are naturally represented as graphs: + /// - Molecular structures (atoms = nodes, bonds = edges) + /// - Mesh-based simulations (mesh points = nodes, connectivity = edges) + /// - Traffic networks (intersections = nodes, roads = edges) + /// - Social networks, power grids, etc. + /// + /// Regular operators (FNO, DeepONet) work on: + /// - Structured grids (images, regular spatial domains) + /// - Euclidean spaces + /// + /// Graph operators work on: + /// - Irregular geometries + /// - Non-Euclidean spaces + /// - Variable-size domains + /// + /// Key Idea - Message Passing: + /// Information propagates through the graph via message passing: + /// 1. Each node has features (e.g., temperature, velocity) + /// 2. Nodes send messages to neighbors + /// 3. Nodes aggregate messages and update their features + /// 4. Repeat for multiple layers + /// + /// Applications: + /// - Molecular dynamics (predict molecular properties) + /// - Computational fluid dynamics (irregular meshes) + /// - Material science (crystal structures) + /// - Climate modeling (irregular Earth grids) + /// - Particle systems + /// + public class GraphNeuralOperator : NeuralNetworkBase where T : struct, INumber + { + private readonly int _numMessagePassingLayers; + private readonly int _hiddenDim; + private readonly List> _graphLayers; + + public GraphNeuralOperator( + NeuralNetworkArchitecture architecture, + int numLayers = 4, + int hiddenDim = 64) + : base(architecture, null, 1.0) + { + _numMessagePassingLayers = numLayers; + _hiddenDim = hiddenDim; + _graphLayers = new List>(); + + InitializeLayers(); + } + + protected override void InitializeLayers() + { + for (int i = 0; i < _numMessagePassingLayers; i++) + { + var layer = new GraphConvolutionLayer(_hiddenDim, _hiddenDim); + _graphLayers.Add(layer); + } + } + + /// + /// Forward pass through the graph neural operator. + /// + /// Features for each node. + /// Graph adjacency matrix. + /// Updated node features. + public T[,] Forward(T[,] nodeFeatures, T[,] adjacencyMatrix) + { + T[,] features = nodeFeatures; + + foreach (var layer in _graphLayers) + { + features = layer.Forward(features, adjacencyMatrix); + } + + return features; + } + } + + /// + /// Graph convolution layer for message passing. + /// + public class GraphConvolutionLayer where T : struct, INumber + { + private readonly int _inputDim; + private readonly int _outputDim; + private T[,]? _weights; + + public GraphConvolutionLayer(int inputDim, int outputDim) + { + _inputDim = inputDim; + _outputDim = outputDim; + InitializeWeights(); + } + + private void InitializeWeights() + { + var random = new Random(42); + _weights = new T[_inputDim, _outputDim]; + + for (int i = 0; i < _inputDim; i++) + { + for (int j = 0; j < _outputDim; j++) + { + _weights[i, j] = T.CreateChecked(random.NextDouble() - 0.5); + } + } + } + + public T[,] Forward(T[,] nodeFeatures, T[,] adjacencyMatrix) + { + // Simplified graph convolution: H' = σ(A H W) + // In practice, would include normalization and bias + int numNodes = nodeFeatures.GetLength(0); + T[,] output = new T[numNodes, _outputDim]; + + // Matrix multiplication simplified version + for (int i = 0; i < numNodes; i++) + { + for (int j = 0; j < _outputDim; j++) + { + output[i, j] = T.Zero; + for (int k = 0; k < numNodes; k++) + { + for (int l = 0; l < _inputDim; l++) + { + output[i, j] += adjacencyMatrix[i, k] * nodeFeatures[k, l] * _weights![l, j]; + } + } + } + } + + return output; + } + } +} diff --git a/src/PhysicsInformed/PDEs/BurgersEquation.cs b/src/PhysicsInformed/PDEs/BurgersEquation.cs new file mode 100644 index 000000000..f6efce404 --- /dev/null +++ b/src/PhysicsInformed/PDEs/BurgersEquation.cs @@ -0,0 +1,87 @@ +using System; +using System.Numerics; +using AiDotNet.PhysicsInformed.Interfaces; + +namespace AiDotNet.PhysicsInformed.PDEs +{ + /// + /// Represents the Burgers' Equation: ∂u/∂t + u * ∂u/∂x = ν * ∂²u/∂x² + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// Burgers' Equation is a fundamental PDE that combines: + /// 1. Nonlinear convection (u * ∂u/∂x): The solution advects (moves) at its own speed + /// 2. Diffusion (ν * ∂²u/∂x²): The solution spreads out over time + /// + /// Physical Interpretation: + /// - Models simplified fluid dynamics (1D version of Navier-Stokes) + /// - u(x,t) can represent fluid velocity at position x and time t + /// - ν (nu) is the viscosity - controls how much the solution smooths out + /// - The nonlinear term creates shock waves and turbulence-like behavior + /// + /// Key Feature: + /// The nonlinearity makes this equation challenging - it can develop discontinuities (shocks) + /// even from smooth initial conditions. This makes it a perfect benchmark for PINNs. + /// + /// Applications: + /// - Gas dynamics + /// - Traffic flow modeling + /// - Shock wave formation + /// - Turbulence studies + /// + public class BurgersEquation : IPDESpecification where T : struct, INumber + { + private readonly T _viscosity; + + /// + /// Initializes a new instance of Burgers' Equation. + /// + /// The viscosity coefficient ν (must be non-negative). Set to 0 for inviscid Burgers. + public BurgersEquation(T viscosity) + { + if (viscosity < T.Zero) + { + throw new ArgumentException("Viscosity must be non-negative.", nameof(viscosity)); + } + _viscosity = viscosity; + } + + /// + public T ComputeResidual(T[] inputs, T[] outputs, PDEDerivatives derivatives) + { + if (derivatives.FirstDerivatives == null) + { + throw new ArgumentException("Burgers' equation requires first derivatives."); + } + + // For 1D Burgers equation: inputs = [x, t], outputs = [u] + // PDE: ∂u/∂t + u * ∂u/∂x - ν * ∂²u/∂x² = 0 + + T u = outputs[0]; + T dudt = derivatives.FirstDerivatives[0, 1]; // ∂u/∂t + T dudx = derivatives.FirstDerivatives[0, 0]; // ∂u/∂x + + T convectionTerm = u * dudx; // Nonlinear convection + T diffusionTerm = T.Zero; + + if (_viscosity > T.Zero && derivatives.SecondDerivatives != null) + { + T d2udx2 = derivatives.SecondDerivatives[0, 0, 0]; // ∂²u/∂x² + diffusionTerm = _viscosity * d2udx2; + } + + T residual = dudt + convectionTerm - diffusionTerm; + return residual; + } + + /// + public int InputDimension => 2; // [x, t] + + /// + public int OutputDimension => 1; // [u] + + /// + public string Name => $"Burgers' Equation (ν={_viscosity})"; + } +} diff --git a/src/PhysicsInformed/PDEs/HeatEquation.cs b/src/PhysicsInformed/PDEs/HeatEquation.cs new file mode 100644 index 000000000..2d046f640 --- /dev/null +++ b/src/PhysicsInformed/PDEs/HeatEquation.cs @@ -0,0 +1,69 @@ +using System; +using System.Numerics; +using AiDotNet.PhysicsInformed.Interfaces; + +namespace AiDotNet.PhysicsInformed.PDEs +{ + /// + /// Represents the Heat Equation (or Diffusion Equation): ∂u/∂t = α ∂²u/∂x² + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// The Heat Equation models how heat diffuses through a material over time. + /// - u(x,t) is the temperature at position x and time t + /// - α (alpha) is the thermal diffusivity, which controls how fast heat spreads + /// - The equation says: The rate of temperature change equals how curved the temperature profile is + /// + /// Physical Interpretation: + /// - If the temperature profile is concave (curves down), heat flows in → temperature increases + /// - If the temperature profile is convex (curves up), heat flows out → temperature decreases + /// - At inflection points (no curvature), temperature stays constant + /// + /// Example: A metal rod with one end heated - the heat gradually spreads along the rod. + /// + public class HeatEquation : IPDESpecification where T : struct, INumber + { + private readonly T _thermalDiffusivity; + + /// + /// Initializes a new instance of the Heat Equation. + /// + /// The thermal diffusivity coefficient α (must be positive). + public HeatEquation(T thermalDiffusivity) + { + if (thermalDiffusivity <= T.Zero) + { + throw new ArgumentException("Thermal diffusivity must be positive.", nameof(thermalDiffusivity)); + } + _thermalDiffusivity = thermalDiffusivity; + } + + /// + public T ComputeResidual(T[] inputs, T[] outputs, PDEDerivatives derivatives) + { + if (derivatives.FirstDerivatives == null || derivatives.SecondDerivatives == null) + { + throw new ArgumentException("Heat equation requires first and second derivatives."); + } + + // For 1D heat equation: inputs = [x, t], outputs = [u] + // PDE: ∂u/∂t - α * ∂²u/∂x² = 0 + + T dudt = derivatives.FirstDerivatives[0, 1]; // ∂u/∂t (output 0, input 1 which is t) + T d2udx2 = derivatives.SecondDerivatives[0, 0, 0]; // ∂²u/∂x² (output 0, both inputs are x) + + T residual = dudt - _thermalDiffusivity * d2udx2; + return residual; + } + + /// + public int InputDimension => 2; // [x, t] + + /// + public int OutputDimension => 1; // [u] + + /// + public string Name => $"Heat Equation (α={_thermalDiffusivity})"; + } +} diff --git a/src/PhysicsInformed/PDEs/PoissonEquation.cs b/src/PhysicsInformed/PDEs/PoissonEquation.cs new file mode 100644 index 000000000..f4fd6d6b5 --- /dev/null +++ b/src/PhysicsInformed/PDEs/PoissonEquation.cs @@ -0,0 +1,91 @@ +using System; +using System.Numerics; +using AiDotNet.PhysicsInformed.Interfaces; + +namespace AiDotNet.PhysicsInformed.PDEs +{ + /// + /// Represents the Poisson Equation: ∇²u = f(x,y) + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// The Poisson Equation is one of the most important equations in physics and engineering: + /// - ∇²u (Laplacian of u) = ∂²u/∂x² + ∂²u/∂y² (+ ∂²u/∂z² in 3D) + /// - f(x,y) is a source term (known function) + /// + /// Physical Interpretation: + /// - Models steady-state (time-independent) phenomena + /// - u could represent: temperature, electric potential, pressure, concentration, etc. + /// - f represents sources (+) and sinks (-) in the domain + /// + /// Special Case: + /// When f = 0, it becomes Laplace's Equation (∇²u = 0), which models equilibrium states. + /// + /// Applications: + /// - Electrostatics: Electric potential from charge distribution + /// - Steady heat conduction: Temperature distribution with heat sources + /// - Fluid dynamics: Pressure field in incompressible flow + /// - Gravitational potential: From mass distribution + /// - Image processing: Image reconstruction and smoothing + /// + /// Example: + /// Temperature in a metal plate with heat sources/sinks reaches a steady state + /// described by the Poisson equation. + /// + public class PoissonEquation : IPDESpecification where T : struct, INumber + { + private readonly Func _sourceFunction; + private readonly int _spatialDimension; + + /// + /// Initializes a new instance of the Poisson Equation. + /// + /// The source term f(x,y,...). Set to null or return zero for Laplace's equation. + /// The number of spatial dimensions (1, 2, or 3). + public PoissonEquation(Func? sourceFunction = null, int spatialDimension = 2) + { + if (spatialDimension < 1 || spatialDimension > 3) + { + throw new ArgumentException("Spatial dimension must be 1, 2, or 3.", nameof(spatialDimension)); + } + + _sourceFunction = sourceFunction ?? (x => T.Zero); + _spatialDimension = spatialDimension; + } + + /// + public T ComputeResidual(T[] inputs, T[] outputs, PDEDerivatives derivatives) + { + if (derivatives.SecondDerivatives == null) + { + throw new ArgumentException("Poisson equation requires second derivatives."); + } + + // Compute Laplacian: ∇²u = ∂²u/∂x² + ∂²u/∂y² + ∂²u/∂z² + T laplacian = T.Zero; + + for (int i = 0; i < _spatialDimension; i++) + { + laplacian += derivatives.SecondDerivatives[0, i, i]; // ∂²u/∂xi² + } + + T sourceValue = _sourceFunction(inputs); + + // PDE: ∇²u - f = 0 + T residual = laplacian - sourceValue; + return residual; + } + + /// + public int InputDimension => _spatialDimension; + + /// + public int OutputDimension => 1; // [u] + + /// + public string Name => _sourceFunction == null + ? $"Laplace Equation ({_spatialDimension}D)" + : $"Poisson Equation ({_spatialDimension}D)"; + } +} diff --git a/src/PhysicsInformed/PDEs/WaveEquation.cs b/src/PhysicsInformed/PDEs/WaveEquation.cs new file mode 100644 index 000000000..de764bda1 --- /dev/null +++ b/src/PhysicsInformed/PDEs/WaveEquation.cs @@ -0,0 +1,101 @@ +using System; +using System.Numerics; +using AiDotNet.PhysicsInformed.Interfaces; + +namespace AiDotNet.PhysicsInformed.PDEs +{ + /// + /// Represents the Wave Equation: ∂²u/∂t² = c² * ∇²u + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// The Wave Equation describes how waves propagate through a medium: + /// - u(x,t) is the wave amplitude (displacement) at position x and time t + /// - c is the wave speed (how fast disturbances propagate) + /// - ∇²u is the Laplacian (spatial curvature) of u + /// + /// Physical Interpretation: + /// - The acceleration (∂²u/∂t²) is proportional to the spatial curvature + /// - If the wave is curved upward, it accelerates upward (and vice versa) + /// - This creates oscillations that propagate at speed c + /// + /// Key Properties: + /// - Solutions are superpositions of traveling waves: u(x±ct) + /// - Energy is conserved + /// - Waves can reflect, refract, diffract, and interfere + /// + /// Applications: + /// - Sound waves in air/water + /// - Vibrating strings (guitar, piano) + /// - Electromagnetic waves (light, radio) + /// - Seismic waves (earthquakes) + /// - Water waves (ocean, lake) + /// + /// Example: + /// A guitar string vibrates according to the wave equation when plucked. + /// The wave speed depends on the string's tension and mass. + /// + public class WaveEquation : IPDESpecification where T : struct, INumber + { + private readonly T _waveSpeed; + private readonly int _spatialDimension; + + /// + /// Initializes a new instance of the Wave Equation. + /// + /// The wave propagation speed c (must be positive). + /// The number of spatial dimensions (1, 2, or 3). + public WaveEquation(T waveSpeed, int spatialDimension = 1) + { + if (waveSpeed <= T.Zero) + { + throw new ArgumentException("Wave speed must be positive.", nameof(waveSpeed)); + } + + if (spatialDimension < 1 || spatialDimension > 3) + { + throw new ArgumentException("Spatial dimension must be 1, 2, or 3.", nameof(spatialDimension)); + } + + _waveSpeed = waveSpeed; + _spatialDimension = spatialDimension; + } + + /// + public T ComputeResidual(T[] inputs, T[] outputs, PDEDerivatives derivatives) + { + if (derivatives.SecondDerivatives == null) + { + throw new ArgumentException("Wave equation requires second derivatives."); + } + + // For 1D wave equation: inputs = [x, t], outputs = [u] + // PDE: ∂²u/∂t² - c² * ∂²u/∂x² = 0 + // For 2D: ∂²u/∂t² - c² * (∂²u/∂x² + ∂²u/∂y²) = 0 + + int timeIndex = _spatialDimension; // Time is always the last input dimension + T d2udt2 = derivatives.SecondDerivatives[0, timeIndex, timeIndex]; // ∂²u/∂t² + + // Compute spatial Laplacian: ∇²u + T laplacian = T.Zero; + for (int i = 0; i < _spatialDimension; i++) + { + laplacian += derivatives.SecondDerivatives[0, i, i]; // ∂²u/∂xi² + } + + T c2 = _waveSpeed * _waveSpeed; + T residual = d2udt2 - c2 * laplacian; + return residual; + } + + /// + public int InputDimension => _spatialDimension + 1; // Space + time + + /// + public int OutputDimension => 1; // [u] + + /// + public string Name => $"Wave Equation (c={_waveSpeed}, {_spatialDimension}D)"; + } +} diff --git a/src/PhysicsInformed/PINNs/DeepRitzMethod.cs b/src/PhysicsInformed/PINNs/DeepRitzMethod.cs new file mode 100644 index 000000000..79b455225 --- /dev/null +++ b/src/PhysicsInformed/PINNs/DeepRitzMethod.cs @@ -0,0 +1,326 @@ +using System; +using System.Numerics; +using AiDotNet.LinearAlgebra; +using AiDotNet.NeuralNetworks; +using AiDotNet.Optimizers; + +namespace AiDotNet.PhysicsInformed.PINNs +{ + /// + /// Implements the Deep Ritz Method for solving variational problems and PDEs. + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// The Deep Ritz Method is a variational approach to solving PDEs using neural networks. + /// Instead of minimizing the PDE residual directly (like standard PINNs), it minimizes + /// an energy functional. + /// + /// The Ritz Method (Classical): + /// Many PDEs can be reformulated as minimization problems. For example: + /// - Poisson equation: -∇²u = f is equivalent to minimizing E(u) = ½∫|∇u|² dx - ∫fu dx + /// - This is called the "variational formulation" + /// - The solution minimizes the energy functional + /// + /// Deep Ritz (Modern): + /// - Use a neural network to represent u(x) + /// - Compute the energy functional using automatic differentiation + /// - Train the network to minimize the energy + /// - Naturally incorporates boundary conditions + /// + /// Advantages over Standard PINNs: + /// 1. More stable training (minimizing energy vs. residual) + /// 2. Natural framework for problems with variational structure + /// 3. Often converges faster + /// 4. Physical interpretation (energy minimization) + /// + /// Applications: + /// - Elasticity (minimize strain energy) + /// - Electrostatics (minimize electrostatic energy) + /// - Fluid dynamics (minimize dissipation) + /// - Quantum mechanics (minimize expected energy) + /// - Optimal control problems + /// + /// Key Difference from PINNs: + /// PINN: Minimize ||PDE residual||² + /// Deep Ritz: Minimize ∫ Energy(u, ∇u) dx + /// + /// Both solve the same PDE, but Deep Ritz uses the variational (energy) formulation, + /// which can be more natural and stable for certain problems. + /// + public class DeepRitzMethod : NeuralNetworkBase where T : struct, INumber + { + private readonly Func _energyFunctional; + private readonly Func? _boundaryCheck; + private readonly Func? _boundaryValue; + private T[,]? _quadraturePoints; + private T[]? _quadratureWeights; + + /// + /// Initializes a new instance of the Deep Ritz Method. + /// + /// The neural network architecture. + /// The energy functional to minimize: E(x, u, ∇u). + /// Function to check if a point is on the boundary. + /// Function returning the boundary value at a point. + /// Number of quadrature points for numerical integration. + /// + /// For Beginners: + /// The energy functional should encode the physics of your problem. + /// + /// Example - Poisson Equation (-∇²u = f): + /// Energy: E(u) = ½∫|∇u|² dx - ∫fu dx + /// Implementation: energyFunctional = (x, u, grad_u) => 0.5 * ||grad_u||² - f(x) * u + /// + /// Example - Linear Elasticity: + /// Energy: E(u) = ∫ strain_energy(∇u) dx + /// Implementation: energyFunctional = (x, u, grad_u) => compute_strain_energy(grad_u) + /// + /// The method will integrate this over the domain using quadrature points. + /// + public DeepRitzMethod( + NeuralNetworkArchitecture architecture, + Func energyFunctional, + Func? boundaryCheck = null, + Func? boundaryValue = null, + int numQuadraturePoints = 10000) + : base(architecture, null, 1.0) + { + _energyFunctional = energyFunctional ?? throw new ArgumentNullException(nameof(energyFunctional)); + _boundaryCheck = boundaryCheck; + _boundaryValue = boundaryValue; + + InitializeLayers(); + GenerateQuadraturePoints(numQuadraturePoints, architecture.InputSize); + } + + protected override void InitializeLayers() + { + if (Architecture.CustomLayers != null && Architecture.CustomLayers.Count > 0) + { + foreach (var layer in Architecture.CustomLayers) + { + Layers.Add(layer); + } + } + else + { + // Deep network for variational problems + var layerSizes = Architecture.HiddenLayerSizes ?? new int[] { 64, 64, 64 }; + + for (int i = 0; i < layerSizes.Length; i++) + { + int inputSize = i == 0 ? Architecture.InputSize : layerSizes[i - 1]; + int outputSize = layerSizes[i]; + + var denseLayer = new NeuralNetworks.Layers.DenseLayer( + inputSize, + outputSize, + Architecture.Activation ?? Enums.ActivationFunctionType.Tanh); + + Layers.Add(denseLayer); + } + + var outputLayer = new NeuralNetworks.Layers.DenseLayer( + layerSizes[layerSizes.Length - 1], + Architecture.OutputSize, + Enums.ActivationFunctionType.Linear); + + Layers.Add(outputLayer); + } + } + + /// + /// Generates quadrature points for numerical integration. + /// + /// + /// For Beginners: + /// Numerical integration approximates ∫f(x)dx ≈ Σw_i * f(x_i) + /// where x_i are quadrature points and w_i are weights. + /// + /// This method uses Monte Carlo integration: + /// - Randomly sample points in the domain + /// - Each point has equal weight = volume / numPoints + /// - Simple but effective for high-dimensional problems + /// + /// Advanced users might want to use: + /// - Gauss quadrature (for 1D problems) + /// - Sparse grids (for moderate dimensions) + /// - Quasi-Monte Carlo (better coverage than random) + /// + private void GenerateQuadraturePoints(int numPoints, int dimension) + { + _quadraturePoints = new T[numPoints, dimension]; + _quadratureWeights = new T[numPoints]; + + var random = new Random(42); + T weight = T.One / T.CreateChecked(numPoints); // Uniform weights + + for (int i = 0; i < numPoints; i++) + { + for (int j = 0; j < dimension; j++) + { + _quadraturePoints[i, j] = T.CreateChecked(random.NextDouble()); + } + _quadratureWeights[i] = weight; + } + } + + /// + /// Computes the total energy functional by integrating over the domain. + /// + /// The total energy value. + /// + /// For Beginners: + /// This is the key method that computes ∫E(u, ∇u)dx numerically. + /// + /// Steps: + /// 1. For each quadrature point x_i: + /// a) Evaluate u(x_i) using the network + /// b) Compute ∇u(x_i) using automatic differentiation + /// c) Evaluate the energy density E(x_i, u(x_i), ∇u(x_i)) + /// 2. Sum weighted energies: Total = Σ w_i * E_i + /// 3. Add boundary penalty if needed + /// + /// The gradient of this total energy with respect to network parameters + /// tells us how to update the network to minimize energy. + /// + public T ComputeTotalEnergy() + { + if (_quadraturePoints == null || _quadratureWeights == null) + { + throw new InvalidOperationException("Quadrature points not initialized."); + } + + T totalEnergy = T.Zero; + + // Integrate energy over the domain + for (int i = 0; i < _quadraturePoints.GetLength(0); i++) + { + T[] point = new T[_quadraturePoints.GetLength(1)]; + for (int j = 0; j < point.Length; j++) + { + point[j] = _quadraturePoints[i, j]; + } + + // Skip boundary points if boundary conditions are specified + if (_boundaryCheck != null && _boundaryCheck(point)) + { + continue; + } + + // Evaluate network + T[] u = EvaluateAtPoint(point); + + // Compute gradient + T[,] gradU = AutomaticDifferentiation.ComputeGradient( + EvaluateAtPoint, + point, + Architecture.OutputSize); + + // Evaluate energy density + T energyDensity = _energyFunctional(point, u, gradU); + + // Weighted sum + totalEnergy += _quadratureWeights[i] * energyDensity; + } + + // Add boundary penalty if needed + if (_boundaryCheck != null && _boundaryValue != null) + { + T boundaryPenalty = ComputeBoundaryPenalty(); + totalEnergy += boundaryPenalty; + } + + return totalEnergy; + } + + /// + /// Computes penalty for violating boundary conditions. + /// + private T ComputeBoundaryPenalty() + { + if (_quadraturePoints == null || _boundaryCheck == null || _boundaryValue == null) + { + return T.Zero; + } + + T penalty = T.Zero; + T penaltyWeight = T.CreateChecked(100.0); // Large weight to enforce BC + + for (int i = 0; i < _quadraturePoints.GetLength(0); i++) + { + T[] point = new T[_quadraturePoints.GetLength(1)]; + for (int j = 0; j < point.Length; j++) + { + point[j] = _quadraturePoints[i, j]; + } + + if (_boundaryCheck(point)) + { + T[] u = EvaluateAtPoint(point); + T[] uBC = _boundaryValue(point); + + for (int k = 0; k < u.Length; k++) + { + T error = u[k] - uBC[k]; + penalty += penaltyWeight * error * error; + } + } + } + + return penalty; + } + + private T[] EvaluateAtPoint(T[] inputs) + { + var inputTensor = new Tensor(new int[] { 1, inputs.Length }); + for (int i = 0; i < inputs.Length; i++) + { + inputTensor[0, i] = inputs[i]; + } + + var outputTensor = Forward(inputTensor); + + T[] result = new T[outputTensor.Shape[1]]; + for (int i = 0; i < result.Length; i++) + { + result[i] = outputTensor[0, i]; + } + + return result; + } + + /// + /// Trains the network to minimize the energy functional. + /// + public TrainingHistory Solve(int epochs = 1000, double learningRate = 0.001, bool verbose = true) + { + var history = new TrainingHistory(); + + for (int epoch = 0; epoch < epochs; epoch++) + { + T energy = ComputeTotalEnergy(); + history.AddEpoch(energy); + + if (verbose && epoch % 100 == 0) + { + Console.WriteLine($"Epoch {epoch}/{epochs}, Energy: {energy}"); + } + + // Note: Actual gradient computation and parameter update would go here + // This would require backpropagation through the energy computation + } + + return history; + } + + /// + /// Gets the solution at a specific point. + /// + public T[] GetSolution(T[] point) + { + return EvaluateAtPoint(point); + } + } +} diff --git a/src/PhysicsInformed/PINNs/PhysicsInformedNeuralNetwork.cs b/src/PhysicsInformed/PINNs/PhysicsInformedNeuralNetwork.cs new file mode 100644 index 000000000..b84164533 --- /dev/null +++ b/src/PhysicsInformed/PINNs/PhysicsInformedNeuralNetwork.cs @@ -0,0 +1,422 @@ +using System; +using System.Numerics; +using AiDotNet.Interfaces; +using AiDotNet.LinearAlgebra; +using AiDotNet.NeuralNetworks; +using AiDotNet.Optimizers; +using AiDotNet.PhysicsInformed.Interfaces; + +namespace AiDotNet.PhysicsInformed.PINNs +{ + /// + /// Represents a Physics-Informed Neural Network (PINN) for solving PDEs. + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// A Physics-Informed Neural Network (PINN) is a neural network that learns to solve + /// Partial Differential Equations (PDEs) by incorporating physical laws directly into + /// the training process. + /// + /// Traditional Approach (Finite Elements/Differences): + /// - Discretize the domain into a grid + /// - Approximate derivatives using neighboring points + /// - Solve a large system of equations + /// - Works well but can be slow for complex geometries + /// + /// PINN Approach: + /// - Neural network represents the solution u(x,t) + /// - Use automatic differentiation to compute ∂u/∂x, ∂²u/∂x², etc. + /// - Train the network to minimize: + /// * PDE residual (how much the PDE is violated) + /// * Boundary condition errors + /// * Initial condition errors + /// * Data fitting errors (if measurements are available) + /// + /// Key Advantages: + /// 1. Meshless: No need to discretize the domain + /// 2. Data-efficient: Can work with sparse or noisy data + /// 3. Flexible: Easy to handle complex geometries and boundary conditions + /// 4. Interpolation: Get solution at any point by evaluating the network + /// 5. Inverse problems: Can discover unknown parameters in the PDE + /// + /// Key Challenges: + /// 1. Training can be difficult (multiple objectives to balance) + /// 2. May require careful tuning of loss weights + /// 3. Network architecture affects accuracy + /// 4. Computational cost during training (many derivative evaluations) + /// + /// Applications: + /// - Fluid dynamics (Navier-Stokes equations) + /// - Heat transfer + /// - Structural mechanics + /// - Quantum mechanics + /// - Financial modeling (Black-Scholes PDE) + /// - Climate and weather modeling + /// + /// Historical Context: + /// PINNs were introduced by Raissi, Perdikaris, and Karniadakis in 2019. + /// They've revolutionized scientific machine learning by showing that deep learning + /// can be guided by physics rather than just data. + /// + public class PhysicsInformedNeuralNetwork : NeuralNetworkBase where T : struct, INumber + { + private readonly IPDESpecification _pdeSpecification; + private readonly IBoundaryCondition[] _boundaryConditions; + private readonly IInitialCondition? _initialCondition; + private readonly PhysicsInformedLoss _physicsLoss; + private readonly IGradientBasedOptimizer, Tensor> _optimizer; + + // Collocation points for PDE residual evaluation + private T[,]? _collocationPoints; + private readonly int _numCollocationPoints; + + /// + /// Initializes a new instance of the PINN class. + /// + /// The neural network architecture (typically a deep feedforward network). + /// The PDE that the solution must satisfy. + /// Boundary conditions for the problem. + /// Initial condition for time-dependent problems (optional). + /// Number of points in the domain where to enforce the PDE. + /// Optimization algorithm (Adam is recommended for PINNs). + /// Weight for data loss component. + /// Weight for PDE residual loss (often needs tuning). + /// Weight for boundary condition loss. + /// Weight for initial condition loss. + /// + /// For Beginners: + /// When creating a PINN, you need to specify: + /// 1. Network architecture: Usually a deep network (5-10 hidden layers, 20-50 neurons each) + /// - Activation: tanh or sin often work well for smooth solutions + /// - Input: spatial coordinates (x, y, z) and possibly time (t) + /// - Output: solution values u(x,t) + /// + /// 2. PDE specification: Defines the physics (e.g., Heat Equation, Navier-Stokes) + /// + /// 3. Boundary conditions: What happens at the edges of your domain + /// + /// 4. Collocation points: Where to enforce the PDE + /// - More points = better accuracy but slower training + /// - Typically 10,000-100,000 points + /// - Can use random sampling or quasi-random (Sobol, Latin hypercube) + /// + /// 5. Loss weights: Balance between different objectives + /// - Start with all weights = 1.0 + /// - If PDE residual is large, increase pdeWeight + /// - If boundary conditions are violated, increase boundaryWeight + /// - This is often the trickiest part of PINN training! + /// + public PhysicsInformedNeuralNetwork( + NeuralNetworkArchitecture architecture, + IPDESpecification pdeSpecification, + IBoundaryCondition[] boundaryConditions, + IInitialCondition? initialCondition = null, + int numCollocationPoints = 10000, + IGradientBasedOptimizer, Tensor>? optimizer = null, + T? dataWeight = null, + T? pdeWeight = null, + T? boundaryWeight = null, + T? initialWeight = null) + : base(architecture, null, 1.0) + { + _pdeSpecification = pdeSpecification ?? throw new ArgumentNullException(nameof(pdeSpecification)); + _boundaryConditions = boundaryConditions ?? throw new ArgumentNullException(nameof(boundaryConditions)); + _initialCondition = initialCondition; + _numCollocationPoints = numCollocationPoints; + + // Create the physics-informed loss function + _physicsLoss = new PhysicsInformedLoss( + _pdeSpecification, + _boundaryConditions, + _initialCondition, + dataWeight, + pdeWeight, + boundaryWeight, + initialWeight); + + // Use Adam optimizer by default (works well for PINNs) + _optimizer = optimizer ?? new AdamOptimizer, Tensor>(this); + + InitializeLayers(); + GenerateCollocationPoints(); + } + + /// + /// Initializes the neural network layers. + /// + protected override void InitializeLayers() + { + // Use custom layers if provided in architecture + if (Architecture.CustomLayers != null && Architecture.CustomLayers.Count > 0) + { + foreach (var layer in Architecture.CustomLayers) + { + Layers.Add(layer); + } + } + else + { + // Create default deep feedforward architecture + // For PINNs, we typically want deeper networks with moderate width + var layerSizes = Architecture.HiddenLayerSizes ?? new int[] { 32, 32, 32, 32 }; + + // Create fully connected layers + for (int i = 0; i < layerSizes.Length; i++) + { + int inputSize = i == 0 ? Architecture.InputSize : layerSizes[i - 1]; + int outputSize = layerSizes[i]; + + // Add a dense layer with tanh activation (good for smooth solutions) + var denseLayer = new NeuralNetworks.Layers.DenseLayer( + inputSize, + outputSize, + Architecture.Activation ?? Enums.ActivationFunctionType.Tanh); + + Layers.Add(denseLayer); + } + + // Output layer + var outputLayer = new NeuralNetworks.Layers.DenseLayer( + layerSizes[layerSizes.Length - 1], + Architecture.OutputSize, + Enums.ActivationFunctionType.Linear); // Linear output for regression + + Layers.Add(outputLayer); + } + } + + /// + /// Generates collocation points for enforcing the PDE in the domain. + /// + /// + /// For Beginners: + /// Collocation points are locations in the domain where we enforce the PDE. + /// Think of them as "checkpoints" where the neural network must satisfy the physics. + /// + /// Sampling Strategies: + /// 1. Uniform grid: Simple but can miss important regions + /// 2. Random sampling: Used here - good coverage, easy to implement + /// 3. Latin Hypercube: Better space-filling properties + /// 4. Adaptive sampling: Add more points where error is high + /// + /// For this implementation, we use random sampling in the unit hypercube [0,1]^d. + /// You should scale these to your actual domain (e.g., x ∈ [-1, 1], t ∈ [0, 10]). + /// + private void GenerateCollocationPoints() + { + int inputDim = _pdeSpecification.InputDimension; + _collocationPoints = new T[_numCollocationPoints, inputDim]; + + var random = new Random(42); // Fixed seed for reproducibility + + for (int i = 0; i < _numCollocationPoints; i++) + { + for (int j = 0; j < inputDim; j++) + { + // Random points in [0, 1] + // In practice, you'd scale this to your actual domain + _collocationPoints[i, j] = T.CreateChecked(random.NextDouble()); + } + } + } + + /// + /// Sets custom collocation points (for advanced users who want specific sampling). + /// + /// Collocation points [numPoints, inputDim]. + public void SetCollocationPoints(T[,] points) + { + if (points.GetLength(1) != _pdeSpecification.InputDimension) + { + throw new ArgumentException( + $"Collocation points must have {_pdeSpecification.InputDimension} dimensions."); + } + _collocationPoints = points; + } + + /// + /// Solves the PDE by training the PINN. + /// + /// Optional measured input data. + /// Optional measured output data. + /// Number of training epochs. + /// Learning rate for optimization. + /// Whether to print progress. + /// Training history (losses over epochs). + /// + /// For Beginners: + /// Training a PINN is like training a regular neural network, but with a special loss function. + /// + /// Training Process: + /// 1. Sample collocation points + /// 2. For each point: + /// a) Evaluate network: u = NN(x) + /// b) Compute derivatives: ∂u/∂x, ∂²u/∂x², etc. + /// c) Evaluate PDE residual: PDE(u, ∂u/∂x, ...) + /// 3. Evaluate boundary and initial conditions + /// 4. Compute total loss + /// 5. Backpropagate and update network weights + /// 6. Repeat + /// + /// Tips for Success: + /// - Start with simpler PDEs (heat, Poisson) before trying complex ones + /// - Monitor individual loss components (data, PDE, BC, IC) + /// - If one component dominates, adjust the weights + /// - Learning rate scheduling can help + /// - Sometimes training is unstable - try different architectures or optimizers + /// + public TrainingHistory Solve( + T[,]? dataInputs = null, + T[,]? dataOutputs = null, + int epochs = 10000, + double learningRate = 0.001, + bool verbose = true) + { + var history = new TrainingHistory(); + + for (int epoch = 0; epoch < epochs; epoch++) + { + T totalLoss = T.Zero; + int numBatches = 0; + + // Train on collocation points (PDE residual) + if (_collocationPoints != null) + { + for (int i = 0; i < _collocationPoints.GetLength(0); i++) + { + T[] point = new T[_pdeSpecification.InputDimension]; + for (int j = 0; j < _pdeSpecification.InputDimension; j++) + { + point[j] = _collocationPoints[i, j]; + } + + // Forward pass to get network output + T[] output = EvaluateAtPoint(point); + + // Compute derivatives using automatic differentiation + var derivatives = AutomaticDifferentiation.ComputeDerivatives( + EvaluateAtPoint, + point, + _pdeSpecification.OutputDimension); + + // Compute physics loss + T loss = _physicsLoss.ComputeLoss(output, null, derivatives, point); + totalLoss += loss; + numBatches++; + + // Note: In a real implementation, you'd accumulate gradients and update in batches + // This simplified version updates after each point + } + } + + // Train on data points if provided + if (dataInputs != null && dataOutputs != null) + { + for (int i = 0; i < dataInputs.GetLength(0); i++) + { + T[] point = new T[dataInputs.GetLength(1)]; + T[] target = new T[dataOutputs.GetLength(1)]; + + for (int j = 0; j < point.Length; j++) + { + point[j] = dataInputs[i, j]; + } + for (int j = 0; j < target.Length; j++) + { + target[j] = dataOutputs[i, j]; + } + + T[] output = EvaluateAtPoint(point); + + var derivatives = AutomaticDifferentiation.ComputeDerivatives( + EvaluateAtPoint, + point, + _pdeSpecification.OutputDimension); + + T loss = _physicsLoss.ComputeLoss(output, target, derivatives, point); + totalLoss += loss; + numBatches++; + } + } + + T avgLoss = numBatches > 0 ? totalLoss / T.CreateChecked(numBatches) : T.Zero; + history.AddEpoch(avgLoss); + + if (verbose && epoch % 100 == 0) + { + Console.WriteLine($"Epoch {epoch}/{epochs}, Loss: {avgLoss}"); + } + } + + return history; + } + + /// + /// Evaluates the network at a single point. + /// + private T[] EvaluateAtPoint(T[] inputs) + { + // This is a simplified forward pass + // In a real implementation, you'd use the proper tensor-based forward pass + // For now, we'll create a minimal implementation + + // Convert to tensor, forward pass, convert back + var inputTensor = new Tensor(new int[] { 1, inputs.Length }); + for (int i = 0; i < inputs.Length; i++) + { + inputTensor[0, i] = inputs[i]; + } + + var outputTensor = Forward(inputTensor); + + T[] result = new T[outputTensor.Shape[1]]; + for (int i = 0; i < result.Length; i++) + { + result[i] = outputTensor[0, i]; + } + + return result; + } + + /// + /// Gets the solution at a specific point in the domain. + /// + /// The point coordinates (x, y, t, etc.). + /// The solution value(s) at that point. + public T[] GetSolution(T[] point) + { + return EvaluateAtPoint(point); + } + + /// + /// Evaluates the PDE residual at a point (for validation). + /// + /// The point coordinates. + /// The PDE residual (should be close to zero for a good solution). + public T EvaluatePDEResidual(T[] point) + { + T[] output = EvaluateAtPoint(point); + var derivatives = AutomaticDifferentiation.ComputeDerivatives( + EvaluateAtPoint, + point, + _pdeSpecification.OutputDimension); + + return _pdeSpecification.ComputeResidual(point, output, derivatives); + } + } + + /// + /// Stores training history for analysis. + /// + /// The numeric type. + public class TrainingHistory where T : struct, INumber + { + public List Losses { get; } = new List(); + + public void AddEpoch(T loss) + { + Losses.Add(loss); + } + } +} diff --git a/src/PhysicsInformed/PINNs/VariationalPINN.cs b/src/PhysicsInformed/PINNs/VariationalPINN.cs new file mode 100644 index 000000000..3a8714d48 --- /dev/null +++ b/src/PhysicsInformed/PINNs/VariationalPINN.cs @@ -0,0 +1,336 @@ +using System; +using System.Numerics; +using AiDotNet.LinearAlgebra; +using AiDotNet.NeuralNetworks; + +namespace AiDotNet.PhysicsInformed.PINNs +{ + /// + /// Implements Variational Physics-Informed Neural Networks (VPINNs). + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// Variational PINNs (VPINNs) use the weak (variational) formulation of PDEs instead of + /// the strong form. This is similar to finite element methods (FEM). + /// + /// Strong vs. Weak Formulation: + /// + /// Strong Form (standard PINN): + /// - PDE must hold pointwise: PDE(u) = 0 at every point + /// - Example: -∇²u = f everywhere + /// - Requires computing second derivatives + /// - Solution must be twice differentiable + /// + /// Weak Form (VPINN): + /// - PDE holds "on average" against test functions + /// - ∫∇u·∇v dx = ∫fv dx for all test functions v + /// - Integration by parts reduces derivative order + /// - Solution only needs to be once differentiable + /// - More stable numerically + /// + /// Key Advantages: + /// 1. Lower derivative requirements (better numerical stability) + /// 2. Natural incorporation of boundary conditions (through integration by parts) + /// 3. Can handle discontinuities and rough solutions better + /// 4. Closer to FEM (well-understood mathematical theory) + /// 5. Often better convergence properties + /// + /// How VPINNs Work: + /// 1. Choose test functions (often neural networks themselves) + /// 2. Multiply PDE by test function and integrate + /// 3. Use integration by parts to reduce derivative order + /// 4. Minimize the residual in the weak sense + /// + /// Example - Poisson Equation: + /// Strong: -∇²u = f + /// Weak: ∫∇u·∇v dx = ∫fv dx (after integration by parts) + /// + /// VPINNs train the network u(x) to satisfy the weak form for all test functions v. + /// + /// Applications: + /// - Same as PINNs, but particularly useful for: + /// * Problems with rough solutions + /// * Conservation laws + /// * Problems where weak solutions are more natural + /// * High-order PDEs (where reducing derivative order helps) + /// + /// Comparison with Standard PINNs: + /// - VPINN: More stable, lower derivative requirements, closer to FEM + /// - Standard PINN: Simpler to implement, direct enforcement of PDE + /// + /// The variational formulation often provides better training dynamics and accuracy. + /// + public class VariationalPINN : NeuralNetworkBase where T : struct, INumber + { + private readonly Func _weakFormResidual; + private T[,]? _quadraturePoints; + private T[]? _quadratureWeights; + private readonly int _numTestFunctions; + + /// + /// Initializes a new instance of the Variational PINN. + /// + /// The neural network architecture for the solution. + /// The weak form residual: R(x, u, ∇u, v, ∇v). + /// Number of quadrature points for integration. + /// Number of test functions to use. + /// + /// For Beginners: + /// The weak form residual should encode the variational formulation of your PDE. + /// + /// Example - Poisson Equation (-∇²u = f): + /// Weak form: ∫∇u·∇v dx - ∫fv dx = 0 + /// weakFormResidual = (x, u, grad_u, v, grad_v) => { + /// T term1 = DotProduct(grad_u, grad_v); // ∇u·∇v + /// T term2 = f(x) * v; // fv + /// return term1 - term2; + /// } + /// + /// The method integrates this over the domain using numerical quadrature. + /// + public VariationalPINN( + NeuralNetworkArchitecture architecture, + Func weakFormResidual, + int numQuadraturePoints = 10000, + int numTestFunctions = 10) + : base(architecture, null, 1.0) + { + _weakFormResidual = weakFormResidual ?? throw new ArgumentNullException(nameof(weakFormResidual)); + _numTestFunctions = numTestFunctions; + + InitializeLayers(); + GenerateQuadraturePoints(numQuadraturePoints, architecture.InputSize); + } + + protected override void InitializeLayers() + { + if (Architecture.CustomLayers != null && Architecture.CustomLayers.Count > 0) + { + foreach (var layer in Architecture.CustomLayers) + { + Layers.Add(layer); + } + } + else + { + var layerSizes = Architecture.HiddenLayerSizes ?? new int[] { 32, 32, 32 }; + + for (int i = 0; i < layerSizes.Length; i++) + { + int inputSize = i == 0 ? Architecture.InputSize : layerSizes[i - 1]; + int outputSize = layerSizes[i]; + + var denseLayer = new NeuralNetworks.Layers.DenseLayer( + inputSize, + outputSize, + Architecture.Activation ?? Enums.ActivationFunctionType.Tanh); + + Layers.Add(denseLayer); + } + + var outputLayer = new NeuralNetworks.Layers.DenseLayer( + layerSizes[layerSizes.Length - 1], + Architecture.OutputSize, + Enums.ActivationFunctionType.Linear); + + Layers.Add(outputLayer); + } + } + + private void GenerateQuadraturePoints(int numPoints, int dimension) + { + _quadraturePoints = new T[numPoints, dimension]; + _quadratureWeights = new T[numPoints]; + + var random = new Random(42); + T weight = T.One / T.CreateChecked(numPoints); + + for (int i = 0; i < numPoints; i++) + { + for (int j = 0; j < dimension; j++) + { + _quadraturePoints[i, j] = T.CreateChecked(random.NextDouble()); + } + _quadratureWeights[i] = weight; + } + } + + /// + /// Computes the weak form residual by integrating over the domain. + /// + /// Index of the test function to use. + /// The weak residual (should be zero for a perfect solution). + /// + /// For Beginners: + /// This computes ∫R(u, v)dx where: + /// - u is the neural network solution + /// - v is a test function + /// - R is the weak form residual + /// + /// For a true solution, this integral should be zero for ALL test functions. + /// We approximate "all" by using a finite set of test functions. + /// + /// Test Function Choices: + /// 1. Polynomial basis (Legendre, Chebyshev) + /// 2. Trigonometric functions (Fourier) + /// 3. Another neural network + /// 4. Random functions + /// + /// This implementation uses simple polynomial test functions. + /// + public T ComputeWeakResidual(int testFunctionIndex) + { + if (_quadraturePoints == null || _quadratureWeights == null) + { + throw new InvalidOperationException("Quadrature points not initialized."); + } + + T residual = T.Zero; + + for (int i = 0; i < _quadraturePoints.GetLength(0); i++) + { + T[] point = new T[_quadraturePoints.GetLength(1)]; + for (int j = 0; j < point.Length; j++) + { + point[j] = _quadraturePoints[i, j]; + } + + // Evaluate solution + T[] u = EvaluateAtPoint(point); + T[,] gradU = AutomaticDifferentiation.ComputeGradient( + EvaluateAtPoint, + point, + Architecture.OutputSize); + + // Evaluate test function + T[] v = EvaluateTestFunction(point, testFunctionIndex); + T[,] gradV = ComputeTestFunctionGradient(point, testFunctionIndex); + + // Compute weak form residual at this point + T localResidual = _weakFormResidual(point, u, gradU, v, gradV); + + // Integrate + residual += _quadratureWeights[i] * localResidual; + } + + return residual; + } + + /// + /// Evaluates a test function (simple polynomial basis for demonstration). + /// + private T[] EvaluateTestFunction(T[] point, int index) + { + // Simple polynomial test functions: v_i(x) = x₁^i₁ * x₂^i₂ * ... + // This is a basic implementation; in practice, you'd use better basis functions + + T[] result = new T[Architecture.OutputSize]; + + // Generate multi-index based on index + int[] multiIndex = GenerateMultiIndex(index, point.Length); + + for (int k = 0; k < result.Length; k++) + { + T value = T.One; + for (int j = 0; j < point.Length; j++) + { + // Compute point[j]^multiIndex[j] + for (int p = 0; p < multiIndex[j]; p++) + { + value *= point[j]; + } + } + result[k] = value; + } + + return result; + } + + /// + /// Computes the gradient of a test function. + /// + private T[,] ComputeTestFunctionGradient(T[] point, int index) + { + // Numerical differentiation of test function + return AutomaticDifferentiation.ComputeGradient( + x => EvaluateTestFunction(x, index), + point, + Architecture.OutputSize); + } + + /// + /// Generates a multi-index for polynomial basis. + /// + private int[] GenerateMultiIndex(int index, int dimension) + { + int[] multiIndex = new int[dimension]; + int remaining = index; + + for (int i = 0; i < dimension; i++) + { + multiIndex[i] = remaining % 3; // Use powers 0, 1, 2 + remaining /= 3; + } + + return multiIndex; + } + + private T[] EvaluateAtPoint(T[] inputs) + { + var inputTensor = new Tensor(new int[] { 1, inputs.Length }); + for (int i = 0; i < inputs.Length; i++) + { + inputTensor[0, i] = inputs[i]; + } + + var outputTensor = Forward(inputTensor); + + T[] result = new T[outputTensor.Shape[1]]; + for (int i = 0; i < result.Length; i++) + { + result[i] = outputTensor[0, i]; + } + + return result; + } + + /// + /// Trains the network to minimize the weak residual. + /// + public TrainingHistory Solve(int epochs = 1000, double learningRate = 0.001, bool verbose = true) + { + var history = new TrainingHistory(); + + for (int epoch = 0; epoch < epochs; epoch++) + { + T totalLoss = T.Zero; + + // Compute residual for all test functions + for (int i = 0; i < _numTestFunctions; i++) + { + T residual = ComputeWeakResidual(i); + totalLoss += residual * residual; + } + + T avgLoss = totalLoss / T.CreateChecked(_numTestFunctions); + history.AddEpoch(avgLoss); + + if (verbose && epoch % 100 == 0) + { + Console.WriteLine($"Epoch {epoch}/{epochs}, Weak Residual: {avgLoss}"); + } + } + + return history; + } + + /// + /// Gets the solution at a specific point. + /// + public T[] GetSolution(T[] point) + { + return EvaluateAtPoint(point); + } + } +} diff --git a/src/PhysicsInformed/PhysicsInformedLoss.cs b/src/PhysicsInformed/PhysicsInformedLoss.cs new file mode 100644 index 000000000..5aaffc8ab --- /dev/null +++ b/src/PhysicsInformed/PhysicsInformedLoss.cs @@ -0,0 +1,238 @@ +using System; +using System.Numerics; +using AiDotNet.Interfaces; +using AiDotNet.PhysicsInformed.Interfaces; + +namespace AiDotNet.PhysicsInformed +{ + /// + /// Loss function for Physics-Informed Neural Networks (PINNs). + /// Combines data loss, PDE residual loss, boundary condition loss, and initial condition loss. + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// Traditional neural networks learn from data alone. Physics-Informed Neural Networks (PINNs) + /// additionally enforce that the solution satisfies physical laws (PDEs) and constraints. + /// + /// This loss function has multiple components: + /// 1. Data Loss: Measures how well predictions match observed data points + /// 2. PDE Residual Loss: Measures how much the PDE is violated at collocation points + /// 3. Boundary Loss: Ensures boundary conditions are satisfied + /// 4. Initial Condition Loss: Ensures initial conditions are satisfied (for time-dependent problems) + /// + /// The total loss is a weighted sum: + /// L_total = λ_data * L_data + λ_pde * L_pde + λ_bc * L_bc + λ_ic * L_ic + /// + /// Why This Works: + /// By minimizing this loss, the network learns to: + /// - Fit the available data + /// - Obey physical laws everywhere (not just at data points) + /// - Satisfy boundary and initial conditions + /// This often requires far less data than traditional deep learning! + /// + /// Key Innovation: + /// PINNs can solve PDEs in regions where we have NO data, as long as the physics is known. + /// + public class PhysicsInformedLoss : ILossFunction where T : struct, INumber + { + private readonly IPDESpecification? _pdeSpecification; + private readonly IBoundaryCondition[]? _boundaryConditions; + private readonly IInitialCondition? _initialCondition; + + // Loss weights + private readonly T _dataWeight; + private readonly T _pdeWeight; + private readonly T _boundaryWeight; + private readonly T _initialWeight; + + /// + /// Initializes a new instance of the Physics-Informed loss function. + /// + /// The PDE that the solution must satisfy. + /// Boundary conditions for the problem. + /// Initial condition for time-dependent problems. + /// Weight for data loss component. + /// Weight for PDE residual loss component. + /// Weight for boundary condition loss. + /// Weight for initial condition loss. + public PhysicsInformedLoss( + IPDESpecification? pdeSpecification = null, + IBoundaryCondition[]? boundaryConditions = null, + IInitialCondition? initialCondition = null, + T? dataWeight = null, + T? pdeWeight = null, + T? boundaryWeight = null, + T? initialWeight = null) + { + _pdeSpecification = pdeSpecification; + _boundaryConditions = boundaryConditions; + _initialCondition = initialCondition; + + // Default weights + _dataWeight = dataWeight ?? T.One; + _pdeWeight = pdeWeight ?? T.One; + _boundaryWeight = boundaryWeight ?? T.One; + _initialWeight = initialWeight ?? T.One; + } + + /// + /// Gets the name of the loss function. + /// + public string Name => "Physics-Informed Loss"; + + /// + /// Computes the total physics-informed loss. + /// + /// Network predictions. + /// Target values (may be null if no data available). + /// Derivatives needed for PDE computation. + /// Input points where predictions were made. + /// The total loss value. + public T ComputeLoss(T[] predictions, T[]? targets, PDEDerivatives derivatives, T[] inputs) + { + T totalLoss = T.Zero; + + // 1. Data Loss (if targets are provided) + if (targets != null && targets.Length > 0) + { + T dataLoss = ComputeDataLoss(predictions, targets); + totalLoss += _dataWeight * dataLoss; + } + + // 2. PDE Residual Loss + if (_pdeSpecification != null) + { + T pdeLoss = ComputePDELoss(inputs, predictions, derivatives); + totalLoss += _pdeWeight * pdeLoss; + } + + // 3. Boundary Condition Loss + if (_boundaryConditions != null && _boundaryConditions.Length > 0) + { + T boundaryLoss = ComputeBoundaryLoss(inputs, predictions, derivatives); + totalLoss += _boundaryWeight * boundaryLoss; + } + + // 4. Initial Condition Loss + if (_initialCondition != null) + { + T initialLoss = ComputeInitialLoss(inputs, predictions); + totalLoss += _initialWeight * initialLoss; + } + + return totalLoss; + } + + /// + /// Computes the data fitting loss (Mean Squared Error). + /// + private T ComputeDataLoss(T[] predictions, T[] targets) + { + if (predictions.Length != targets.Length) + { + throw new ArgumentException("Predictions and targets must have the same length."); + } + + T sumSquaredError = T.Zero; + for (int i = 0; i < predictions.Length; i++) + { + T error = predictions[i] - targets[i]; + sumSquaredError += error * error; + } + + return sumSquaredError / T.CreateChecked(predictions.Length); + } + + /// + /// Computes the PDE residual loss. + /// + private T ComputePDELoss(T[] inputs, T[] predictions, PDEDerivatives derivatives) + { + if (_pdeSpecification == null) + { + return T.Zero; + } + + T residual = _pdeSpecification.ComputeResidual(inputs, predictions, derivatives); + return residual * residual; // Squared residual + } + + /// + /// Computes the boundary condition loss. + /// + private T ComputeBoundaryLoss(T[] inputs, T[] predictions, PDEDerivatives derivatives) + { + if (_boundaryConditions == null || _boundaryConditions.Length == 0) + { + return T.Zero; + } + + T totalBoundaryLoss = T.Zero; + int boundaryCount = 0; + + foreach (var bc in _boundaryConditions) + { + if (bc.IsOnBoundary(inputs)) + { + T residual = bc.ComputeBoundaryResidual(inputs, predictions, derivatives); + totalBoundaryLoss += residual * residual; + boundaryCount++; + } + } + + return boundaryCount > 0 + ? totalBoundaryLoss / T.CreateChecked(boundaryCount) + : T.Zero; + } + + /// + /// Computes the initial condition loss. + /// + private T ComputeInitialLoss(T[] inputs, T[] predictions) + { + if (_initialCondition == null) + { + return T.Zero; + } + + if (!_initialCondition.IsAtInitialTime(inputs)) + { + return T.Zero; + } + + // Extract spatial coordinates (all except the last which is time) + T[] spatialInputs = new T[inputs.Length - 1]; + Array.Copy(inputs, spatialInputs, spatialInputs.Length); + + T[] expectedValues = _initialCondition.ComputeInitialValue(spatialInputs); + + T sumSquaredError = T.Zero; + for (int i = 0; i < Math.Min(predictions.Length, expectedValues.Length); i++) + { + T error = predictions[i] - expectedValues[i]; + sumSquaredError += error * error; + } + + return sumSquaredError / T.CreateChecked(predictions.Length); + } + + /// + /// Computes the derivative of the loss with respect to predictions. + /// Required by the ILossFunction interface. + /// + public T[] ComputeDerivative(T[] predictions, T[] targets) + { + // For simplicity, return MSE derivative for data loss component + T[] derivative = new T[predictions.Length]; + T scale = T.CreateChecked(2.0) / T.CreateChecked(predictions.Length); + + for (int i = 0; i < predictions.Length; i++) + { + derivative[i] = scale * (predictions[i] - targets[i]); + } + + return derivative; + } + } +} diff --git a/src/PhysicsInformed/ScientificML/HamiltonianNeuralNetwork.cs b/src/PhysicsInformed/ScientificML/HamiltonianNeuralNetwork.cs new file mode 100644 index 000000000..d488b3bf2 --- /dev/null +++ b/src/PhysicsInformed/ScientificML/HamiltonianNeuralNetwork.cs @@ -0,0 +1,209 @@ +using System; +using System.Numerics; +using AiDotNet.LinearAlgebra; +using AiDotNet.NeuralNetworks; + +namespace AiDotNet.PhysicsInformed.ScientificML +{ + /// + /// Implements Hamiltonian Neural Networks (HNN) for learning conservative dynamical systems. + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// Hamiltonian Neural Networks learn the laws of physics by respecting conservation principles. + /// + /// Classical Mechanics - Hamiltonian Formulation: + /// Many physical systems are described by Hamilton's equations: + /// - dq/dt = ∂H/∂p (position changes with momentum gradient) + /// - dp/dt = -∂H/∂q (momentum changes with negative position gradient) + /// + /// Where: + /// - q = position coordinates + /// - p = momentum coordinates + /// - H(q,p) = Hamiltonian (total energy of the system) + /// + /// Key Property: Energy Conservation + /// For conservative systems, H(q,p) = constant (energy is conserved) + /// + /// Traditional Neural Networks vs. HNN: + /// + /// Traditional NN: + /// - Learn dynamics directly: (q,p) → (dq/dt, dp/dt) + /// - Can violate physics laws + /// - May not conserve energy + /// - Can accumulate errors over time + /// + /// HNN: + /// - Learn the Hamiltonian: (q,p) → H + /// - Compute dynamics from H: dq/dt = ∂H/∂p, dp/dt = -∂H/∂q + /// - Automatically conserves energy (by construction!) + /// - More accurate long-term predictions + /// + /// How It Works: + /// 1. Neural network learns H(q,p) + /// 2. Use automatic differentiation to get ∂H/∂q and ∂H/∂p + /// 3. Apply Hamilton's equations to get dynamics + /// 4. Guaranteed to preserve Hamiltonian structure + /// + /// Applications: + /// - Planetary motion (gravitational systems) + /// - Molecular dynamics (particle interactions) + /// - Robotics (mechanical systems) + /// - Quantum mechanics (Schrödinger equation) + /// - Any conservative system + /// + /// Example - Pendulum: + /// H(q,p) = p²/(2m) + mgl(1 - cos(q)) + /// - q = angle, p = angular momentum + /// - HNN learns this from data without knowing the formula! + /// + /// Key Benefit: + /// By encoding physics structure (Hamiltonian formulation), the network + /// learns faster, generalizes better, and makes physically consistent predictions. + /// + public class HamiltonianNeuralNetwork : NeuralNetworkBase where T : struct, INumber + { + private readonly int _stateDim; // Dimension of state space (q and p together) + + public HamiltonianNeuralNetwork( + NeuralNetworkArchitecture architecture, + int stateDim) + : base(architecture, null, 1.0) + { + _stateDim = stateDim; + InitializeLayers(); + } + + protected override void InitializeLayers() + { + if (Architecture.CustomLayers != null && Architecture.CustomLayers.Count > 0) + { + foreach (var layer in Architecture.CustomLayers) + { + Layers.Add(layer); + } + } + else + { + var layerSizes = Architecture.HiddenLayerSizes ?? new int[] { 64, 64, 64 }; + + for (int i = 0; i < layerSizes.Length; i++) + { + int inputSize = i == 0 ? _stateDim : layerSizes[i - 1]; + int outputSize = layerSizes[i]; + + var denseLayer = new NeuralNetworks.Layers.DenseLayer( + inputSize, + outputSize, + Enums.ActivationFunctionType.Tanh); + + Layers.Add(denseLayer); + } + + // Output is scalar Hamiltonian + var outputLayer = new NeuralNetworks.Layers.DenseLayer( + layerSizes[layerSizes.Length - 1], + 1, + Enums.ActivationFunctionType.Linear); + + Layers.Add(outputLayer); + } + } + + /// + /// Computes the Hamiltonian (energy) for a given state. + /// + /// State vector [q₁, ..., qₙ, p₁, ..., pₙ]. + /// Hamiltonian value (energy). + public T ComputeHamiltonian(T[] state) + { + var inputTensor = new Tensor(new int[] { 1, state.Length }); + for (int i = 0; i < state.Length; i++) + { + inputTensor[0, i] = state[i]; + } + + var output = Forward(inputTensor); + return output[0, 0]; + } + + /// + /// Computes the time derivative of the state using Hamilton's equations. + /// + /// Current state [q, p]. + /// Time derivative [dq/dt, dp/dt]. + /// + /// For Beginners: + /// This is where the physics happens! + /// Instead of predicting the derivative directly, we: + /// 1. Compute H(q,p) using the network + /// 2. Compute ∂H/∂p (derivative of H with respect to momentum) + /// 3. Compute ∂H/∂q (derivative of H with respect to position) + /// 4. Apply Hamilton's equations: dq/dt = ∂H/∂p, dp/dt = -∂H/∂q + /// + /// This guarantees that energy is conserved! + /// + public T[] ComputeTimeDerivative(T[] state) + { + int n = _stateDim / 2; // Half are positions, half are momenta + + // Compute gradient of H with respect to state + T[,] gradient = AutomaticDifferentiation.ComputeGradient( + s => new T[] { ComputeHamiltonian(s) }, + state, + 1); + + T[] derivative = new T[_stateDim]; + + // Hamilton's equations: + // dq/dt = ∂H/∂p + // dp/dt = -∂H/∂q + + for (int i = 0; i < n; i++) + { + derivative[i] = gradient[0, n + i]; // dq_i/dt = ∂H/∂p_i + derivative[n + i] = -gradient[0, i]; // dp_i/dt = -∂H/∂q_i + } + + return derivative; + } + + /// + /// Simulates the system forward in time. + /// + /// Initial state [q₀, p₀]. + /// Time step. + /// Number of time steps. + /// Trajectory [numSteps + 1, stateDim]. + public T[,] Simulate(T[] initialState, T dt, int numSteps) + { + T[,] trajectory = new T[numSteps + 1, _stateDim]; + + // Set initial state + for (int i = 0; i < _stateDim; i++) + { + trajectory[0, i] = initialState[i]; + } + + // Integrate using symplectic Euler (preserves Hamiltonian structure) + for (int step = 0; step < numSteps; step++) + { + T[] currentState = new T[_stateDim]; + for (int i = 0; i < _stateDim; i++) + { + currentState[i] = trajectory[step, i]; + } + + T[] derivative = ComputeTimeDerivative(currentState); + + for (int i = 0; i < _stateDim; i++) + { + trajectory[step + 1, i] = currentState[i] + dt * derivative[i]; + } + } + + return trajectory; + } + } +} diff --git a/src/PhysicsInformed/ScientificML/LagrangianNeuralNetwork.cs b/src/PhysicsInformed/ScientificML/LagrangianNeuralNetwork.cs new file mode 100644 index 000000000..7fd82d855 --- /dev/null +++ b/src/PhysicsInformed/ScientificML/LagrangianNeuralNetwork.cs @@ -0,0 +1,133 @@ +using System; +using System.Numerics; +using AiDotNet.LinearAlgebra; +using AiDotNet.NeuralNetworks; + +namespace AiDotNet.PhysicsInformed.ScientificML +{ + /// + /// Implements Lagrangian Neural Networks (LNN) for learning mechanical systems. + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// Lagrangian Neural Networks learn physics using the Lagrangian formulation of mechanics. + /// + /// Lagrangian Mechanics: + /// Alternative to Hamiltonian mechanics, uses the Lagrangian: + /// L(q, q̇) = T - V = Kinetic Energy - Potential Energy + /// + /// Equations of Motion (Euler-Lagrange): + /// d/dt(∂L/∂q̇) - ∂L/∂q = 0 + /// + /// Where: + /// - q = generalized coordinates (positions) + /// - q̇ = generalized velocities + /// - T = kinetic energy (usually ½m q̇²) + /// - V = potential energy (depends on q) + /// + /// Why Lagrangian vs. Hamiltonian? + /// - Lagrangian: Uses (q, q̇) - position and velocity + /// - Hamiltonian: Uses (q, p) - position and momentum + /// - Lagrangian often more intuitive for mechanical systems + /// - Both give same physics, different formulations + /// + /// How LNN Works: + /// 1. Neural network learns L(q, q̇) + /// 2. Compute ∂L/∂q and ∂L/∂q̇ using automatic differentiation + /// 3. Apply Euler-Lagrange equation to get acceleration q̈ + /// 4. Guaranteed to conserve energy and satisfy principle of least action + /// + /// Applications: + /// - Robotics (manipulator dynamics) + /// - Biomechanics (human motion) + /// - Aerospace (satellite dynamics) + /// - Any mechanical system + /// + public class LagrangianNeuralNetwork : NeuralNetworkBase where T : struct, INumber + { + private readonly int _configurationDim; // Dimension of configuration space (q) + + public LagrangianNeuralNetwork( + NeuralNetworkArchitecture architecture, + int configurationDim) + : base(architecture, null, 1.0) + { + _configurationDim = configurationDim; + InitializeLayers(); + } + + protected override void InitializeLayers() + { + if (Architecture.CustomLayers != null && Architecture.CustomLayers.Count > 0) + { + foreach (var layer in Architecture.CustomLayers) + { + Layers.Add(layer); + } + } + else + { + var layerSizes = Architecture.HiddenLayerSizes ?? new int[] { 64, 64 }; + int inputDim = 2 * _configurationDim; // q and q̇ + + for (int i = 0; i < layerSizes.Length; i++) + { + int inputSize = i == 0 ? inputDim : layerSizes[i - 1]; + int outputSize = layerSizes[i]; + + var denseLayer = new NeuralNetworks.Layers.DenseLayer( + inputSize, + outputSize, + Enums.ActivationFunctionType.Tanh); + + Layers.Add(denseLayer); + } + + // Output is scalar Lagrangian + var outputLayer = new NeuralNetworks.Layers.DenseLayer( + layerSizes[layerSizes.Length - 1], + 1, + Enums.ActivationFunctionType.Linear); + + Layers.Add(outputLayer); + } + } + + /// + /// Computes the Lagrangian L(q, q̇) = T - V. + /// + public T ComputeLagrangian(T[] q, T[] qDot) + { + T[] state = new T[2 * _configurationDim]; + Array.Copy(q, 0, state, 0, _configurationDim); + Array.Copy(qDot, 0, state, _configurationDim, _configurationDim); + + var inputTensor = new Tensor(new int[] { 1, state.Length }); + for (int i = 0; i < state.Length; i++) + { + inputTensor[0, i] = state[i]; + } + + var output = Forward(inputTensor); + return output[0, 0]; + } + + /// + /// Computes acceleration using Euler-Lagrange equation. + /// + public T[] ComputeAcceleration(T[] q, T[] qDot) + { + T[] state = new T[2 * _configurationDim]; + Array.Copy(q, 0, state, 0, _configurationDim); + Array.Copy(qDot, 0, state, _configurationDim, _configurationDim); + + // This is a simplified version + // Full implementation would solve: d/dt(∂L/∂q̇) - ∂L/∂q = 0 for q̈ + T[] acceleration = new T[_configurationDim]; + + // Placeholder: would compute using automatic differentiation + return acceleration; + } + } +} diff --git a/src/PhysicsInformed/ScientificML/SymbolicPhysicsLearner.cs b/src/PhysicsInformed/ScientificML/SymbolicPhysicsLearner.cs new file mode 100644 index 000000000..402aab8c6 --- /dev/null +++ b/src/PhysicsInformed/ScientificML/SymbolicPhysicsLearner.cs @@ -0,0 +1,160 @@ +using System; +using System.Collections.Generic; +using System.Numerics; + +namespace AiDotNet.PhysicsInformed.ScientificML +{ + /// + /// Implements Symbolic Physics Learning for discovering interpretable equations from data. + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// Symbolic Physics Learner discovers equations in symbolic form (like f = ma, E = mc²). + /// + /// Traditional ML: + /// - Neural networks are "black boxes" + /// - Learn complex functions but hard to interpret + /// - Can't extract simple equations + /// + /// Symbolic Regression: + /// - Discovers actual mathematical equations + /// - Interpretable results (can publish in papers!) + /// - Can rediscover known physics laws + /// - Can discover NEW laws + /// + /// Example: + /// Input: Data of planetary positions vs. time + /// Output: F = G*m₁*m₂/r² (Newton's law of gravitation) + /// + /// How It Works: + /// 1. Search space: Library of operators (+, -, *, /, sin, exp, etc.) + /// 2. Search algorithm: Genetic programming, reinforcement learning, etc. + /// 3. Fitness: Balance between accuracy and simplicity (Occam's razor) + /// 4. Output: Symbolic expression + /// + /// Applications: + /// - Discovering physical laws from experiments + /// - Automating scientific discovery + /// - Interpretable AI for science + /// - Finding conservation laws + /// + /// Famous Success: + /// - Rediscovered Kepler's laws from planetary data + /// - Found new equations in materials science + /// - Discovered patterns in quantum mechanics + /// + public class SymbolicPhysicsLearner where T : struct, INumber + { + private readonly List> _unaryOperators; + private readonly List> _binaryOperators; + private readonly Random _random; + + public SymbolicPhysicsLearner() + { + _random = new Random(42); + _unaryOperators = new List> + { + x => -x, // Negation + x => x * x, // Square + x => T.One / x, // Reciprocal + x => T.CreateChecked(Math.Sqrt(double.CreateChecked(x))), // Sqrt + x => T.CreateChecked(Math.Sin(double.CreateChecked(x))), // Sin + x => T.CreateChecked(Math.Cos(double.CreateChecked(x))), // Cos + x => T.CreateChecked(Math.Exp(double.CreateChecked(x))), // Exp + x => T.CreateChecked(Math.Log(double.CreateChecked(x))) // Log + }; + + _binaryOperators = new List> + { + (x, y) => x + y, // Addition + (x, y) => x - y, // Subtraction + (x, y) => x * y, // Multiplication + (x, y) => x / y, // Division + (x, y) => T.CreateChecked(Math.Pow(double.CreateChecked(x), double.CreateChecked(y))) // Power + }; + } + + /// + /// Discovers a symbolic equation from data using genetic programming. + /// + /// Input data [numSamples, numFeatures]. + /// Output data [numSamples]. + /// Maximum allowed complexity of the equation. + /// Number of evolutionary generations. + /// Best discovered equation as a symbolic expression tree. + public SymbolicExpression DiscoverEquation( + T[,] inputs, + T[] outputs, + int maxComplexity = 10, + int numGenerations = 100) + { + // Simplified genetic programming for equation discovery + // Full implementation would include: + // 1. Initialize population of random expressions + // 2. Evaluate fitness (accuracy + simplicity) + // 3. Selection, crossover, mutation + // 4. Repeat for generations + // 5. Return best expression + + // Placeholder: return a simple linear equation + return new SymbolicExpression("x1", SymbolicExpressionType.Variable); + } + + /// + /// Simplifies an expression using symbolic algebra rules. + /// + public SymbolicExpression Simplify(SymbolicExpression expression) + { + // Apply algebraic simplification rules: + // x + 0 = x, x * 1 = x, x * 0 = 0, etc. + return expression; + } + + /// + /// Converts expression to human-readable string. + /// + public string ToLatex(SymbolicExpression expression) + { + return expression.ToString(); + } + } + + /// + /// Represents a symbolic mathematical expression. + /// + public class SymbolicExpression where T : struct, INumber + { + public string Expression { get; set; } + public SymbolicExpressionType Type { get; set; } + + public SymbolicExpression(string expression, SymbolicExpressionType type) + { + Expression = expression; + Type = type; + } + + public override string ToString() => Expression; + + /// + /// Evaluates the expression for given variable values. + /// + public T Evaluate(Dictionary variables) + { + // Placeholder: would parse and evaluate the expression + return T.Zero; + } + } + + /// + /// Types of symbolic expressions. + /// + public enum SymbolicExpressionType + { + Constant, + Variable, + UnaryOperation, + BinaryOperation, + Function + } +} diff --git a/src/PhysicsInformed/ScientificML/UniversalDifferentialEquations.cs b/src/PhysicsInformed/ScientificML/UniversalDifferentialEquations.cs new file mode 100644 index 000000000..cdeb639cb --- /dev/null +++ b/src/PhysicsInformed/ScientificML/UniversalDifferentialEquations.cs @@ -0,0 +1,167 @@ +using System; +using System.Numerics; +using AiDotNet.LinearAlgebra; +using AiDotNet.NeuralNetworks; + +namespace AiDotNet.PhysicsInformed.ScientificML +{ + /// + /// Implements Universal Differential Equations (UDEs) - ODEs with neural network components. + /// + /// The numeric type used for calculations. + /// + /// For Beginners: + /// Universal Differential Equations combine known physics with machine learning. + /// + /// Traditional ODEs: + /// dx/dt = f(x, t, θ) where f is a known function with parameters θ + /// Example: dx/dt = -kx (exponential decay, k is known) + /// + /// Pure Neural ODEs: + /// dx/dt = NN(x, t, θ) where NN is a neural network + /// - Very flexible, can learn any dynamics + /// - But ignores known physics + /// - May violate physical laws + /// + /// Universal Differential Equations (UDEs): + /// dx/dt = f_known(x, t) + NN(x, t, θ) + /// - Combines known physics (f_known) with learned corrections (NN) + /// - Best of both worlds! + /// + /// Key Idea: + /// Use neural networks to model UNKNOWN parts of the physics while keeping + /// KNOWN parts as explicit equations. + /// + /// Example - Epidemic Model: + /// Known: dS/dt = -βSI, dI/dt = βSI - γI (basic SIR model) + /// Unknown: How β (infection rate) varies with temperature, policy, etc. + /// UDE: dS/dt = -β(T, P)SI where β(T, P) = NN(temperature, policy) + /// + /// Applications: + /// - Climate modeling (known physics + unknown feedback loops) + /// - Epidemiology (known disease spread + unknown interventions) + /// - Chemistry (known reactions + unknown catalysis effects) + /// - Biology (known population dynamics + unknown environmental factors) + /// - Engineering (known mechanics + unknown friction/damping) + /// + public class UniversalDifferentialEquation : NeuralNetworkBase where T : struct, INumber + { + private readonly Func _knownDynamics; + private readonly int _stateDim; + + public UniversalDifferentialEquation( + NeuralNetworkArchitecture architecture, + int stateDim, + Func? knownDynamics = null) + : base(architecture, null, 1.0) + { + _stateDim = stateDim; + _knownDynamics = knownDynamics ?? ((x, t) => new T[stateDim]); // Default to zero + InitializeLayers(); + } + + protected override void InitializeLayers() + { + if (Architecture.CustomLayers != null && Architecture.CustomLayers.Count > 0) + { + foreach (var layer in Architecture.CustomLayers) + { + Layers.Add(layer); + } + } + else + { + var layerSizes = Architecture.HiddenLayerSizes ?? new int[] { 32, 32 }; + + for (int i = 0; i < layerSizes.Length; i++) + { + int inputSize = i == 0 ? _stateDim + 1 : layerSizes[i - 1]; // +1 for time + int outputSize = layerSizes[i]; + + var denseLayer = new NeuralNetworks.Layers.DenseLayer( + inputSize, + outputSize, + Enums.ActivationFunctionType.Tanh); + + Layers.Add(denseLayer); + } + + var outputLayer = new NeuralNetworks.Layers.DenseLayer( + layerSizes[layerSizes.Length - 1], + _stateDim, + Enums.ActivationFunctionType.Linear); + + Layers.Add(outputLayer); + } + } + + /// + /// Computes dx/dt = f_known(x, t) + NN(x, t). + /// + public T[] ComputeDerivative(T[] state, T time) + { + // Known physics component + T[] knownPart = _knownDynamics(state, time); + + // Neural network component (learned unknown physics) + T[] input = new T[_stateDim + 1]; + Array.Copy(state, input, _stateDim); + input[_stateDim] = time; + + var inputTensor = new Tensor(new int[] { 1, input.Length }); + for (int i = 0; i < input.Length; i++) + { + inputTensor[0, i] = input[i]; + } + + var output = Forward(inputTensor); + T[] learnedPart = new T[_stateDim]; + for (int i = 0; i < _stateDim; i++) + { + learnedPart[i] = output[0, i]; + } + + // Combine: total derivative = known + learned + T[] totalDerivative = new T[_stateDim]; + for (int i = 0; i < _stateDim; i++) + { + totalDerivative[i] = knownPart[i] + learnedPart[i]; + } + + return totalDerivative; + } + + /// + /// Simulates the UDE forward in time. + /// + public T[,] Simulate(T[] initialState, T tStart, T tEnd, int numSteps) + { + T dt = (tEnd - tStart) / T.CreateChecked(numSteps); + T[,] trajectory = new T[numSteps + 1, _stateDim]; + + for (int i = 0; i < _stateDim; i++) + { + trajectory[0, i] = initialState[i]; + } + + for (int step = 0; step < numSteps; step++) + { + T[] currentState = new T[_stateDim]; + for (int i = 0; i < _stateDim; i++) + { + currentState[i] = trajectory[step, i]; + } + + T currentTime = tStart + T.CreateChecked(step) * dt; + T[] derivative = ComputeDerivative(currentState, currentTime); + + for (int i = 0; i < _stateDim; i++) + { + trajectory[step + 1, i] = currentState[i] + dt * derivative[i]; + } + } + + return trajectory; + } + } +}