Skip to content

Commit 8bc80ad

Browse files
committed
Improve numerical differentiation in NonlinearObjectiveModel
- Add configurable accuracy with orders 1-6 for both model and residual functions - Use NumericalJacobian class for more reliable derivative approximation
1 parent 597917e commit 8bc80ad

File tree

1 file changed

+118
-156
lines changed

1 file changed

+118
-156
lines changed

src/Numerics/Optimization/ObjectiveFunctions/NonlinearObjectiveModel.cs

Lines changed: 118 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using MathNet.Numerics.LinearAlgebra;
1+
using MathNet.Numerics.Differentiation;
2+
using MathNet.Numerics.LinearAlgebra;
23
using System;
34
using System.Collections.Generic;
45
using System.Linq;
@@ -449,6 +450,11 @@ void EvaluateFunction()
449450
_functionValue = 0.5 * _residuals.DotProduct(_residuals);
450451
}
451452

453+
/// <summary>
454+
/// Evaluates the Jacobian matrix, gradient vector, and approximated Hessian matrix at the current parameters.
455+
/// For direct residual mode, gradient is J'R where J is the Jacobian and R is the residual vector.
456+
/// For model function mode, gradient is -J'R since residuals are defined as (observed - predicted).
457+
/// </summary>
452458
void EvaluateJacobian()
453459
{
454460
if (_coefficients == null)
@@ -467,8 +473,8 @@ void EvaluateJacobian()
467473
else
468474
{
469475
// Calculate Jacobian numerically for residual function
470-
_jacobianValue = NumericalJacobianForResidual(Point);
471-
FunctionEvaluations += _accuracyOrder * NumberOfParameters;
476+
_jacobianValue = NumericalJacobianForResidual(Point, out var evaluations);
477+
FunctionEvaluations += evaluations;
472478
}
473479
}
474480
else
@@ -483,8 +489,8 @@ void EvaluateJacobian()
483489
else
484490
{
485491
// numerical jacobian
486-
_jacobianValue = NumericalJacobian(Point, ModelValues, _accuracyOrder);
487-
FunctionEvaluations += _accuracyOrder * NumberOfParameters;
492+
_jacobianValue = NumericalJacobian(Point, out var evaluations);
493+
FunctionEvaluations += evaluations;
488494
}
489495

490496
// Apply weights to jacobian in model function mode
@@ -516,199 +522,155 @@ void EvaluateJacobian()
516522
}
517523
}
518524

519-
// Gradient, g = -J'W(y − f(x; p)) = -J'L(L'E) = -J'LR
520-
_gradientValue = -_jacobianValue.Transpose() * _residuals;
525+
// Gradient calculation with sign dependent on mode
526+
if (_useDirectResiduals)
527+
{
528+
// For direct residual mode: g = J'R
529+
// When using a direct residual function R(p), the gradient is J'R
530+
_gradientValue = _jacobianValue.Transpose() * _residuals;
531+
}
532+
else
533+
{
534+
// For model function mode: g = -J'R
535+
// When using a model function with residuals defined as (observed - predicted),
536+
// the gradient includes a negative sign
537+
_gradientValue = -_jacobianValue.Transpose() * _residuals;
538+
}
521539

522540
// approximated Hessian, H = J'WJ + ∑LRiHi ~ J'WJ near the minimum
523541
_hessianValue = _jacobianValue.Transpose() * _jacobianValue;
524542
}
525543

526544
/// <summary>
527-
/// Calculate numerical Jacobian for model function using finite differences
545+
/// Calculates the Jacobian matrix using numerical differentiation with finite differences.
546+
/// The accuracy order determines which finite difference formula to use.
528547
/// </summary>
529-
/// <param name="parameters">Current parameter values</param>
530-
/// <param name="currentValues">Current model values at the parameters</param>
531-
/// <param name="accuracyOrder">Order of accuracy for finite difference formula</param>
532-
/// <returns>Jacobian matrix of partial derivatives</returns>
533-
Matrix<double> NumericalJacobian(Vector<double> parameters, Vector<double> currentValues, int accuracyOrder = 2)
548+
/// <param name="parameters">The current parameter values</param>
549+
/// <param name="evaluationCount">Returns the number of function evaluations performed</param>
550+
/// <returns>The Jacobian matrix of partial derivatives df(x;p)/dp</returns>
551+
Matrix<double> NumericalJacobian(Vector<double> parameters, out int evaluationCount)
534552
{
535-
const double sqrtEpsilon = 1.4901161193847656250E-8; // sqrt(machineEpsilon)
536-
537-
var derivertives = Matrix<double>.Build.Dense(NumberOfObservations, NumberOfParameters);
553+
// Get appropriate finite difference configuration based on _accuracyOrder
554+
var (points, center) = GetFiniteDifferenceConfiguration(_accuracyOrder);
538555

539-
var d = 0.000003 * parameters.PointwiseAbs().PointwiseMaximum(sqrtEpsilon);
556+
// Initialize NumericalJacobian with appropriate configuration
557+
var jacobianCalculator = new NumericalJacobian(points, center);
558+
var derivatives = Matrix<double>.Build.Dense(NumberOfObservations, NumberOfParameters);
559+
evaluationCount = 0;
540560

541-
var h = Vector<double>.Build.Dense(NumberOfParameters);
542-
for (var j = 0; j < NumberOfParameters; j++)
561+
// Process each observation point separately
562+
for (var i = 0; i < NumberOfObservations; i++)
543563
{
544-
h[j] = d[j];
564+
var obsIndex = i; // Capture observation index for the lambda
545565

546-
if (accuracyOrder >= 6)
566+
// Create adapter function that returns the model value for current observation
567+
// when given parameters array
568+
double funcAdapter(double[] p)
547569
{
548-
// f'(x) = {- f(x - 3h) + 9f(x - 2h) - 45f(x - h) + 45f(x + h) - 9f(x + 2h) + f(x + 3h)} / 60h + O(h^6)
549-
var f1 = _modelFunction(parameters - 3 * h, ObservedX);
550-
var f2 = _modelFunction(parameters - 2 * h, ObservedX);
551-
var f3 = _modelFunction(parameters - h, ObservedX);
552-
var f4 = _modelFunction(parameters + h, ObservedX);
553-
var f5 = _modelFunction(parameters + 2 * h, ObservedX);
554-
var f6 = _modelFunction(parameters + 3 * h, ObservedX);
555-
556-
var prime = (-f1 + 9 * f2 - 45 * f3 + 45 * f4 - 9 * f5 + f6) / (60 * h[j]);
557-
derivertives.SetColumn(j, prime);
570+
var paramsVector = Vector<double>.Build.DenseOfArray(p);
571+
var modelValues = _modelFunction(paramsVector, ObservedX);
572+
return modelValues[obsIndex];
558573
}
559-
else if (accuracyOrder == 5)
560-
{
561-
// f'(x) = {-137f(x) + 300f(x + h) - 300f(x + 2h) + 200f(x + 3h) - 75f(x + 4h) + 12f(x + 5h)} / 60h + O(h^5)
562-
var f1 = currentValues;
563-
var f2 = _modelFunction(parameters + h, ObservedX);
564-
var f3 = _modelFunction(parameters + 2 * h, ObservedX);
565-
var f4 = _modelFunction(parameters + 3 * h, ObservedX);
566-
var f5 = _modelFunction(parameters + 4 * h, ObservedX);
567-
var f6 = _modelFunction(parameters + 5 * h, ObservedX);
568-
569-
var prime = (-137 * f1 + 300 * f2 - 300 * f3 + 200 * f4 - 75 * f5 + 12 * f6) / (60 * h[j]);
570-
derivertives.SetColumn(j, prime);
571-
}
572-
else if (accuracyOrder == 4)
573-
{
574-
// f'(x) = {f(x - 2h) - 8f(x - h) + 8f(x + h) - f(x + 2h)} / 12h + O(h^4)
575-
var f1 = _modelFunction(parameters - 2 * h, ObservedX);
576-
var f2 = _modelFunction(parameters - h, ObservedX);
577-
var f3 = _modelFunction(parameters + h, ObservedX);
578-
var f4 = _modelFunction(parameters + 2 * h, ObservedX);
579-
580-
var prime = (f1 - 8 * f2 + 8 * f3 - f4) / (12 * h[j]);
581-
derivertives.SetColumn(j, prime);
582-
}
583-
else if (accuracyOrder == 3)
584-
{
585-
// f'(x) = {-11f(x) + 18f(x + h) - 9f(x + 2h) + 2f(x + 3h)} / 6h + O(h^3)
586-
var f1 = currentValues;
587-
var f2 = _modelFunction(parameters + h, ObservedX);
588-
var f3 = _modelFunction(parameters + 2 * h, ObservedX);
589-
var f4 = _modelFunction(parameters + 3 * h, ObservedX);
590-
591-
var prime = (-11 * f1 + 18 * f2 - 9 * f3 + 2 * f4) / (6 * h[j]);
592-
derivertives.SetColumn(j, prime);
593-
}
594-
else if (accuracyOrder == 2)
595-
{
596-
// f'(x) = {f(x + h) - f(x - h)} / 2h + O(h^2)
597-
var f1 = _modelFunction(parameters + h, ObservedX);
598-
var f2 = _modelFunction(parameters - h, ObservedX);
599574

600-
var prime = (f1 - f2) / (2 * h[j]);
601-
derivertives.SetColumn(j, prime);
602-
}
603-
else
604-
{
605-
// f'(x) = {- f(x) + f(x + h)} / h + O(h)
606-
var f1 = currentValues;
607-
var f2 = _modelFunction(parameters + h, ObservedX);
575+
// Calculate gradient (which is the row of Jacobian for this observation)
576+
var jacobianRow = jacobianCalculator.Evaluate(funcAdapter, parameters.ToArray());
608577

609-
var prime = (-f1 + f2) / h[j];
610-
derivertives.SetColumn(j, prime);
578+
// Store results in derivatives matrix
579+
for (var j = 0; j < NumberOfParameters; j++)
580+
{
581+
derivatives[i, j] = jacobianRow[j];
611582
}
612-
613-
h[j] = 0;
614583
}
615584

616-
return derivertives;
585+
// Get total function evaluation count
586+
evaluationCount = jacobianCalculator.FunctionEvaluations;
587+
588+
return derivatives;
617589
}
618590

619591
/// <summary>
620-
/// Calculate numerical Jacobian for direct residual function R(p)
592+
/// Calculate numerical Jacobian for direct residual function R(p) using finite differences.
593+
/// The accuracy order determines which finite difference formula to use.
621594
/// </summary>
622-
Matrix<double> NumericalJacobianForResidual(Vector<double> parameters)
595+
/// <param name="parameters">Current parameter values</param>
596+
/// <param name="evaluationCount">Returns the number of function evaluations performed</param>
597+
/// <returns>Jacobian matrix of partial derivatives dR(p)/dp</returns>
598+
Matrix<double> NumericalJacobianForResidual(Vector<double> parameters, out int evaluationCount)
623599
{
624-
const double sqrtEpsilon = 1.4901161193847656250E-8; // sqrt(machineEpsilon)
625-
626600
// Get current residuals
627601
var residuals = _residualFunction(parameters);
628602
var residualSize = residuals.Count;
629603

630-
var derivatives = Matrix<double>.Build.Dense(residualSize, NumberOfParameters);
604+
// Get appropriate finite difference configuration based on _accuracyOrder
605+
var (points, center) = GetFiniteDifferenceConfiguration(_accuracyOrder);
631606

632-
var d = 0.000003 * parameters.PointwiseAbs().PointwiseMaximum(sqrtEpsilon);
607+
var derivatives = Matrix<double>.Build.Dense(residualSize, NumberOfParameters);
608+
evaluationCount = 0;
609+
int totalEvaluations = 0;
633610

634-
var h = Vector<double>.Build.Dense(NumberOfParameters);
635-
for (var j = 0; j < NumberOfParameters; j++)
611+
// Process each residual component separately
612+
for (var i = 0; i < residualSize; i++)
636613
{
637-
h[j] = d[j];
614+
var resIndex = i; // Capture residual index for the lambda
638615

639-
if (_accuracyOrder >= 6)
616+
// Create adapter function that returns the residual component for the current index
617+
// when given parameters array
618+
double funcAdapter(double[] p)
640619
{
641-
// f'(x) = {- f(x - 3h) + 9f(x - 2h) - 45f(x - h) + 45f(x + h) - 9f(x + 2h) + f(x + 3h)} / 60h + O(h^6)
642-
var r1 = _residualFunction(parameters - 3 * h);
643-
var r2 = _residualFunction(parameters - 2 * h);
644-
var r3 = _residualFunction(parameters - h);
645-
var r4 = _residualFunction(parameters + h);
646-
var r5 = _residualFunction(parameters + 2 * h);
647-
var r6 = _residualFunction(parameters + 3 * h);
648-
649-
var prime = (-r1 + 9 * r2 - 45 * r3 + 45 * r4 - 9 * r5 + r6) / (60 * h[j]);
650-
derivatives.SetColumn(j, prime);
620+
var paramsVector = Vector<double>.Build.DenseOfArray(p);
621+
var resValues = _residualFunction(paramsVector);
622+
return resValues[resIndex];
651623
}
652-
else if (_accuracyOrder == 5)
653-
{
654-
// Implementation similar to above for 5th order accuracy
655-
var r1 = residuals;
656-
var r2 = _residualFunction(parameters + h);
657-
var r3 = _residualFunction(parameters + 2 * h);
658-
var r4 = _residualFunction(parameters + 3 * h);
659-
var r5 = _residualFunction(parameters + 4 * h);
660-
var r6 = _residualFunction(parameters + 5 * h);
661-
662-
var prime = (-137 * r1 + 300 * r2 - 300 * r3 + 200 * r4 - 75 * r5 + 12 * r6) / (60 * h[j]);
663-
derivatives.SetColumn(j, prime);
664-
}
665-
else if (_accuracyOrder == 4)
666-
{
667-
// Implementation similar to above for 4th order accuracy
668-
var r1 = _residualFunction(parameters - 2 * h);
669-
var r2 = _residualFunction(parameters - h);
670-
var r3 = _residualFunction(parameters + h);
671-
var r4 = _residualFunction(parameters + 2 * h);
672-
673-
var prime = (r1 - 8 * r2 + 8 * r3 - r4) / (12 * h[j]);
674-
derivatives.SetColumn(j, prime);
675-
}
676-
else if (_accuracyOrder == 3)
677-
{
678-
// Implementation similar to above for 3rd order accuracy
679-
var r1 = residuals;
680-
var r2 = _residualFunction(parameters + h);
681-
var r3 = _residualFunction(parameters + 2 * h);
682-
var r4 = _residualFunction(parameters + 3 * h);
683-
684-
var prime = (-11 * r1 + 18 * r2 - 9 * r3 + 2 * r4) / (6 * h[j]);
685-
derivatives.SetColumn(j, prime);
686-
}
687-
else if (_accuracyOrder == 2)
688-
{
689-
// f'(x) = {f(x + h) - f(x - h)} / 2h + O(h^2)
690-
var r1 = _residualFunction(parameters + h);
691-
var r2 = _residualFunction(parameters - h);
692624

693-
var prime = (r1 - r2) / (2 * h[j]);
694-
derivatives.SetColumn(j, prime);
695-
}
696-
else
697-
{
698-
// f'(x) = {- f(x) + f(x + h)} / h + O(h)
699-
var r1 = residuals;
700-
var r2 = _residualFunction(parameters + h);
625+
// Calculate gradient (which is the row of Jacobian for this residual component)
626+
var jacobianCalculator = new NumericalJacobian(points, center);
627+
var jacobianRow = jacobianCalculator.Evaluate(funcAdapter, parameters.ToArray());
628+
totalEvaluations += jacobianCalculator.FunctionEvaluations;
701629

702-
var prime = (-r1 + r2) / h[j];
703-
derivatives.SetColumn(j, prime);
630+
// Store results in derivatives matrix
631+
for (var j = 0; j < NumberOfParameters; j++)
632+
{
633+
derivatives[i, j] = jacobianRow[j];
704634
}
705-
706-
h[j] = 0;
707635
}
708636

637+
// Set the total evaluation count
638+
evaluationCount = totalEvaluations;
639+
709640
return derivatives;
710641
}
711642

643+
/// <summary>
644+
/// Returns appropriate finite difference configuration based on accuracy order.
645+
/// </summary>
646+
/// <param name="accuracyOrder">Accuracy order (1-6)</param>
647+
/// <returns>Tuple of (points count, center position)</returns>
648+
private static (int points, int center) GetFiniteDifferenceConfiguration(int accuracyOrder)
649+
{
650+
switch (accuracyOrder)
651+
{
652+
case 1:
653+
// 1st order accuracy: 2-point forward difference
654+
return (2, 0);
655+
case 2:
656+
// 2nd order accuracy: 3-point central difference
657+
return (3, 1);
658+
case 3:
659+
// 3rd order accuracy: 4-point difference
660+
return (4, 1); // Asymmetric central difference
661+
case 4:
662+
// 4th order accuracy: 5-point central difference
663+
return (5, 2);
664+
case 5:
665+
// 5th order accuracy: 6-point difference
666+
return (6, 2); // Asymmetric central difference
667+
default:
668+
case 6:
669+
// 6th order accuracy: 7-point central difference
670+
return (7, 3);
671+
}
672+
}
673+
712674
#endregion Private Methods
713675
}
714676
}

0 commit comments

Comments
 (0)