Linear Regression: Newton method added (#237)

gyrdym · gyrdym · commit dc2c80a89d06 · 2022-08-21T13:52:58.000+03:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## 16.16.0
+- LinearRegressor:
+    - Newton method added
+
 ## 16.15.2
 - LinearRegressor, LogisticRegressor, SoftmaxRegressor:
     - Set `fitIntercept` param to `true` by default
diff --git a/README.md b/README.md
@@ -73,6 +73,12 @@ it in web applications.
     
         - [LinearRegressor.SGD](https://pub.dev/documentation/ml_algo/latest/ml_algo/LinearRegressor/LinearRegressor.SGD.html)
     Implementation of the linear regression algorithm based on stochastic gradient descent with L2 regularisation
+    
+        - [LinearRegressor.BGD](https://pub.dev/documentation/ml_algo/latest/ml_algo/LinearRegressor/LinearRegressor.BGD.html)
+        Implementation of the linear regression algorithm based on batch gradient descent with L2 regularisation
+    
+        - [LinearRegressor.newton](https://pub.dev/documentation/ml_algo/latest/ml_algo/LinearRegressor/LinearRegressor.newton.html)
+        Implementation of the linear regression algorithm based on Newton method with L2 regularisation
      
     - [KnnRegressor](https://pub.dev/documentation/ml_algo/latest/ml_algo/KnnRegressor-class.html)
     A class that makes predictions for each new observation based on the first `k` closest observations from 
diff --git a/e2e/linear_regressor/linear_regressor_newton_test.dart b/e2e/linear_regressor/linear_regressor_newton_test.dart
@@ -0,0 +1,81 @@
+import 'package:ml_algo/ml_algo.dart';
+import 'package:ml_dataframe/ml_dataframe.dart';
+import 'package:test/test.dart';
+
+num trainHousingModel(MetricType metricType, DType dtype) {
+  final data = getHousingDataFrame().shuffle();
+  final samples = splitData(data, [0.8]);
+  final trainSamples = samples.first;
+  final testSamples = samples.last;
+  final targetName = 'MEDV';
+
+  final model = LinearRegressor.newton(
+    trainSamples,
+    targetName,
+    dtype: dtype,
+  );
+
+  return model.assess(testSamples, metricType);
+}
+
+num trainWineModel(MetricType metricType, DType dtype) {
+  final data = getWineQualityDataFrame().shuffle();
+  final samples = splitData(data, [0.8]);
+  final trainSamples = samples.first;
+  final testSamples = samples.last;
+  final targetName = 'quality';
+
+  final model = LinearRegressor.newton(
+    trainSamples,
+    targetName,
+    dtype: dtype,
+  );
+
+  return model.assess(testSamples, metricType);
+}
+
+void main() {
+  group('LinearRegressor, Newton method, housing dataset', () {
+    test(
+        'should return adequate error on mape metric, '
+        'dtype=DType.float32', () {
+      final error = trainHousingModel(MetricType.mape, DType.float32);
+
+      print('MAPE is $error');
+
+      expect(error, lessThan(0.5));
+    });
+
+    test(
+        'should return adequate error on mape metric, '
+        'dtype=DType.float64', () {
+      final error = trainHousingModel(MetricType.mape, DType.float64);
+
+      print('MAPE is $error');
+
+      expect(error, lessThan(0.2));
+    });
+  });
+
+  group('LinearRegressor, Newton method, wine dataset', () {
+    test(
+        'should return adequate error on mape metric, '
+        'dtype=DType.float32', () {
+      final error = trainWineModel(MetricType.mape, DType.float32);
+
+      print('MAPE is $error');
+
+      expect(error, lessThan(0.2));
+    });
+
+    test(
+        'should return adequate error on mape metric, '
+        'dtype=DType.float64', () {
+      final error = trainWineModel(MetricType.mape, DType.float64);
+
+      print('MAPE is $error');
+
+      expect(error, lessThan(0.5));
+    });
+  });
+}
diff --git a/example/logistic_regression.dart b/example/logistic_regression.dart
@@ -0,0 +1,33 @@
+import 'package:ml_algo/ml_algo.dart';
+import 'package:ml_dataframe/ml_dataframe.dart';
+
+void main() {
+  final samples = getPimaIndiansDiabetesDataFrame().shuffle();
+  final splits = splitData(samples, [0.8]);
+  final model = LogisticRegressor(
+    splits.first,
+    'Outcome',
+    batchSize: splits.first.rows.length,
+    learningRateType: LearningRateType.exponential,
+    decay: 0.7,
+    collectLearningData: true,
+  );
+
+  print('ACURACY:');
+  print(model.assess(splits.last, MetricType.accuracy));
+
+  print('RECALL:');
+  print(model.assess(splits.last, MetricType.recall));
+
+  print('PRECISION:');
+  print(model.assess(splits.last, MetricType.precision));
+
+  print('LD: ');
+  print(splits.last['Outcome'].data.take(10));
+  print(model
+      .predict(splits.last.dropSeries(names: ['Outcome']))
+      .series
+      .first
+      .data
+      .take(10));
+}
diff --git a/lib/src/cost_function/cost_function.dart b/lib/src/cost_function/cost_function.dart
@@ -3,4 +3,5 @@ import 'package:ml_linalg/matrix.dart';
 abstract class CostFunction {
   double getCost(Matrix x, Matrix w, Matrix y);
   Matrix getGradient(Matrix x, Matrix w, Matrix y);
+  Matrix getHessian(Matrix x, Matrix w, Matrix y);
 }
diff --git a/lib/src/cost_function/least_square_cost_function.dart b/lib/src/cost_function/least_square_cost_function.dart
@@ -10,4 +10,7 @@ class LeastSquareCostFunction implements CostFunction {
   @override
   Matrix getGradient(Matrix x, Matrix w, Matrix y) =>
       x.transpose() * -2 * (y - x * w);
+
+  @override
+  Matrix getHessian(Matrix x, Matrix w, Matrix y) => x.transpose() * x * 2;
 }
diff --git a/lib/src/cost_function/log_likelihood_cost_function.dart b/lib/src/cost_function/log_likelihood_cost_function.dart
@@ -35,4 +35,10 @@ class LogLikelihoodCostFunction implements CostFunction {
 
     return x.transpose() * (yNormalized - _linkFunction.link(x * w));
   }
+
+  @override
+  Matrix getHessian(Matrix x, Matrix w, Matrix y) {
+    // TODO: implement getHessian
+    throw UnimplementedError();
+  }
 }
diff --git a/lib/src/linear_optimizer/closed_form_optimizer.dart b/lib/src/linear_optimizer/closed_form_optimizer.dart
diff --git a/lib/src/linear_optimizer/least_squares_coordinate_descent_optimizer.dart b/lib/src/linear_optimizer/least_squares_coordinate_descent_optimizer.dart
diff --git a/lib/src/linear_optimizer/least_squares_newton_optimizer.dart b/lib/src/linear_optimizer/least_squares_newton_optimizer.dart
@@ -0,0 +1,76 @@
+import 'package:ml_algo/src/cost_function/cost_function.dart';
+import 'package:ml_algo/src/linear_optimizer/linear_optimizer.dart';
+import 'package:ml_linalg/matrix.dart';
+import 'package:xrange/xrange.dart';
+
+class LeastSquaresNewtonOptimizer implements LinearOptimizer {
+  LeastSquaresNewtonOptimizer(
+      {required Matrix features,
+      required Matrix labels,
+      required CostFunction costFunction,
+      required int iterationLimit,
+      required num minCoefficientsUpdate,
+      num lambda = 0})
+      : _features = features,
+        _labels = labels,
+        _costFunction = costFunction,
+        _iterations = integers(0, iterationLimit),
+        _minCoefficientsUpdate = minCoefficientsUpdate,
+        _lambda = lambda;
+
+  final Matrix _features;
+  final Matrix _labels;
+  final CostFunction _costFunction;
+  final Iterable<int> _iterations;
+  final List<num> _costPerIteration = [];
+  final num _lambda;
+  final num _minCoefficientsUpdate;
+
+  @override
+  List<num> get costPerIteration => _costPerIteration;
+
+  @override
+  Matrix findExtrema(
+      {Matrix? initialCoefficients,
+      bool isMinimizingObjective = true,
+      bool collectLearningData = false}) {
+    var dtype = _features.dtype;
+    var coefficients = initialCoefficients ??
+        Matrix.column(List.filled(_features.first.length, 0), dtype: dtype);
+    var prevCoefficients = coefficients;
+    var coefficientsUpdate = double.maxFinite;
+
+    final regularizingTerm =
+        Matrix.scalar(_lambda.toDouble(), _features.columnsNum, dtype: dtype);
+    // Since we perfectly know that Hessian matrix calculation of least squares
+    // function doesn't depend on coefficient vector, Hessian matrix will be
+    // constant throughout the entire optimization procedure, let's calculate it
+    // only once in the beginning of the procedure:
+    final hessian = _costFunction.getHessian(_features, coefficients, _labels);
+    final regularizedInverseHessian = _lambda == 0
+        ? hessian.inverse()
+        : (hessian + regularizingTerm).inverse();
+
+    for (final _ in _iterations) {
+      if (coefficientsUpdate.isNaN ||
+          coefficientsUpdate <= _minCoefficientsUpdate) {
+        break;
+      }
+
+      final gradient =
+          _costFunction.getGradient(_features, coefficients, _labels);
+
+      coefficients = coefficients - regularizedInverseHessian * gradient;
+      coefficientsUpdate = (coefficients - prevCoefficients).norm();
+      prevCoefficients = coefficients;
+
+      if (collectLearningData) {
+        final cost = _costFunction.getCost(_features, coefficients, _labels);
+
+        _costPerIteration.add(cost);
+      }
+    }
+
+    return coefficients;
+  }
+}
diff --git a/lib/src/linear_optimizer/linear_optimizer_factory_impl.dart b/lib/src/linear_optimizer/linear_optimizer_factory_impl.dart
@@ -1,8 +1,8 @@
 import 'package:ml_algo/src/common/constants/default_parameters/common.dart';
 import 'package:ml_algo/src/cost_function/cost_function.dart';
-import 'package:ml_algo/src/linear_optimizer/closed_form_optimizer/closed_form_optimizer.dart';
+import 'package:ml_algo/src/linear_optimizer/closed_form_optimizer.dart';
 import 'package:ml_algo/src/linear_optimizer/convergence_detector/convergence_detector_factory.dart';
-import 'package:ml_algo/src/linear_optimizer/coordinate_optimizer/least_squares_coordinate_descent_optimizer.dart';
+import 'package:ml_algo/src/linear_optimizer/least_squares_coordinate_descent_optimizer.dart';
 import 'package:ml_algo/src/linear_optimizer/gradient_optimizer/gradient_optimizer.dart';
 import 'package:ml_algo/src/linear_optimizer/gradient_optimizer/learning_rate/learning_rate_iterable_factory.dart';
 import 'package:ml_algo/src/linear_optimizer/gradient_optimizer/learning_rate/learning_rate_type.dart';
@@ -11,6 +11,7 @@ import 'package:ml_algo/src/linear_optimizer/initial_coefficients_generator/init
 import 'package:ml_algo/src/linear_optimizer/linear_optimizer.dart';
 import 'package:ml_algo/src/linear_optimizer/linear_optimizer_factory.dart';
 import 'package:ml_algo/src/linear_optimizer/linear_optimizer_type.dart';
+import 'package:ml_algo/src/linear_optimizer/least_squares_newton_optimizer.dart';
 import 'package:ml_algo/src/linear_optimizer/optimizer_to_regularization_mapping.dart';
 import 'package:ml_algo/src/linear_optimizer/regularization_type.dart';
 import 'package:ml_algo/src/math/randomizer/randomizer_factory.dart';
@@ -93,6 +94,16 @@ class LinearOptimizerFactoryImpl implements LinearOptimizerFactory {
           isFittingDataNormalized: isFittingDataNormalized,
         );
 
+      case LinearOptimizerType.newton:
+        return LeastSquaresNewtonOptimizer(
+          features: features,
+          labels: labels,
+          costFunction: costFunction,
+          iterationLimit: iterationLimit,
+          lambda: lambda,
+          minCoefficientsUpdate: minCoefficientsUpdate,
+        );
+
       case LinearOptimizerType.closedForm:
         return ClosedFormOptimizer(features, labels);
 
diff --git a/lib/src/linear_optimizer/linear_optimizer_type.dart b/lib/src/linear_optimizer/linear_optimizer_type.dart
@@ -15,4 +15,7 @@ enum LinearOptimizerType {
   /// Analytical solution of the linear regression problem. The solution based on
   /// finding the inverse of the feature matrix using LU decomposition
   closedForm,
+
+  /// The Newton method
+  newton,
 }
diff --git a/lib/src/linear_optimizer/linear_optimizer_type_json_converter.dart b/lib/src/linear_optimizer/linear_optimizer_type_json_converter.dart
@@ -17,6 +17,9 @@ class LinearOptimizerTypeJsonConverter
 
       case closedFormLinearOptimizerTypeEncodedValue:
         return LinearOptimizerType.closedForm;
+
+      case newtonLinearOptimizerTypeEncodedValue:
+        return LinearOptimizerType.newton;
     }
 
     throw Exception('LinearOptimizerType, '
@@ -34,6 +37,9 @@ class LinearOptimizerTypeJsonConverter
 
       case LinearOptimizerType.closedForm:
         return closedFormLinearOptimizerTypeEncodedValue;
+
+      case LinearOptimizerType.newton:
+        return newtonLinearOptimizerTypeEncodedValue;
     }
   }
 }
diff --git a/lib/src/linear_optimizer/linear_optimizer_type_json_encoded_values.dart b/lib/src/linear_optimizer/linear_optimizer_type_json_encoded_values.dart
@@ -1,3 +1,4 @@
 const gradientLinearOptimizerTypeEncodedValue = 'G';
 const coordinateLinearOptimizerTypeEncodedValue = 'C';
 const closedFormLinearOptimizerTypeEncodedValue = 'CF';
+const newtonLinearOptimizerTypeEncodedValue = 'N';
diff --git a/lib/src/linear_optimizer/optimizer_to_regularization_mapping.dart b/lib/src/linear_optimizer/optimizer_to_regularization_mapping.dart
@@ -4,4 +4,5 @@ import 'package:ml_algo/src/linear_optimizer/regularization_type.dart';
 const optimizerToRegularization = {
   LinearOptimizerType.coordinate: [RegularizationType.L1],
   LinearOptimizerType.gradient: [RegularizationType.L2],
+  LinearOptimizerType.newton: [RegularizationType.L2],
 };
diff --git a/lib/src/regressor/linear_regressor/linear_regressor.dart b/lib/src/regressor/linear_regressor/linear_regressor.dart
diff --git a/pubspec.yaml b/pubspec.yaml
diff --git a/test/linear_optimizer/coordinate_optimizer/coordinate_descent_optimizer_integration_test.dart b/test/linear_optimizer/coordinate_optimizer/coordinate_descent_optimizer_integration_test.dart

Original file line number	Diff line number	Diff line change
`@@ -3,4 +3,5 @@ import 'package:ml_linalg/matrix.dart';`
`3`	`3`	`abstract class CostFunction {`
`4`	`4`	`double getCost(Matrix x, Matrix w, Matrix y);`
`5`	`5`	`Matrix getGradient(Matrix x, Matrix w, Matrix y);`
	`6`	`+ Matrix getHessian(Matrix x, Matrix w, Matrix y);`
`6`	`7`	`}`
Original file line number	Diff line number	Diff line change
`@@ -10,4 +10,7 @@ class LeastSquareCostFunction implements CostFunction {`
`10`	`10`	`@override`
`11`	`11`	`Matrix getGradient(Matrix x, Matrix w, Matrix y) =>`
`12`	`12`	`x.transpose() * -2 * (y - x * w);`
	`13`	`+`
	`14`	`+ @override`
	`15`	`+ Matrix getHessian(Matrix x, Matrix w, Matrix y) => x.transpose() * x * 2;`
`13`	`16`	`}`
Original file line number	Diff line number	Diff line change
`@@ -35,4 +35,10 @@ class LogLikelihoodCostFunction implements CostFunction {`
`35`	`35`
`36`	`36`	`return x.transpose() * (yNormalized - _linkFunction.link(x * w));`
`37`	`37`	`}`
	`38`	`+`
	`39`	`+ @override`
	`40`	`+ Matrix getHessian(Matrix x, Matrix w, Matrix y) {`
	`41`	`+ // TODO: implement getHessian`
	`42`	`+ throw UnimplementedError();`
	`43`	`+ }`
`38`	`44`	`}`