README: LogisticRegressor example corrected (#233)

gyrdym · web-flow · commit cdfe598a9058 · 2022-05-26T21:45:51.000+03:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # Changelog
 
+## 16.15.1
+- README: LogisticRegressor example corrected
+
 ## 16.15.0
 - `LinearRegressor.BGD` constructor added
 
diff --git a/README.md b/README.md
@@ -225,34 +225,30 @@ if the selected hyperparameters are good enough or not:
 
 ```dart
 final createClassifier = (DataFrame samples) =>
-  LogisticRegressor(
+  // BGD stands for "Batch Gradient Descent" that's meaning that the classifier will use the whole dataset on every 
+  // training iteration 
+  LogisticRegressor.BGD(
     samples
     targetColumnName,
-    optimizerType: LinearOptimizerType.gradient,
     iterationsLimit: 90,
     learningRateType: LearningRateType.timeBased,
-    batchSize: samples.rows.length,
     probabilityThreshold: 0.7,
   );
 ```
 
 Let's describe our hyperparameters:
-- `optimizerType` - a type of optimization algorithm that will be used to learn coefficients of our model, this time we
-decided to use a vanilla gradient ascent algorithm
 - `iterationsLimit` - number of learning iterations. The selected optimization algorithm (gradient ascent in our case) will 
 be cyclically run this amount of times
 - `learningRateType` - a strategy for learning rate update. In our case, the learning rate will decrease after every 
 iteration
-- `batchSize` - the size of data (in rows) that will be used per each iteration. As we have a really small dataset we may use
-full-batch gradient ascent, that's why we used `samples.rows.length` here - the total amount of data.
 - `probabilityThreshold` - lower bound for positive label probability
 
 If we want to evaluate the learning process more thoroughly, we may pass `collectLearningData` argument to the classifier
 constructor:
 
 ```dart
 final createClassifier = (DataFrame samples) =>
-  LogisticRegressor(
+  LogisticRegressor.BGD(
     ...,
     collectLearningData: true,
   );
@@ -323,22 +319,26 @@ After that we can simply read the model from the file and make predictions:
 ```dart
 import 'dart:io';
 
-final fileName = 'diabetes_classifier.json';
-final file = File(fileName);
-final encodedModel = await file.readAsString();
-final classifier = LogisticRegressor.fromJson(encodedModel);
-final unlabelledData = await fromCsv('some_unlabelled_data.csv');
-final prediction = classifier.predict(unlabelledData);
+void main() {
+  // ...
+  final fileName = 'diabetes_classifier.json';
+  final file = File(fileName);
+  final encodedModel = await file.readAsString();
+  final classifier = LogisticRegressor.fromJson(encodedModel);
+  final unlabelledData = await fromCsv('some_unlabelled_data.csv');
+  final prediction = classifier.predict(unlabelledData);
 
-print(prediction.header); // ('class variable (0 or 1)')
-print(prediction.rows); // [ 
+  print(prediction.header); // ('class variable (0 or 1)')
+  print(prediction.rows); // [ 
                         //   (1),
                         //   (0),
                         //   (0),
                         //   (1),
                         //   ...,
                         //   (1),
                         // ]
+  // ...
+}
 ```
 
 Please note that all the hyperparameters that we used to generate the model are persisted as the model's read-only 
@@ -368,13 +368,11 @@ void main() async {
   final testData = splits[1];
   final validator = CrossValidator.kFold(validationData, numberOfFolds: 5);
   final createClassifier = (DataFrame samples) =>
-    LogisticRegressor(
+    LogisticRegressor.BGD(
       samples
       targetColumnName,
-      optimizerType: LinearOptimizerType.gradient,
       iterationsLimit: 90,
       learningRateType: LearningRateType.timeBased,
-      batchSize: samples.rows.length,
       probabilityThreshold: 0.7,
     );
   final scores = await validator.evaluate(createClassifier, MetricType.accuracy);
@@ -413,13 +411,11 @@ void main() async {
   final testData = splits[1];
   final validator = CrossValidator.kFold(validationData, numberOfFolds: 5);
   final createClassifier = (DataFrame samples) =>
-    LogisticRegressor(
+    LogisticRegressor.BGD(
       samples
       targetColumnName,
-      optimizerType: LinearOptimizerType.gradient,
       iterationsLimit: 90,
       learningRateType: LearningRateType.timeBased,
-      batchSize: samples.rows.length,
       probabilityThreshold: 0.7,
     );
   final scores = await validator.evaluate(createClassifier, MetricType.accuracy);
diff --git a/e2e/logistic_regressor/logistic_regressor_bgd_test.dart b/e2e/logistic_regressor/logistic_regressor_bgd_test.dart
@@ -4,7 +4,7 @@ import 'package:ml_linalg/vector.dart';
 import 'package:test/test.dart';
 
 Future<Vector> evaluateLogisticRegressor(MetricType metric, DType dtype) {
-  final samples = getPimaIndiansDiabetesDataFrame().shuffle(seed: 12);
+  final samples = getPimaIndiansDiabetesDataFrame().shuffle();
   final numberOfFolds = 5;
   final validator = CrossValidator.kFold(
     samples,
@@ -14,8 +14,8 @@ Future<Vector> evaluateLogisticRegressor(MetricType metric, DType dtype) {
         trainSamples,
         'Outcome',
         iterationsLimit: 50,
-        initialLearningRate: 1e-4,
-        learningRateType: LearningRateType.constant,
+        decay: .1,
+        learningRateType: LearningRateType.timeBased,
         dtype: dtype,
       );
 
diff --git a/pubspec.yaml b/pubspec.yaml
@@ -1,6 +1,6 @@
 name: ml_algo
 description: Machine learning algorithms, Machine learning models performance evaluation functionality
-version: 16.15.0
+version: 16.15.1
 homepage: https://github.com/gyrdym/ml_algo
 
 environment: