@@ -188,6 +188,9 @@ abstract class LogisticRegressor
188188 dtype: dtype,
189189 );
190190
191+ /// Creates a [LogisticRegressor] instance based on Stochastic
192+ /// Gradient Descent algorithm
193+ ///
191194 /// Parameters:
192195 ///
193196 /// [trainingData] Observations that will be used by the classifier to learn
@@ -346,6 +349,160 @@ abstract class LogisticRegressor
346349 dtype: dtype,
347350 );
348351
352+ /// Creates a [LogisticRegressor] instance based on Batch Gradient Descent
353+ /// algorithm
354+ ///
355+ /// Parameters:
356+ ///
357+ /// [trainingData] Observations that will be used by the classifier to learn
358+ /// the coefficients. Must contain [targetName] column.
359+ ///
360+ /// [targetName] A string that serves as a name of the target column (a
361+ /// column that contains class labels or outcomes for the associated
362+ /// features).
363+ ///
364+ /// [learningRateType] A value defining a strategy for the learning rate
365+ /// behaviour throughout the whole fitting process.
366+ ///
367+ /// [iterationsLimit] A number of fitting iterations. Uses as a condition of
368+ /// convergence in the optimization algorithm. Default value is `100` .
369+ ///
370+ /// [initialLearningRate] The initial value defining velocity of the convergence of the
371+ /// gradient descent optimizer. Default value is `1e-3` .
372+ ///
373+ /// [decay] The value meaning "speed" of learning rate decrease. Applicable only
374+ /// for [LearningRateType.timeBased] , [LearningRateType.stepBased] , and
375+ /// [LearningRateType.exponential] strategies
376+ ///
377+ /// [dropRate] The value that is used as a number of learning iterations after
378+ /// which the learning rate will be decreased. The value is applicable only for
379+ /// [LearningRateType.stepBased] learning rate; it will be omitted for other
380+ /// learning rate strategies
381+ ///
382+ /// [minCoefficientsUpdate] A minimum distance between coefficient vectors in
383+ /// two contiguous iterations. Uses as a condition of convergence in the
384+ /// optimization algorithm. If a difference between the two vectors is small
385+ /// enough, there is no reason to continue fitting. Default value is `1e-12`
386+ ///
387+ /// [probabilityThreshold] A probability on the basis of which it is decided,
388+ /// whether an observation relates to positive class label (see
389+ /// [positiveLabel] parameter) or to negative class label (see [negativeLabel]
390+ /// parameter). The greater the probability, the more strict the classifier
391+ /// is. Default value is `0.5` .
392+ ///
393+ /// [lambda] A coefficient of regularization. Uses to prevent the regressor's
394+ /// overfitting. The more the value of [lambda] , the more regular the
395+ /// coefficients of the equation of the predicting hyperplane are. Extremely
396+ /// large [lambda] may decrease the coefficients to nothing, otherwise too
397+ /// small [lambda] may be a cause of too large absolute values of the
398+ /// coefficients, that is also bad.
399+ ///
400+ /// [fitIntercept] Whether or not to fit intercept term. Default value is
401+ /// `false` . Intercept in 2-dimensional space is a bias of the line (relative
402+ /// to X-axis).
403+ ///
404+ /// [interceptScale] A value, defining a size of the intercept.
405+ ///
406+ /// [initialCoefficientsType] Defines the coefficients that will be
407+ /// autogenerated at the first optimization iteration. By default
408+ /// all the autogenerated coefficients are equal to zeroes. If
409+ /// [initialCoefficients] are provided, the parameter will be ignored
410+ ///
411+ /// [initialCoefficients] Coefficients to be used in the first iteration of
412+ /// optimization algorithm. [initialCoefficients] is a vector, length of which
413+ /// must be equal to the number of features in [trainingData] : in case of
414+ /// logistic regression only one column from [trainingData] is used as a
415+ /// prediction target column, thus the number of features is equal to
416+ /// the number of columns in [trainingData] minus 1 (target column). Keep in
417+ /// mind, that if your model considers intercept term, [initialCoefficients]
418+ /// should contain an extra element in the beginning of the vector and it
419+ /// denotes the intercept term coefficient
420+ ///
421+ /// [positiveLabel] A value that will be used for the positive class.
422+ /// By default, `1` .
423+ ///
424+ /// [negativeLabel] A value that will be used for the negative class.
425+ /// By default, `0` .
426+ ///
427+ /// [collectLearningData] Whether or not to collect learning data, for
428+ /// instance cost function value per each iteration. Affects performance much.
429+ /// If [collectLearningData] is true, one may access [costPerIteration]
430+ /// getter in order to evaluate learning process more thoroughly. Default value
431+ /// is `false`
432+ ///
433+ /// [dtype] A data type for all the numeric values, used by the algorithm. Can
434+ /// affect performance or accuracy of the computations. Default value is
435+ /// [DType.float32]
436+ ///
437+ /// Example:
438+ ///
439+ /// ```dart
440+ /// import 'package:ml_algo/ml_algo.dart';
441+ /// import 'package:ml_dataframe/ml_dataframe.dart';
442+ ///
443+ /// void main() {
444+ /// final samples = getPimaIndiansDiabetesDataFrame().shuffle(seed: 12);
445+ /// final model = LogisticRegressor.BGD(
446+ /// samples,
447+ /// 'Outcome',
448+ /// iterationsLimit: 50,
449+ /// initialLearningRate: 1e-4,
450+ /// learningRateType: LearningRateType.constant,
451+ /// dtype: dtype,
452+ /// );
453+ /// }
454+ /// ```
455+ ///
456+ /// Keep in mind that you need to select a proper learning rate strategy for
457+ /// every particular model. For more details, refer to [LearningRateType] ,
458+ /// also consider [decay] and [dropRate] parameters.
459+ factory LogisticRegressor .BGD (
460+ DataFrame trainingData,
461+ String targetName, {
462+ required LearningRateType learningRateType,
463+ int iterationsLimit = iterationLimitDefaultValue,
464+ double initialLearningRate = initialLearningRateDefaultValue,
465+ double decay = decayDefaultValue,
466+ int dropRate = dropRateDefaultValue,
467+ double minCoefficientsUpdate = minCoefficientsUpdateDefaultValue,
468+ double probabilityThreshold = probabilityThresholdDefaultValue,
469+ double lambda = lambdaDefaultValue,
470+ bool fitIntercept = fitInterceptDefaultValue,
471+ double interceptScale = interceptScaleDefaultValue,
472+ InitialCoefficientsType initialCoefficientsType =
473+ initialCoefficientsTypeDefaultValue,
474+ num positiveLabel = positiveLabelDefaultValue,
475+ num negativeLabel = negativeLabelDefaultValue,
476+ bool collectLearningData = collectLearningDataDefaultValue,
477+ DType dtype = dTypeDefaultValue,
478+ Vector ? initialCoefficients,
479+ }) =>
480+ initLogisticRegressorModule ().get <LogisticRegressorFactory >().create (
481+ trainData: trainingData,
482+ targetName: targetName,
483+ optimizerType: LinearOptimizerType .gradient,
484+ iterationsLimit: iterationsLimit,
485+ initialLearningRate: initialLearningRate,
486+ decay: decay,
487+ dropRate: dropRate,
488+ minCoefficientsUpdate: minCoefficientsUpdate,
489+ probabilityThreshold: probabilityThreshold,
490+ lambda: lambda,
491+ regularizationType: RegularizationType .L2 ,
492+ batchSize: trainingData.shape.first,
493+ fitIntercept: fitIntercept,
494+ interceptScale: interceptScale,
495+ isFittingDataNormalized: false ,
496+ learningRateType: learningRateType,
497+ initialCoefficientsType: initialCoefficientsType,
498+ initialCoefficients:
499+ initialCoefficients ?? Vector .empty (dtype: dtype),
500+ positiveLabel: positiveLabel,
501+ negativeLabel: negativeLabel,
502+ collectLearningData: collectLearningData,
503+ dtype: dtype,
504+ );
505+
349506 /// Restores previously fitted classifier instance from the [json]
350507 ///
351508 /// ````dart
0 commit comments