Skip to content

Commit 6488bc5

Browse files
committed
MATH-1671: Update stat.descriptive package to use Commons Statistics
Removes redundant classes. descriptive.moment: - FirstMoment - FourthMoment - GeometricMean - Kurtosis - SecondMoment - Skewness - StandardDeviation - ThirdMoment Mean + Variance have been changed to only implement the weighted evaluation interface. descriptive.rank: - Min - Max descriptive.summary: - Sum - SumOfLogs - SumOfSquares Product has been changed to only implement the weighted evaluation interface. The utility class StatUtils has been updated to delegate all calls to Commons Statistics. Legacy Math exceptions have been preserved. Removes methods to compute the variance using an existing mean: public static double variance(double[] values, double mean, int begin, int length) public static double variance(double[] values, double mean) public static double populationVariance(double[] values, double mean, int begin, int length) public static double populationVariance(double[] values, double mean) Note: StatUtils has inconsistent documentation of what to return for an empty array. The documentation states NaN but StatUtilsTest requires otherwise: Sum-of-squares = 0 Product = 1 Sum-of-logs = 0 This is inconsistent and has been updated to NaN for all statistics. The class MultivariateSummaryStatistics has been updated with partial implementations of StorelessUnivariateStatistic that delegate to Commons Statistics. Some test classes have been updated to pass the build after removal of the statistic implementations.
1 parent e7ab52e commit 6488bc5

File tree

56 files changed

+970
-4524
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+970
-4524
lines changed

commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/ml/clustering/KMeansPlusPlusClusterer.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,9 @@
2424
import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
2525
import org.apache.commons.math4.legacy.ml.distance.DistanceMeasure;
2626
import org.apache.commons.math4.legacy.ml.distance.EuclideanDistance;
27-
import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance;
2827
import org.apache.commons.rng.UniformRandomProvider;
2928
import org.apache.commons.rng.simple.RandomSource;
30-
29+
import org.apache.commons.statistics.descriptive.Variance;
3130
import java.util.ArrayList;
3231
import java.util.Collection;
3332
import java.util.Collections;
@@ -431,11 +430,11 @@ private T getPointFromLargestVarianceCluster(final Collection<CentroidCluster<T>
431430

432431
// compute the distance variance of the current cluster
433432
final Clusterable center = cluster.getCenter();
434-
final Variance stat = new Variance();
433+
final Variance stat = Variance.create();
435434
for (final T point : cluster.getPoints()) {
436-
stat.increment(distance(point, center));
435+
stat.accept(distance(point, center));
437436
}
438-
final double variance = stat.getResult();
437+
final double variance = stat.getAsDouble();
439438

440439
// select the cluster with the largest variance
441440
if (variance > maxVariance) {

commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/ml/clustering/evaluation/SumOfClusterVariances.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import org.apache.commons.math4.legacy.ml.clustering.Clusterable;
2424
import org.apache.commons.math4.legacy.ml.clustering.ClusterEvaluator;
2525
import org.apache.commons.math4.legacy.ml.distance.DistanceMeasure;
26-
import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance;
26+
import org.apache.commons.statistics.descriptive.Variance;
2727

2828
/**
2929
* Computes the sum of intra-cluster distance variances according to the formula:
@@ -56,12 +56,12 @@ public double score(List<? extends Cluster<? extends Clusterable>> clusters) {
5656
final Clusterable center = cluster.centroid();
5757

5858
// compute the distance variance of the current cluster
59-
final Variance stat = new Variance();
59+
final Variance stat = Variance.create();
6060
for (final Clusterable point : cluster.getPoints()) {
61-
stat.increment(distance(point, center));
61+
stat.accept(distance(point, center));
6262
}
6363

64-
varianceSum += stat.getResult();
64+
varianceSum += stat.getAsDouble();
6565
}
6666
}
6767
return varianceSum;

commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/optim/nonlinear/scalar/SimulatedAnnealing.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import java.util.function.BiFunction;
2020
import java.util.function.DoublePredicate;
2121
import org.apache.commons.rng.UniformRandomProvider;
22-
import org.apache.commons.math4.legacy.stat.descriptive.moment.StandardDeviation;
22+
import org.apache.commons.statistics.descriptive.StandardDeviation;
2323
import org.apache.commons.math4.legacy.optim.OptimizationData;
2424
import org.apache.commons.math4.legacy.optim.nonlinear.scalar.noderiv.Simplex;
2525

@@ -164,11 +164,11 @@ static CoolingSchedule aarstAndVanLaarhoven(final double delta) {
164164

165165
return (previousTemperature, simplex) -> {
166166
// Standard deviation of the values of the objective function.
167-
final StandardDeviation stddev = new StandardDeviation();
167+
final StandardDeviation stddev = StandardDeviation.create();
168168
for (int i = 0; i < simplex.getSize(); i++) {
169-
stddev.increment(simplex.get(i).getValue());
169+
stddev.accept(simplex.get(i).getValue());
170170
}
171-
final double sigma = stddev.getResult();
171+
final double sigma = stddev.getAsDouble();
172172

173173
final double a = previousTemperature * Math.log(1 + delta);
174174
final double b = 3 * sigma;

0 commit comments

Comments
 (0)