Skip to content

Commit 2c8359d

Browse files
committed
* Add RunningWeightedStatistics class, and tests.
* Extend DescriptiveStatistics to cope with weighted samples.
1 parent 368b598 commit 2c8359d

File tree

5 files changed

+1039
-4
lines changed

5 files changed

+1039
-4
lines changed

src/Numerics.Tests/StatisticsTests/DescriptiveStatisticsTests.cs

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
using System;
3131
using System.Collections.Generic;
32+
using System.Linq;
3233
#if NET5_0_OR_GREATER
3334
using System.Text.Json;
3435
using System.Text.Json.Serialization;
@@ -75,7 +76,10 @@ public void ConstructorThrowArgumentNullException()
7576
{
7677
const IEnumerable<double> Data = null;
7778
const IEnumerable<double?> NullableData = null;
79+
const IEnumerable<Tuple<double, double>> WeightedData = null;
7880

81+
Assert.That(() => new DescriptiveStatistics(WeightedData), Throws.TypeOf<ArgumentNullException>());
82+
Assert.That(() => new DescriptiveStatistics(WeightedData, true), Throws.TypeOf<ArgumentNullException>());
7983
Assert.That(() => new DescriptiveStatistics(Data), Throws.TypeOf<ArgumentNullException>());
8084
Assert.That(() => new DescriptiveStatistics(Data, true), Throws.TypeOf<ArgumentNullException>());
8185
Assert.That(() => new DescriptiveStatistics(NullableData), Throws.TypeOf<ArgumentNullException>());
@@ -114,6 +118,7 @@ public void IEnumerableDouble(string dataSet, int digits, double skewness, doubl
114118
Assert.AreEqual(stats.Minimum, min);
115119
Assert.AreEqual(stats.Maximum, max);
116120
Assert.AreEqual(stats.Count, count);
121+
Assert.AreEqual(stats.TotalWeight, count);
117122
}
118123

119124
/// <summary>
@@ -145,6 +150,7 @@ public void IEnumerableDoubleHighAccuracy(string dataSet, double skewness, doubl
145150
Assert.AreEqual(stats.Minimum, min);
146151
Assert.AreEqual(stats.Maximum, max);
147152
Assert.AreEqual(stats.Count, count);
153+
Assert.AreEqual(stats.TotalWeight, count);
148154
}
149155

150156
/// <summary>
@@ -177,6 +183,7 @@ public void IEnumerableDoubleLowAccuracy(string dataSet, int digits, double skew
177183
Assert.AreEqual(stats.Minimum, min);
178184
Assert.AreEqual(stats.Maximum, max);
179185
Assert.AreEqual(stats.Count, count);
186+
Assert.AreEqual(stats.TotalWeight, count);
180187
}
181188

182189
/// <summary>
@@ -209,6 +216,7 @@ public void IEnumerableNullableDouble(string dataSet, int digits, double skewnes
209216
Assert.AreEqual(stats.Minimum, min);
210217
Assert.AreEqual(stats.Maximum, max);
211218
Assert.AreEqual(stats.Count, count);
219+
Assert.AreEqual(stats.TotalWeight, count);
212220
}
213221

214222
/// <summary>
@@ -240,6 +248,7 @@ public void IEnumerableNullableDoubleHighAccuracy(string dataSet, double skewnes
240248
Assert.AreEqual(stats.Minimum, min);
241249
Assert.AreEqual(stats.Maximum, max);
242250
Assert.AreEqual(stats.Count, count);
251+
Assert.AreEqual(stats.TotalWeight, count);
243252
}
244253

245254
/// <summary>
@@ -272,6 +281,205 @@ public void IEnumerableNullableDoubleLowAccuracy(string dataSet, int digits, dou
272281
Assert.AreEqual(stats.Minimum, min);
273282
Assert.AreEqual(stats.Maximum, max);
274283
Assert.AreEqual(stats.Count, count);
284+
Assert.AreEqual(stats.TotalWeight, count);
285+
}
286+
287+
/// <summary>
288+
/// <c>IEnumerable</c> Double.
289+
/// </summary>
290+
/// <param name="dataSet">Dataset name.</param>
291+
/// <param name="digits">Digits count.</param>
292+
/// <param name="skewness">Skewness value.</param>
293+
/// <param name="kurtosis">Kurtosis value.</param>
294+
/// <param name="median">Median value.</param>
295+
/// <param name="min">Min value.</param>
296+
/// <param name="max">Max value.</param>
297+
/// <param name="count">Count value.</param>
298+
[TestCase("lottery", 12, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
299+
[TestCase("lew", 12, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
300+
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
301+
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
302+
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
303+
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
304+
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
305+
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
306+
[TestCase("meixner", 8, -0.016649617280859657, 0.8171318629552635, -0.002042931016531602, -4.825626912281697, 5.3018298664184913, 10000)]
307+
public void IEnumerableTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
308+
{
309+
var data = _data[dataSet];
310+
var stats = new DescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)));
311+
312+
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 10);
313+
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
314+
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 8);
315+
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 8);
316+
Assert.AreEqual(stats.Minimum, min);
317+
Assert.AreEqual(stats.Maximum, max);
318+
Assert.AreEqual(stats.Count, count);
319+
Assert.AreEqual(stats.TotalWeight, count);
320+
}
321+
322+
/// <summary>
323+
/// <c>IEnumerable</c> Double high accuracy.
324+
/// </summary>
325+
/// <param name="dataSet">Dataset name.</param>
326+
/// <param name="skewness">Skewness value.</param>
327+
/// <param name="kurtosis">Kurtosis value.</param>
328+
/// <param name="median">Median value.</param>
329+
/// <param name="min">Min value.</param>
330+
/// <param name="max">Max value.</param>
331+
/// <param name="count">Count value.</param>
332+
[TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
333+
[TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
334+
[TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
335+
[TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
336+
[TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)]
337+
[TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
338+
[TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
339+
[TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
340+
public void IEnumerableTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count)
341+
{
342+
var data = _data[dataSet];
343+
var stats = new DescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)), true);
344+
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
345+
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14);
346+
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9);
347+
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9);
348+
Assert.AreEqual(stats.Minimum, min);
349+
Assert.AreEqual(stats.Maximum, max);
350+
Assert.AreEqual(stats.Count, count);
351+
Assert.AreEqual(stats.TotalWeight, count);
352+
}
353+
354+
/// <summary>
355+
/// <c>IEnumerable</c> double low accuracy.
356+
/// </summary>
357+
/// <param name="dataSet">Dataset name.</param>
358+
/// <param name="digits">Digits count.</param>
359+
/// <param name="skewness">Skewness value.</param>
360+
/// <param name="kurtosis">Kurtosis value.</param>
361+
/// <param name="median">Median value.</param>
362+
/// <param name="min">Min value.</param>
363+
/// <param name="max">Max value.</param>
364+
/// <param name="count">Count value.</param>
365+
[TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
366+
[TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
367+
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
368+
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
369+
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
370+
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
371+
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
372+
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
373+
public void IEnumerableTupleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
374+
{
375+
var data = _data[dataSet];
376+
var stats = new DescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)), false);
377+
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
378+
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
379+
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7);
380+
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7);
381+
Assert.AreEqual(stats.Minimum, min);
382+
Assert.AreEqual(stats.Maximum, max);
383+
Assert.AreEqual(stats.Count, count);
384+
Assert.AreEqual(stats.TotalWeight, count);
385+
}
386+
387+
/// <summary>
388+
/// <c>IEnumerable</c> <c>Nullable</c> double.
389+
/// </summary>
390+
/// <param name="dataSet">Dataset name.</param>
391+
/// <param name="digits">Digits count.</param>
392+
/// <param name="skewness">Skewness value.</param>
393+
/// <param name="kurtosis">Kurtosis value.</param>
394+
/// <param name="median">Median value.</param>
395+
/// <param name="min">Min value.</param>
396+
/// <param name="max">Max value.</param>
397+
/// <param name="count">Count value.</param>
398+
[TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
399+
[TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
400+
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
401+
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
402+
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
403+
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
404+
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
405+
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
406+
public void IEnumerableZeroWeightTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
407+
{
408+
var data = _data[dataSet];
409+
var stats = new DescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)));
410+
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
411+
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
412+
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7);
413+
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7);
414+
Assert.AreEqual(stats.Minimum, min);
415+
Assert.AreEqual(stats.Maximum, max);
416+
Assert.AreEqual(stats.Count, count);
417+
Assert.AreEqual(stats.TotalWeight, count);
418+
}
419+
420+
/// <summary>
421+
/// <c>IEnumerable</c> <c>Nullable</c> double high accuracy.
422+
/// </summary>
423+
/// <param name="dataSet">Dataset name.</param>
424+
/// <param name="skewness">Skewness value.</param>
425+
/// <param name="kurtosis">Kurtosis value.</param>
426+
/// <param name="median">Median value.</param>
427+
/// <param name="min">Min value.</param>
428+
/// <param name="max">Max value.</param>
429+
/// <param name="count">Count value.</param>
430+
[TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
431+
[TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
432+
[TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
433+
[TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
434+
[TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)]
435+
[TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
436+
[TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
437+
[TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
438+
public void IEnumerableZeroWeightTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count)
439+
{
440+
var data = _data[dataSet];
441+
var stats = new DescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)), true);
442+
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
443+
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14);
444+
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9);
445+
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9);
446+
Assert.AreEqual(stats.Minimum, min);
447+
Assert.AreEqual(stats.Maximum, max);
448+
Assert.AreEqual(stats.Count, count);
449+
Assert.AreEqual(stats.TotalWeight, count);
450+
}
451+
452+
/// <summary>
453+
/// <c>IEnumerable</c> <c>Nullable</c> Double Low Accuracy.
454+
/// </summary>
455+
/// <param name="dataSet">Dataset name.</param>
456+
/// <param name="digits">Digits count.</param>
457+
/// <param name="skewness">Skewness value.</param>
458+
/// <param name="kurtosis">Kurtosis value.</param>
459+
/// <param name="median">Median value.</param>
460+
/// <param name="min">Min value.</param>
461+
/// <param name="max">Max value.</param>
462+
/// <param name="count">Count value.</param>
463+
[TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
464+
[TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
465+
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
466+
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
467+
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
468+
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
469+
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
470+
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
471+
public void IEnumerableZeroWeightTupleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
472+
{
473+
var data = _data[dataSet];
474+
var stats = new DescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)), false);
475+
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
476+
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
477+
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7);
478+
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7);
479+
Assert.AreEqual(stats.Minimum, min);
480+
Assert.AreEqual(stats.Maximum, max);
481+
Assert.AreEqual(stats.Count, count);
482+
Assert.AreEqual(stats.TotalWeight, count);
275483
}
276484

277485
[Test]
@@ -296,6 +504,26 @@ public void ShortSequences()
296504
var stats4 = new DescriptiveStatistics(new[] { 1.0, 2.0, -3.0, -4.0 });
297505
Assert.That(stats4.Skewness, Is.Not.NaN);
298506
Assert.That(stats4.Kurtosis, Is.Not.NaN);
507+
508+
var stats5 = new DescriptiveStatistics(new Tuple<double, double>[0]);
509+
Assert.That(stats5.Skewness, Is.NaN);
510+
Assert.That(stats5.Kurtosis, Is.NaN);
511+
512+
var stats6 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0) });
513+
Assert.That(stats6.Skewness, Is.NaN);
514+
Assert.That(stats6.Kurtosis, Is.NaN);
515+
516+
var stats7 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0) });
517+
Assert.That(stats7.Skewness, Is.NaN);
518+
Assert.That(stats7.Kurtosis, Is.NaN);
519+
520+
var stats8 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, -3.0) });
521+
Assert.That(stats8.Skewness, Is.Not.NaN);
522+
Assert.That(stats8.Kurtosis, Is.NaN);
523+
524+
var stats9 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, -3.0), Tuple.Create(1.0, -4.0) });
525+
Assert.That(stats9.Skewness, Is.Not.NaN);
526+
Assert.That(stats9.Kurtosis, Is.Not.NaN);
299527
}
300528

301529
[Test]
@@ -304,6 +532,10 @@ public void ZeroVarianceSequence()
304532
var stats = new DescriptiveStatistics(new[] { 2.0, 2.0, 2.0, 2.0 });
305533
Assert.That(stats.Skewness, Is.NaN);
306534
Assert.That(stats.Kurtosis, Is.NaN);
535+
536+
var stats2 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0) });
537+
Assert.That(stats2.Skewness, Is.NaN);
538+
Assert.That(stats2.Kurtosis, Is.NaN);
307539
}
308540

309541
#if NET5_0_OR_GREATER
@@ -345,6 +577,7 @@ public void JsonDeserializationTest(string dataSet, int digits, double skewness,
345577
Assert.AreEqual(stats.Minimum, min);
346578
Assert.AreEqual(stats.Maximum, max);
347579
Assert.AreEqual(stats.Count, count);
580+
Assert.AreEqual(stats.TotalWeight, count);
348581
}
349582
#endif
350583
}

0 commit comments

Comments
 (0)