Skip to content

Commit 42bad58

Browse files
committed
Skewness and excess kurtosis.
1 parent 40090d8 commit 42bad58

File tree

4 files changed

+127
-34
lines changed

4 files changed

+127
-34
lines changed

ext/stats/moments.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package stats
22

33
import "math"
44

5+
// Fisher–Pearson skewness and kurtosis using
56
// Terriberry's algorithm with Kahan summation:
67
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics
78

@@ -45,6 +46,10 @@ func (m moments) skewness_samp() float64 {
4546
}
4647

4748
func (m moments) kurtosis_pop() float64 {
49+
return m.raw_kurtosis_pop() - 3
50+
}
51+
52+
func (m moments) raw_kurtosis_pop() float64 {
4853
m2 := m.m2.hi
4954
if div := m2 * m2; div != 0 {
5055
return m.m4.hi * float64(m.n) / div
@@ -53,9 +58,15 @@ func (m moments) kurtosis_pop() float64 {
5358
}
5459

5560
func (m moments) kurtosis_samp() float64 {
61+
n := m.n
62+
k := math.FMA(m.raw_kurtosis_pop(), float64(n+1), float64(3-3*n))
63+
return k * float64(n-1) / float64((n-2)*(n-3))
64+
}
65+
66+
func (m moments) raw_kurtosis_samp() float64 {
5667
n := m.n
5768
// https://mathworks.com/help/stats/kurtosis.html#f4975293
58-
k := math.FMA(m.kurtosis_pop(), float64(n+1), float64(3-3*n))
69+
k := math.FMA(m.raw_kurtosis_pop(), float64(n+1), float64(3-3*n))
5970
return math.FMA(k, float64(n-1)/float64((n-2)*(n-3)), 3)
6071
}
6172

ext/stats/moments_test.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ func Test_moments(t *testing.T) {
1414
if !math.IsNaN(s1.skewness_pop()) {
1515
t.Errorf("want NaN")
1616
}
17-
if !math.IsNaN(s1.kurtosis_pop()) {
17+
if !math.IsNaN(s1.raw_kurtosis_pop()) {
1818
t.Errorf("want NaN")
1919
}
2020

@@ -29,16 +29,16 @@ func Test_moments(t *testing.T) {
2929
s1.enqueue(+3.5784)
3030
s1.enqueue(+2.7694)
3131

32-
if got := float32(s1.skewness_pop()); got != 0.106098293 {
32+
if got := s1.skewness_pop(); float32(got) != 0.106098293 {
3333
t.Errorf("got %v, want 0.1061", got)
3434
}
35-
if got := float32(s1.skewness_samp()); got != 0.1258171 {
35+
if got := s1.skewness_samp(); float32(got) != 0.1258171 {
3636
t.Errorf("got %v, want 0.1258", got)
3737
}
38-
if got := float32(s1.kurtosis_pop()); got != 2.3121266 {
38+
if got := s1.raw_kurtosis_pop(); float32(got) != 2.3121266 {
3939
t.Errorf("got %v, want 2.3121", got)
4040
}
41-
if got := float32(s1.kurtosis_samp()); got != 2.7482237 {
41+
if got := s1.raw_kurtosis_samp(); float32(got) != 2.7482237 {
4242
t.Errorf("got %v, want 2.7483", got)
4343
}
4444

@@ -72,16 +72,16 @@ func Test_moments(t *testing.T) {
7272
s1.dequeue(math.E)
7373
s1.dequeue(math.Sqrt2)
7474

75-
if got := float32(s1.skewness_pop()); got != 0.106098293 {
75+
if got := s1.skewness_pop(); float32(got) != 0.106098293 {
7676
t.Errorf("got %v, want 0.1061", got)
7777
}
78-
if got := float32(s1.skewness_samp()); got != 0.1258171 {
78+
if got := s1.skewness_samp(); float32(got) != 0.1258171 {
7979
t.Errorf("got %v, want 0.1258", got)
8080
}
81-
if got := float32(s1.kurtosis_pop()); got != 2.3121266 {
81+
if got := s1.raw_kurtosis_pop(); float32(got) != 2.3121266 {
8282
t.Errorf("got %v, want 2.3121", got)
8383
}
84-
if got := float32(s1.kurtosis_samp()); got != 2.7482237 {
84+
if got := s1.raw_kurtosis_samp(); float32(got) != 2.7482237 {
8585
t.Errorf("got %v, want 2.7483", got)
8686
}
8787
}

ext/stats/stats.go

Lines changed: 89 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
// Package stats provides aggregate functions for statistics.
22
//
33
// Provided functions:
4-
// - stddev_pop: population standard deviation
5-
// - stddev_samp: sample standard deviation
64
// - var_pop: population variance
75
// - var_samp: sample variance
6+
// - stddev_pop: population standard deviation
7+
// - stddev_samp: sample standard deviation
8+
// - skewness_pop: Pearson population skewness
9+
// - skewness_samp: Pearson sample skewness
10+
// - kurtosis_pop: Fisher population excess kurtosis
11+
// - kurtosis_samp: Fisher sample excess kurtosis
812
// - covar_pop: population covariance
913
// - covar_samp: sample covariance
10-
// - corr: correlation coefficient
14+
// - corr: Pearson correlation coefficient
1115
// - regr_r2: correlation coefficient squared
1216
// - regr_avgx: average of the independent variable
1317
// - regr_avgy: average of the dependent variable
@@ -61,6 +65,10 @@ func Register(db *sqlite3.Conn) error {
6165
db.CreateWindowFunction("var_samp", 1, flags, newVariance(var_samp)),
6266
db.CreateWindowFunction("stddev_pop", 1, flags, newVariance(stddev_pop)),
6367
db.CreateWindowFunction("stddev_samp", 1, flags, newVariance(stddev_samp)),
68+
db.CreateWindowFunction("skewness_pop", 1, flags, newMoments(skewness_pop)),
69+
db.CreateWindowFunction("skewness_samp", 1, flags, newMoments(skewness_samp)),
70+
db.CreateWindowFunction("kurtosis_pop", 1, flags, newMoments(kurtosis_pop)),
71+
db.CreateWindowFunction("kurtosis_samp", 1, flags, newMoments(kurtosis_samp)),
6472
db.CreateWindowFunction("covar_pop", 2, flags, newCovariance(var_pop)),
6573
db.CreateWindowFunction("covar_samp", 2, flags, newCovariance(var_samp)),
6674
db.CreateWindowFunction("corr", 2, flags, newCovariance(corr)),
@@ -88,6 +96,10 @@ const (
8896
var_samp
8997
stddev_pop
9098
stddev_samp
99+
skewness_pop
100+
skewness_samp
101+
kurtosis_pop
102+
kurtosis_samp
91103
corr
92104
regr_r2
93105
regr_sxx
@@ -101,6 +113,23 @@ const (
101113
regr_json
102114
)
103115

116+
func special(kind int, n int64) (null, zero bool) {
117+
switch kind {
118+
case var_pop, stddev_pop, regr_sxx, regr_syy, regr_sxy:
119+
return n <= 0, n == 1
120+
case regr_avgx, regr_avgy:
121+
return n <= 0, false
122+
case kurtosis_samp:
123+
return n <= 3, false
124+
case skewness_samp:
125+
return n <= 2, false
126+
case skewness_pop:
127+
return n <= 1, n == 2
128+
default:
129+
return n <= 1, false
130+
}
131+
}
132+
104133
func newVariance(kind int) func() sqlite3.AggregateFunction {
105134
return func() sqlite3.AggregateFunction { return &variance{kind: kind} }
106135
}
@@ -111,14 +140,11 @@ type variance struct {
111140
}
112141

113142
func (fn *variance) Value(ctx sqlite3.Context) {
114-
switch fn.n {
115-
case 1:
116-
switch fn.kind {
117-
case var_pop, stddev_pop:
118-
ctx.ResultFloat(0)
119-
}
143+
switch null, zero := special(fn.kind, fn.n); {
144+
case zero:
145+
ctx.ResultFloat(0)
120146
return
121-
case 0:
147+
case null:
122148
return
123149
}
124150

@@ -166,18 +192,11 @@ func (fn *covariance) Value(ctx sqlite3.Context) {
166192
ctx.ResultInt64(fn.regr_count())
167193
return
168194
}
169-
switch fn.n {
170-
case 1:
171-
switch fn.kind {
172-
case var_pop, stddev_pop, regr_sxx, regr_syy, regr_sxy:
173-
ctx.ResultFloat(0)
174-
return
175-
case regr_avgx, regr_avgy:
176-
break
177-
default:
178-
return
179-
}
180-
case 0:
195+
switch null, zero := special(fn.kind, fn.n); {
196+
case zero:
197+
ctx.ResultFloat(0)
198+
return
199+
case null:
181200
return
182201
}
183202

@@ -234,3 +253,51 @@ func (fn *covariance) Inverse(ctx sqlite3.Context, arg ...sqlite3.Value) {
234253
fn.dequeue(fa, fb)
235254
}
236255
}
256+
257+
func newMoments(kind int) func() sqlite3.AggregateFunction {
258+
return func() sqlite3.AggregateFunction { return &momentfn{kind: kind} }
259+
}
260+
261+
type momentfn struct {
262+
kind int
263+
moments
264+
}
265+
266+
func (fn *momentfn) Value(ctx sqlite3.Context) {
267+
switch null, zero := special(fn.kind, fn.n); {
268+
case zero:
269+
ctx.ResultFloat(0)
270+
return
271+
case null:
272+
return
273+
}
274+
275+
var r float64
276+
switch fn.kind {
277+
case skewness_pop:
278+
r = fn.skewness_pop()
279+
case skewness_samp:
280+
r = fn.skewness_samp()
281+
case kurtosis_pop:
282+
r = fn.kurtosis_pop()
283+
case kurtosis_samp:
284+
r = fn.kurtosis_samp()
285+
}
286+
ctx.ResultFloat(r)
287+
}
288+
289+
func (fn *momentfn) Step(ctx sqlite3.Context, arg ...sqlite3.Value) {
290+
a := arg[0]
291+
f := a.Float()
292+
if f != 0.0 || a.NumericType() != sqlite3.NULL {
293+
fn.enqueue(f)
294+
}
295+
}
296+
297+
func (fn *momentfn) Inverse(ctx sqlite3.Context, arg ...sqlite3.Value) {
298+
a := arg[0]
299+
f := a.Float()
300+
if f != 0.0 || a.NumericType() != sqlite3.NULL {
301+
fn.dequeue(f)
302+
}
303+
}

ext/stats/stats_test.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ func TestRegister_variance(t *testing.T) {
4949
SELECT
5050
sum(x), avg(x),
5151
var_samp(x), var_pop(x),
52-
stddev_samp(x), stddev_pop(x)
52+
stddev_samp(x), stddev_pop(x),
53+
skewness_samp(x), skewness_pop(x),
54+
kurtosis_samp(x), kurtosis_pop(x)
5355
FROM data`)
5456
if err != nil {
5557
t.Fatal(err)
@@ -73,13 +75,26 @@ func TestRegister_variance(t *testing.T) {
7375
if got := stmt.ColumnFloat(5); got != math.Sqrt(22.5) {
7476
t.Errorf("got %v, want √22.5", got)
7577
}
78+
if got := stmt.ColumnFloat(6); got != 0 {
79+
t.Errorf("got %v, want zero", got)
80+
}
81+
if got := stmt.ColumnFloat(7); got != 0 {
82+
t.Errorf("got %v, want zero", got)
83+
}
84+
if got := stmt.ColumnFloat(8); float32(got) != -3.3 {
85+
t.Errorf("got %v, want -3.3", got)
86+
}
87+
if got := stmt.ColumnFloat(9); got != -1.64 {
88+
t.Errorf("got %v, want -1.64", got)
89+
}
7690
}
7791
stmt.Close()
7892

7993
stmt, _, err = db.Prepare(`
8094
SELECT
8195
var_samp(x) OVER (ROWS 1 PRECEDING),
82-
var_pop(x) OVER (ROWS 1 PRECEDING)
96+
var_pop(x) OVER (ROWS 1 PRECEDING),
97+
skewness_pop(x) OVER (ROWS 1 PRECEDING)
8398
FROM data`)
8499
if err != nil {
85100
t.Fatal(err)

0 commit comments

Comments
 (0)