Skip to content

Commit 870fe51

Browse files
committed
feat: add incremental Pearson correlation coefficient accumulator that ignores NaN values
1 parent 8f9dfb3 commit 870fe51

File tree

12 files changed

+1342
-0
lines changed

12 files changed

+1342
-0
lines changed
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
<!--
2+
3+
@license Apache-2.0
4+
5+
Copyright (c) 2018 The Stdlib Authors.
6+
7+
Licensed under the Apache License, Version 2.0 (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
19+
-->
20+
21+
# incrnanpcorr
22+
23+
> Compute a [sample Pearson product-moment correlation coefficient][pearson-correlation] incrementally.
24+
25+
<section class="intro">
26+
27+
The [Pearson product-moment correlation coefficient][pearson-correlation] between random variables `X` and `Y` is defined as
28+
29+
<!-- <equation class="equation" label="eq:pearson_correlation_coefficient" align="center" raw="\rho_{X,Y} = \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y}" alt="Equation for the Pearson product-moment correlation coefficient."> -->
30+
31+
```math
32+
\rho_{X,Y} = \frac{\mathop{\mathrm{cov}}(X,Y)}{\sigma_X \sigma_Y}
33+
```
34+
35+
<!-- <div class="equation" align="center" data-raw-text="\rho_{X,Y} = \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y}" data-equation="eq:pearson_correlation_coefficient">
36+
<img src="https://cdn.jsdelivr.net/gh/stdlib-js/stdlib@49d8cabda84033d55d7b8069f19ee3dd8b8d1496/lib/node_modules/@stdlib/stats/incr/nanpcorr/docs/img/equation_pearson_correlation_coefficient.svg" alt="Equation for the Pearson product-moment correlation coefficient.">
37+
<br>
38+
</div> -->
39+
40+
<!-- </equation> -->
41+
42+
where the numerator is the [covariance][covariance] and the denominator is the product of the respective standard deviations.
43+
44+
For a sample of size `n`, the [sample Pearson product-moment correlation coefficient][pearson-correlation] is defined as
45+
46+
<!-- <equation class="equation" label="eq:sample_pearson_correlation_coefficient" align="center" raw="r = \frac{\displaystyle\sum_{i=0}^{n-1} (x_i - \bar{x})(y_i - \bar{y})}{\displaystyle\sqrt{\sum_{i=0}^{n-1} (x_i - \bar{x})^2} \sqrt{\sum_{i=0}^{n-1} (y_i - \bar{y})^2}}" alt="Equation for the sample Pearson product-moment correlation coefficient."> -->
47+
48+
```math
49+
r = \frac{\displaystyle\sum_{i=0}^{n-1} (x_i - \bar{x})(y_i - \bar{y})}{\displaystyle\sqrt{\sum_{i=0}^{n-1} (x_i - \bar{x})^2} \sqrt{\sum_{i=0}^{n-1} (y_i - \bar{y})^2}}
50+
```
51+
52+
<!-- <div class="equation" align="center" data-raw-text="r = \frac{\displaystyle\sum_{i=0}^{n-1} (x_i - \bar{x})(y_i - \bar{y})}{\displaystyle\sqrt{\sum_{i=0}^{n-1} (x_i - \bar{x})^2} \sqrt{\sum_{i=0}^{n-1} (y_i - \bar{y})^2}}" data-equation="eq:sample_pearson_correlation_coefficient">
53+
<img src="https://cdn.jsdelivr.net/gh/stdlib-js/stdlib@49d8cabda84033d55d7b8069f19ee3dd8b8d1496/lib/node_modules/@stdlib/stats/incr/nanpcorr/docs/img/equation_sample_pearson_correlation_coefficient.svg" alt="Equation for the sample Pearson product-moment correlation coefficient.">
54+
<br>
55+
</div> -->
56+
57+
<!-- </equation> -->
58+
59+
</section>
60+
61+
<!-- /.intro -->
62+
63+
<section class="usage">
64+
65+
## Usage
66+
67+
```javascript
68+
var incrnanpcorr = require( '@stdlib/stats/incr/nanpcorr' );
69+
```
70+
71+
#### incrnanpcorr( \[mx, my] )
72+
73+
Returns an accumulator `function` which incrementally computes a [sample Pearson product-moment correlation coefficient][pearson-correlation].
74+
75+
```javascript
76+
var accumulator = incrnanpcorr();
77+
```
78+
79+
If the means are already known, provide `mx` and `my` arguments.
80+
81+
```javascript
82+
var accumulator = incrnanpcorr( 3.0, -5.5 );
83+
```
84+
85+
#### accumulator( \[x, y] )
86+
87+
If provided input value `x` and `y`, the accumulator function returns an updated [sample correlation coefficient][pearson-correlation]. If not provided input values `x` and `y`, the accumulator function returns the current [sample correlation coefficient][pearson-correlation].
88+
89+
```javascript
90+
var accumulator = incrnanpcorr();
91+
92+
var v = accumulator( 2.0, 1.0 );
93+
// returns 0.0
94+
95+
var v = accumulator( NaN, 1.0 );
96+
// returns 0.0
97+
98+
v = accumulator( 1.0, -5.0 );
99+
// returns 1.0
100+
101+
v = accumulator( 1.0, NaN );
102+
// returns 1.0
103+
104+
v = accumulator( 3.0, 3.14 );
105+
// returns ~0.965
106+
107+
v = accumulator();
108+
// returns ~0.965
109+
```
110+
111+
</section>
112+
113+
<!-- /.usage -->
114+
115+
<section class="notes">
116+
117+
## Notes
118+
119+
- Input values are **not** type checked. If provided `NaN` or a value which, when used in computations, results in `NaN`, it will be ignored but you are advised to type check and handle accordingly **before** passing the value to the accumulator function.
120+
121+
</section>
122+
123+
<!-- /.notes -->
124+
125+
<section class="examples">
126+
127+
## Examples
128+
129+
<!-- eslint no-undef: "error" -->
130+
131+
```javascript
132+
var randu = require( '@stdlib/random/base/randu' );
133+
var incrnanpcorr = require( './../lib' );
134+
var isnan = require( '@stdlib/math/base/assert/is-nan' );
135+
136+
var accumulator;
137+
var r;
138+
var x;
139+
var y;
140+
var i;
141+
142+
// Initialize an accumulator:
143+
accumulator = incrnanpcorr();
144+
145+
// For each simulated datum, update the sample Pearson correlation coefficient...
146+
console.log( '\nx\ty\tCorrelation Coefficient\n' );
147+
for ( i = 0; i < 100; i++ ) {
148+
x = (randu() < 0.2) ? NaN : randu()*100.0; // ~20% NaN values assigned to `x`
149+
y = (randu() < 0.2) ? NaN : randu()*100.0; // ~20% NaN values assigned to `y`
150+
r = accumulator( x, y );
151+
}
152+
console.log( accumulator() );
153+
```
154+
155+
</section>
156+
157+
<!-- /.examples -->
158+
159+
<!-- Section for related `stdlib` packages. Do not manually edit this section, as it is automatically populated. -->
160+
161+
<section class="related">
162+
163+
* * *
164+
165+
## See Also
166+
167+
- <span class="package-name">[`@stdlib/stats/incr/covariance`][@stdlib/stats/incr/covariance]</span><span class="delimiter">: </span><span class="description">compute an unbiased sample covariance incrementally.</span>
168+
- <span class="package-name">[`@stdlib/stats/incr/mpcorr`][@stdlib/stats/incr/mpcorr]</span><span class="delimiter">: </span><span class="description">compute a moving sample Pearson product-moment correlation coefficient incrementally.</span>
169+
- <span class="package-name">[`@stdlib/stats/incr/summary`][@stdlib/stats/incr/summary]</span><span class="delimiter">: </span><span class="description">compute a statistical summary incrementally.</span>
170+
171+
</section>
172+
173+
<!-- /.related -->
174+
175+
<!-- Section for all links. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
176+
177+
<section class="links">
178+
179+
[pearson-correlation]: https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
180+
181+
[covariance]: https://en.wikipedia.org/wiki/Covariance
182+
183+
<!-- <related-links> -->
184+
185+
[@stdlib/stats/incr/covariance]: https://github.com/stdlib-js/stdlib/tree/develop/lib/node_modules/%40stdlib/stats/incr/covariance
186+
187+
[@stdlib/stats/incr/mpcorr]: https://github.com/stdlib-js/stdlib/tree/develop/lib/node_modules/%40stdlib/stats/incr/mpcorr
188+
189+
[@stdlib/stats/incr/summary]: https://github.com/stdlib-js/stdlib/tree/develop/lib/node_modules/%40stdlib/stats/incr/summary
190+
191+
<!-- </related-links> -->
192+
193+
</section>
194+
195+
<!-- /.links -->
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2018 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
// MODULES //
22+
23+
var bench = require( '@stdlib/bench' );
24+
var randu = require( '@stdlib/random/base/randu' );
25+
var pkg = require( './../package.json' ).name;
26+
var incrnanpcorr = require( './../lib' );
27+
28+
29+
// MAIN //
30+
31+
bench( pkg, function benchmark( b ) {
32+
var f;
33+
var i;
34+
b.tic();
35+
for ( i = 0; i < b.iterations; i++ ) {
36+
f = incrnanpcorr();
37+
if ( typeof f !== 'function' ) {
38+
b.fail( 'should return a function' );
39+
}
40+
}
41+
b.toc();
42+
if ( typeof f !== 'function' ) {
43+
b.fail( 'should return a function' );
44+
}
45+
b.pass( 'benchmark finished' );
46+
b.end();
47+
});
48+
49+
bench( pkg+'::accumulator', function benchmark( b ) {
50+
var acc;
51+
var v;
52+
var i;
53+
54+
acc = incrnanpcorr();
55+
56+
b.tic();
57+
for ( i = 0; i < b.iterations; i++ ) {
58+
v = acc( randu(), randu() );
59+
if ( v !== v ) {
60+
b.fail( 'should not return NaN' );
61+
}
62+
}
63+
b.toc();
64+
if ( v !== v ) {
65+
b.fail( 'should not return NaN' );
66+
}
67+
b.pass( 'benchmark finished' );
68+
b.end();
69+
});
70+
71+
bench( pkg+'::accumulator,known_means', function benchmark( b ) {
72+
var acc;
73+
var v;
74+
var i;
75+
76+
acc = incrnanpcorr( 3.0, -2.0 );
77+
78+
b.tic();
79+
for ( i = 0; i < b.iterations; i++ ) {
80+
v = acc( randu(), randu() );
81+
if ( v !== v ) {
82+
b.fail( 'should not return NaN' );
83+
}
84+
}
85+
b.toc();
86+
if ( v !== v ) {
87+
b.fail( 'should not return NaN' );
88+
}
89+
b.pass( 'benchmark finished' );
90+
b.end();
91+
});
Lines changed: 48 additions & 0 deletions
Loading

0 commit comments

Comments
 (0)