Skip to content

Commit 4eee050

Browse files
committed
Use Heer's visitPoints for all linear and transformed linear regressions
1 parent fb9adb7 commit 4eee050

File tree

7 files changed

+166
-185
lines changed

7 files changed

+166
-185
lines changed

README.md

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Calculate statistical regressions from two-dimensional data. [![Build Status](ht
77
If you use NPM, `npm install d3-regression`. Otherwise, download the [latest release](https://github.com/HarryStevens/d3-regression/raw/master/dist/d3-regression.zip). AMD, CommonJS, and vanilla environments are supported. In vanilla, a d3 global is exported:
88

99
```html
10-
<script src="https://unpkg.com/d3-regression@1.2.7/dist/d3-regression.min.js"></script>
10+
<script src="https://unpkg.com/d3-regression@1.2.8/dist/d3-regression.min.js"></script>
1111
<script>
1212
1313
const regression = d3.regressionLinear()
@@ -33,15 +33,15 @@ Creates a new linear regression generator with default [<em>x</em>-](#linear_x)
3333

3434
[<img alt="Linear regression" src="https://raw.githubusercontent.com/harrystevens/d3-regression/master/img/linear.png" width="250">](https://observablehq.com/@harrystevens/linear-regression)
3535

36-
<a name="_linear" href="#_linear">#</a> <i>linear</i>(<i>data</i>) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/linear.js#L8 "Source")
36+
<a name="_linear" href="#_linear">#</a> <i>linear</i>(<i>data</i>) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/linear.js#L9 "Source")
3737

3838
Computes the linear regression, which takes the form <em>y</em> = <em>ax</em> + <em>b</em>, for the specified *data* points, ignoring points with invalid values (null, undefined, NaN, Infinity).
3939

4040
Returns a line represented as an array of two points, where each point is an array of two numbers representing the point's coordinates.
4141

4242
Also returns properties <em>a</em> and <em>b</em>, representing the equation's coefficients, and <em>rSquared</em>, representing the coefficient of determination. Lastly, returns a <em>predict</em> property, which is a function that outputs a <em>y</em>-coordinate given an input <em>x</em>-coordinate.
4343

44-
<a name="linear_x" href="#linear_x">#</a> <i>linear</i>.<b>x</b>([<i>x</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/linear.js#L66 "Source")
44+
<a name="linear_x" href="#linear_x">#</a> <i>linear</i>.<b>x</b>([<i>x</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/linear.js#L47 "Source")
4545

4646
If <em>x</em> is specified, sets the <em>x</em>-coordinate accessor, which is passed passed the current datum (<em>d</em>), the current index (<em>i</em>), and the entire data array (<em>data</em>). If <em>x</em> is not specified, returns the current <em>x</em>-coordinate accessor, which defaults to:
4747

@@ -51,7 +51,7 @@ function x(d, i, data) {
5151
}
5252
```
5353

54-
<a name="linear_y" href="#linear_y">#</a> <i>linear</i>.<b>y</b>([<i>y</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/linear.js#L70 "Source")
54+
<a name="linear_y" href="#linear_y">#</a> <i>linear</i>.<b>y</b>([<i>y</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/linear.js#L51 "Source")
5555

5656
If <em>y</em> is specified, sets the <em>y</em>-coordinate accessor, which is passed passed the current datum (<em>d</em>), the current index (<em>i</em>), and the entire data array (<em>data</em>). If <em>y</em> is not specified, returns the current <em>y</em>-coordinate accessor, which defaults to:
5757

@@ -61,7 +61,7 @@ function y(d, i, data) {
6161
}
6262
```
6363

64-
<a name="linear_domain" href="#linear_domain">#</a> <i>linear</i>.<b>domain</b>([<i>domain</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/linear.js#L62 "Source")
64+
<a name="linear_domain" href="#linear_domain">#</a> <i>linear</i>.<b>domain</b>([<i>domain</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/linear.js#L43 "Source")
6565

6666
If <em>domain</em> is specified, sets the minimum and maximum <em>x</em>-coordinates of the returned line to the specified array of numbers. The array must contain two elements. If the elements in the given array are not numbers, they will be coerced to numbers. If <em>domain</em> is not specified, returns a copy of the regression generator’s current domain.
6767

@@ -73,23 +73,23 @@ Creates a new exponential regression generator with default [<em>x</em>-](#exp_x
7373

7474
[<img alt="Exponential regression" src="https://raw.githubusercontent.com/harrystevens/d3-regression/master/img/exponential-2.png" width="250">](https://observablehq.com/@harrystevens/exponential-regression)
7575

76-
<a name="_exponential" href="#_exponential">#</a> <i>exp</i>(<i>data</i>) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/exponential.js#L9 "Source")
76+
<a name="_exponential" href="#_exponential">#</a> <i>exp</i>(<i>data</i>) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/exponential.js#L10 "Source")
7777

7878
Computes the exponential regression, which takes the form <em>y</em> = <em>ae</em><sup><em>bx</em></sup>, for the specified *data* points, ignoring points with invalid values (null, undefined, NaN, Infinity).
7979

8080
Returns a smooth line represented as an array of points, where each point is an array of two numbers representing the point's coordinates.
8181

8282
Also returns properties <em>a</em> and <em>b</em>, representing the equation's coefficients, and <em>rSquared</em>, representing the coefficient of determination. Lastly, returns a <em>predict</em> property, which is a function that outputs a <em>y</em>-coordinate given an input <em>x</em>-coordinate.
8383

84-
<a name="exp_x" href="#exp_x">#</a> <i>exp</i>.<b>x</b>([<i>x</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/exponential.js#L58 "Source")
84+
<a name="exp_x" href="#exp_x">#</a> <i>exp</i>.<b>x</b>([<i>x</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/exponential.js#L52 "Source")
8585

8686
See [<em>linear</em>.x()](#linear_x).
8787

88-
<a name="exp_y" href="#exp_y">#</a> <i>exp</i>.<b>y</b>([<i>y</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/exponential.js#L62 "Source")
88+
<a name="exp_y" href="#exp_y">#</a> <i>exp</i>.<b>y</b>([<i>y</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/exponential.js#L56 "Source")
8989

9090
See [<em>linear</em>.y()](#linear_y).
9191

92-
<a name="exp_domain" href="#exp_domain">#</a> <i>exp</i>.<b>domain</b>([<i>domain</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/exponential.js#L54 "Source")
92+
<a name="exp_domain" href="#exp_domain">#</a> <i>exp</i>.<b>domain</b>([<i>domain</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/exponential.js#L48 "Source")
9393

9494
See [<em>linear</em>.domain()](#linear_domain).
9595

@@ -99,23 +99,23 @@ Creates a new logarithmic regression generator with default [<em>x</em>-](#log_x
9999

100100
[<img alt="Logarithmic regression" src="https://raw.githubusercontent.com/harrystevens/d3-regression/master/img/logarithmic.png" width="250">](https://observablehq.com/@harrystevens/logarithmic-regression)
101101

102-
<a name="_log" href="#_log">#</a> <i>log</i>(<i>data</i>) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/logarithmic.js#L9 "Source")
102+
<a name="_log" href="#_log">#</a> <i>log</i>(<i>data</i>) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/logarithmic.js#L10 "Source")
103103

104104
Computes the logarithmic regression, which takes the form <em>y</em> = <em>a</em> · ln(<em>x</em>) + <em>b</em>, for the specified *data* points, ignoring points with invalid values (null, undefined, NaN, Infinity).
105105

106106
Returns a smooth line represented as an array of points, where each point is an array of two numbers representing the point's coordinates.
107107

108108
Also returns properties <em>a</em> and <em>b</em>, representing the equation's coefficients, and <em>rSquared</em>, representing the coefficient of determination. Lastly, returns a <em>predict</em> property, which is a function that outputs a <em>y</em>-coordinate given an input <em>x</em>-coordinate.
109109

110-
<a name="log_x" href="#log_x">#</a> <i>log</i>.<b>x</b>([<i>x</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/logarithmic.js#L59 "Source")
110+
<a name="log_x" href="#log_x">#</a> <i>log</i>.<b>x</b>([<i>x</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/logarithmic.js#L49 "Source")
111111

112112
See [<em>linear</em>.x()](#linear_x).
113113

114-
<a name="log_y" href="#log_y">#</a> <i>log</i>.<b>y</b>([<i>y</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/logarithmic.js#L63 "Source")
114+
<a name="log_y" href="#log_y">#</a> <i>log</i>.<b>y</b>([<i>y</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/logarithmic.js#L53 "Source")
115115

116116
See [<em>linear</em>.y()](#linear_y).
117117

118-
<a name="log_domain" href="#log_domain">#</a> <i>log</i>.<b>domain</b>([<i>domain</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/logarithmic.js#L55 "Source")
118+
<a name="log_domain" href="#log_domain">#</a> <i>log</i>.<b>domain</b>([<i>domain</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/logarithmic.js#L45 "Source")
119119

120120
See [<em>linear</em>.domain()](#linear_domain).
121121

@@ -183,23 +183,23 @@ Creates a new power law regression generator with default [<em>x</em>-](#pow_x)
183183

184184
[<img alt="Power law regression" src="https://raw.githubusercontent.com/harrystevens/d3-regression/master/img/power.png" width="250">](https://observablehq.com/@harrystevens/power-law-regression)
185185

186-
<a name="_pow" href="#_pow">#</a> <i>pow</i>(<i>data</i>) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/power.js#L9 "Source")
186+
<a name="_pow" href="#_pow">#</a> <i>pow</i>(<i>data</i>) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/power.js#L10 "Source")
187187

188188
Computes the power law regression, which takes the form <em>y</em> = <em>a</em><em>x</em><sup><em>b</em></sup>, for the specified *data* points, ignoring points with invalid values (null, undefined, NaN, Infinity).
189189

190190
Returns a smooth line represented as an array of points, where each point is an array of two numbers representing the point's coordinates.
191191

192192
Also returns properties <em>a</em> and <em>b</em>, representing the equation's coefficients, and <em>rSquared</em>, representing the coefficient of determination. Lastly, returns a <em>predict</em> property, which is a function that outputs a <em>y</em>-coordinate given an input <em>x</em>-coordinate.
193193

194-
<a name="pow_x" href="#pow_x">#</a> <i>pow</i>.<b>x</b>([<i>x</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/power.js#L61 "Source")
194+
<a name="pow_x" href="#pow_x">#</a> <i>pow</i>.<b>x</b>([<i>x</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/power.js#L51 "Source")
195195

196196
See [<em>linear</em>.x()](#linear_x).
197197

198-
<a name="pow_y" href="#pow_y">#</a> <i>pow</i>.<b>y</b>([<i>y</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/power.js#L65 "Source")
198+
<a name="pow_y" href="#pow_y">#</a> <i>pow</i>.<b>y</b>([<i>y</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/power.js#L55 "Source")
199199

200200
See [<em>linear</em>.y()](#linear_y).
201201

202-
<a name="pow_domain" href="#pow_domain">#</a> <i>pow</i>.<b>domain</b>([<i>domain</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/power.js#L57 "Source")
202+
<a name="pow_domain" href="#pow_domain">#</a> <i>pow</i>.<b>domain</b>([<i>domain</i>]) · [Source](https://github.com/harrystevens/d3-regression/blob/master/src/power.js#L47 "Source")
203203

204204
See [<em>linear</em>.domain()](#linear_domain).
205205

dist/d3-regression.js

Lines changed: 69 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
// Given a dataset, x- and y-accessors, the sum of the y values, and a predict function,
99
// return the coefficient of determination, or R squared.
10-
function determination(data, x, y, ySum, predict) {
10+
function determination(data, x, y, Y, predict) {
1111
var n = data.length;
1212
var SSE = 0,
1313
SST = 0;
@@ -18,7 +18,7 @@
1818
dy = y(d),
1919
yComp = predict(dx);
2020
SSE += Math.pow(dy - yComp, 2);
21-
SST += Math.pow(dy - ySum / n, 2);
21+
SST += Math.pow(dy - Y / n, 2);
2222
}
2323

2424
return 1 - SSE / SST;
@@ -74,6 +74,23 @@
7474
}
7575
}
7676

77+
// Adapted from vega-statistics by Jeffrey Heer
78+
// License: https://github.com/vega/vega/blob/f058b099decad9db78301405dd0d2e9d8ba3d51a/LICENSE
79+
// Source: https://github.com/vega/vega/blob/f058b099decad9db78301405dd0d2e9d8ba3d51a/packages/vega-statistics/src/regression/points.js
80+
function visitPoints(data, x, y, cb) {
81+
var iterations = 0;
82+
83+
for (var i = 0, n = data.length; i < n; i++) {
84+
var d = data[i],
85+
dx = x(d),
86+
dy = y(d);
87+
88+
if (dx != null && isFinite(dx) && dy != null && isFinite(dy)) {
89+
cb(dx, dy, iterations++);
90+
}
91+
}
92+
}
93+
7794
function exponential () {
7895
var x = function x(d) {
7996
return d[0];
@@ -84,37 +101,29 @@
84101
domain;
85102

86103
function exponential(data) {
87-
var n = data.length;
88-
var ySum = 0,
89-
x2ySum = 0,
90-
ylogySum = 0,
91-
xylogySum = 0,
92-
xySum = 0,
104+
var Y = 0,
105+
X2Y = 0,
106+
YLY = 0,
107+
XYLY = 0,
108+
XY = 0,
93109
minX = domain ? +domain[0] : Infinity,
94110
maxX = domain ? +domain[1] : -Infinity;
111+
visitPoints(data, x, y, function (dx, dy) {
112+
Y += dy;
113+
X2Y += dx * dx * dy;
114+
YLY += dy * Math.log(dy);
115+
XYLY += dx * dy * Math.log(dy);
116+
XY += dx * dy;
95117

96-
for (var i = 0; i < n; i++) {
97-
var d = data[i],
98-
dx = x(d, i, data),
99-
dy = y(d, i, data); // filter out points with invalid x or y values
100-
101-
if (dx != null && isFinite(dx) && dy != null && isFinite(dy)) {
102-
ySum += dy;
103-
x2ySum += dx * dx * dy;
104-
ylogySum += dy * Math.log(dy);
105-
xylogySum += dx * dy * Math.log(dy);
106-
xySum += dx * dy;
107-
108-
if (!domain) {
109-
if (dx < minX) minX = dx;
110-
if (dx > maxX) maxX = dx;
111-
}
118+
if (!domain) {
119+
if (dx < minX) minX = dx;
120+
if (dx > maxX) maxX = dx;
112121
}
113-
}
122+
});
114123

115-
var denominator = ySum * x2ySum - xySum * xySum,
116-
a = Math.exp((x2ySum * ylogySum - xySum * xylogySum) / denominator),
117-
b = (ySum * xylogySum - xySum * ylogySum) / denominator,
124+
var denominator = Y * X2Y - XY * XY,
125+
a = Math.exp((X2Y * YLY - XY * XYLY) / denominator),
126+
b = (Y * XYLY - XY * YLY) / denominator,
118127
fn = function fn(x) {
119128
return a * Math.exp(b * x);
120129
},
@@ -123,7 +132,7 @@
123132
out.a = a;
124133
out.b = b;
125134
out.predict = fn;
126-
out.rSquared = determination(data, x, y, ySum, fn);
135+
out.rSquared = determination(data, x, y, Y, fn);
127136
return out;
128137
}
129138

@@ -142,23 +151,6 @@
142151
return exponential;
143152
}
144153

145-
// Adapted from vega-statistics by Jeffrey Heer
146-
// License: https://github.com/vega/vega/blob/f058b099decad9db78301405dd0d2e9d8ba3d51a/LICENSE
147-
// Source: https://github.com/vega/vega/blob/f058b099decad9db78301405dd0d2e9d8ba3d51a/packages/vega-statistics/src/regression/points.js
148-
function visitPoints(data, x, y, cb) {
149-
var iterations = 0;
150-
151-
for (var i = 0, n = data.length; i < n; i++) {
152-
var d = data[i],
153-
dx = x(d),
154-
dy = y(d);
155-
156-
if (dx != null && isFinite(dx) && dy != null && isFinite(dy)) {
157-
cb(dx, dy, iterations++);
158-
}
159-
}
160-
}
161-
162154
function linear () {
163155
var x = function x(d) {
164156
return d[0];
@@ -406,27 +398,27 @@
406398

407399
function logarithmic(data) {
408400
var n = 0,
409-
xlogSum = 0,
410-
yxlogSum = 0,
411-
ySum = 0,
412-
xlog2Sum = 0,
401+
XL = 0,
402+
XLY = 0,
403+
Y = 0,
404+
XL2 = 0,
413405
minX = domain ? +domain[0] : Infinity,
414406
maxX = domain ? +domain[1] : -Infinity;
415407
visitPoints(data, x, y, function (dx, dy) {
416408
++n;
417-
xlogSum += Math.log(dx);
418-
yxlogSum += dy * Math.log(dx);
419-
ySum += dy;
420-
xlog2Sum += Math.pow(Math.log(dx), 2);
409+
XL += Math.log(dx);
410+
XLY += dy * Math.log(dx);
411+
Y += dy;
412+
XL2 += Math.pow(Math.log(dx), 2);
421413

422414
if (!domain) {
423415
if (dx < minX) minX = dx;
424416
if (dx > maxX) maxX = dx;
425417
}
426418
});
427419

428-
var a = (n * yxlogSum - ySum * xlogSum) / (n * xlog2Sum - xlogSum * xlogSum),
429-
b = (ySum - a * xlogSum) / n,
420+
var a = (n * XLY - Y * XL) / (n * XL2 - XL * XL),
421+
b = (Y - a * XL) / n,
430422
fn = function fn(x) {
431423
return a * Math.log(x) + b;
432424
},
@@ -435,7 +427,7 @@
435427
out.a = a;
436428
out.b = b;
437429
out.predict = fn;
438-
out.rSquared = determination(data, x, y, ySum, fn);
430+
out.rSquared = determination(data, x, y, Y, fn);
439431
return out;
440432
}
441433

@@ -711,41 +703,30 @@
711703
domain;
712704

713705
function power(data) {
714-
var n = data.length,
715-
valid = 0,
716-
xlogSum = 0,
717-
xlogylogSum = 0,
718-
ylogSum = 0,
719-
xlog2Sum = 0,
720-
ySum = 0,
706+
var n = 0,
707+
XL = 0,
708+
XLYL = 0,
709+
YL = 0,
710+
XL2 = 0,
711+
Y = 0,
721712
minX = domain ? +domain[0] : Infinity,
722713
maxX = domain ? +domain[1] : -Infinity;
714+
visitPoints(data, x, y, function (dx, dy) {
715+
n++;
716+
XL += Math.log(dx);
717+
XLYL += Math.log(dy) * Math.log(dx);
718+
YL += Math.log(dy);
719+
XL2 += Math.pow(Math.log(dx), 2);
720+
Y += dy;
723721

724-
for (var i = 0; i < n; i++) {
725-
var d = data[i],
726-
dx = x(d, i, data),
727-
dy = y(d, i, data); // Filter out points with invalid x or y values
728-
729-
if (dx != null && isFinite(dx) && dy != null && isFinite(dy)) {
730-
valid++;
731-
xlogSum += Math.log(dx);
732-
xlogylogSum += Math.log(dy) * Math.log(dx);
733-
ylogSum += Math.log(dy);
734-
xlog2Sum += Math.pow(Math.log(dx), 2);
735-
ySum += dy;
736-
737-
if (!domain) {
738-
if (dx < minX) minX = dx;
739-
if (dx > maxX) maxX = dx;
740-
}
722+
if (!domain) {
723+
if (dx < minX) minX = dx;
724+
if (dx > maxX) maxX = dx;
741725
}
742-
} // Update n in case there were invalid x or y values
743-
744-
745-
n = valid;
726+
});
746727

747-
var b = (n * xlogylogSum - xlogSum * ylogSum) / (n * xlog2Sum - Math.pow(xlogSum, 2)),
748-
a = Math.exp((ylogSum - b * xlogSum) / n),
728+
var b = (n * XLYL - XL * YL) / (n * XL2 - Math.pow(XL, 2)),
729+
a = Math.exp((YL - b * XL) / n),
749730
fn = function fn(x) {
750731
return a * Math.pow(x, b);
751732
},
@@ -754,7 +735,7 @@
754735
out.a = a;
755736
out.b = b;
756737
out.predict = fn;
757-
out.rSquared = determination(data, x, y, ySum, fn);
738+
out.rSquared = determination(data, x, y, Y, fn);
758739
return out;
759740
}
760741

0 commit comments

Comments
 (0)