Skip to content

Commit 53c446b

Browse files
committed
revamp Box.calc and Box.plot handling of boxpoints
- generate 'pts' array of objects similar to scatter pts inside box calcdata items, instead of simply keep track of all values corresponding to each box. - fill in pt objects fill w/ jitter during Box.plot instead of mapping box 'val' array to d3-esque array of objects. - in preparation for box select and 'points' hover, keep track of original val indices in pt object
1 parent cfc8725 commit 53c446b

File tree

2 files changed

+158
-121
lines changed

2 files changed

+158
-121
lines changed

src/traces/box/calc.js

Lines changed: 136 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@ var isNumeric = require('fast-isnumeric');
1313
var Lib = require('../../lib');
1414
var Axes = require('../../plots/cartesian/axes');
1515

16-
1716
// outlier definition based on http://www.physics.csbsju.edu/stats/box2.html
1817
module.exports = function calc(gd, trace) {
19-
var xa = Axes.getFromId(gd, trace.xaxis || 'x'),
20-
ya = Axes.getFromId(gd, trace.yaxis || 'y'),
21-
orientation = trace.orientation,
22-
cd = [],
23-
valAxis, valLetter, val, valBinned,
24-
posAxis, posLetter, pos, posDistinct, dPos;
25-
26-
// Set value (val) and position (pos) keys via orientation
18+
var xa = Axes.getFromId(gd, trace.xaxis || 'x');
19+
var ya = Axes.getFromId(gd, trace.yaxis || 'y');
20+
var orientation = trace.orientation;
21+
var cd = [];
22+
23+
var i;
24+
var valAxis, valLetter;
25+
var posAxis, posLetter;
26+
2727
if(orientation === 'h') {
2828
valAxis = xa;
2929
valLetter = 'x';
@@ -36,112 +36,146 @@ module.exports = function calc(gd, trace) {
3636
posLetter = 'x';
3737
}
3838

39-
val = valAxis.makeCalcdata(trace, valLetter); // get val
40-
41-
// size autorange based on all source points
42-
// position happens afterward when we know all the pos
43-
Axes.expand(valAxis, val, {padded: true});
44-
45-
// In vertical (horizontal) box plots:
46-
// if no x (y) data, use x0 (y0), or name
47-
// so if you want one box
48-
// per trace, set x0 (y0) to the x (y) value or category for this trace
49-
// (or set x (y) to a constant array matching y (x))
50-
function getPos(gd, trace, posLetter, posAxis, val) {
51-
var pos0;
52-
if(posLetter in trace) pos = posAxis.makeCalcdata(trace, posLetter);
53-
else {
54-
if(posLetter + '0' in trace) pos0 = trace[posLetter + '0'];
55-
else if('name' in trace && (
56-
posAxis.type === 'category' ||
57-
(isNumeric(trace.name) &&
58-
['linear', 'log'].indexOf(posAxis.type) !== -1) ||
59-
(Lib.isDateTime(trace.name) &&
60-
posAxis.type === 'date')
61-
)) {
62-
pos0 = trace.name;
63-
}
64-
else pos0 = gd.numboxes;
65-
pos0 = posAxis.d2c(pos0, 0, trace[posLetter + 'calendar']);
66-
pos = val.map(function() { return pos0; });
67-
}
68-
return pos;
69-
}
70-
71-
pos = getPos(gd, trace, posLetter, posAxis, val);
39+
var val = valAxis.makeCalcdata(trace, valLetter);
40+
var pos = getPos(trace, posLetter, posAxis, val, gd.numboxes);
7241

73-
// get distinct positions and min difference
7442
var dv = Lib.distinctVals(pos);
75-
posDistinct = dv.vals;
76-
dPos = dv.minDiff / 2;
77-
78-
function binVal(cd, val, pos, posDistinct, dPos) {
79-
var posDistinctLength = posDistinct.length,
80-
valLength = val.length,
81-
valBinned = [],
82-
bins = [],
83-
i, p, n, v;
84-
85-
// store distinct pos in cd, find bins, init. valBinned
86-
for(i = 0; i < posDistinctLength; ++i) {
87-
p = posDistinct[i];
88-
cd[i] = {pos: p};
89-
bins[i] = p - dPos;
90-
valBinned[i] = [];
91-
}
92-
bins.push(posDistinct[posDistinctLength - 1] + dPos);
93-
94-
// bin the values
95-
for(i = 0; i < valLength; ++i) {
96-
v = val[i];
97-
if(!isNumeric(v)) continue;
98-
n = Lib.findBin(pos[i], bins);
99-
if(n >= 0 && n < valLength) valBinned[n].push(v);
43+
var posDistinct = dv.vals;
44+
var dPos = dv.minDiff / 2;
45+
var posBins = makeBins(posDistinct, dPos);
46+
47+
var vLen = val.length;
48+
var pLen = posDistinct.length;
49+
var ptsPerBin = initNestedArray(pLen);
50+
51+
// bin pts info per position bins
52+
for(i = 0; i < vLen; i++) {
53+
var v = val[i];
54+
if(!isNumeric(v)) continue;
55+
56+
var n = Lib.findBin(pos[i], posBins);
57+
if(n >= 0 && n < pLen) {
58+
var pt = {v: v, i: i};
59+
ptsPerBin[n].push(pt);
10060
}
101-
102-
return valBinned;
10361
}
10462

105-
valBinned = binVal(cd, val, pos, posDistinct, dPos);
106-
107-
// sort the bins and calculate the stats
108-
function calculateStats(cd, valBinned) {
109-
var v, l, cdi, i;
110-
111-
for(i = 0; i < valBinned.length; ++i) {
112-
v = valBinned[i].sort(Lib.sorterAsc);
113-
l = v.length;
114-
cdi = cd[i];
115-
116-
cdi.val = v; // put all values into calcdata
117-
cdi.min = v[0];
118-
cdi.max = v[l - 1];
119-
cdi.mean = Lib.mean(v, l);
120-
cdi.sd = Lib.stdev(v, l, cdi.mean);
121-
cdi.q1 = Lib.interp(v, 0.25); // first quartile
122-
cdi.med = Lib.interp(v, 0.5); // median
123-
cdi.q3 = Lib.interp(v, 0.75); // third quartile
63+
// build calcdata trace items, one item per distinct position
64+
for(i = 0; i < pLen; i++) {
65+
if(ptsPerBin[i].length > 0) {
66+
var pts = ptsPerBin[i].sort(sortByVal);
67+
var boxVals = pts.map(extractVal);
68+
var bvLen = boxVals.length;
69+
70+
var cdi = {
71+
pos: posDistinct[i],
72+
pts: pts
73+
};
74+
75+
cdi.min = boxVals[0];
76+
cdi.max = boxVals[bvLen - 1];
77+
cdi.mean = Lib.mean(boxVals, bvLen);
78+
cdi.sd = Lib.stdev(boxVals, bvLen, cdi.mean);
79+
80+
// first quartile
81+
cdi.q1 = Lib.interp(boxVals, 0.25);
82+
// median
83+
cdi.med = Lib.interp(boxVals, 0.5);
84+
// third quartile
85+
cdi.q3 = Lib.interp(boxVals, 0.75);
86+
12487
// lower and upper fences - last point inside
12588
// 1.5 interquartile ranges from quartiles
126-
cdi.lf = Math.min(cdi.q1, v[
127-
Math.min(Lib.findBin(2.5 * cdi.q1 - 1.5 * cdi.q3, v, true) + 1, l - 1)]);
128-
cdi.uf = Math.max(cdi.q3, v[
129-
Math.max(Lib.findBin(2.5 * cdi.q3 - 1.5 * cdi.q1, v), 0)]);
89+
cdi.lf = Math.min(
90+
cdi.q1,
91+
boxVals[Math.min(
92+
Lib.findBin(2.5 * cdi.q1 - 1.5 * cdi.q3, boxVals, true) + 1,
93+
bvLen - 1
94+
)]
95+
);
96+
cdi.uf = Math.max(
97+
cdi.q3,
98+
boxVals[Math.max(
99+
Lib.findBin(2.5 * cdi.q3 - 1.5 * cdi.q1, boxVals),
100+
0
101+
)]
102+
);
103+
130104
// lower and upper outliers - 3 IQR out (don't clip to max/min,
131105
// this is only for discriminating suspected & far outliers)
132106
cdi.lo = 4 * cdi.q1 - 3 * cdi.q3;
133107
cdi.uo = 4 * cdi.q3 - 3 * cdi.q1;
108+
109+
cd.push(cdi);
134110
}
135111
}
136112

137-
calculateStats(cd, valBinned);
138-
139-
// remove empty bins
140-
cd = cd.filter(function(cdi) { return cdi.val && cdi.val.length; });
141-
if(!cd.length) return [{t: {emptybox: true}}];
113+
Axes.expand(valAxis, val, {padded: true});
142114

143-
// add numboxes and dPos to cd
144-
cd[0].t = {boxnum: gd.numboxes, dPos: dPos};
145-
gd.numboxes++;
146-
return cd;
115+
if(cd.length > 0) {
116+
cd[0].t = {
117+
boxnum: gd.numboxes,
118+
dPos: dPos
119+
};
120+
gd.numboxes++;
121+
return cd;
122+
} else {
123+
return [{t: {emptybox: true}}];
124+
}
147125
};
126+
127+
// In vertical (horizontal) box plots:
128+
// if no x (y) data, use x0 (y0), or name
129+
// so if you want one box
130+
// per trace, set x0 (y0) to the x (y) value or category for this trace
131+
// (or set x (y) to a constant array matching y (x))
132+
function getPos(trace, posLetter, posAxis, val, numboxes) {
133+
if(posLetter in trace) {
134+
return posAxis.makeCalcdata(trace, posLetter);
135+
}
136+
137+
var pos0;
138+
139+
if(posLetter + '0' in trace) {
140+
pos0 = trace[posLetter + '0'];
141+
} else if('name' in trace && (
142+
posAxis.type === 'category' || (
143+
isNumeric(trace.name) &&
144+
['linear', 'log'].indexOf(posAxis.type) !== -1
145+
) || (
146+
Lib.isDateTime(trace.name) &&
147+
posAxis.type === 'date'
148+
)
149+
)) {
150+
pos0 = trace.name;
151+
} else {
152+
pos0 = numboxes;
153+
}
154+
155+
var pos0c = posAxis.d2c(pos0, 0, trace[posLetter + 'calendar']);
156+
return val.map(function() { return pos0c; });
157+
}
158+
159+
function makeBins(x, dx) {
160+
var len = x.length;
161+
var bins = new Array(len + 1);
162+
163+
for(var i = 0; i < len; i++) {
164+
bins[i] = x[i] - dx;
165+
}
166+
bins[len] = x[len - 1] + dx;
167+
168+
return bins;
169+
}
170+
171+
function initNestedArray(len) {
172+
var arr = new Array(len);
173+
for(var i = 0; i < len; i++) {
174+
arr[i] = [];
175+
}
176+
return arr;
177+
}
178+
179+
function sortByVal(a, b) { return a.v - b.v; }
180+
181+
function extractVal(o) { return o.v; }

src/traces/box/plot.js

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,6 @@ module.exports = function plot(gd, plotinfo, cdbox) {
132132
.attr('class', 'points')
133133
.selectAll('path')
134134
.data(function(d) {
135-
var pts = (trace.boxpoints === 'all') ? d.val :
136-
d.val.filter(function(v) { return (v < d.lf || v > d.uf); }),
137135
// normally use IQR, but if this is 0 or too small, use max-min
138136
typicalSpread = Math.max((d.max - d.min) / 10, d.q3 - d.q1),
139137
minSpread = typicalSpread * 1e-9,
@@ -147,6 +145,10 @@ module.exports = function plot(gd, plotinfo, cdbox) {
147145
jitterFactor,
148146
newJitter;
149147

148+
var pts = trace.boxpoints === 'all' ?
149+
d.pts :
150+
d.pts.filter(function(pt) { return (pt.v < d.lf || pt.v > d.uf); });
151+
150152
// dynamic jitter
151153
if(trace.jitter) {
152154
if(typicalSpread === 0) {
@@ -179,31 +181,32 @@ module.exports = function plot(gd, plotinfo, cdbox) {
179181
newJitter = trace.jitter * 2 / maxJitterFactor;
180182
}
181183

182-
return pts.map(function(v, i) {
183-
var posOffset = trace.pointpos,
184-
p;
185-
if(trace.jitter) {
186-
posOffset += newJitter * jitterFactors[i] * (rand() - 0.5);
187-
}
184+
// fills in 'x' and 'y' in calcdata 'pts' item
185+
for(i = 0; i < pts.length; i++) {
186+
var pt = pts[i];
187+
var v = pt.v;
188+
189+
var jitterOffset = trace.jitter ?
190+
bdPos * (newJitter * jitterFactors[i] * (rand() - 0.5)) :
191+
0;
192+
193+
var posPx = d.pos + bPos + bdPos * trace.pointpos + jitterOffset;
188194

189195
if(trace.orientation === 'h') {
190-
p = {
191-
y: d.pos + posOffset * bdPos + bPos,
192-
x: v
193-
};
196+
pt.y = posPx;
197+
pt.x = v;
194198
} else {
195-
p = {
196-
x: d.pos + posOffset * bdPos + bPos,
197-
y: v
198-
};
199+
pt.x = posPx;
200+
pt.y = v;
199201
}
200202

201203
// tag suspected outliers
202204
if(trace.boxpoints === 'suspectedoutliers' && v < d.uo && v > d.lo) {
203-
p.so = true;
205+
pt.so = true;
204206
}
205-
return p;
206-
});
207+
}
208+
209+
return pts;
207210
})
208211
.enter().append('path')
209212
.classed('point', true)

0 commit comments

Comments
 (0)