Skip to content

Commit b773d87

Browse files
authored
faster bin transform (#1225)
* bin 1m test * faster binning * fix first and last bin * fix first and last bin, again * fix last bin, again * bypass slow data reducer * data reducer is required * fix single-value bin * fix 1d cumulative
1 parent adf8278 commit b773d87

File tree

5 files changed

+440
-93
lines changed

5 files changed

+440
-93
lines changed

src/transforms/bin.js

Lines changed: 131 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
1-
import {bin as binner, extent, thresholdFreedmanDiaconis, thresholdScott, thresholdSturges, utcTickInterval} from "d3";
1+
import {
2+
bisect,
3+
extent,
4+
thresholdFreedmanDiaconis,
5+
thresholdScott,
6+
thresholdSturges,
7+
ticks,
8+
tickIncrement,
9+
utcTickInterval
10+
} from "d3";
211
import {
312
valueof,
4-
range,
513
identity,
614
maybeColumn,
715
maybeInterval,
@@ -11,7 +19,8 @@ import {
1119
mid,
1220
labelof,
1321
isTemporal,
14-
isIterable
22+
isIterable,
23+
map
1524
} from "../options.js";
1625
import {coerceDate, coerceNumber} from "../scales.js";
1726
import {basic} from "./basic.js";
@@ -74,7 +83,7 @@ function binn(
7483
gx, // optionally group on x (exclusive with bx and gy)
7584
gy, // optionally group on y (exclusive with by and gx)
7685
{
77-
data: reduceData = reduceIdentity,
86+
data: reduceData = reduceIdentity, // TODO avoid materializing when unused?
7887
filter = reduceCount, // return only non-empty bins by default
7988
sort,
8089
reverse,
@@ -147,12 +156,11 @@ function binn(
147156
const GZ = Z && setGZ([]);
148157
const GF = F && setGF([]);
149158
const GS = S && setGS([]);
150-
const BX = bx ? bx(data) : [[, , (I) => I]];
151-
const BY = by ? by(data) : [[, , (I) => I]];
152159
const BX1 = bx && setBX1([]);
153160
const BX2 = bx && setBX2([]);
154161
const BY1 = by && setBY1([]);
155162
const BY2 = by && setBY2([]);
163+
const bin = Bin(bx?.(data), by?.(data));
156164
let i = 0;
157165
for (const o of outputs) o.initialize(data);
158166
if (sort) sort.initialize(data);
@@ -164,23 +172,18 @@ function binn(
164172
if (filter) filter.scope("facet", facet);
165173
for (const [f, I] of maybeGroup(facet, G)) {
166174
for (const [k, g] of maybeGroup(I, K)) {
167-
for (const [x1, x2, fx] of BX) {
168-
const bb = fx(g);
169-
for (const [y1, y2, fy] of BY) {
170-
const extent = {x1, x2, y1, y2};
171-
const b = fy(bb);
172-
if (filter && !filter.reduce(b, extent)) continue;
173-
groupFacet.push(i++);
174-
groupData.push(reduceData.reduce(b, data, extent));
175-
if (K) GK.push(k);
176-
if (Z) GZ.push(G === Z ? f : Z[b[0]]);
177-
if (F) GF.push(G === F ? f : F[b[0]]);
178-
if (S) GS.push(G === S ? f : S[b[0]]);
179-
if (BX1) BX1.push(x1), BX2.push(x2);
180-
if (BY1) BY1.push(y1), BY2.push(y2);
181-
for (const o of outputs) o.reduce(b, extent);
182-
if (sort) sort.reduce(b);
183-
}
175+
for (const [b, extent] of bin(g)) {
176+
if (filter && !filter.reduce(b, extent)) continue;
177+
groupFacet.push(i++);
178+
groupData.push(reduceData.reduce(b, data, extent));
179+
if (K) GK.push(k);
180+
if (Z) GZ.push(G === Z ? f : Z[b[0]]);
181+
if (F) GF.push(G === F ? f : F[b[0]]);
182+
if (S) GS.push(G === S ? f : S[b[0]]);
183+
if (BX1) BX1.push(extent.x1), BX2.push(extent.x2);
184+
if (BY1) BY1.push(extent.y1), BY2.push(extent.y2);
185+
for (const o of outputs) o.reduce(b, extent);
186+
if (sort) sort.reduce(b);
184187
}
185188
}
186189
}
@@ -224,39 +227,72 @@ function maybeBin(options) {
224227
if (options == null) return;
225228
const {value, cumulative, domain = extent, thresholds} = options;
226229
const bin = (data) => {
227-
let V = valueof(data, value, Array); // d3.bin prefers Array input
228-
const bin = binner().value((i) => V[i]);
230+
let V = valueof(data, value);
231+
let T; // bin thresholds
229232
if (isTemporal(V) || isTimeThresholds(thresholds)) {
230-
V = V.map(coerceDate);
233+
V = map(V, coerceDate, Float64Array);
231234
let [min, max] = typeof domain === "function" ? domain(V) : domain;
232235
let t = typeof thresholds === "function" && !isInterval(thresholds) ? thresholds(V, min, max) : thresholds;
233236
if (typeof t === "number") t = utcTickInterval(min, max, t);
234237
if (isInterval(t)) {
235238
if (domain === extent) {
236239
min = t.floor(min);
237-
max = t.ceil(new Date(+max + 1));
240+
max = t.offset(t.floor(max));
238241
}
239-
t = t.range(min, max);
242+
t = t.range(min, t.offset(max));
240243
}
241-
bin.thresholds(t).domain([min, max]);
244+
T = t;
242245
} else {
243-
V = V.map(coerceNumber);
244-
let d = domain;
245-
let t = thresholds;
246-
if (isInterval(t)) {
247-
let [min, max] = typeof d === "function" ? d(V) : d;
248-
if (d === extent) {
246+
V = map(V, coerceNumber, Float64Array); // TODO deduplicate with code above
247+
let [min, max] = typeof domain === "function" ? domain(V) : domain;
248+
let t = typeof thresholds === "function" && !isInterval(thresholds) ? thresholds(V, min, max) : thresholds;
249+
if (typeof t === "number") {
250+
// This differs from d3.ticks with regard to exclusive bounds: we want a
251+
// first threshold less than or equal to the minimum, and a last
252+
// threshold (strictly) greater than the maximum.
253+
if (domain === extent) {
254+
let step = tickIncrement(min, max, t);
255+
if (isFinite(step)) {
256+
if (step > 0) {
257+
let r0 = Math.round(min / step);
258+
let r1 = Math.round(max / step);
259+
if (!(r0 * step <= min)) --r0;
260+
if (!(r1 * step > max)) ++r1;
261+
let n = r1 - r0 + 1;
262+
t = new Float64Array(n);
263+
for (let i = 0; i < n; ++i) t[i] = (r0 + i) * step;
264+
} else if (step < 0) {
265+
step = -step;
266+
let r0 = Math.round(min * step);
267+
let r1 = Math.round(max * step);
268+
if (!(r0 / step <= min)) --r0;
269+
if (!(r1 / step > max)) ++r1;
270+
let n = r1 - r0 + 1;
271+
t = new Float64Array(n);
272+
for (let i = 0; i < n; ++i) t[i] = (r0 + i) / step;
273+
} else {
274+
t = [min];
275+
}
276+
} else {
277+
t = [min];
278+
}
279+
} else {
280+
t = ticks(min, max, t);
281+
}
282+
} else if (isInterval(t)) {
283+
if (domain === extent) {
249284
min = t.floor(min);
250285
max = t.offset(t.floor(max));
251-
d = [min, max];
252286
}
253-
t = t.range(min, max);
287+
t = t.range(min, t.offset(max));
254288
}
255-
bin.thresholds(t).domain(d);
289+
T = t;
256290
}
257-
let bins = bin(range(data)).map(binset);
258-
if (cumulative) bins = (cumulative < 0 ? bins.reverse() : bins).map(bincumset);
259-
return bins.map(binfilter);
291+
const E = [];
292+
if (T.length === 1) E.push([T[0], T[0]]); // collapsed domain
293+
else for (let i = 1; i < T.length; ++i) E.push([T[i - 1], T[i]]);
294+
E.bin = (cumulative < 0 ? bin1cn : cumulative > 0 ? bin1cp : bin1)(E, T, V);
295+
return E;
260296
};
261297
bin.label = labelof(value);
262298
return bin;
@@ -305,38 +341,66 @@ function isInterval(t) {
305341
return t ? typeof t.range === "function" : false;
306342
}
307343

308-
function binset(bin) {
309-
return [bin, new Set(bin)];
310-
}
311-
312-
function bincumset([bin], j, bins) {
313-
return [
314-
bin,
315-
{
316-
get size() {
317-
for (let k = 0; k <= j; ++k) {
318-
if (bins[k][1].size) {
319-
return 1; // a non-empty value
344+
function Bin(EX, EY) {
345+
return EX && EY
346+
? function* (I) {
347+
const X = EX.bin(I); // first bin on x
348+
for (const [ix, [x1, x2]] of EX.entries()) {
349+
const Y = EY.bin(X[ix]); // then bin on y
350+
for (const [iy, [y1, y2]] of EY.entries()) {
351+
yield [Y[iy], {x1, y1, x2, y2}];
320352
}
321353
}
322-
return 0;
323-
},
324-
has(i) {
325-
for (let k = 0; k <= j; ++k) {
326-
if (bins[k][1].has(i)) {
327-
return true;
328-
}
354+
}
355+
: EX
356+
? function* (I) {
357+
const X = EX.bin(I);
358+
for (const [i, [x1, x2]] of EX.entries()) {
359+
yield [X[i], {x1, x2}];
329360
}
330-
return false;
331361
}
332-
}
333-
];
362+
: function* (I) {
363+
const Y = EY.bin(I);
364+
for (const [i, [y1, y2]] of EY.entries()) {
365+
yield [Y[i], {y1, y2}];
366+
}
367+
};
334368
}
335369

336-
function binfilter([{x0, x1}, set]) {
337-
return [x0, x1, set.size ? (I) => I.filter(set.has, set) : binempty];
370+
// non-cumulative distribution
371+
function bin1(E, T, V) {
372+
T = T.map(coerceNumber); // for faster bisection; TODO skip if already typed
373+
return (I) => {
374+
const B = E.map(() => []);
375+
for (const i of I) B[bisect(T, V[i]) - 1]?.push(i); // TODO quantization?
376+
return B;
377+
};
338378
}
339379

340-
function binempty() {
341-
return new Uint32Array(0);
380+
// cumulative distribution
381+
function bin1cp(E, T, V) {
382+
const bin = bin1(E, T, V);
383+
return (I) => {
384+
const B = bin(I);
385+
for (let i = 1, n = B.length; i < n; ++i) {
386+
const C = B[i - 1];
387+
const b = B[i];
388+
for (const j of C) b.push(j);
389+
}
390+
return B;
391+
};
392+
}
393+
394+
// complementary cumulative distribution
395+
function bin1cn(E, T, V) {
396+
const bin = bin1(E, T, V);
397+
return (I) => {
398+
const B = bin(I);
399+
for (let i = B.length - 2; i >= 0; --i) {
400+
const C = B[i + 1];
401+
const b = B[i];
402+
for (const j of C) b.push(j);
403+
}
404+
return B;
405+
};
342406
}

0 commit comments

Comments
 (0)