Skip to content

Commit 646055a

Browse files
mbostockFil
andauthored
vgplot (#1015)
* document Mosaic * taxi data loader Note: it's not active, only here for reference. To make this data loader work in CI, you have to install the proper duckdb binary on the $PATH, and I'd recommend to use a dedicated $TMPDIR rather than write in the root folder. * Apply suggestions from code review Co-authored-by: Mike Bostock <[email protected]> * vgplot * s/boilerplate// * s/upvote// * vg live update * doc polish * fix transitive preload * re-resolve npm imports * test getDependencyResolver * test import resolutions; comment * optimize resolution; apache-arrow 15 * duckdb needs latest apache-arrow, too --------- Co-authored-by: Philippe Rivière <[email protected]>
1 parent 5222f99 commit 646055a

File tree

13 files changed

+291
-62
lines changed

13 files changed

+291
-62
lines changed

docs/lib/mosaic.md

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
---
2+
sql:
3+
trips: nyc-taxi.parquet
4+
---
5+
6+
# Mosaic vgplot
7+
8+
[Mosaic](https://uwdata.github.io/mosaic/) is a system for linking data visualizations, tables, and inputs, leveraging [DuckDB](./duckdb) for scalable processing. Mosaic includes an interactive grammar of graphics, [Mosaic vgplot](https://uwdata.github.io/mosaic/vgplot/), built on [Observable Plot](./plot). With vgplot, you can interactively visualize and explore millions — even billions — of data points.
9+
10+
The example below shows the pickup and dropoff locations of one million taxi rides in New York City from Jan 1–3, 2010. The dataset is stored in a 8MB [Apache Parquet](./arrow#apache-parquet) file, generated with a [data loader](../loaders).
11+
12+
${maps}
13+
14+
${histogram}
15+
16+
The views above are coordinated: brushing a time window in the histogram, or a region in either map, will filter both maps. _What spatial patterns can you find?_
17+
18+
The code below creates three views, coordinated by Mosaic’s [crossfilter](https://uwdata.github.io/mosaic/api/core/selection.html#selection-crossfilter) helper.
19+
20+
```js echo
21+
// Create a shared filter
22+
const $filter = vg.Selection.crossfilter();
23+
24+
// Shared attributes for the maps
25+
const attributes = [
26+
vg.width(315),
27+
vg.height(550),
28+
vg.margin(0),
29+
vg.xAxis(null),
30+
vg.yAxis(null),
31+
vg.xDomain([297000, 297000 + 28.36 * 315]),
32+
vg.yDomain([57900, 57900 + 28.36 * 550]), // ensure aspect ratio of 1
33+
vg.colorScale("symlog")
34+
];
35+
36+
// Create two side-by-side maps
37+
const maps = vg.hconcat(
38+
vg.plot(
39+
vg.raster(vg.from("trips", {filterBy: $filter}), {x: "px", y: "py", imageRendering: "pixelated"}),
40+
vg.intervalXY({as: $filter}),
41+
vg.text([{label: "Taxi pickups"}], {
42+
dx: 10,
43+
dy: 10,
44+
text: "label",
45+
fill: "white",
46+
frameAnchor: "top-left"
47+
}),
48+
...attributes,
49+
vg.colorScheme("turbo"),
50+
vg.frame({stroke: "black"})
51+
),
52+
vg.hspace(10),
53+
vg.plot(
54+
vg.raster(vg.from("trips", {filterBy: $filter}), {x: "dx", y: "dy", imageRendering: "pixelated"}),
55+
vg.intervalXY({as: $filter}),
56+
vg.text([{label: "Taxi dropoffs"}], {
57+
dx: 10,
58+
dy: 10,
59+
text: "label",
60+
fill: "white",
61+
frameAnchor: "top-left"
62+
}),
63+
...attributes,
64+
vg.colorScheme("turbo"),
65+
vg.frame({stroke: "black"})
66+
)
67+
);
68+
69+
// Create the histogram
70+
const histogram = vg.plot(
71+
vg.rectY(vg.from("trips"), {x: vg.bin("time"), y: vg.count(), insetLeft: 0.5, insetRight: 0.5}),
72+
vg.intervalX({as: $filter}),
73+
vg.yTickFormat("s"),
74+
vg.xLabel("Hour of pickup"),
75+
vg.yLabel("Number of rides"),
76+
vg.width(640),
77+
vg.height(100)
78+
);
79+
```
80+
81+
For more Mosaic examples, see the [Mosaic + Framework](https://uwdata.github.io/mosaic-framework-example/) website.

docs/lib/nyc-taxi.parquet

7.85 MB
Binary file not shown.

docs/lib/nyc-taxi.parquet.sh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
duckdb :memory: << EOF
2+
-- Load spatial extension
3+
INSTALL spatial; LOAD spatial;
4+
5+
-- Project coordinates, following the example at https://github.com/duckdb/duckdb_spatial
6+
CREATE TEMP TABLE rides AS SELECT
7+
pickup_datetime::TIMESTAMP AS datetime,
8+
ST_Transform(ST_Point(pickup_latitude, pickup_longitude), 'EPSG:4326', 'EPSG:32118') AS pick,
9+
ST_Transform(ST_Point(dropoff_latitude, dropoff_longitude), 'EPSG:4326', 'EPSG:32118') AS drop
10+
FROM 'https://uwdata.github.io/mosaic-datasets/data/nyc-rides-2010.parquet';
11+
12+
-- Write output parquet file
13+
COPY (SELECT
14+
HOUR(datetime) + MINUTE(datetime) / 60 AS time,
15+
ST_X(pick)::INTEGER AS px, -- extract pickup x-coord
16+
ST_Y(pick)::INTEGER AS py, -- extract pickup y-coord
17+
ST_X(drop)::INTEGER AS dx, -- extract dropff x-coord
18+
ST_Y(drop)::INTEGER AS dy -- extract dropff y-coord
19+
FROM rides
20+
ORDER BY 2,3,4,5,1 -- optimize output size by sorting
21+
) TO 'trips.parquet' (COMPRESSION 'ZSTD', row_group_size 10000000);
22+
EOF
23+
24+
cat trips.parquet >&1 # Copy payload to stdout
25+
rm trips.parquet # Clean up

observablehq.config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ export default {
7070
{name: "Mapbox GL JS", path: "/lib/mapbox-gl"},
7171
{name: "Mermaid", path: "/lib/mermaid"},
7272
{name: "Microsoft Excel (XLSX)", path: "/lib/xlsx"},
73+
{name: "Mosaic vgplot", path: "/lib/mosaic"},
7374
{name: "Observable Generators", path: "/lib/generators"},
7475
{name: "Observable Inputs", path: "/lib/inputs"},
7576
{name: "Observable Plot", path: "/lib/plot"},

src/client/main.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const library = {
1515
...sampleDatasets
1616
};
1717

18-
const runtime = new Runtime(library);
18+
export const runtime = new Runtime(library);
1919
export const main = runtime.module();
2020

2121
const cellsById = new Map();

src/client/preview.js

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import {registerTable} from "npm:@observablehq/duckdb";
22
import {FileAttachment, registerFile} from "npm:@observablehq/stdlib";
3-
import {main, undefine} from "./main.js";
3+
import {main, runtime, undefine} from "./main.js";
44
import {enableCopyButtons} from "./pre.js";
55

66
export * from "./index.js";
@@ -92,7 +92,10 @@ export function open({hash, eval: compile} = {}) {
9292
}
9393
if (message.tables.removed.length || message.tables.added.length) {
9494
const sql = main._resolve("sql");
95-
sql.define(sql._promise); // re-evaluate sql code
95+
runtime._updates.add(sql); // re-evaluate sql code
96+
runtime._compute();
97+
const vg = runtime._builtin._resolve("vg");
98+
vg.define("vg", [], vg._definition); // reload vgplot, then re-evaluate vg code
9699
}
97100
if (message.stylesheets.added.length === 1 && message.stylesheets.removed.length === 1) {
98101
const [newHref] = message.stylesheets.added;

src/client/stdlib/duckdb.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,12 @@ export function registerTable(name, source) {
5959
}
6060

6161
export async function sql(strings, ...args) {
62+
return (await getDefaultClient()).query(strings.join("?"), args);
63+
}
64+
65+
export async function getDefaultClient() {
6266
await Promise.all(inserts);
63-
return (await (db ??= DuckDBClient.of())).query(strings.join("?"), args);
67+
return await (db ??= DuckDBClient.of());
6468
}
6569

6670
export class DuckDBClient {

src/client/stdlib/recommendedLibraries.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@ export const SQLite = () => import("observablehq:stdlib/sqlite").then((sqlite) =
1919
export const SQLiteDatabaseClient = () => import("observablehq:stdlib/sqlite").then((sqlite) => sqlite.SQLiteDatabaseClient); // prettier-ignore
2020
export const tex = () => import("observablehq:stdlib/tex").then((tex) => tex.default);
2121
export const topojson = () => import("npm:topojson-client");
22+
export const vg = () => import("observablehq:stdlib/vgplot").then((vg) => vg.default());
2223
export const vl = () => import("observablehq:stdlib/vega-lite").then((vl) => vl.default);

src/client/stdlib/vgplot.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import * as vgplot from "npm:@uwdata/vgplot";
2+
import {getDefaultClient} from "observablehq:stdlib/duckdb";
3+
4+
export default async function vg() {
5+
const coordinator = new vgplot.Coordinator();
6+
const api = vgplot.createAPIContext({coordinator});
7+
const duckdb = (await getDefaultClient())._db;
8+
coordinator.databaseConnector(vgplot.wasmConnector({duckdb}));
9+
return api;
10+
}

src/libraries.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ export function getImplicitInputImports(inputs: Iterable<string>): Set<string> {
3131
if (set.has("tex")) implicits.add("npm:@observablehq/tex");
3232
if (set.has("topojson")) implicits.add("npm:topojson-client");
3333
if (set.has("vl")) implicits.add("observablehq:stdlib/vega-lite");
34+
if (set.has("vg")) implicits.add("observablehq:stdlib/vgplot");
3435
return implicits;
3536
}
3637

@@ -159,6 +160,7 @@ export function getImplicitDependencies(imports: Iterable<string>): Set<string>
159160
if (set.has("npm:@observablehq/xlsx")) implicits.add("npm:exceljs");
160161
if (set.has("npm:@observablehq/zip")) implicits.add("npm:jszip");
161162
if (set.has("observablehq:stdlib/vega-lite")) implicits.add("npm:vega-lite-api").add("npm:vega-lite").add("npm:vega");
163+
if (set.has("observablehq:stdlib/vgplot")) implicits.add("npm:@uwdata/vgplot").add("npm:@observablehq/duckdb").add("npm:@duckdb/duckdb-wasm"); // prettier-ignore
162164
return implicits;
163165
}
164166

0 commit comments

Comments
 (0)