observablehq
diff --git a/‎README.md
Lines changed: 4 additions & 0 deletions b/‎README.md
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/marks/text.js
Lines changed: 117 additions & 3 deletions b/‎src/marks/text.js
Lines changed: 117 additions & 3 deletions
@@ -1111,6 +1111,8 @@ The following text-specific constant options are also supported:
 * **textAnchor** - the [text anchor](https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/text-anchor) for horizontal position; start, end, or middle
 * **lineAnchor** - the line anchor for vertical position; top, bottom, or middle
 * **lineHeight** - the line height in ems; defaults to 1
+* **lineWidth** - the line width in ems, for wrapping; defaults to Infinity
+* **monospace** - if true, changes the default fontFamily and metrics to monospace
 * **fontFamily** - the font name; defaults to [system-ui](https://drafts.csswg.org/css-fonts-4/#valdef-font-family-system-ui)
 * **fontSize** - the font size in pixels; defaults to 10
 * **fontStyle** - the [font style](https://developer.mozilla.org/en-US/docs/Web/CSS/font-style); defaults to normal
@@ -1119,6 +1121,8 @@ The following text-specific constant options are also supported:
 * **frameAnchor** - the frame anchor; top-left, top, top-right, right, bottom-right, bottom, bottom-left, left, or middle (default)
 * **rotate** - the rotation angle in degrees clockwise; defaults to 0
 
+If a **lineWidth** is specified, input text values will be wrapped as needed to fit while preserving existing newlines. The line wrapping implementation is rudimentary; for non-ASCII, non-U.S. English text, or for when a different font is used, you may get better results by hard-wrapping the text yourself (by supplying newlines in the input). If the **monospace** option is truthy, the default **fontFamily** changes to “ui-monospace, monospace”, and the **lineWidth** option is interpreted as characters (ch) rather than ems.
+
 The **fontSize** and **rotate** options can be specified as either channels or constants. When fontSize or rotate is specified as a number, it is interpreted as a constant; otherwise it is interpreted as a channel.
 
 If the **frameAnchor** option is not specified, then **textAnchor** and **lineAnchor** default to middle. Otherwise, **textAnchor** defaults to start if **frameAnchor** is on the left, end if **frameAnchor** is on the right, and otherwise middle. Similarly, **lineAnchor** defaults to top if **frameAnchor** is on the top, bottom if **frameAnchor** is on the bottom, and otherwise middle.
 
@@ -19,7 +19,9 @@ export class Text extends Mark {
       textAnchor = /right$/i.test(frameAnchor) ? "end" : /left$/i.test(frameAnchor) ? "start" : "middle",
       lineAnchor = /^top/i.test(frameAnchor) ? "top" : /^bottom/i.test(frameAnchor) ? "bottom" : "middle",
       lineHeight = 1,
-      fontFamily,
+      lineWidth = Infinity,
+      monospace,
+      fontFamily = monospace ? "ui-monospace, monospace" : undefined,
       fontSize,
       fontStyle,
       fontVariant,
@@ -44,6 +46,8 @@ export class Text extends Mark {
     this.textAnchor = impliedString(textAnchor, "middle");
     this.lineAnchor = keyword(lineAnchor, "lineAnchor", ["top", "middle", "bottom"]);
     this.lineHeight = +lineHeight;
+    this.lineWidth = +lineWidth;
+    this.monospace = !!monospace;
     this.fontFamily = string(fontFamily);
     this.fontSize = cfontSize;
     this.fontStyle = string(fontStyle);
@@ -81,11 +85,15 @@ export class Text extends Mark {
   }
 }
 
-function applyMultilineText(selection, {lineAnchor, lineHeight}, T) {
+function applyMultilineText(selection, {monospace, lineAnchor, lineHeight, lineWidth}, T) {
   if (!T) return;
   const format = isTemporal(T) ? isoFormat : isNumeric(T) ? formatNumber() : string;
+  const linesof = isFinite(lineWidth) ? (monospace
+    ? t => lineWrap(t, lineWidth, monospaceWidth)
+    : t => lineWrap(t, lineWidth * 100, defaultWidth))
+    : t => t.split(/\r\n?|\n/g);
   selection.each(function(i) {
-    const lines = format(T[i]).split(/\r\n?|\n/g);
+    const lines = linesof(format(T[i]));
     const n = lines.length;
     const y = lineAnchor === "top" ? 0.71 : lineAnchor === "bottom" ? 1 - n : (164 - n * 100) / 200;
     if (n > 1) {
@@ -162,3 +170,109 @@ function maybeFontSizeChannel(fontSize) {
     ? [undefined, fontSize]
     : [fontSize, undefined];
 }
+
+// This is a greedy algorithm for line wrapping. It would be better to use the
+// Knuth–Plass line breaking algorithm (but that would be much more complex).
+// https://en.wikipedia.org/wiki/Line_wrap_and_word_wrap
+function lineWrap(input, maxWidth, widthof = (_, i, j) => j - i) {
+  const lines = [];
+  let lineStart, lineEnd = 0;
+  for (const [wordStart, wordEnd, required] of lineBreaks(input)) {
+    // Record the start of a line. This isn’t the same as the previous line’s
+    // end because we often skip spaces between lines.
+    if (lineStart === undefined) lineStart = wordStart;
+
+    // If the current line is not empty, and if adding the current word would
+    // make the line longer than the allowed width, then break the line at the
+    // previous word end.
+    if (lineEnd > lineStart && widthof(input, lineStart, wordEnd) > maxWidth) {
+      lines.push(input.slice(lineStart, lineEnd));
+      lineStart = wordStart;
+    }
+
+    // If this is a required break (a newline), emit the line and reset.
+    if (required) {
+      lines.push(input.slice(lineStart, wordEnd));
+      lineStart = undefined;
+      continue;
+    }
+
+    // Extend the current line to include the new word.
+    lineEnd = wordEnd;
+  }
+  return lines;
+}
+
+// This is a rudimentary (and U.S.-centric) algorithm for finding opportunities
+// to break lines between words. A better and far more comprehensive approach
+// would be to use the official Unicode Line Breaking Algorithm.
+// https://unicode.org/reports/tr14/
+function* lineBreaks(input) {
+  let i = 0, j = 0;
+  const n = input.length;
+  while (j < n) {
+    let k = 1;
+    switch (input[j]) {
+      case "-": // hyphen
+        ++j;
+        yield [i, j, false];
+        i = j;
+        break;
+      case " ":
+        yield [i, j, false];
+        while (input[++j] === " "); // skip multiple spaces
+        i = j;
+        break;
+      case "\r": if (input[j + 1] === "\n") ++k; // falls through
+      case "\n":
+        yield [i, j, true];
+        j += k;
+        i = j;
+        break;
+      default:
+        ++j;
+        break;
+    }
+  }
+  yield [i, j, true];
+}
+
+// Computed as round(measureText(text).width * 10) at 10px system-ui. For
+// characters that are not represented in this map, we’d ideally want to use a
+// weighted average of what we expect to see. But since we don’t really know
+// what that is, using “e” seems reasonable.
+const defaultWidthMap = {
+  a: 56, b: 63, c: 57, d: 63, e: 58, f: 37, g: 62, h: 60, i: 26, j: 26, k: 55, l: 26, m: 88, n: 60, o: 60, p: 62, q: 62, r: 39, s: 54, t: 38, u: 60, v: 55, w: 79, x: 54, y: 55, z: 55,
+  A: 69, B: 67, C: 73, D: 74, E: 61, F: 58, G: 76, H: 75, I: 28, J: 55, K: 67, L: 58, M: 89, N: 75, O: 78, P: 65, Q: 78, R: 67, S: 65, T: 65, U: 75, V: 69, W: 98, X: 69, Y: 67, Z: 67,
+  0: 64, 1: 48, 2: 62, 3: 64, 4: 66, 5: 63, 6: 65, 7: 58, 8: 65, 9: 65,
+  " ": 29, "!": 32, '"': 49, "'": 31, "(": 39, ")": 39, ",": 31, "-": 48, ".": 31, "/": 32, ":": 31, ";": 31, "?": 52, "‘": 31, "’": 31, "“": 47, "”": 47
+};
+
+// This is a rudimentary (and U.S.-centric) algorithm for measuring the width of
+// a string based on a technique of Gregor Aisch; it assumes that individual
+// characters are laid out independently and does not implement the Unicode
+// grapheme cluster breaking algorithm. It does understand code points, though,
+// and so treats things like emoji as having the width of a lowercase e (and
+// should be equivalent to using for-of to iterate over code points, while also
+// being fast). TODO Optimize this by noting that we often re-measure characters
+// that were previously measured?
+// http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
+// https://exploringjs.com/impatient-js/ch_strings.html#atoms-of-text
+function defaultWidth(text, start, end) {
+  let sum = 0;
+  for (let i = start; i < end; ++i) {
+    sum += defaultWidthMap[text[i]] || defaultWidthMap.e;
+    const first = text.charCodeAt(i);
+    if (first >= 0xd800 && first <= 0xdbff) { // high surrogate
+      const second = text.charCodeAt(i + 1);
+      if (second >= 0xdc00 && second <= 0xdfff) { // low surrogate
+        ++i; // surrogate pair
+      }
+    }
+  }
+  return sum;
+}
+
+function monospaceWidth(text, start, end) {
+  return end - start;
+}