Skip to content

Commit 6c29fd9

Browse files
authored
text wrap (lineWidth) (#699)
* text wrap (lineWidth) * tweak metrics; document * lineWidth test
1 parent 362d28d commit 6c29fd9

File tree

5 files changed

+308
-3
lines changed

5 files changed

+308
-3
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,6 +1111,8 @@ The following text-specific constant options are also supported:
11111111
* **textAnchor** - the [text anchor](https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/text-anchor) for horizontal position; start, end, or middle
11121112
* **lineAnchor** - the line anchor for vertical position; top, bottom, or middle
11131113
* **lineHeight** - the line height in ems; defaults to 1
1114+
* **lineWidth** - the line width in ems, for wrapping; defaults to Infinity
1115+
* **monospace** - if true, changes the default fontFamily and metrics to monospace
11141116
* **fontFamily** - the font name; defaults to [system-ui](https://drafts.csswg.org/css-fonts-4/#valdef-font-family-system-ui)
11151117
* **fontSize** - the font size in pixels; defaults to 10
11161118
* **fontStyle** - the [font style](https://developer.mozilla.org/en-US/docs/Web/CSS/font-style); defaults to normal
@@ -1119,6 +1121,8 @@ The following text-specific constant options are also supported:
11191121
* **frameAnchor** - the frame anchor; top-left, top, top-right, right, bottom-right, bottom, bottom-left, left, or middle (default)
11201122
* **rotate** - the rotation angle in degrees clockwise; defaults to 0
11211123

1124+
If a **lineWidth** is specified, input text values will be wrapped as needed to fit while preserving existing newlines. The line wrapping implementation is rudimentary; for non-ASCII, non-U.S. English text, or for when a different font is used, you may get better results by hard-wrapping the text yourself (by supplying newlines in the input). If the **monospace** option is truthy, the default **fontFamily** changes to “ui-monospace, monospace”, and the **lineWidth** option is interpreted as characters (ch) rather than ems.
1125+
11221126
The **fontSize** and **rotate** options can be specified as either channels or constants. When fontSize or rotate is specified as a number, it is interpreted as a constant; otherwise it is interpreted as a channel.
11231127

11241128
If the **frameAnchor** option is not specified, then **textAnchor** and **lineAnchor** default to middle. Otherwise, **textAnchor** defaults to start if **frameAnchor** is on the left, end if **frameAnchor** is on the right, and otherwise middle. Similarly, **lineAnchor** defaults to top if **frameAnchor** is on the top, bottom if **frameAnchor** is on the bottom, and otherwise middle.

src/marks/text.js

Lines changed: 117 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ export class Text extends Mark {
1919
textAnchor = /right$/i.test(frameAnchor) ? "end" : /left$/i.test(frameAnchor) ? "start" : "middle",
2020
lineAnchor = /^top/i.test(frameAnchor) ? "top" : /^bottom/i.test(frameAnchor) ? "bottom" : "middle",
2121
lineHeight = 1,
22-
fontFamily,
22+
lineWidth = Infinity,
23+
monospace,
24+
fontFamily = monospace ? "ui-monospace, monospace" : undefined,
2325
fontSize,
2426
fontStyle,
2527
fontVariant,
@@ -44,6 +46,8 @@ export class Text extends Mark {
4446
this.textAnchor = impliedString(textAnchor, "middle");
4547
this.lineAnchor = keyword(lineAnchor, "lineAnchor", ["top", "middle", "bottom"]);
4648
this.lineHeight = +lineHeight;
49+
this.lineWidth = +lineWidth;
50+
this.monospace = !!monospace;
4751
this.fontFamily = string(fontFamily);
4852
this.fontSize = cfontSize;
4953
this.fontStyle = string(fontStyle);
@@ -81,11 +85,15 @@ export class Text extends Mark {
8185
}
8286
}
8387

84-
function applyMultilineText(selection, {lineAnchor, lineHeight}, T) {
88+
function applyMultilineText(selection, {monospace, lineAnchor, lineHeight, lineWidth}, T) {
8589
if (!T) return;
8690
const format = isTemporal(T) ? isoFormat : isNumeric(T) ? formatNumber() : string;
91+
const linesof = isFinite(lineWidth) ? (monospace
92+
? t => lineWrap(t, lineWidth, monospaceWidth)
93+
: t => lineWrap(t, lineWidth * 100, defaultWidth))
94+
: t => t.split(/\r\n?|\n/g);
8795
selection.each(function(i) {
88-
const lines = format(T[i]).split(/\r\n?|\n/g);
96+
const lines = linesof(format(T[i]));
8997
const n = lines.length;
9098
const y = lineAnchor === "top" ? 0.71 : lineAnchor === "bottom" ? 1 - n : (164 - n * 100) / 200;
9199
if (n > 1) {
@@ -162,3 +170,109 @@ function maybeFontSizeChannel(fontSize) {
162170
? [undefined, fontSize]
163171
: [fontSize, undefined];
164172
}
173+
174+
// This is a greedy algorithm for line wrapping. It would be better to use the
175+
// Knuth–Plass line breaking algorithm (but that would be much more complex).
176+
// https://en.wikipedia.org/wiki/Line_wrap_and_word_wrap
177+
function lineWrap(input, maxWidth, widthof = (_, i, j) => j - i) {
178+
const lines = [];
179+
let lineStart, lineEnd = 0;
180+
for (const [wordStart, wordEnd, required] of lineBreaks(input)) {
181+
// Record the start of a line. This isn’t the same as the previous line’s
182+
// end because we often skip spaces between lines.
183+
if (lineStart === undefined) lineStart = wordStart;
184+
185+
// If the current line is not empty, and if adding the current word would
186+
// make the line longer than the allowed width, then break the line at the
187+
// previous word end.
188+
if (lineEnd > lineStart && widthof(input, lineStart, wordEnd) > maxWidth) {
189+
lines.push(input.slice(lineStart, lineEnd));
190+
lineStart = wordStart;
191+
}
192+
193+
// If this is a required break (a newline), emit the line and reset.
194+
if (required) {
195+
lines.push(input.slice(lineStart, wordEnd));
196+
lineStart = undefined;
197+
continue;
198+
}
199+
200+
// Extend the current line to include the new word.
201+
lineEnd = wordEnd;
202+
}
203+
return lines;
204+
}
205+
206+
// This is a rudimentary (and U.S.-centric) algorithm for finding opportunities
207+
// to break lines between words. A better and far more comprehensive approach
208+
// would be to use the official Unicode Line Breaking Algorithm.
209+
// https://unicode.org/reports/tr14/
210+
function* lineBreaks(input) {
211+
let i = 0, j = 0;
212+
const n = input.length;
213+
while (j < n) {
214+
let k = 1;
215+
switch (input[j]) {
216+
case "-": // hyphen
217+
++j;
218+
yield [i, j, false];
219+
i = j;
220+
break;
221+
case " ":
222+
yield [i, j, false];
223+
while (input[++j] === " "); // skip multiple spaces
224+
i = j;
225+
break;
226+
case "\r": if (input[j + 1] === "\n") ++k; // falls through
227+
case "\n":
228+
yield [i, j, true];
229+
j += k;
230+
i = j;
231+
break;
232+
default:
233+
++j;
234+
break;
235+
}
236+
}
237+
yield [i, j, true];
238+
}
239+
240+
// Computed as round(measureText(text).width * 10) at 10px system-ui. For
241+
// characters that are not represented in this map, we’d ideally want to use a
242+
// weighted average of what we expect to see. But since we don’t really know
243+
// what that is, using “e” seems reasonable.
244+
const defaultWidthMap = {
245+
a: 56, b: 63, c: 57, d: 63, e: 58, f: 37, g: 62, h: 60, i: 26, j: 26, k: 55, l: 26, m: 88, n: 60, o: 60, p: 62, q: 62, r: 39, s: 54, t: 38, u: 60, v: 55, w: 79, x: 54, y: 55, z: 55,
246+
A: 69, B: 67, C: 73, D: 74, E: 61, F: 58, G: 76, H: 75, I: 28, J: 55, K: 67, L: 58, M: 89, N: 75, O: 78, P: 65, Q: 78, R: 67, S: 65, T: 65, U: 75, V: 69, W: 98, X: 69, Y: 67, Z: 67,
247+
0: 64, 1: 48, 2: 62, 3: 64, 4: 66, 5: 63, 6: 65, 7: 58, 8: 65, 9: 65,
248+
" ": 29, "!": 32, '"': 49, "'": 31, "(": 39, ")": 39, ",": 31, "-": 48, ".": 31, "/": 32, ":": 31, ";": 31, "?": 52, "‘": 31, "’": 31, "“": 47, "”": 47
249+
};
250+
251+
// This is a rudimentary (and U.S.-centric) algorithm for measuring the width of
252+
// a string based on a technique of Gregor Aisch; it assumes that individual
253+
// characters are laid out independently and does not implement the Unicode
254+
// grapheme cluster breaking algorithm. It does understand code points, though,
255+
// and so treats things like emoji as having the width of a lowercase e (and
256+
// should be equivalent to using for-of to iterate over code points, while also
257+
// being fast). TODO Optimize this by noting that we often re-measure characters
258+
// that were previously measured?
259+
// http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
260+
// https://exploringjs.com/impatient-js/ch_strings.html#atoms-of-text
261+
function defaultWidth(text, start, end) {
262+
let sum = 0;
263+
for (let i = start; i < end; ++i) {
264+
sum += defaultWidthMap[text[i]] || defaultWidthMap.e;
265+
const first = text.charCodeAt(i);
266+
if (first >= 0xd800 && first <= 0xdbff) { // high surrogate
267+
const second = text.charCodeAt(i + 1);
268+
if (second >= 0xdc00 && second <= 0xdfff) { // low surrogate
269+
++i; // surrogate pair
270+
}
271+
}
272+
}
273+
return sum;
274+
}
275+
276+
function monospaceWidth(text, start, end) {
277+
return end - start;
278+
}

0 commit comments

Comments
 (0)