Skip to content

Commit e118b95

Browse files
Filmbostock
andauthored
textOverflow (#1283)
* textOverflow * more textOverflow options; work with utf8 chars rather than string indices * format * the measure for … determines how some strings are clipped * clip-end, ellipsis-end * roll back the changes to lineWrap, treat unknown char width as 1em, fix clipping * monospace emoji is 1 char * tests * monospace emoji * undo changes to the metric linearize the overflow function * tests * restore comment, clean up * clarify the role of this function: it returns the length of the current character; should allow to generalize to multi-code point chars. * cleaner * glyph length * test readCharacter * isPictographic * tweak * cut * more rigorous clip tests * add failing tests * better middle clip; fix names * center ellipsis * comments * splitText * separate splitting from clipping * splitLines, clipLine * inferFontVariant * maybeTextOverflow * widthof(text) shorthand * include ellipsis in default width map * add a multiline film title to the test, and remove obsolete comments * optimize and improve readability * Update README --------- Co-authored-by: Mike Bostock <[email protected]>
1 parent 9c52b94 commit e118b95

File tree

11 files changed

+2628
-30
lines changed

11 files changed

+2628
-30
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2030,6 +2030,7 @@ The following text-specific constant options are also supported:
20302030
* **lineAnchor** - the line anchor for vertical position; top, bottom, or middle
20312031
* **lineHeight** - the line height in ems; defaults to 1
20322032
* **lineWidth** - the line width in ems, for wrapping; defaults to Infinity
2033+
* **textOverflow** - how to wrap or clip lines longer than the specified line width
20332034
* **monospace** - if true, changes the default fontFamily and metrics to monospace
20342035
* **fontFamily** - the font name; defaults to [system-ui](https://drafts.csswg.org/css-fonts-4/#valdef-font-family-system-ui)
20352036
* **fontSize** - the font size in pixels; defaults to 10
@@ -2041,6 +2042,15 @@ The following text-specific constant options are also supported:
20412042
20422043
If a **lineWidth** is specified, input text values will be wrapped as needed to fit while preserving existing newlines. The line wrapping implementation is rudimentary; for non-ASCII, non-U.S. English text, or for when a different font is used, you may get better results by hard-wrapping the text yourself (by supplying newlines in the input). If the **monospace** option is truthy, the default **fontFamily** changes to “ui-monospace, monospace”, and the **lineWidth** option is interpreted as characters (ch) rather than ems.
20432044
2045+
The **textOverflow** option can be used to truncate lines of text longer than the given **lineWidth**. If the mark does not have a **title** channel, a title with the non-truncated text is also added. The following **textOverflow** values are supported:
2046+
2047+
* null (default) - preserve overflowing characters
2048+
* *clip* or *clip-end* - remove characters from the end
2049+
* *clip-start* - remove characters from the start
2050+
* *ellipsis* or *ellipsis-end* - replace characters from the end with an ellipsis (…)
2051+
* *ellipsis-start* - replace characters from the start with an ellipsis (…)
2052+
* *ellipsis-middle* - replace characters from the middle with an ellipsis (…)
2053+
20442054
The **fontSize** and **rotate** options can be specified as either channels or constants. When fontSize or rotate is specified as a number, it is interpreted as a constant; otherwise it is interpreted as a channel.
20452055
20462056
If the **frameAnchor** option is not specified, then **textAnchor** and **lineAnchor** default to middle. Otherwise, **textAnchor** defaults to start if **frameAnchor** is on the left, end if **frameAnchor** is on the right, and otherwise middle. Similarly, **lineAnchor** defaults to top if **frameAnchor** is on the top, bottom if **frameAnchor** is on the bottom, and otherwise middle.

src/marks/axis.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ function axisKy(
132132
fill,
133133
fillOpacity,
134134
...options,
135+
lineWidth: undefined,
136+
textOverflow: undefined,
135137
facet: "super",
136138
x: null,
137139
y: null,
@@ -239,6 +241,8 @@ function axisKx(
239241
fill,
240242
fillOpacity,
241243
...options,
244+
lineWidth: undefined,
245+
textOverflow: undefined,
242246
facet: "super",
243247
x: null,
244248
y: null,

src/marks/text.js

Lines changed: 167 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ export class Text extends Mark {
4949
lineAnchor = /^top/i.test(frameAnchor) ? "top" : /^bottom/i.test(frameAnchor) ? "bottom" : "middle",
5050
lineHeight = 1,
5151
lineWidth = Infinity,
52+
textOverflow,
5253
monospace,
5354
fontFamily = monospace ? "ui-monospace, monospace" : undefined,
5455
fontSize,
@@ -76,17 +77,21 @@ export class Text extends Mark {
7677
this.lineAnchor = keyword(lineAnchor, "lineAnchor", ["top", "middle", "bottom"]);
7778
this.lineHeight = +lineHeight;
7879
this.lineWidth = +lineWidth;
80+
this.textOverflow = maybeTextOverflow(textOverflow);
7981
this.monospace = !!monospace;
8082
this.fontFamily = string(fontFamily);
8183
this.fontSize = cfontSize;
8284
this.fontStyle = string(fontStyle);
8385
this.fontVariant = string(fontVariant);
8486
this.fontWeight = string(fontWeight);
8587
this.frameAnchor = maybeFrameAnchor(frameAnchor);
88+
if (!(this.lineWidth >= 0)) throw new Error(`invalid lineWidth: ${lineWidth}`);
89+
this.splitLines = splitter(this);
90+
this.clipLine = clipper(this);
8691
}
8792
render(index, scales, channels, dimensions, context) {
8893
const {x, y} = scales;
89-
const {x: X, y: Y, rotate: R, text: T, fontSize: FS} = channels;
94+
const {x: X, y: Y, rotate: R, text: T, title: TL, fontSize: FS} = channels;
9095
const {rotate} = this;
9196
const [cx, cy] = applyFrameAnchor(this, dimensions);
9297
return create("svg:g", context)
@@ -100,7 +105,7 @@ export class Text extends Mark {
100105
.enter()
101106
.append("text")
102107
.call(applyDirectStyles, this)
103-
.call(applyMultilineText, this, T)
108+
.call(applyMultilineText, this, T, TL)
104109
.attr(
105110
"transform",
106111
template`translate(${X ? (i) => X[i] : cx},${Y ? (i) => Y[i] : cy})${
@@ -114,15 +119,25 @@ export class Text extends Mark {
114119
}
115120
}
116121

117-
function applyMultilineText(selection, {monospace, lineAnchor, lineHeight, lineWidth}, T) {
122+
function maybeTextOverflow(textOverflow) {
123+
return textOverflow == null
124+
? null
125+
: keyword(textOverflow, "textOverflow", [
126+
"clip", // shorthand for clip-end
127+
"ellipsis", // … ellipsis-end
128+
"clip-start",
129+
"clip-end",
130+
"ellipsis-start",
131+
"ellipsis-middle",
132+
"ellipsis-end"
133+
]).replace(/^(clip|ellipsis)$/, "$1-end");
134+
}
135+
136+
function applyMultilineText(selection, mark, T, TL) {
118137
if (!T) return;
119-
const linesof = isFinite(lineWidth)
120-
? monospace
121-
? (t) => lineWrap(t, lineWidth, monospaceWidth)
122-
: (t) => lineWrap(t, lineWidth * 100, defaultWidth)
123-
: (t) => t.split(/\r\n?|\n/g);
138+
const {lineAnchor, lineHeight, textOverflow, splitLines, clipLine} = mark;
124139
selection.each(function (i) {
125-
const lines = linesof(formatDefault(T[i]));
140+
const lines = splitLines(formatDefault(T[i])).map(clipLine);
126141
const n = lines.length;
127142
const y = lineAnchor === "top" ? 0.71 : lineAnchor === "bottom" ? 1 - n : (164 - n * 100) / 200;
128143
if (n > 1) {
@@ -138,6 +153,11 @@ function applyMultilineText(selection, {monospace, lineAnchor, lineHeight, lineW
138153
if (y) this.setAttribute("y", `${y * lineHeight}em`);
139154
this.textContent = lines[0];
140155
}
156+
if (textOverflow && !TL && lines[0] !== T[i]) {
157+
const title = this.ownerDocument.createElementNS(namespaces.svg, "title");
158+
title.textContent = T[i];
159+
this.appendChild(title);
160+
}
141161
});
142162
}
143163

@@ -165,14 +185,14 @@ function applyIndirectTextStyles(selection, mark, T) {
165185
applyAttr(selection, "font-family", mark.fontFamily);
166186
applyAttr(selection, "font-size", mark.fontSize);
167187
applyAttr(selection, "font-style", mark.fontStyle);
168-
applyAttr(
169-
selection,
170-
"font-variant",
171-
mark.fontVariant === undefined && (isNumeric(T) || isTemporal(T)) ? "tabular-nums" : mark.fontVariant
172-
);
188+
applyAttr(selection, "font-variant", mark.fontVariant === undefined ? inferFontVariant(T) : mark.fontVariant);
173189
applyAttr(selection, "font-weight", mark.fontWeight);
174190
}
175191

192+
function inferFontVariant(T) {
193+
return isNumeric(T) || isTemporal(T) ? "tabular-nums" : undefined;
194+
}
195+
176196
// https://developer.mozilla.org/en-US/docs/Web/CSS/font-size
177197
const fontSizes = new Set([
178198
// global keywords
@@ -212,7 +232,7 @@ function maybeFontSizeChannel(fontSize) {
212232
// This is a greedy algorithm for line wrapping. It would be better to use the
213233
// Knuth–Plass line breaking algorithm (but that would be much more complex).
214234
// https://en.wikipedia.org/wiki/Line_wrap_and_word_wrap
215-
function lineWrap(input, maxWidth, widthof = (_, i, j) => j - i) {
235+
function lineWrap(input, maxWidth, widthof) {
216236
const lines = [];
217237
let lineStart,
218238
lineEnd = 0;
@@ -362,7 +382,8 @@ const defaultWidthMap = {
362382
"‘": 31,
363383
"’": 31,
364384
"“": 47,
365-
"”": 47
385+
"”": 47,
386+
"…": 82
366387
};
367388

368389
// This is a rudimentary (and U.S.-centric) algorithm for measuring the width of
@@ -375,23 +396,139 @@ const defaultWidthMap = {
375396
// that were previously measured?
376397
// http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
377398
// https://exploringjs.com/impatient-js/ch_strings.html#atoms-of-text
378-
function defaultWidth(text, start, end) {
399+
export function defaultWidth(text, start = 0, end = text.length) {
379400
let sum = 0;
380-
for (let i = start; i < end; ++i) {
381-
sum += defaultWidthMap[text[i]] || defaultWidthMap.e;
382-
const first = text.charCodeAt(i);
383-
if (first >= 0xd800 && first <= 0xdbff) {
384-
// high surrogate
385-
const second = text.charCodeAt(i + 1);
386-
if (second >= 0xdc00 && second <= 0xdfff) {
387-
// low surrogate
388-
++i; // surrogate pair
389-
}
390-
}
401+
for (let i = start; i < end; i = readCharacter(text, i)) {
402+
sum += defaultWidthMap[text[i]] ?? (isPictographic(text, i) ? 120 : defaultWidthMap.e);
403+
}
404+
return sum;
405+
}
406+
407+
// Even for monospaced text, we can’t assume that the number of UTF-16 code
408+
// points (i.e., the length of a string) corresponds to the number of visible
409+
// characters; we still have to count graphemes. And note that pictographic
410+
// characters such as emojis are typically not monospaced!
411+
export function monospaceWidth(text, start = 0, end = text.length) {
412+
let sum = 0;
413+
for (let i = start; i < end; i = readCharacter(text, i)) {
414+
sum += isPictographic(text, i) ? 200 : 100;
391415
}
392416
return sum;
393417
}
394418

395-
function monospaceWidth(text, start, end) {
396-
return end - start;
419+
function splitter({monospace, lineWidth, textOverflow}) {
420+
if (textOverflow != null || lineWidth == Infinity) return (text) => text.split(/\r\n?|\n/g);
421+
const widthof = monospace ? monospaceWidth : defaultWidth;
422+
const maxWidth = lineWidth * 100;
423+
return (text) => lineWrap(text, maxWidth, widthof);
424+
}
425+
426+
function clipper({monospace, lineWidth, textOverflow}) {
427+
if (textOverflow == null || lineWidth == Infinity) return (text) => text;
428+
const widthof = monospace ? monospaceWidth : defaultWidth;
429+
const maxWidth = lineWidth * 100;
430+
switch (textOverflow) {
431+
case "clip-start":
432+
return (text) => clipStart(text, maxWidth, widthof, "");
433+
case "clip-end":
434+
return (text) => clipEnd(text, maxWidth, widthof, "");
435+
case "ellipsis-start":
436+
return (text) => clipStart(text, maxWidth, widthof, "…");
437+
case "ellipsis-middle":
438+
return (text) => clipMiddle(text, maxWidth, widthof, "…");
439+
case "ellipsis-end":
440+
return (text) => clipEnd(text, maxWidth, widthof, "…");
441+
}
442+
}
443+
444+
// Cuts the given text to the given width, using the specified widthof function;
445+
// the returned [index, error] guarantees text.slice(0, index) fits within the
446+
// specified width with the given error. If the text fits naturally within the
447+
// given width, returns [-1, 0]. If the text needs cutting, the given inset
448+
// specifies how much space (in the same units as width and widthof) to reserve
449+
// for a possible ellipsis character.
450+
function cut(text, width, widthof, inset) {
451+
const I = []; // indexes of read character boundaries
452+
let w = 0; // current line width
453+
for (let i = 0, j = 0, n = text.length; i < n; i = j) {
454+
j = readCharacter(text, i); // read the next character
455+
const l = widthof(text, i, j); // current character width
456+
if (w + l > width) {
457+
w += inset;
458+
while (w > width && i > 0) (j = i), (i = I.pop()), (w -= widthof(text, i, j)); // remove excess
459+
return [i, width - w];
460+
}
461+
w += l;
462+
I.push(i);
463+
}
464+
return [-1, 0];
465+
}
466+
467+
export function clipEnd(text, width, widthof, ellipsis) {
468+
text = text.trim(); // ignore leading and trailing whitespace
469+
const e = widthof(ellipsis);
470+
const [i] = cut(text, width, widthof, e);
471+
return i < 0 ? text : text.slice(0, i).trimEnd() + ellipsis;
472+
}
473+
474+
export function clipMiddle(text, width, widthof, ellipsis) {
475+
text = text.trim(); // ignore leading and trailing whitespace
476+
const w = widthof(text);
477+
if (w <= width) return text;
478+
const e = widthof(ellipsis) / 2;
479+
const [i, ei] = cut(text, width / 2, widthof, e);
480+
const [j] = cut(text, w - width / 2 - ei + e, widthof, -e); // TODO read spaces?
481+
return j < 0 ? ellipsis : text.slice(0, i).trimEnd() + ellipsis + text.slice(readCharacter(text, j)).trimStart();
482+
}
483+
484+
export function clipStart(text, width, widthof, ellipsis) {
485+
text = text.trim(); // ignore leading and trailing whitespace
486+
const w = widthof(text);
487+
if (w <= width) return text;
488+
const e = widthof(ellipsis);
489+
const [j] = cut(text, w - width + e, widthof, -e); // TODO read spaces?
490+
return j < 0 ? ellipsis : ellipsis + text.slice(readCharacter(text, j)).trimStart();
491+
}
492+
493+
const reCombiner = /[\p{Combining_Mark}\p{Emoji_Modifier}]+/uy;
494+
const rePictographic = /\p{Extended_Pictographic}/uy;
495+
496+
// Reads a single “character” element from the given text starting at the given
497+
// index, returning the index after the read character. Ideally, this implements
498+
// the Unicode text segmentation algorithm and understands grapheme cluster
499+
// boundaries, etc., but in practice this is only smart enough to detect UTF-16
500+
// surrogate pairs, combining marks, and zero-width joiner (zwj) sequences such
501+
// as emoji skin color modifiers. https://unicode.org/reports/tr29/
502+
export function readCharacter(text, i) {
503+
i += isSurrogatePair(text, i) ? 2 : 1;
504+
if (isCombiner(text, i)) i = reCombiner.lastIndex;
505+
if (isZeroWidthJoiner(text, i)) return readCharacter(text, i + 1);
506+
return i;
507+
}
508+
509+
// We avoid more expensive regex tests involving Unicode property classes by
510+
// first checking for the common case of 7-bit ASCII characters.
511+
function isAscii(text, i) {
512+
return text.charCodeAt(i) < 0x80;
513+
}
514+
515+
function isSurrogatePair(text, i) {
516+
const hi = text.charCodeAt(i);
517+
if (hi >= 0xd800 && hi < 0xdc00) {
518+
const lo = text.charCodeAt(i + 1);
519+
return lo >= 0xdc00 && lo < 0xe000;
520+
}
521+
return false;
522+
}
523+
524+
function isZeroWidthJoiner(text, i) {
525+
return text.charCodeAt(i) === 0x200d;
526+
}
527+
528+
function isCombiner(text, i) {
529+
return isAscii(text, i) ? false : ((reCombiner.lastIndex = i), reCombiner.test(text));
530+
}
531+
532+
function isPictographic(text, i) {
533+
return isAscii(text, i) ? false : ((rePictographic.lastIndex = i), rePictographic.test(text));
397534
}

0 commit comments

Comments
 (0)